1#include "../alignment/alignment.h"
2#include "../model/model.h"
3#include "updatingnode.h"
63 std::istream& tree_stream,
64 const bool fixed_blengths =
false,
65 std::unique_ptr<cmaple::Params>&& params =
nullptr);
96 const std::string& tree_filename =
"",
97 const bool fixed_blengths =
false,
98 std::unique_ptr<cmaple::Params>&& params =
nullptr);
119 void load(std::istream& tree_stream,
const bool fixed_blengths =
false);
137 void load(
const std::string& tree_filename,
138 const bool fixed_blengths =
false);
185 const bool shallow_tree_search, std::ostream& out_stream = std::cout);
226 const bool shallow_tree_search =
false, std::ostream& out_stream = std::cout);
261 const int num_replicates = 1000,
262 const double epsilon = 0.1,
263 const bool allow_replacing_ML_tree =
true,
264 std::ostream& out_stream = std::cout);
276 const bool show_branch_supports =
true);
285 bool fixed_blengths =
false;
290 cmaple::RealNumType default_blength, min_blength, max_blength,
291 min_blength_mid, min_blength_sensitivity, half_max_blength,
292 half_min_blength_mid, double_min_blength;
298 std::unique_ptr<cmaple::Params> params =
nullptr;
313 cmaple::RealNumType* cumulative_rate =
nullptr;
318 std::vector<std::vector<cmaple::PositionType>> cumulative_base;
323 std::vector<PhyloNode> nodes;
328 std::vector<NodeLh> node_lhs;
333 cmaple::NumSeqsType root_vector_index;
340 std::vector<std::string> seq_names;
346 std::vector<bool> sequence_added;
355 void makeTreeInOutConsistent();
363 const std::string& tree_search_type);
370 static std::string getTreeSearchStr(
const TreeSearchType tree_search_type);
377 static TreeType parseTreeType(
const std::string& tree_type_str);
385 typedef void (
Tree::*LoadTreePtrType)(std::istream&, const bool);
386 LoadTreePtrType loadTreePtr;
391 typedef void (
Tree::*ChangeModelPtrType)(
Model*);
392 ChangeModelPtrType changeModelPtr;
398 ChangeAlnPtrType changeAlnPtr;
404 const bool, std::ostream&);
405 DoInferencePtrType doInferencePtr;
410 typedef void (
Tree::*DoPlacementPtrType)(std::ostream&);
411 DoPlacementPtrType doPlacementPtr;
417 const bool, std::ostream&);
418 ApplySPRPtrType applySPRPtr;
423 typedef void (
Tree::*OptimizeBranchPtrType)(std::ostream&);
424 OptimizeBranchPtrType optimizeBranchPtr;
429 typedef RealNumType (
Tree::*computeLhPtrType)();
430 computeLhPtrType computeLhPtr;
435 typedef void (
Tree::*computeBranchSupportPtrType)(const int,
438 const bool, std::ostream&);
439 computeBranchSupportPtrType computeBranchSupportPtr;
441 typedef void (
Tree::*MakeTreeInOutConsistentPtrType)();
442 MakeTreeInOutConsistentPtrType makeTreeInOutConsistentPtr;
449 template <const cmaple::StateType num_states>
450 void loadTreeTemplate(std::istream& tree_stream,
const bool fixed_blengths);
454 template <const cmaple::StateType num_states>
459 template <const cmaple::StateType num_states>
460 void changeModelTemplate(
Model* model);
464 template <const cmaple::StateType num_states>
466 const bool shallow_tree_search, std::ostream& out_stream);
470 template <const cmaple::StateType num_states>
471 void doPlacementTemplate(std::ostream& out_stream);
475 template <const cmaple::StateType num_states>
477 const bool shallow_tree_search, std::ostream& out_stream);
481 template <const cmaple::StateType num_states>
482 void optimizeBranchTemplate(std::ostream& out_stream);
486 template <const cmaple::StateType num_states>
487 RealNumType computeLhTemplate();
491 template <const cmaple::StateType num_states>
492 void computeBranchSupportTemplate(
const int num_threads,
493 const int num_replicates,
494 const double epsilon,
495 const bool allow_replacing_ML_tree,
496 std::ostream& out_stream);
500 template <const cmaple::StateType num_states>
501 void makeTreeInOutConsistentTemplate();
507 void setupFuncPtrs();
512 void setupBlengthThresh();
528 std::unique_ptr<cmaple::Params>&& params);
534 void computeCumulativeRate();
540 template <const cmaple::StateType num_states>
541 void optimizeTreeTopology(
bool short_range_search =
false);
549 template <const cmaple::StateType num_states>
550 void refreshAllNonLowerLhs();
558 template <const cmaple::StateType num_states>
559 cmaple::RealNumType improveSubTree(
const cmaple::Index index,
561 bool short_range_search);
567 cmaple::RealNumType calculateDerivative(
568 const std::vector<cmaple::RealNumType>& coefficient_vec,
569 const cmaple::RealNumType delta_t);
574 template <const cmaple::StateType num_states>
575 void examineSamplePlacementMidBranch(
576 cmaple::Index& selected_node_index,
577 const std::unique_ptr<SeqRegions>& mid_branch_lh,
578 cmaple::RealNumType& best_lh_diff,
580 cmaple::RealNumType& lh_diff_mid_branch,
581 TraversingNode& current_extended_node,
582 const std::unique_ptr<SeqRegions>& sample_regions);
587 template <const cmaple::StateType num_states>
588 void examineSamplePlacementAtNode(
589 cmaple::Index& selected_node_index,
590 const std::unique_ptr<SeqRegions>& total_lh,
591 cmaple::RealNumType& best_lh_diff,
593 cmaple::RealNumType& lh_diff_at_node,
594 cmaple::RealNumType& lh_diff_mid_branch,
595 cmaple::RealNumType& best_up_lh_diff,
596 cmaple::RealNumType& best_down_lh_diff,
597 cmaple::Index& best_child_index,
598 TraversingNode& current_extended_node,
599 const std::unique_ptr<SeqRegions>& sample_regions);
606 template <const cmaple::StateType num_states>
607 void finetuneSamplePlacementAtNode(
608 const PhyloNode& selected_node,
609 cmaple::RealNumType& best_down_lh_diff,
610 cmaple::Index& best_child_index,
611 const std::unique_ptr<SeqRegions>& sample_regions);
618 template <const cmaple::StateType num_states>
619 void addStartingNodes(
const cmaple::Index& node_index,
621 const cmaple::Index& other_child_node_index,
622 const cmaple::RealNumType best_lh_diff,
623 std::stack<std::unique_ptr<UpdatingNode>>& node_stack);
630 template <const cmaple::StateType num_states>
631 bool examineSubtreePlacementMidBranch(
632 cmaple::Index& best_node_index,
633 PhyloNode& current_node,
634 cmaple::RealNumType& best_lh_diff,
636 cmaple::RealNumType& lh_diff_at_node,
637 cmaple::RealNumType& lh_diff_mid_branch,
638 cmaple::RealNumType& best_up_lh_diff,
639 cmaple::RealNumType& best_down_lh_diff,
640 std::unique_ptr<UpdatingNode>& updating_node,
641 const std::unique_ptr<SeqRegions>& subtree_regions,
642 const cmaple::RealNumType threshold_prob,
643 const cmaple::RealNumType removed_blength,
644 const cmaple::Index top_node_index,
645 std::unique_ptr<SeqRegions>& bottom_regions);
652 template <const cmaple::StateType num_states>
653 bool examineSubTreePlacementAtNode(
654 cmaple::Index& best_node_index,
655 PhyloNode& current_node,
656 cmaple::RealNumType& best_lh_diff,
658 cmaple::RealNumType& lh_diff_at_node,
659 cmaple::RealNumType& lh_diff_mid_branch,
660 cmaple::RealNumType& best_up_lh_diff,
661 cmaple::RealNumType& best_down_lh_diff,
662 std::unique_ptr<UpdatingNode>& updating_node,
663 const std::unique_ptr<SeqRegions>& subtree_regions,
664 const cmaple::RealNumType threshold_prob,
665 const cmaple::RealNumType removed_blength,
666 const cmaple::Index top_node_index);
673 template <const cmaple::StateType num_states>
674 void addChildSeekSubtreePlacement(
675 const cmaple::Index child_1_index,
678 const cmaple::RealNumType& lh_diff_at_node,
679 const std::unique_ptr<UpdatingNode>& updating_node,
680 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
681 const cmaple::RealNumType threshold_prob);
689 template <const cmaple::StateType num_states>
690 bool addNeighborsSeekSubtreePlacement(
691 PhyloNode& current_node,
692 const cmaple::Index other_child_index,
693 std::unique_ptr<SeqRegions>&& bottom_regions,
694 const cmaple::RealNumType& lh_diff_at_node,
695 const std::unique_ptr<UpdatingNode>& updating_node,
696 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
697 const cmaple::RealNumType threshold_prob);
705 template <
const cmaple::StateType num_states,
706 cmaple::RealNumType (
Tree::*calculatePlacementCost)(
707 const std::unique_ptr<SeqRegions>&,
708 const std::unique_ptr<SeqRegions>&,
709 const cmaple::RealNumType)>
710 bool tryShorterBranch(
711 const cmaple::RealNumType current_blength,
712 std::unique_ptr<SeqRegions>& best_child_regions,
713 const std::unique_ptr<SeqRegions>& sample,
714 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
715 const std::unique_ptr<SeqRegions>& lower_regions,
716 cmaple::RealNumType& best_split_lh,
717 cmaple::RealNumType& best_branch_length_split,
718 const cmaple::RealNumType new_branch_length,
719 const bool try_first_branch);
726 template <const cmaple::StateType num_states>
727 void tryShorterBranchAtRoot(
const std::unique_ptr<SeqRegions>& sample,
728 const std::unique_ptr<SeqRegions>& lower_regions,
729 std::unique_ptr<SeqRegions>& best_parent_regions,
730 cmaple::RealNumType& best_root_blength,
731 cmaple::RealNumType& best_parent_lh,
732 const cmaple::RealNumType fixed_blength);
738 template <const cmaple::StateType num_states>
739 bool tryShorterNewBranchAtRoot(
740 const std::unique_ptr<SeqRegions>& sample,
741 const std::unique_ptr<SeqRegions>& lower_regions,
742 std::unique_ptr<SeqRegions>& best_parent_regions,
743 cmaple::RealNumType& best_length,
744 cmaple::RealNumType& best_parent_lh,
745 const cmaple::RealNumType fixed_blength);
753 template <const cmaple::StateType num_states>
754 bool tryLongerNewBranchAtRoot(
755 const std::unique_ptr<SeqRegions>& sample,
756 const std::unique_ptr<SeqRegions>& lower_regions,
757 std::unique_ptr<SeqRegions>& best_parent_regions,
758 cmaple::RealNumType& best_length,
759 cmaple::RealNumType& best_parent_lh,
760 const cmaple::RealNumType fixed_blength);
767 template <const cmaple::StateType num_states>
768 void estimateLengthNewBranchAtRoot(
769 const std::unique_ptr<SeqRegions>& sample,
770 const std::unique_ptr<SeqRegions>& lower_regions,
771 std::unique_ptr<SeqRegions>& best_parent_regions,
772 cmaple::RealNumType& best_length,
773 cmaple::RealNumType& best_parent_lh,
774 const cmaple::RealNumType fixed_blength,
775 const cmaple::RealNumType short_blength_thresh,
776 const bool optional_check);
782 template <cmaple::RealNumType (
Tree::*calculatePlacementCost)(
783 const std::unique_ptr<SeqRegions>&,
784 const std::unique_ptr<SeqRegions>&,
785 const cmaple::RealNumType)>
786 bool tryShorterNewBranch(
787 const std::unique_ptr<SeqRegions>& best_child_regions,
788 const std::unique_ptr<SeqRegions>& sample,
789 cmaple::RealNumType& best_blength,
790 cmaple::RealNumType& new_branch_lh,
791 const cmaple::RealNumType short_blength_thresh);
797 template <cmaple::RealNumType (
Tree::*calculatePlacementCost)(
798 const std::unique_ptr<SeqRegions>&,
799 const std::unique_ptr<SeqRegions>&,
800 const cmaple::RealNumType)>
801 void tryLongerNewBranch(const std::unique_ptr<SeqRegions>& best_child_regions,
802 const std::unique_ptr<SeqRegions>& sample,
803 cmaple::RealNumType& best_blength,
804 cmaple::RealNumType& new_branch_lh,
805 const cmaple::RealNumType long_blength_thresh);
810 template <cmaple::RealNumType (
Tree::*calculatePlacementCost)(
811 const std::unique_ptr<SeqRegions>&,
812 const std::unique_ptr<SeqRegions>&,
813 const cmaple::RealNumType)>
814 void estimateLengthNewBranch(
815 const cmaple::RealNumType best_split_lh,
816 const std::unique_ptr<SeqRegions>& best_child_regions,
817 const std::unique_ptr<SeqRegions>& sample,
818 cmaple::RealNumType& best_blength,
819 const cmaple::RealNumType long_blength_thresh,
820 const cmaple::RealNumType short_blength_thresh,
821 const bool optional_check);
828 template <const cmaple::StateType num_states>
829 void connectNewSample2Branch(
830 std::unique_ptr<SeqRegions>& sample,
831 const cmaple::NumSeqsType seq_name_index,
832 const cmaple::Index sibling_node_index,
833 PhyloNode& sibling_node,
834 const cmaple::RealNumType top_distance,
835 const cmaple::RealNumType down_distance,
836 const cmaple::RealNumType best_blength,
837 std::unique_ptr<SeqRegions>& best_child_regions,
838 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
845 template <const cmaple::StateType num_states>
846 void connectNewSample2Root(std::unique_ptr<SeqRegions>& sample,
847 const cmaple::NumSeqsType seq_name_index,
848 const cmaple::Index sibling_node_index,
849 PhyloNode& sibling_node,
850 const cmaple::RealNumType best_root_blength,
851 const cmaple::RealNumType best_length2,
852 std::unique_ptr<SeqRegions>& best_parent_regions);
859 template <const cmaple::StateType num_states>
860 void placeSubTreeAtNode(
const cmaple::Index selected_node_index,
861 const cmaple::Index subtree_index,
863 const std::unique_ptr<SeqRegions>& subtree_regions,
864 const cmaple::RealNumType new_branch_length,
865 const cmaple::RealNumType new_lh);
872 template <const cmaple::StateType num_states>
873 void placeSubTreeMidBranch(
const cmaple::Index selected_node_index,
874 const cmaple::Index subtree_index,
876 const std::unique_ptr<SeqRegions>& subtree_regions,
877 const cmaple::RealNumType new_branch_length,
878 const cmaple::RealNumType new_lh);
886 const cmaple::StateType num_states,
887 void (
Tree::*updateRegionsSubTree)(PhyloNode&,
890 std::unique_ptr<SeqRegions>&&,
891 const std::unique_ptr<SeqRegions>&,
892 const std::unique_ptr<SeqRegions>&,
893 const std::unique_ptr<SeqRegions>&,
894 cmaple::RealNumType&)>
895 void connectSubTree2Branch(
896 const std::unique_ptr<SeqRegions>& subtree_regions,
897 const std::unique_ptr<SeqRegions>& lower_regions,
898 const cmaple::Index subtree_index,
900 const cmaple::Index sibling_node_index,
901 PhyloNode& sibling_node,
902 const cmaple::RealNumType top_distance,
903 const cmaple::RealNumType down_distance,
904 cmaple::RealNumType& best_blength,
905 std::unique_ptr<SeqRegions>&& best_child_regions,
906 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
913 template <const cmaple::StateType num_states>
914 void connectSubTree2Root(
const cmaple::Index subtree_index,
916 const std::unique_ptr<SeqRegions>& subtree_regions,
917 const std::unique_ptr<SeqRegions>& lower_regions,
918 const cmaple::Index sibling_node_index,
919 PhyloNode& sibling_node,
920 const cmaple::RealNumType best_root_blength,
921 const cmaple::RealNumType best_length2,
922 std::unique_ptr<SeqRegions>&& best_parent_regions);
931 template <const cmaple::StateType num_states>
932 void updateRegionsPlaceSubTree(
934 PhyloNode& sibling_node,
936 std::unique_ptr<SeqRegions>&& best_child_regions,
937 const std::unique_ptr<SeqRegions>& subtree_regions,
938 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
939 const std::unique_ptr<SeqRegions>& lower_regions,
940 cmaple::RealNumType& best_blength);
949 template <const cmaple::StateType num_states>
950 void updateRegionsPlaceSubTreeAbove(
952 PhyloNode& sibling_node,
954 std::unique_ptr<SeqRegions>&& best_child_regions,
955 const std::unique_ptr<SeqRegions>& subtree_regions,
956 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
957 const std::unique_ptr<SeqRegions>& lower_regions,
958 cmaple::RealNumType& best_blength);
965 template <const cmaple::StateType num_states>
966 void handlePolytomyPlaceSubTree(
967 const cmaple::Index selected_node_index,
968 PhyloNode& selected_node,
969 const std::unique_ptr<SeqRegions>& subtree_regions,
970 const cmaple::RealNumType new_branch_length,
971 cmaple::RealNumType& best_down_lh_diff,
972 cmaple::Index& best_child_index,
973 cmaple::RealNumType& best_child_blength_split,
974 std::unique_ptr<SeqRegions>& best_child_regions);
981 template <const cmaple::StateType num_states>
982 void updateMidBranchLh(
983 const cmaple::Index node_index,
985 const std::unique_ptr<SeqRegions>& parent_upper_regions,
986 std::stack<cmaple::Index>& node_stack,
987 bool& update_blength);
995 template <const cmaple::StateType num_states>
996 std::unique_ptr<SeqRegions> computeUpperLeftRightRegions(
997 const cmaple::Index node_index,
999 const cmaple::MiniIndex next_node_mini,
1000 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1001 std::stack<cmaple::Index>& node_stack,
1002 bool& update_blength);
1008 bool updateNewPartialIfDifferent(
1010 const cmaple::MiniIndex next_node_mini,
1011 std::unique_ptr<SeqRegions>& upper_left_right_regions,
1012 std::stack<cmaple::Index>& node_stack,
1013 const cmaple::PositionType seq_length);
1021 template <const cmaple::StateType num_states>
1022 void inline handleNullNewRegions(
const cmaple::Index index,
1024 const bool do_update_zeroblength,
1025 std::stack<cmaple::Index>& node_stack,
1026 bool& update_blength,
1027 const std::string err_msg) {
1028 if (do_update_zeroblength) {
1029 updateZeroBlength<num_states>(index, node, node_stack);
1030 update_blength =
true;
1032 throw std::logic_error(err_msg);
1041 template <const cmaple::StateType num_states>
1042 void updatePartialLhFromParent(
1043 const cmaple::Index index,
1045 std::stack<cmaple::Index>& node_stack,
1046 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1047 const cmaple::PositionType seq_length);
1055 template <const cmaple::StateType num_states>
1056 void updatePartialLhFromChildren(
1057 const cmaple::Index index,
1059 std::stack<cmaple::Index>& node_stack,
1060 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1061 const bool is_non_root,
1062 const cmaple::PositionType seq_length);
1069 template <const cmaple::StateType num_states>
1070 inline void computeMidBranchRegions(
1072 std::unique_ptr<SeqRegions>& regions_2_update,
1073 const SeqRegions& parent_upper_lr_lh) {
1074 std::unique_ptr<SeqRegions>& lower_lh = node.getPartialLh(cmaple::TOP);
1075 cmaple::RealNumType half_branch_length = node.getUpperLength() * 0.5;
1076 parent_upper_lr_lh.mergeUpperLower<num_states>(
1077 regions_2_update, half_branch_length, *lower_lh, half_branch_length,
1078 aln, model, params->threshold_prob);
1086 template <const cmaple::StateType num_states>
1087 void refreshNonLowerLhsFromParent(cmaple::Index& node_index,
1088 cmaple::Index& last_node_index);
1095 template <const cmaple::StateType num_states>
1096 void refreshUpperLR(
const cmaple::Index node_index,
1098 const cmaple::Index neighbor_index,
1099 std::unique_ptr<SeqRegions>& replaced_regions,
1100 const SeqRegions& parent_upper_lr_lh);
1105 template <const cmaple::StateType num_states>
1106 void estimateBlength_R_O(
const SeqRegion& seq1_region,
1107 const SeqRegion& seq2_region,
1108 const cmaple::RealNumType total_blength,
1109 const cmaple::PositionType end_pos,
1110 cmaple::RealNumType& coefficient,
1111 std::vector<cmaple::RealNumType>& coefficient_vec);
1116 void estimateBlength_R_ACGT(
1117 const SeqRegion& seq1_region,
1118 const cmaple::StateType seq2_state,
1119 const cmaple::RealNumType total_blength,
1120 const cmaple::PositionType end_pos,
1121 std::vector<cmaple::RealNumType>& coefficient_vec);
1127 template <const cmaple::StateType num_states>
1128 void estimateBlength_O_X(
const SeqRegion& seq1_region,
1129 const SeqRegion& seq2_region,
1130 const cmaple::RealNumType total_blength,
1131 const cmaple::PositionType end_pos,
1132 cmaple::RealNumType& coefficient,
1133 std::vector<cmaple::RealNumType>& coefficient_vec);
1138 template <const cmaple::StateType num_states>
1139 void estimateBlength_ACGT_O(
1140 const SeqRegion& seq1_region,
1141 const SeqRegion& seq2_region,
1142 const cmaple::RealNumType total_blength,
1143 cmaple::RealNumType& coefficient,
1144 std::vector<cmaple::RealNumType>& coefficient_vec);
1150 void estimateBlength_ACGT_RACGT(
1151 const SeqRegion& seq1_region,
1152 const SeqRegion& seq2_region,
1153 const cmaple::RealNumType total_blength,
1154 const cmaple::PositionType end_pos,
1155 std::vector<cmaple::RealNumType>& coefficient_vec);
1160 cmaple::RealNumType estimateBlengthFromCoeffs(
1161 cmaple::RealNumType& coefficient,
1162 const std::vector<cmaple::RealNumType> coefficient_vec);
1169 template <const cmaple::StateType num_states>
1170 void handleBlengthChanged(PhyloNode& node,
1171 const cmaple::Index node_index,
1172 const cmaple::RealNumType best_blength);
1177 template <const cmaple::StateType num_states>
1178 void optimizeBlengthBeforeSeekingSPR(
1180 cmaple::RealNumType& best_blength,
1181 cmaple::RealNumType& best_lh,
1182 bool& blength_changed,
1183 const std::unique_ptr<SeqRegions>& parent_upper_lr_lh,
1184 const std::unique_ptr<SeqRegions>& lower_lh);
1191 template <const cmaple::StateType num_states>
1192 void checkAndApplySPR(
const cmaple::RealNumType best_lh_diff,
1193 const cmaple::RealNumType best_blength,
1194 const cmaple::RealNumType best_lh,
1195 const cmaple::Index node_index,
1197 const cmaple::Index best_node_index,
1198 const cmaple::Index parent_node_index,
1199 const bool is_mid_node,
1200 cmaple::RealNumType& total_improvement,
1201 bool& topology_updated);
1206 inline void createAnInternalNode() { nodes.emplace_back(InternalNode()); }
1211 inline void createALeafNode(
const cmaple::NumSeqsType new_seq_name_index) {
1212 nodes.emplace_back(LeafNode(
1213 new_seq_name_index));
1219 std::unique_ptr<SeqRegions>& getPartialLhAtNode(
const cmaple::Index index);
1226 template <const cmaple::StateType num_states>
1227 bool calculateNNILh(std::stack<cmaple::Index>& node_stack_aLRT,
1228 cmaple::RealNumType& lh_diff,
1229 PhyloNode& current_node,
1234 const cmaple::Index parent_index,
1235 cmaple::RealNumType& lh_at_root,
1236 const bool allow_replacing_ML_tree);
1243 template <const cmaple::StateType num_states>
1244 bool calculateNNILhRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1245 cmaple::RealNumType& lh_diff,
1246 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1247 const cmaple::RealNumType& child_2_new_blength,
1248 PhyloNode& current_node,
1253 const cmaple::Index parent_index,
1254 cmaple::RealNumType& lh_at_root,
1255 const bool allow_replacing_ML_tree);
1263 template <const cmaple::StateType num_states>
1264 bool calculateNNILhNonRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1265 cmaple::RealNumType& lh_diff,
1266 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1267 const cmaple::RealNumType& child_2_new_blength,
1268 PhyloNode& current_node,
1273 const cmaple::Index parent_index,
1274 cmaple::RealNumType& lh_at_root,
1275 const bool allow_replacing_ML_tree);
1282 template <const cmaple::StateType num_states>
1283 void replaceMLTreebyNNIRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1284 cmaple::RealNumType& lh_diff,
1285 PhyloNode& current_node,
1290 cmaple::RealNumType& lh_at_root,
1291 const cmaple::RealNumType child_1_best_blength,
1292 const cmaple::RealNumType child_2_best_blength,
1293 const cmaple::RealNumType sibling_best_blength,
1294 const cmaple::RealNumType parent_best_blength);
1302 template <const cmaple::StateType num_states>
1303 void replaceMLTreebyNNINonRoot(
1304 std::stack<cmaple::Index>& node_stack_aLRT,
1305 cmaple::RealNumType& lh_diff,
1306 PhyloNode& current_node,
1311 cmaple::RealNumType& lh_at_root,
1312 const cmaple::RealNumType child_1_best_blength,
1313 const cmaple::RealNumType child_2_best_blength,
1314 const cmaple::RealNumType sibling_best_blength,
1315 const cmaple::RealNumType parent_best_blength,
1316 const cmaple::RealNumType new_parent_best_blength);
1325 template <const cmaple::StateType num_states>
1326 void updateUpperLR(std::stack<cmaple::Index>& node_stack,
1327 std::stack<cmaple::Index>& node_stack_aLRT);
1333 void recompute_aLRT_GrandChildren(PhyloNode& parent,
1334 std::stack<cmaple::Index>& node_stack_aLRT);
1343 template <const cmaple::StateType num_states>
1344 void calculate_aRLT(
const bool allow_replacing_ML_tree);
1351 template <const cmaple::StateType num_states>
1352 cmaple::RealNumType calculateSiteLhs(
1353 std::vector<cmaple::RealNumType>& site_lh_contributions,
1354 std::vector<cmaple::RealNumType>& site_lh_root);
1361 template <const cmaple::StateType num_states>
1362 void calculate_aRLT_SH(
1363 std::vector<cmaple::RealNumType>& site_lh_contributions,
1364 std::vector<cmaple::RealNumType>& site_lh_root,
1365 const cmaple::RealNumType& LT1);
1372 template <const cmaple::StateType num_states>
1373 cmaple::PositionType count_aRLT_SH_branch(
1374 std::vector<cmaple::RealNumType>& site_lh_contributions,
1375 std::vector<cmaple::RealNumType>& site_lh_root,
1377 const cmaple::RealNumType& LT1);
1385 template <const cmaple::StateType num_states>
1386 void calSiteLhDiffRoot(std::vector<cmaple::RealNumType>& site_lh_diff,
1387 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1388 const std::vector<cmaple::RealNumType>& site_lh_root,
1389 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1390 const cmaple::RealNumType& child_2_new_blength,
1391 PhyloNode& current_node,
1396 const cmaple::Index parent_index);
1404 template <const cmaple::StateType num_states>
1405 void calSiteLhDiffNonRoot(
1406 std::vector<cmaple::RealNumType>& site_lh_diff,
1407 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1408 const std::vector<cmaple::RealNumType>& site_lh_root,
1409 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1410 const cmaple::RealNumType& child_2_new_blength,
1411 PhyloNode& current_node,
1416 const cmaple::Index parent_index);
1423 template <const cmaple::StateType num_states>
1424 void calSiteLhDiff(std::vector<cmaple::RealNumType>& site_lh_diff,
1425 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1426 const std::vector<cmaple::RealNumType>& site_lh_root,
1427 PhyloNode& current_node,
1432 const cmaple::Index parent_index);
1437 const char readNextChar(std::istream& in,
1438 cmaple::PositionType& in_line,
1439 cmaple::PositionType& in_column,
1440 const char& current_ch = 0)
const;
1448 cmaple::NumSeqsType parseFile(
1449 std::istream& infile,
1451 cmaple::RealNumType& branch_len,
1452 cmaple::PositionType& in_line,
1453 cmaple::PositionType& in_column,
1454 const std::map<std::string, cmaple::NumSeqsType>& map_seqname_index,
1455 bool& missing_blengths);
1461 void collapseAllZeroLeave();
1467 void collapseOneZeroLeaf(PhyloNode& node,
1468 cmaple::Index& node_index,
1469 PhyloNode& neighbor_1,
1470 const cmaple::Index neighbor_1_index,
1471 PhyloNode& neighbor_2);
1476 template <const cmaple::StateType num_states>
1477 void updatePesudoCountModel(PhyloNode& node,
1478 const cmaple::Index node_index,
1479 const cmaple::Index parent_index);
1486 template <const cmaple::StateType num_states>
1487 void expandTreeByOneLessInfoSeq(PhyloNode& node,
1488 const cmaple::Index node_index,
1489 const cmaple::Index parent_index);
1499 template <const cmaple::StateType num_states>
1500 void updateBlengthReplaceMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1501 cmaple::RealNumType& lh_diff,
1503 const cmaple::Index node_index,
1504 const cmaple::RealNumType best_blength);
1513 template <const cmaple::StateType num_states>
1514 void addLessInfoSeqReplacingMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1515 cmaple::RealNumType& lh_diff,
1517 const cmaple::Index node_index,
1518 const cmaple::Index parent_index);
1525 std::string exportNodeString(
const bool binary,
1526 const cmaple::NumSeqsType node_vec_index,
1527 const bool show_branch_supports);
1539 bool readTree(std::istream& tree_stream);
1552 void updateModelByAln();
1559 template <const cmaple::StateType num_states>
1560 void updateModelLhAfterLoading();
1565 std::map<std::string, NumSeqsType> initMapSeqNameIndex();
1573 void remarkExistingSeqs();
1584 NumSeqsType markAnExistingSeq(
1585 const std::string& seq_name,
1586 const std::map<std::string, NumSeqsType>& map_name_index);
1591 void resetSeqAdded();
1599 void attachAlnModel(Alignment* aln, ModelBase* model);
1604 std::string
exportNewick(
const bool binary,
const bool show_branch_supports);
1610 template <const cmaple::StateType num_states>
1611 void updateZeroBlength(
const cmaple::Index index,
1613 std::stack<cmaple::Index>& node_stack);
1622 template <const cmaple::StateType num_states>
1623 void updatePartialLh(std::stack<cmaple::Index>& node_stack);
1631 template <const cmaple::StateType num_states>
1632 void seekSamplePlacement(
const cmaple::Index start_node_index,
1633 const cmaple::NumSeqsType seq_name_index,
1634 const std::unique_ptr<SeqRegions>& sample_regions,
1635 cmaple::Index& selected_node_index,
1636 cmaple::RealNumType& best_lh_diff,
1637 bool& is_mid_branch,
1638 cmaple::RealNumType& best_up_lh_diff,
1639 cmaple::RealNumType& best_down_lh_diff,
1640 cmaple::Index& best_child_index);
1648 template <const cmaple::StateType num_states>
1649 void seekSubTreePlacement(
1650 cmaple::Index& best_node_index,
1651 cmaple::RealNumType& best_lh_diff,
1652 bool& is_mid_branch,
1653 cmaple::RealNumType& best_up_lh_diff,
1654 cmaple::RealNumType& best_down_lh_diff,
1655 cmaple::Index& best_child_index,
1656 const bool short_range_search,
1657 const cmaple::Index child_node_index,
1658 cmaple::RealNumType&
1667 template <const cmaple::StateType num_states>
1668 void placeNewSampleMidBranch(
const cmaple::Index& selected_node_index,
1669 std::unique_ptr<SeqRegions>& sample,
1670 const cmaple::NumSeqsType seq_name_index,
1671 const cmaple::RealNumType best_lh_diff);
1678 template <const cmaple::StateType num_states>
1679 void placeNewSampleAtNode(
const cmaple::Index selected_node_index,
1680 std::unique_ptr<SeqRegions>& sample,
1681 const cmaple::NumSeqsType seq_name_index,
1682 const cmaple::RealNumType best_lh_diff,
1683 const cmaple::RealNumType best_up_lh_diff,
1684 const cmaple::RealNumType best_down_lh_diff,
1685 const cmaple::Index best_child_index);
1693 template <const cmaple::StateType num_states>
1694 void applyOneSPR(
const cmaple::Index subtree_index,
1696 const cmaple::Index best_node_index,
1697 const bool is_mid_branch,
1698 const cmaple::RealNumType branch_length,
1699 const cmaple::RealNumType best_lh_diff);
1707 template <const cmaple::StateType num_states>
1708 void refreshAllLhs(
bool avoid_using_upper_lr_lhs =
false);
1715 void resetSPRFlags(
const bool update_outdated,
1716 const bool n_outdated);
1724 template <const cmaple::StateType num_states>
1725 cmaple::RealNumType improveEntireTree(
bool short_range_search);
1733 template <const cmaple::StateType num_states>
1734 cmaple::PositionType optimizeBranchIter();
1740 template <const cmaple::StateType num_states>
1741 cmaple::RealNumType estimateBranchLength(
1742 const std::unique_ptr<SeqRegions>& parent_regions,
1743 const std::unique_ptr<SeqRegions>& child_regions);
1749 template <const cmaple::StateType num_states>
1750 cmaple::RealNumType estimateBranchLengthWithCheck(
1751 const std::unique_ptr<SeqRegions>& upper_lr_regions,
1752 const std::unique_ptr<SeqRegions>& lower_regions,
1753 const cmaple::RealNumType current_blength);
1759 template <const cmaple::StateType num_states>
1760 cmaple::RealNumType calculateSamplePlacementCost(
1761 const std::unique_ptr<SeqRegions>& parent_regions,
1762 const std::unique_ptr<SeqRegions>& child_regions,
1763 const cmaple::RealNumType blength);
1769 template <const cmaple::StateType num_states>
1770 cmaple::RealNumType calculateSubTreePlacementCost(
1771 const std::unique_ptr<SeqRegions>& parent_regions,
1772 const std::unique_ptr<SeqRegions>& child_regions,
1773 const cmaple::RealNumType blength);
1780 template <const cmaple::StateType num_states>
1781 void updateLowerLh(cmaple::RealNumType& total_lh,
1782 std::unique_ptr<SeqRegions>& new_lower_lh,
1784 const std::unique_ptr<SeqRegions>& lower_lh_1,
1785 const std::unique_ptr<SeqRegions>& lower_lh_2,
1786 const cmaple::Index neighbor_1_index,
1787 PhyloNode& neighbor_1,
1788 const cmaple::Index neighbor_2_index,
1789 PhyloNode& neighbor_2,
1790 const cmaple::PositionType& seq_length);
1799 template <const cmaple::StateType num_states>
1800 void updateLowerLhAvoidUsingUpperLRLh(
1801 cmaple::RealNumType& total_lh,
1802 std::unique_ptr<SeqRegions>& new_lower_lh,
1804 const std::unique_ptr<SeqRegions>& lower_lh_1,
1805 const std::unique_ptr<SeqRegions>& lower_lh_2,
1806 const cmaple::Index neighbor_1_index,
1807 PhyloNode& neighbor_1,
1808 const cmaple::Index neighbor_2_index,
1809 PhyloNode& neighbor_2,
1810 const cmaple::PositionType& seq_length);
1817 template <const cmaple::StateType num_states>
1818 void computeLhContribution(cmaple::RealNumType& total_lh,
1819 std::unique_ptr<SeqRegions>& new_lower_lh,
1821 const std::unique_ptr<SeqRegions>& lower_lh_1,
1822 const std::unique_ptr<SeqRegions>& lower_lh_2,
1823 const cmaple::Index neighbor_1_index,
1824 PhyloNode& neighbor_1,
1825 const cmaple::Index neighbor_2_index,
1826 PhyloNode& neighbor_2,
1827 const cmaple::PositionType& seq_length);
1832 template <void (Tree::*task)(cmaple::RealNumType&,
1833 std::unique_ptr<SeqRegions>&,
1835 const std::unique_ptr<SeqRegions>&,
1836 const std::unique_ptr<SeqRegions>&,
1837 const cmaple::Index,
1839 const cmaple::Index,
1841 const cmaple::PositionType&)>
1842 cmaple::RealNumType performDFS();
1848 template <const cmaple::StateType num_states>
1849 void updateModelParams();
1855 void (Tree::*task)(PhyloNode&, const cmaple::Index, const cmaple::Index)>
1856 void performDFSAtLeave();
1861 bool keepTraversing(
const RealNumType& best_lh_diff,
1862 const RealNumType& lh_diff_at_node,
1863 const bool& strict_stop_seeking_placement_subtree,
1864 const std::unique_ptr<UpdatingNode>& updating_node,
1865 const int& failure_limit_subtree,
1866 const RealNumType& thresh_log_lh_subtree,
1867 const bool able2traverse =
true);
1890template <const StateType num_states>
1891void cmaple::Tree::refreshAllLhs(
bool avoid_using_upper_lr_lhs) {
1894 assert(cumulative_rate);
1897 if (avoid_using_upper_lr_lhs) {
1898 performDFS<&cmaple::Tree::updateLowerLhAvoidUsingUpperLRLh<num_states>>();
1900 performDFS<&cmaple::Tree::updateLowerLh<num_states>>();
1904 refreshAllNonLowerLhs<num_states>();
1907template <const StateType num_states>
1908RealNumType cmaple::Tree::improveEntireTree(
bool short_range_search) {
1911 assert(cumulative_rate);
1912 assert(nodes.size() > 0);
1915 std::stack<Index> node_stack;
1916 node_stack.push(Index(root_vector_index, TOP));
1919 RealNumType total_improvement = 0;
1920 PositionType num_nodes = 0;
1921 PositionType count_node_1K = 0;
1924 while (!node_stack.empty()) {
1926 Index index = node_stack.top();
1928 PhyloNode& node = nodes[index.getVectorIndex()];
1935 assert(index.getMiniIndex() == TOP);
1936 if (node.isInternal()) {
1937 node_stack.push(node.getNeighborIndex(RIGHT));
1938 node_stack.push(node.getNeighborIndex(LEFT));
1943 if (node.isOutdated() && node.getSPRCount() <= 5) {
1944 node.setOutdated(
false);
1956 RealNumType improvement =
1957 improveSubTree<num_states>(index, node, short_range_search);
1978 total_improvement += improvement;
1987 if (cmaple::verbose_mode >= cmaple::VB_MED && num_nodes - count_node_1K >= 1000) {
1988 std::cout <<
"Processed topology for " << convertIntToString(num_nodes)
1989 <<
" nodes." << std::endl;
1990 count_node_1K = num_nodes;
1995 return total_improvement;
1998template <const StateType num_states>
1999void cmaple::Tree::updateModelParams() {
2005 performDFSAtLeave<&cmaple::Tree::updatePesudoCountModel<num_states>>();
2008 if (model->updateMutationMatEmpirical()) {
2009 computeCumulativeRate();
2013template <const StateType num_states>
2014void cmaple::Tree::seekSamplePlacement(
2015 const Index start_node_index,
2016 const NumSeqsType seq_name_index,
2017 const std::unique_ptr<SeqRegions>& sample_regions,
2018 Index& selected_node_index,
2019 RealNumType& best_lh_diff,
2020 bool& is_mid_branch,
2021 RealNumType& best_up_lh_diff,
2022 RealNumType& best_down_lh_diff,
2023 Index& best_child_index) {
2024 assert(sample_regions && sample_regions->size() > 0);
2025 assert(seq_name_index >= 0);
2028 assert(cumulative_rate);
2032 selected_node_index = start_node_index;
2034 RealNumType lh_diff_mid_branch = 0;
2035 RealNumType lh_diff_at_node = 0;
2036 PositionType seq_length =
static_cast<PositionType
>(aln->ref_seq.size());
2038 std::stack<TraversingNode> extended_node_stack;
2039 extended_node_stack.push(TraversingNode(start_node_index, 0, MIN_NEGATIVE));
2042 while (!extended_node_stack.empty()) {
2043 TraversingNode current_extended_node = std::move(extended_node_stack.top());
2044 extended_node_stack.pop();
2045 const NumSeqsType current_node_vec =
2046 current_extended_node.getIndex().getVectorIndex();
2047 PhyloNode& current_node = nodes[current_node_vec];
2048 const bool& is_internal = current_node.isInternal();
2062 if ((!is_internal) &&
2063 (current_node.getPartialLh(TOP)->compareWithSample(
2064 *sample_regions, seq_length, aln) == 1)) {
2065 current_node.addLessInfoSeqs(seq_name_index);
2066 selected_node_index = Index();
2070 const RealNumType current_node_blength = current_node.getUpperLength();
2074 if (root_vector_index != current_node_vec && current_node_blength > 0) {
2075 examineSamplePlacementMidBranch<num_states>(
2076 selected_node_index, current_node.getMidBranchLh(), best_lh_diff,
2077 is_mid_branch, lh_diff_mid_branch, current_extended_node,
2082 lh_diff_mid_branch = MIN_NEGATIVE;
2087 if (root_vector_index == current_node_vec || current_node_blength > 0) {
2088 examineSamplePlacementAtNode<num_states>(
2089 selected_node_index, current_node.getTotalLh(), best_lh_diff,
2090 is_mid_branch, lh_diff_at_node, lh_diff_mid_branch, best_up_lh_diff,
2091 best_down_lh_diff, best_child_index, current_extended_node,
2094 lh_diff_at_node = current_extended_node.getLhDiff();
2099 const short int failure_count = current_extended_node.getFailureCount();
2100 if ((params->strict_stop_seeking_placement_sample &&
2101 failure_count <= params->failure_limit_sample &&
2102 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)) ||
2103 (!params->strict_stop_seeking_placement_sample &&
2104 (failure_count <= params->failure_limit_sample ||
2105 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)))) {
2110 extended_node_stack.push(
2111 TraversingNode(current_node.getNeighborIndex(RIGHT), failure_count,
2113 extended_node_stack.push(
2114 TraversingNode(current_node.getNeighborIndex(LEFT), failure_count,
2124 best_down_lh_diff = MIN_NEGATIVE;
2125 best_child_index = Index();
2129 if (!is_mid_branch) {
2130 finetuneSamplePlacementAtNode<num_states>(
2131 nodes[selected_node_index.getVectorIndex()], best_down_lh_diff,
2132 best_child_index, sample_regions);
2136template <const StateType num_states>
2137void cmaple::Tree::placeNewSampleAtNode(
const Index selected_node_index,
2138 std::unique_ptr<SeqRegions>& sample,
2139 const NumSeqsType seq_name_index,
2140 const RealNumType best_lh_diff,
2141 const RealNumType best_up_lh_diff,
2142 const RealNumType best_down_lh_diff,
2143 const Index best_child_index) {
2145 RealNumType best_child_lh = MIN_NEGATIVE;
2146 RealNumType best_child_blength_split = 0;
2147 RealNumType best_parent_lh;
2148 RealNumType best_parent_blength_split = 0;
2149 RealNumType best_root_blength = -1;
2150 const RealNumType threshold_prob = params->threshold_prob;
2151 std::unique_ptr<SeqRegions> best_parent_regions =
nullptr;
2152 std::unique_ptr<SeqRegions> best_child_regions =
nullptr;
2154 assert(selected_node_index.getMiniIndex() == TOP);
2155 assert(sample && sample->size() > 0);
2156 assert(seq_name_index >= 0);
2159 assert(cumulative_rate);
2161 const NumSeqsType selected_node_vec_index =
2162 selected_node_index.getVectorIndex();
2163 PhyloNode& selected_node = nodes[selected_node_vec_index];
2166 if (best_child_index.getMiniIndex() != UNDEFINED) {
2167 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2168 best_child_lh = best_down_lh_diff;
2169 assert(best_child_index.getMiniIndex() == TOP);
2170 best_child_blength_split =
2171 0.5 * best_child.getUpperLength();
2172 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2173 getPartialLhAtNode(best_child.getNeighborIndex(
2176 const std::unique_ptr<SeqRegions>& lower_regions = best_child.getPartialLh(
2181 best_child_regions =
2188 tryShorterBranch<num_states,
2189 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2190 best_child.getUpperLength(), best_child_regions, sample,
2191 upper_left_right_regions, lower_regions, best_child_lh,
2192 best_child_blength_split, default_blength,
true);
2195 if (!best_child_regions) {
2196 best_child_regions =
2197 cmaple::make_unique<SeqRegions>(SeqRegions(best_child.getMidBranchLh()));
2202 RealNumType old_root_lh = MIN_NEGATIVE;
2203 if (root_vector_index == selected_node_vec_index) {
2208 const std::unique_ptr<SeqRegions>& lower_regions =
2209 selected_node.getPartialLh(TOP);
2210 old_root_lh = lower_regions->computeAbsoluteLhAtRoot<num_states>(
2211 model, cumulative_base);
2214 RealNumType new_root_lh = lower_regions->mergeTwoLowers<num_states>(
2215 best_parent_regions, default_blength, *sample, default_blength, aln,
2216 model, cumulative_rate, threshold_prob,
true);
2218 new_root_lh += best_parent_regions->computeAbsoluteLhAtRoot<num_states>(
2219 model, cumulative_base);
2220 best_parent_lh = new_root_lh;
2223 best_root_blength = default_blength;
2224 tryShorterBranchAtRoot<num_states>(sample, lower_regions,
2225 best_parent_regions, best_root_blength,
2226 best_parent_lh, default_blength);
2229 best_parent_lh -= old_root_lh;
2233 best_parent_lh = best_up_lh_diff;
2234 best_parent_blength_split =
2235 0.5 * selected_node.getUpperLength();
2241 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2242 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2243 const std::unique_ptr<SeqRegions>& lower_regions =
2244 selected_node.getPartialLh(TOP);
2247 best_parent_regions =
2254 tryShorterBranch<num_states,
2255 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2256 selected_node.getUpperLength(), best_parent_regions, sample,
2257 upper_left_right_regions, lower_regions, best_parent_lh,
2258 best_parent_blength_split, default_blength,
false);
2261 if (!best_parent_regions) {
2262 best_parent_regions = cmaple::make_unique<SeqRegions>(
2263 SeqRegions(selected_node.getMidBranchLh()));
2269 if (best_child_lh >= best_parent_lh && best_child_lh >= best_lh_diff) {
2270 assert(best_child_index.getMiniIndex() == TOP);
2271 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2272 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2273 getPartialLhAtNode(best_child.getNeighborIndex(
2278 RealNumType best_length = default_blength;
2279 estimateLengthNewBranch<
2280 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2281 best_child_lh, best_child_regions, sample, best_length, max_blength,
2282 min_blength,
false);
2288 connectNewSample2Branch<num_states>(
2289 sample, seq_name_index, best_child_index, best_child,
2290 best_child_blength_split,
2291 best_child.getUpperLength() - best_child_blength_split, best_length,
2292 best_child_regions, upper_left_right_regions);
2298 if (best_lh_diff >= best_parent_lh) {
2299 best_root_blength = -1;
2300 best_parent_blength_split = -1;
2301 best_parent_lh = best_lh_diff;
2304 best_parent_regions =
nullptr;
2306 if (root_vector_index == selected_node_vec_index) {
2310 selected_node.getPartialLh(TOP)->mergeTwoLowers<num_states>(
2311 best_parent_regions, -1, *sample, default_blength, aln, model,
2312 cumulative_rate, threshold_prob);
2315 SeqRegions seq_regions_clone = SeqRegions(selected_node.getTotalLh());
2316 best_parent_regions =
2317 cmaple::make_unique<SeqRegions>(std::move(seq_regions_clone));
2322 if (root_vector_index == selected_node_vec_index) {
2324 best_parent_lh += old_root_lh;
2325 RealNumType best_length2 = default_blength;
2326 const std::unique_ptr<SeqRegions>& lower_regions =
2327 selected_node.getPartialLh(
2330 estimateLengthNewBranchAtRoot<num_states>(
2331 sample, lower_regions, best_parent_regions, best_length2,
2332 best_parent_lh, best_root_blength, min_blength,
false);
2335 best_parent_lh -= old_root_lh;
2338 connectNewSample2Root<num_states>(
2339 sample, seq_name_index, selected_node_index, selected_node,
2340 best_root_blength, best_length2, best_parent_regions);
2344 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2345 getPartialLhAtNode(selected_node.getNeighborIndex(
2350 RealNumType best_length = default_blength;
2351 estimateLengthNewBranch<
2352 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2353 best_parent_lh, best_parent_regions, sample, best_length, max_blength,
2354 min_blength,
false);
2357 RealNumType down_distance = best_parent_blength_split;
2358 RealNumType top_distance =
2359 selected_node.getUpperLength() -
2361 if (best_parent_blength_split < 0) {
2364 selected_node.getUpperLength();
2371 selected_node.setTotalLh(
nullptr);
2372 selected_node.setMidBranchLh(
nullptr);
2376 connectNewSample2Branch<num_states>(
2377 sample, seq_name_index, selected_node_index, selected_node,
2378 top_distance, down_distance, best_length, best_parent_regions,
2379 upper_left_right_regions);
2390template <const StateType num_states>
2391void cmaple::Tree::placeNewSampleMidBranch(
const Index& selected_node_index,
2392 std::unique_ptr<SeqRegions>& sample,
2393 const NumSeqsType seq_name_index,
2394 const RealNumType best_lh_diff) {
2397 std::unique_ptr<SeqRegions> best_child_regions =
nullptr;
2402 assert(selected_node_index.getMiniIndex() == TOP);
2403 assert(sample && sample->size() > 0);
2404 assert(seq_name_index >= 0);
2407 assert(cumulative_rate);
2409 PhyloNode& selected_node = nodes[selected_node_index.getVectorIndex()];
2410 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2411 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2412 RealNumType best_split_lh = best_lh_diff;
2413 const RealNumType selected_node_blength = selected_node.getUpperLength();
2414 RealNumType best_branch_length_split = 0.5 * selected_node_blength;
2415 best_child_regions =
2418 const std::unique_ptr<SeqRegions>& lower_regions =
2419 selected_node.getPartialLh(TOP);
2422 bool found_new_split =
2423 tryShorterBranch<num_states,
2424 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2425 selected_node_blength, best_child_regions, sample,
2426 upper_left_right_regions, lower_regions, best_split_lh,
2427 best_branch_length_split, default_blength,
true);
2429 if (!found_new_split) {
2431 found_new_split = tryShorterBranch<
2432 num_states, &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2433 selected_node_blength, best_child_regions, sample,
2434 upper_left_right_regions, lower_regions, best_split_lh,
2435 best_branch_length_split, default_blength,
false);
2437 if (found_new_split) {
2438 best_branch_length_split =
2439 selected_node_blength - best_branch_length_split;
2444 if (!best_child_regions) {
2445 best_child_regions = cmaple::make_unique<SeqRegions>(
2446 SeqRegions(selected_node.getMidBranchLh()));
2450 RealNumType best_blength = default_blength;
2451 estimateLengthNewBranch<
2452 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2453 best_split_lh, best_child_regions, sample, best_blength, max_blength,
2454 min_blength,
false);
2457 connectNewSample2Branch<num_states>(
2458 sample, seq_name_index, selected_node_index, selected_node,
2459 best_branch_length_split,
2460 selected_node_blength - best_branch_length_split, best_blength,
2461 best_child_regions, upper_left_right_regions);
Definition modelbase.h:17
void doPlacement(std::ostream &out_stream=std::cout)
Do placement (using stepwise addition) to build an initial tree. Model parameters (if not fixed) will...
void load(const std::string &tree_filename, const bool fixed_blengths=false)
Load a tree from a (bifurcating or multifurcating) tree (with/without branch lengths) in NEWICK forma...
std::string exportNewick(const TreeType tree_type=BIN_TREE, const bool show_branch_supports=true)
Export the phylogenetic tree to a string in NEWICK format.
void applySPR(const TreeSearchType tree_search_type, const bool shallow_tree_search, std::ostream &out_stream=std::cout)
Apply SPR moves to optimize the tree.
TreeSearchType
Definition tree.h:17
@ EXHAUSTIVE_TREE_SEARCH
Definition tree.h:21
@ UNKNOWN_TREE_SEARCH
Definition tree.h:22
@ FAST_TREE_SEARCH
Definition tree.h:18
@ NORMAL_TREE_SEARCH
Definition tree.h:19
void computeBranchSupport(const int num_threads=1, const int num_replicates=1000, const double epsilon=0.1, const bool allow_replacing_ML_tree=true, std::ostream &out_stream=std::cout)
Compute branch supports (aLRT-SH) of the current tree, which may or may not contain all taxa in the a...
TreeType
Definition tree.h:28
@ MUL_TREE
Definition tree.h:30
@ BIN_TREE
Definition tree.h:29
@ UNKNOWN_TREE
Definition tree.h:31
RealNumType computeLh()
Compute the log likelihood of the current tree, which may or may not contain all taxa in the alignmen...
void infer(const TreeSearchType tree_search_type=NORMAL_TREE_SEARCH, const bool shallow_tree_search=false, std::ostream &out_stream=std::cout)
Infer a maximum likelihood tree by executing doPlacement(), applySPR(), and optimizeBranch()
void changeModel(Model *model)
Change the substitution model.
void changeAln(Alignment *aln)
Change the alignment.
Tree(Alignment *aln, Model *model, std::istream &tree_stream, const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &¶ms=nullptr)
Constructor from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths in N...
Tree(Alignment *aln, Model *model, const std::string &tree_filename="", const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &¶ms=nullptr)
Constructor from an optional (bifurcating or multifurcating) tree (with/without branch lengths in NEW...
void load(std::istream &tree_stream, const bool fixed_blengths=false)
Load a tree from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths) in ...
void optimizeBranch(std::ostream &out_stream=std::cout)
Optimize the branch lengths of the tree.
std::istream & operator>>(std::istream &in_stream, cmaple::Tree &tree)
Customized >> operator to read a tree from a stream.
std::ostream & operator<<(std::ostream &out_stream, cmaple::Tree &tree)
Customized << operator to output the tree string in a (bifurcating) NEWICK format to a stream.