CMAPLE 1.0.0
C++ MAximum Parsimonious Likelihood Estimation
Loading...
Searching...
No Matches
tree.h
1#include "../alignment/alignment.h"
2#include "../model/model.h"
3#include "updatingnode.h"
4#ifdef _OPENMP
5#include <omp.h>
6#endif
7
8#pragma once
9
10namespace cmaple {
12class Tree {
13 public:
23 };
24
28 enum TreeType {
32 };
33
34 // ----------------- BEGIN OF PUBLIC APIs ------------------------------------
35 // //
62 Model* model,
63 std::istream& tree_stream,
64 const bool fixed_blengths = false,
65 std::unique_ptr<cmaple::Params>&& params = nullptr);
66
95 Model* model,
96 const std::string& tree_filename = "",
97 const bool fixed_blengths = false,
98 std::unique_ptr<cmaple::Params>&& params = nullptr);
99
103
119 void load(std::istream& tree_stream, const bool fixed_blengths = false);
120
137 void load(const std::string& tree_filename,
138 const bool fixed_blengths = false);
139
149
157 void changeModel(Model* model);
158
172 void doPlacement(std::ostream& out_stream = std::cout);
173
184 void applySPR(const TreeSearchType tree_search_type,
185 const bool shallow_tree_search, std::ostream& out_stream = std::cout);
186
194 void optimizeBranch(std::ostream& out_stream = std::cout);
195
224 void infer(
225 const TreeSearchType tree_search_type = NORMAL_TREE_SEARCH,
226 const bool shallow_tree_search = false, std::ostream& out_stream = std::cout);
227
236 RealNumType computeLh();
237
260 void computeBranchSupport(const int num_threads = 1,
261 const int num_replicates = 1000,
262 const double epsilon = 0.1,
263 const bool allow_replacing_ML_tree = true,
264 std::ostream& out_stream = std::cout);
265
275 std::string exportNewick(const TreeType tree_type = BIN_TREE,
276 const bool show_branch_supports = true);
277
278 // ----------------- END OF PUBLIC APIs ------------------------------------
279 // //
280
285 bool fixed_blengths = false;
286
290 cmaple::RealNumType default_blength, min_blength, max_blength,
291 min_blength_mid, min_blength_sensitivity, half_max_blength,
292 half_min_blength_mid, double_min_blength;
293
297 // std::optional<cmaple::Params> params;
298 std::unique_ptr<cmaple::Params> params = nullptr;
299
303 Alignment* aln = nullptr;
304
308 ModelBase* model = nullptr;
309
313 cmaple::RealNumType* cumulative_rate = nullptr;
314
318 std::vector<std::vector<cmaple::PositionType>> cumulative_base;
319
323 std::vector<PhyloNode> nodes;
324
328 std::vector<NodeLh> node_lhs;
329
333 cmaple::NumSeqsType root_vector_index;
334
340 std::vector<std::string> seq_names;
341
346 std::vector<bool> sequence_added;
347
355 void makeTreeInOutConsistent();
356
362 static TreeSearchType parseTreeSearchType(
363 const std::string& tree_search_type);
364
370 static std::string getTreeSearchStr(const TreeSearchType tree_search_type);
371
377 static TreeType parseTreeType(const std::string& tree_type_str);
378
381 private:
385 typedef void (Tree::*LoadTreePtrType)(std::istream&, const bool);
386 LoadTreePtrType loadTreePtr;
387
391 typedef void (Tree::*ChangeModelPtrType)(Model*);
392 ChangeModelPtrType changeModelPtr;
393
397 typedef void (Tree::*ChangeAlnPtrType)(Alignment*);
398 ChangeAlnPtrType changeAlnPtr;
399
403 typedef void (Tree::*DoInferencePtrType)(const TreeSearchType,
404 const bool, std::ostream&);
405 DoInferencePtrType doInferencePtr;
406
410 typedef void (Tree::*DoPlacementPtrType)(std::ostream&);
411 DoPlacementPtrType doPlacementPtr;
412
416 typedef void (Tree::*ApplySPRPtrType)(const TreeSearchType,
417 const bool, std::ostream&);
418 ApplySPRPtrType applySPRPtr;
419
423 typedef void (Tree::*OptimizeBranchPtrType)(std::ostream&);
424 OptimizeBranchPtrType optimizeBranchPtr;
425
429 typedef RealNumType (Tree::*computeLhPtrType)();
430 computeLhPtrType computeLhPtr;
431
435 typedef void (Tree::*computeBranchSupportPtrType)(const int,
436 const int,
437 const double,
438 const bool, std::ostream&);
439 computeBranchSupportPtrType computeBranchSupportPtr;
440
441 typedef void (Tree::*MakeTreeInOutConsistentPtrType)();
442 MakeTreeInOutConsistentPtrType makeTreeInOutConsistentPtr;
443
449 template <const cmaple::StateType num_states>
450 void loadTreeTemplate(std::istream& tree_stream, const bool fixed_blengths);
451
454 template <const cmaple::StateType num_states>
455 void changeAlnTemplate(Alignment* aln);
456
459 template <const cmaple::StateType num_states>
460 void changeModelTemplate(Model* model);
461
464 template <const cmaple::StateType num_states>
465 void doInferenceTemplate(const TreeSearchType tree_search_type,
466 const bool shallow_tree_search, std::ostream& out_stream);
467
470 template <const cmaple::StateType num_states>
471 void doPlacementTemplate(std::ostream& out_stream);
472
475 template <const cmaple::StateType num_states>
476 void applySPRTemplate(const TreeSearchType tree_search_type,
477 const bool shallow_tree_search, std::ostream& out_stream);
478
481 template <const cmaple::StateType num_states>
482 void optimizeBranchTemplate(std::ostream& out_stream);
483
486 template <const cmaple::StateType num_states>
487 RealNumType computeLhTemplate();
488
491 template <const cmaple::StateType num_states>
492 void computeBranchSupportTemplate(const int num_threads,
493 const int num_replicates,
494 const double epsilon,
495 const bool allow_replacing_ML_tree,
496 std::ostream& out_stream);
497
500 template <const cmaple::StateType num_states>
501 void makeTreeInOutConsistentTemplate();
502
507 void setupFuncPtrs();
508
512 void setupBlengthThresh();
513
526 void initTree(Alignment* aln,
527 Model* model,
528 std::unique_ptr<cmaple::Params>&& params);
529
534 void computeCumulativeRate();
535
540 template <const cmaple::StateType num_states>
541 void optimizeTreeTopology(bool short_range_search = false);
542
549 template <const cmaple::StateType num_states>
550 void refreshAllNonLowerLhs();
551
558 template <const cmaple::StateType num_states>
559 cmaple::RealNumType improveSubTree(const cmaple::Index index,
560 PhyloNode& node,
561 bool short_range_search);
562
567 cmaple::RealNumType calculateDerivative(
568 const std::vector<cmaple::RealNumType>& coefficient_vec,
569 const cmaple::RealNumType delta_t);
570
574 template <const cmaple::StateType num_states>
575 void examineSamplePlacementMidBranch(
576 cmaple::Index& selected_node_index,
577 const std::unique_ptr<SeqRegions>& mid_branch_lh,
578 cmaple::RealNumType& best_lh_diff,
579 bool& is_mid_branch,
580 cmaple::RealNumType& lh_diff_mid_branch,
581 TraversingNode& current_extended_node,
582 const std::unique_ptr<SeqRegions>& sample_regions);
583
587 template <const cmaple::StateType num_states>
588 void examineSamplePlacementAtNode(
589 cmaple::Index& selected_node_index,
590 const std::unique_ptr<SeqRegions>& total_lh,
591 cmaple::RealNumType& best_lh_diff,
592 bool& is_mid_branch,
593 cmaple::RealNumType& lh_diff_at_node,
594 cmaple::RealNumType& lh_diff_mid_branch,
595 cmaple::RealNumType& best_up_lh_diff,
596 cmaple::RealNumType& best_down_lh_diff,
597 cmaple::Index& best_child_index,
598 TraversingNode& current_extended_node,
599 const std::unique_ptr<SeqRegions>& sample_regions);
600
606 template <const cmaple::StateType num_states>
607 void finetuneSamplePlacementAtNode(
608 const PhyloNode& selected_node,
609 cmaple::RealNumType& best_down_lh_diff,
610 cmaple::Index& best_child_index,
611 const std::unique_ptr<SeqRegions>& sample_regions);
612
618 template <const cmaple::StateType num_states>
619 void addStartingNodes(const cmaple::Index& node_index,
620 PhyloNode& node,
621 const cmaple::Index& other_child_node_index,
622 const cmaple::RealNumType best_lh_diff,
623 std::stack<std::unique_ptr<UpdatingNode>>& node_stack);
624
630 template <const cmaple::StateType num_states>
631 bool examineSubtreePlacementMidBranch(
632 cmaple::Index& best_node_index,
633 PhyloNode& current_node,
634 cmaple::RealNumType& best_lh_diff,
635 bool& is_mid_branch,
636 cmaple::RealNumType& lh_diff_at_node,
637 cmaple::RealNumType& lh_diff_mid_branch,
638 cmaple::RealNumType& best_up_lh_diff,
639 cmaple::RealNumType& best_down_lh_diff,
640 std::unique_ptr<UpdatingNode>& updating_node,
641 const std::unique_ptr<SeqRegions>& subtree_regions,
642 const cmaple::RealNumType threshold_prob,
643 const cmaple::RealNumType removed_blength,
644 const cmaple::Index top_node_index,
645 std::unique_ptr<SeqRegions>& bottom_regions);
646
652 template <const cmaple::StateType num_states>
653 bool examineSubTreePlacementAtNode(
654 cmaple::Index& best_node_index,
655 PhyloNode& current_node,
656 cmaple::RealNumType& best_lh_diff,
657 bool& is_mid_branch,
658 cmaple::RealNumType& lh_diff_at_node,
659 cmaple::RealNumType& lh_diff_mid_branch,
660 cmaple::RealNumType& best_up_lh_diff,
661 cmaple::RealNumType& best_down_lh_diff,
662 std::unique_ptr<UpdatingNode>& updating_node,
663 const std::unique_ptr<SeqRegions>& subtree_regions,
664 const cmaple::RealNumType threshold_prob,
665 const cmaple::RealNumType removed_blength,
666 const cmaple::Index top_node_index);
667
673 template <const cmaple::StateType num_states>
674 void addChildSeekSubtreePlacement(
675 const cmaple::Index child_1_index,
676 PhyloNode& child_1,
677 PhyloNode& child_2,
678 const cmaple::RealNumType& lh_diff_at_node,
679 const std::unique_ptr<UpdatingNode>& updating_node,
680 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
681 const cmaple::RealNumType threshold_prob);
682
689 template <const cmaple::StateType num_states>
690 bool addNeighborsSeekSubtreePlacement(
691 PhyloNode& current_node,
692 const cmaple::Index other_child_index,
693 std::unique_ptr<SeqRegions>&& bottom_regions,
694 const cmaple::RealNumType& lh_diff_at_node,
695 const std::unique_ptr<UpdatingNode>& updating_node,
696 std::stack<std::unique_ptr<UpdatingNode>>& node_stack,
697 const cmaple::RealNumType threshold_prob);
698
705 template <const cmaple::StateType num_states,
706 cmaple::RealNumType (Tree::*calculatePlacementCost)(
707 const std::unique_ptr<SeqRegions>&,
708 const std::unique_ptr<SeqRegions>&,
709 const cmaple::RealNumType)>
710 bool tryShorterBranch(
711 const cmaple::RealNumType current_blength,
712 std::unique_ptr<SeqRegions>& best_child_regions,
713 const std::unique_ptr<SeqRegions>& sample,
714 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
715 const std::unique_ptr<SeqRegions>& lower_regions,
716 cmaple::RealNumType& best_split_lh,
717 cmaple::RealNumType& best_branch_length_split,
718 const cmaple::RealNumType new_branch_length,
719 const bool try_first_branch);
720
726 template <const cmaple::StateType num_states>
727 void tryShorterBranchAtRoot(const std::unique_ptr<SeqRegions>& sample,
728 const std::unique_ptr<SeqRegions>& lower_regions,
729 std::unique_ptr<SeqRegions>& best_parent_regions,
730 cmaple::RealNumType& best_root_blength,
731 cmaple::RealNumType& best_parent_lh,
732 const cmaple::RealNumType fixed_blength);
733
738 template <const cmaple::StateType num_states>
739 bool tryShorterNewBranchAtRoot(
740 const std::unique_ptr<SeqRegions>& sample,
741 const std::unique_ptr<SeqRegions>& lower_regions,
742 std::unique_ptr<SeqRegions>& best_parent_regions,
743 cmaple::RealNumType& best_length,
744 cmaple::RealNumType& best_parent_lh,
745 const cmaple::RealNumType fixed_blength);
746
753 template <const cmaple::StateType num_states>
754 bool tryLongerNewBranchAtRoot(
755 const std::unique_ptr<SeqRegions>& sample,
756 const std::unique_ptr<SeqRegions>& lower_regions,
757 std::unique_ptr<SeqRegions>& best_parent_regions,
758 cmaple::RealNumType& best_length,
759 cmaple::RealNumType& best_parent_lh,
760 const cmaple::RealNumType fixed_blength);
761
767 template <const cmaple::StateType num_states>
768 void estimateLengthNewBranchAtRoot(
769 const std::unique_ptr<SeqRegions>& sample,
770 const std::unique_ptr<SeqRegions>& lower_regions,
771 std::unique_ptr<SeqRegions>& best_parent_regions,
772 cmaple::RealNumType& best_length,
773 cmaple::RealNumType& best_parent_lh,
774 const cmaple::RealNumType fixed_blength,
775 const cmaple::RealNumType short_blength_thresh,
776 const bool optional_check);
777
782 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
783 const std::unique_ptr<SeqRegions>&,
784 const std::unique_ptr<SeqRegions>&,
785 const cmaple::RealNumType)>
786 bool tryShorterNewBranch(
787 const std::unique_ptr<SeqRegions>& best_child_regions,
788 const std::unique_ptr<SeqRegions>& sample,
789 cmaple::RealNumType& best_blength,
790 cmaple::RealNumType& new_branch_lh,
791 const cmaple::RealNumType short_blength_thresh);
792
797 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
798 const std::unique_ptr<SeqRegions>&,
799 const std::unique_ptr<SeqRegions>&,
800 const cmaple::RealNumType)>
801 void tryLongerNewBranch(const std::unique_ptr<SeqRegions>& best_child_regions,
802 const std::unique_ptr<SeqRegions>& sample,
803 cmaple::RealNumType& best_blength,
804 cmaple::RealNumType& new_branch_lh,
805 const cmaple::RealNumType long_blength_thresh);
806
810 template <cmaple::RealNumType (Tree::*calculatePlacementCost)(
811 const std::unique_ptr<SeqRegions>&,
812 const std::unique_ptr<SeqRegions>&,
813 const cmaple::RealNumType)>
814 void estimateLengthNewBranch(
815 const cmaple::RealNumType best_split_lh,
816 const std::unique_ptr<SeqRegions>& best_child_regions,
817 const std::unique_ptr<SeqRegions>& sample,
818 cmaple::RealNumType& best_blength,
819 const cmaple::RealNumType long_blength_thresh,
820 const cmaple::RealNumType short_blength_thresh,
821 const bool optional_check);
822
828 template <const cmaple::StateType num_states>
829 void connectNewSample2Branch(
830 std::unique_ptr<SeqRegions>& sample,
831 const cmaple::NumSeqsType seq_name_index,
832 const cmaple::Index sibling_node_index,
833 PhyloNode& sibling_node,
834 const cmaple::RealNumType top_distance,
835 const cmaple::RealNumType down_distance,
836 const cmaple::RealNumType best_blength,
837 std::unique_ptr<SeqRegions>& best_child_regions,
838 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
839
845 template <const cmaple::StateType num_states>
846 void connectNewSample2Root(std::unique_ptr<SeqRegions>& sample,
847 const cmaple::NumSeqsType seq_name_index,
848 const cmaple::Index sibling_node_index,
849 PhyloNode& sibling_node,
850 const cmaple::RealNumType best_root_blength,
851 const cmaple::RealNumType best_length2,
852 std::unique_ptr<SeqRegions>& best_parent_regions);
853
859 template <const cmaple::StateType num_states>
860 void placeSubTreeAtNode(const cmaple::Index selected_node_index,
861 const cmaple::Index subtree_index,
862 PhyloNode& subtree,
863 const std::unique_ptr<SeqRegions>& subtree_regions,
864 const cmaple::RealNumType new_branch_length,
865 const cmaple::RealNumType new_lh);
866
872 template <const cmaple::StateType num_states>
873 void placeSubTreeMidBranch(const cmaple::Index selected_node_index,
874 const cmaple::Index subtree_index,
875 PhyloNode& subtree,
876 const std::unique_ptr<SeqRegions>& subtree_regions,
877 const cmaple::RealNumType new_branch_length,
878 const cmaple::RealNumType new_lh);
879
885 template <
886 const cmaple::StateType num_states,
887 void (Tree::*updateRegionsSubTree)(PhyloNode&,
888 PhyloNode&,
889 PhyloNode&,
890 std::unique_ptr<SeqRegions>&&,
891 const std::unique_ptr<SeqRegions>&,
892 const std::unique_ptr<SeqRegions>&,
893 const std::unique_ptr<SeqRegions>&,
894 cmaple::RealNumType&)>
895 void connectSubTree2Branch(
896 const std::unique_ptr<SeqRegions>& subtree_regions,
897 const std::unique_ptr<SeqRegions>& lower_regions,
898 const cmaple::Index subtree_index,
899 PhyloNode& subtree,
900 const cmaple::Index sibling_node_index,
901 PhyloNode& sibling_node,
902 const cmaple::RealNumType top_distance,
903 const cmaple::RealNumType down_distance,
904 cmaple::RealNumType& best_blength,
905 std::unique_ptr<SeqRegions>&& best_child_regions,
906 const std::unique_ptr<SeqRegions>& upper_left_right_regions);
907
913 template <const cmaple::StateType num_states>
914 void connectSubTree2Root(const cmaple::Index subtree_index,
915 PhyloNode& subtree,
916 const std::unique_ptr<SeqRegions>& subtree_regions,
917 const std::unique_ptr<SeqRegions>& lower_regions,
918 const cmaple::Index sibling_node_index,
919 PhyloNode& sibling_node,
920 const cmaple::RealNumType best_root_blength,
921 const cmaple::RealNumType best_length2,
922 std::unique_ptr<SeqRegions>&& best_parent_regions);
923
931 template <const cmaple::StateType num_states>
932 void updateRegionsPlaceSubTree(
933 PhyloNode& subtree,
934 PhyloNode& sibling_node,
935 PhyloNode& internal,
936 std::unique_ptr<SeqRegions>&& best_child_regions,
937 const std::unique_ptr<SeqRegions>& subtree_regions,
938 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
939 const std::unique_ptr<SeqRegions>& lower_regions,
940 cmaple::RealNumType& best_blength);
941
949 template <const cmaple::StateType num_states>
950 void updateRegionsPlaceSubTreeAbove(
951 PhyloNode& subtree,
952 PhyloNode& sibling_node,
953 PhyloNode& internal,
954 std::unique_ptr<SeqRegions>&& best_child_regions,
955 const std::unique_ptr<SeqRegions>& subtree_regions,
956 const std::unique_ptr<SeqRegions>& upper_left_right_regions,
957 const std::unique_ptr<SeqRegions>& lower_regions,
958 cmaple::RealNumType& best_blength);
959
965 template <const cmaple::StateType num_states>
966 void handlePolytomyPlaceSubTree(
967 const cmaple::Index selected_node_index,
968 PhyloNode& selected_node,
969 const std::unique_ptr<SeqRegions>& subtree_regions,
970 const cmaple::RealNumType new_branch_length,
971 cmaple::RealNumType& best_down_lh_diff,
972 cmaple::Index& best_child_index,
973 cmaple::RealNumType& best_child_blength_split,
974 std::unique_ptr<SeqRegions>& best_child_regions);
975
981 template <const cmaple::StateType num_states>
982 void updateMidBranchLh(
983 const cmaple::Index node_index,
984 PhyloNode& node,
985 const std::unique_ptr<SeqRegions>& parent_upper_regions,
986 std::stack<cmaple::Index>& node_stack,
987 bool& update_blength);
988
995 template <const cmaple::StateType num_states>
996 std::unique_ptr<SeqRegions> computeUpperLeftRightRegions(
997 const cmaple::Index node_index,
998 PhyloNode& node,
999 const cmaple::MiniIndex next_node_mini,
1000 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1001 std::stack<cmaple::Index>& node_stack,
1002 bool& update_blength);
1003
1008 bool updateNewPartialIfDifferent(
1009 PhyloNode& node,
1010 const cmaple::MiniIndex next_node_mini,
1011 std::unique_ptr<SeqRegions>& upper_left_right_regions,
1012 std::stack<cmaple::Index>& node_stack,
1013 const cmaple::PositionType seq_length);
1014
1021 template <const cmaple::StateType num_states>
1022 void inline handleNullNewRegions(const cmaple::Index index,
1023 PhyloNode& node,
1024 const bool do_update_zeroblength,
1025 std::stack<cmaple::Index>& node_stack,
1026 bool& update_blength,
1027 const std::string err_msg) {
1028 if (do_update_zeroblength) {
1029 updateZeroBlength<num_states>(index, node, node_stack);
1030 update_blength = true;
1031 } else
1032 throw std::logic_error(err_msg);
1033 }
1034
1041 template <const cmaple::StateType num_states>
1042 void updatePartialLhFromParent(
1043 const cmaple::Index index,
1044 PhyloNode& node,
1045 std::stack<cmaple::Index>& node_stack,
1046 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1047 const cmaple::PositionType seq_length);
1048
1055 template <const cmaple::StateType num_states>
1056 void updatePartialLhFromChildren(
1057 const cmaple::Index index,
1058 PhyloNode& node,
1059 std::stack<cmaple::Index>& node_stack,
1060 const std::unique_ptr<SeqRegions>& parent_upper_regions,
1061 const bool is_non_root,
1062 const cmaple::PositionType seq_length);
1063
1069 template <const cmaple::StateType num_states>
1070 inline void computeMidBranchRegions(
1071 PhyloNode& node,
1072 std::unique_ptr<SeqRegions>& regions_2_update,
1073 const SeqRegions& parent_upper_lr_lh) {
1074 std::unique_ptr<SeqRegions>& lower_lh = node.getPartialLh(cmaple::TOP);
1075 cmaple::RealNumType half_branch_length = node.getUpperLength() * 0.5;
1076 parent_upper_lr_lh.mergeUpperLower<num_states>(
1077 regions_2_update, half_branch_length, *lower_lh, half_branch_length,
1078 aln, model, params->threshold_prob);
1079 }
1080
1086 template <const cmaple::StateType num_states>
1087 void refreshNonLowerLhsFromParent(cmaple::Index& node_index,
1088 cmaple::Index& last_node_index);
1089
1095 template <const cmaple::StateType num_states>
1096 void refreshUpperLR(const cmaple::Index node_index,
1097 PhyloNode& node,
1098 const cmaple::Index neighbor_index,
1099 std::unique_ptr<SeqRegions>& replaced_regions,
1100 const SeqRegions& parent_upper_lr_lh);
1101
1105 template <const cmaple::StateType num_states>
1106 void estimateBlength_R_O(const SeqRegion& seq1_region,
1107 const SeqRegion& seq2_region,
1108 const cmaple::RealNumType total_blength,
1109 const cmaple::PositionType end_pos,
1110 cmaple::RealNumType& coefficient,
1111 std::vector<cmaple::RealNumType>& coefficient_vec);
1112
1116 void estimateBlength_R_ACGT(
1117 const SeqRegion& seq1_region,
1118 const cmaple::StateType seq2_state,
1119 const cmaple::RealNumType total_blength,
1120 const cmaple::PositionType end_pos,
1121 std::vector<cmaple::RealNumType>& coefficient_vec);
1122
1127 template <const cmaple::StateType num_states>
1128 void estimateBlength_O_X(const SeqRegion& seq1_region,
1129 const SeqRegion& seq2_region,
1130 const cmaple::RealNumType total_blength,
1131 const cmaple::PositionType end_pos,
1132 cmaple::RealNumType& coefficient,
1133 std::vector<cmaple::RealNumType>& coefficient_vec);
1134
1138 template <const cmaple::StateType num_states>
1139 void estimateBlength_ACGT_O(
1140 const SeqRegion& seq1_region,
1141 const SeqRegion& seq2_region,
1142 const cmaple::RealNumType total_blength,
1143 cmaple::RealNumType& coefficient,
1144 std::vector<cmaple::RealNumType>& coefficient_vec);
1145
1150 void estimateBlength_ACGT_RACGT(
1151 const SeqRegion& seq1_region,
1152 const SeqRegion& seq2_region,
1153 const cmaple::RealNumType total_blength,
1154 const cmaple::PositionType end_pos,
1155 std::vector<cmaple::RealNumType>& coefficient_vec);
1156
1160 cmaple::RealNumType estimateBlengthFromCoeffs(
1161 cmaple::RealNumType& coefficient,
1162 const std::vector<cmaple::RealNumType> coefficient_vec);
1163
1169 template <const cmaple::StateType num_states>
1170 void handleBlengthChanged(PhyloNode& node,
1171 const cmaple::Index node_index,
1172 const cmaple::RealNumType best_blength);
1173
1177 template <const cmaple::StateType num_states>
1178 void optimizeBlengthBeforeSeekingSPR(
1179 PhyloNode& node,
1180 cmaple::RealNumType& best_blength,
1181 cmaple::RealNumType& best_lh,
1182 bool& blength_changed,
1183 const std::unique_ptr<SeqRegions>& parent_upper_lr_lh,
1184 const std::unique_ptr<SeqRegions>& lower_lh);
1185
1191 template <const cmaple::StateType num_states>
1192 void checkAndApplySPR(const cmaple::RealNumType best_lh_diff,
1193 const cmaple::RealNumType best_blength,
1194 const cmaple::RealNumType best_lh,
1195 const cmaple::Index node_index,
1196 PhyloNode& node,
1197 const cmaple::Index best_node_index,
1198 const cmaple::Index parent_node_index,
1199 const bool is_mid_node,
1200 cmaple::RealNumType& total_improvement,
1201 bool& topology_updated);
1202
1206 inline void createAnInternalNode() { nodes.emplace_back(InternalNode()); }
1207
1211 inline void createALeafNode(const cmaple::NumSeqsType new_seq_name_index) {
1212 nodes.emplace_back(LeafNode(
1213 new_seq_name_index)); //(PhyloNode(std::move(LeafNode(new_seq_name_index))));
1214 }
1215
1219 std::unique_ptr<SeqRegions>& getPartialLhAtNode(const cmaple::Index index);
1220
1226 template <const cmaple::StateType num_states>
1227 bool calculateNNILh(std::stack<cmaple::Index>& node_stack_aLRT,
1228 cmaple::RealNumType& lh_diff,
1229 PhyloNode& current_node,
1230 PhyloNode& child_1,
1231 PhyloNode& child_2,
1232 PhyloNode& sibling,
1233 PhyloNode& parent,
1234 const cmaple::Index parent_index,
1235 cmaple::RealNumType& lh_at_root,
1236 const bool allow_replacing_ML_tree);
1237
1243 template <const cmaple::StateType num_states>
1244 bool calculateNNILhRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1245 cmaple::RealNumType& lh_diff,
1246 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1247 const cmaple::RealNumType& child_2_new_blength,
1248 PhyloNode& current_node,
1249 PhyloNode& child_1,
1250 PhyloNode& child_2,
1251 PhyloNode& sibling,
1252 PhyloNode& parent,
1253 const cmaple::Index parent_index,
1254 cmaple::RealNumType& lh_at_root,
1255 const bool allow_replacing_ML_tree);
1256
1263 template <const cmaple::StateType num_states>
1264 bool calculateNNILhNonRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1265 cmaple::RealNumType& lh_diff,
1266 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1267 const cmaple::RealNumType& child_2_new_blength,
1268 PhyloNode& current_node,
1269 PhyloNode& child_1,
1270 PhyloNode& child_2,
1271 PhyloNode& sibling,
1272 PhyloNode& parent,
1273 const cmaple::Index parent_index,
1274 cmaple::RealNumType& lh_at_root,
1275 const bool allow_replacing_ML_tree);
1276
1282 template <const cmaple::StateType num_states>
1283 void replaceMLTreebyNNIRoot(std::stack<cmaple::Index>& node_stack_aLRT,
1284 cmaple::RealNumType& lh_diff,
1285 PhyloNode& current_node,
1286 PhyloNode& child_1,
1287 PhyloNode& child_2,
1288 PhyloNode& sibling,
1289 PhyloNode& parent,
1290 cmaple::RealNumType& lh_at_root,
1291 const cmaple::RealNumType child_1_best_blength,
1292 const cmaple::RealNumType child_2_best_blength,
1293 const cmaple::RealNumType sibling_best_blength,
1294 const cmaple::RealNumType parent_best_blength);
1295
1302 template <const cmaple::StateType num_states>
1303 void replaceMLTreebyNNINonRoot(
1304 std::stack<cmaple::Index>& node_stack_aLRT,
1305 cmaple::RealNumType& lh_diff,
1306 PhyloNode& current_node,
1307 PhyloNode& child_1,
1308 PhyloNode& child_2,
1309 PhyloNode& sibling,
1310 PhyloNode& parent,
1311 cmaple::RealNumType& lh_at_root,
1312 const cmaple::RealNumType child_1_best_blength,
1313 const cmaple::RealNumType child_2_best_blength,
1314 const cmaple::RealNumType sibling_best_blength,
1315 const cmaple::RealNumType parent_best_blength,
1316 const cmaple::RealNumType new_parent_best_blength);
1317
1325 template <const cmaple::StateType num_states>
1326 void updateUpperLR(std::stack<cmaple::Index>& node_stack,
1327 std::stack<cmaple::Index>& node_stack_aLRT);
1328
1333 void recompute_aLRT_GrandChildren(PhyloNode& parent,
1334 std::stack<cmaple::Index>& node_stack_aLRT);
1335
1343 template <const cmaple::StateType num_states>
1344 void calculate_aRLT(const bool allow_replacing_ML_tree);
1345
1351 template <const cmaple::StateType num_states>
1352 cmaple::RealNumType calculateSiteLhs(
1353 std::vector<cmaple::RealNumType>& site_lh_contributions,
1354 std::vector<cmaple::RealNumType>& site_lh_root);
1355
1361 template <const cmaple::StateType num_states>
1362 void calculate_aRLT_SH(
1363 std::vector<cmaple::RealNumType>& site_lh_contributions,
1364 std::vector<cmaple::RealNumType>& site_lh_root,
1365 const cmaple::RealNumType& LT1);
1366
1372 template <const cmaple::StateType num_states>
1373 cmaple::PositionType count_aRLT_SH_branch(
1374 std::vector<cmaple::RealNumType>& site_lh_contributions,
1375 std::vector<cmaple::RealNumType>& site_lh_root,
1376 PhyloNode& node,
1377 const cmaple::RealNumType& LT1);
1378
1385 template <const cmaple::StateType num_states>
1386 void calSiteLhDiffRoot(std::vector<cmaple::RealNumType>& site_lh_diff,
1387 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1388 const std::vector<cmaple::RealNumType>& site_lh_root,
1389 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1390 const cmaple::RealNumType& child_2_new_blength,
1391 PhyloNode& current_node,
1392 PhyloNode& child_1,
1393 PhyloNode& child_2,
1394 PhyloNode& sibling,
1395 PhyloNode& parent,
1396 const cmaple::Index parent_index);
1397
1404 template <const cmaple::StateType num_states>
1405 void calSiteLhDiffNonRoot(
1406 std::vector<cmaple::RealNumType>& site_lh_diff,
1407 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1408 const std::vector<cmaple::RealNumType>& site_lh_root,
1409 std::unique_ptr<SeqRegions>& parent_new_lower_lh,
1410 const cmaple::RealNumType& child_2_new_blength,
1411 PhyloNode& current_node,
1412 PhyloNode& child_1,
1413 PhyloNode& child_2,
1414 PhyloNode& sibling,
1415 PhyloNode& parent,
1416 const cmaple::Index parent_index);
1417
1423 template <const cmaple::StateType num_states>
1424 void calSiteLhDiff(std::vector<cmaple::RealNumType>& site_lh_diff,
1425 std::vector<cmaple::RealNumType>& site_lh_root_diff,
1426 const std::vector<cmaple::RealNumType>& site_lh_root,
1427 PhyloNode& current_node,
1428 PhyloNode& child_1,
1429 PhyloNode& child_2,
1430 PhyloNode& sibling,
1431 PhyloNode& parent,
1432 const cmaple::Index parent_index);
1433
1437 const char readNextChar(std::istream& in,
1438 cmaple::PositionType& in_line,
1439 cmaple::PositionType& in_column,
1440 const char& current_ch = 0) const;
1441
1448 cmaple::NumSeqsType parseFile(
1449 std::istream& infile,
1450 char& ch,
1451 cmaple::RealNumType& branch_len,
1452 cmaple::PositionType& in_line,
1453 cmaple::PositionType& in_column,
1454 const std::map<std::string, cmaple::NumSeqsType>& map_seqname_index,
1455 bool& missing_blengths);
1456
1461 void collapseAllZeroLeave();
1462
1467 void collapseOneZeroLeaf(PhyloNode& node,
1468 cmaple::Index& node_index,
1469 PhyloNode& neighbor_1,
1470 const cmaple::Index neighbor_1_index,
1471 PhyloNode& neighbor_2);
1472
1476 template <const cmaple::StateType num_states>
1477 void updatePesudoCountModel(PhyloNode& node,
1478 const cmaple::Index node_index,
1479 const cmaple::Index parent_index);
1480
1486 template <const cmaple::StateType num_states>
1487 void expandTreeByOneLessInfoSeq(PhyloNode& node,
1488 const cmaple::Index node_index,
1489 const cmaple::Index parent_index);
1490
1499 template <const cmaple::StateType num_states>
1500 void updateBlengthReplaceMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1501 cmaple::RealNumType& lh_diff,
1502 PhyloNode& node,
1503 const cmaple::Index node_index,
1504 const cmaple::RealNumType best_blength);
1505
1513 template <const cmaple::StateType num_states>
1514 void addLessInfoSeqReplacingMLTree(std::stack<cmaple::Index>& node_stack_aLRT,
1515 cmaple::RealNumType& lh_diff,
1516 PhyloNode& node,
1517 const cmaple::Index node_index,
1518 const cmaple::Index parent_index);
1519
1525 std::string exportNodeString(const bool binary,
1526 const cmaple::NumSeqsType node_vec_index,
1527 const bool show_branch_supports);
1528
1539 bool readTree(std::istream& tree_stream);
1540
1546 bool isComplete();
1547
1552 void updateModelByAln();
1553
1559 template <const cmaple::StateType num_states>
1560 void updateModelLhAfterLoading();
1561
1565 std::map<std::string, NumSeqsType> initMapSeqNameIndex();
1566
1573 void remarkExistingSeqs();
1574
1584 NumSeqsType markAnExistingSeq(
1585 const std::string& seq_name,
1586 const std::map<std::string, NumSeqsType>& map_name_index);
1587
1591 void resetSeqAdded();
1592
1599 void attachAlnModel(Alignment* aln, ModelBase* model);
1600
1604 std::string exportNewick(const bool binary, const bool show_branch_supports);
1605
1610 template <const cmaple::StateType num_states>
1611 void updateZeroBlength(const cmaple::Index index,
1612 PhyloNode& node,
1613 std::stack<cmaple::Index>& node_stack);
1614
1622 template <const cmaple::StateType num_states>
1623 void updatePartialLh(std::stack<cmaple::Index>& node_stack);
1624
1631 template <const cmaple::StateType num_states>
1632 void seekSamplePlacement(const cmaple::Index start_node_index,
1633 const cmaple::NumSeqsType seq_name_index,
1634 const std::unique_ptr<SeqRegions>& sample_regions,
1635 cmaple::Index& selected_node_index,
1636 cmaple::RealNumType& best_lh_diff,
1637 bool& is_mid_branch,
1638 cmaple::RealNumType& best_up_lh_diff,
1639 cmaple::RealNumType& best_down_lh_diff,
1640 cmaple::Index& best_child_index);
1641
1648 template <const cmaple::StateType num_states>
1649 void seekSubTreePlacement(
1650 cmaple::Index& best_node_index,
1651 cmaple::RealNumType& best_lh_diff,
1652 bool& is_mid_branch,
1653 cmaple::RealNumType& best_up_lh_diff,
1654 cmaple::RealNumType& best_down_lh_diff,
1655 cmaple::Index& best_child_index,
1656 const bool short_range_search,
1657 const cmaple::Index child_node_index,
1658 cmaple::RealNumType&
1659 removed_blength); //, bool search_subtree_placement = true,
1660 // SeqRegions* sample_regions = NULL);
1661
1667 template <const cmaple::StateType num_states>
1668 void placeNewSampleMidBranch(const cmaple::Index& selected_node_index,
1669 std::unique_ptr<SeqRegions>& sample,
1670 const cmaple::NumSeqsType seq_name_index,
1671 const cmaple::RealNumType best_lh_diff);
1672
1678 template <const cmaple::StateType num_states>
1679 void placeNewSampleAtNode(const cmaple::Index selected_node_index,
1680 std::unique_ptr<SeqRegions>& sample,
1681 const cmaple::NumSeqsType seq_name_index,
1682 const cmaple::RealNumType best_lh_diff,
1683 const cmaple::RealNumType best_up_lh_diff,
1684 const cmaple::RealNumType best_down_lh_diff,
1685 const cmaple::Index best_child_index);
1686
1693 template <const cmaple::StateType num_states>
1694 void applyOneSPR(const cmaple::Index subtree_index,
1695 PhyloNode& subtree,
1696 const cmaple::Index best_node_index,
1697 const bool is_mid_branch,
1698 const cmaple::RealNumType branch_length,
1699 const cmaple::RealNumType best_lh_diff);
1700
1707 template <const cmaple::StateType num_states>
1708 void refreshAllLhs(bool avoid_using_upper_lr_lhs = false);
1709
1715 void resetSPRFlags(const bool update_outdated,
1716 const bool n_outdated);
1717
1724 template <const cmaple::StateType num_states>
1725 cmaple::RealNumType improveEntireTree(bool short_range_search);
1726
1733 template <const cmaple::StateType num_states>
1734 cmaple::PositionType optimizeBranchIter();
1735
1740 template <const cmaple::StateType num_states>
1741 cmaple::RealNumType estimateBranchLength(
1742 const std::unique_ptr<SeqRegions>& parent_regions,
1743 const std::unique_ptr<SeqRegions>& child_regions);
1744
1749 template <const cmaple::StateType num_states>
1750 cmaple::RealNumType estimateBranchLengthWithCheck(
1751 const std::unique_ptr<SeqRegions>& upper_lr_regions,
1752 const std::unique_ptr<SeqRegions>& lower_regions,
1753 const cmaple::RealNumType current_blength);
1754
1759 template <const cmaple::StateType num_states>
1760 cmaple::RealNumType calculateSamplePlacementCost(
1761 const std::unique_ptr<SeqRegions>& parent_regions,
1762 const std::unique_ptr<SeqRegions>& child_regions,
1763 const cmaple::RealNumType blength);
1764
1769 template <const cmaple::StateType num_states>
1770 cmaple::RealNumType calculateSubTreePlacementCost(
1771 const std::unique_ptr<SeqRegions>& parent_regions,
1772 const std::unique_ptr<SeqRegions>& child_regions,
1773 const cmaple::RealNumType blength);
1774
1780 template <const cmaple::StateType num_states>
1781 void updateLowerLh(cmaple::RealNumType& total_lh,
1782 std::unique_ptr<SeqRegions>& new_lower_lh,
1783 PhyloNode& node,
1784 const std::unique_ptr<SeqRegions>& lower_lh_1,
1785 const std::unique_ptr<SeqRegions>& lower_lh_2,
1786 const cmaple::Index neighbor_1_index,
1787 PhyloNode& neighbor_1,
1788 const cmaple::Index neighbor_2_index,
1789 PhyloNode& neighbor_2,
1790 const cmaple::PositionType& seq_length);
1791
1799 template <const cmaple::StateType num_states>
1800 void updateLowerLhAvoidUsingUpperLRLh(
1801 cmaple::RealNumType& total_lh,
1802 std::unique_ptr<SeqRegions>& new_lower_lh,
1803 PhyloNode& node,
1804 const std::unique_ptr<SeqRegions>& lower_lh_1,
1805 const std::unique_ptr<SeqRegions>& lower_lh_2,
1806 const cmaple::Index neighbor_1_index,
1807 PhyloNode& neighbor_1,
1808 const cmaple::Index neighbor_2_index,
1809 PhyloNode& neighbor_2,
1810 const cmaple::PositionType& seq_length);
1811
1817 template <const cmaple::StateType num_states>
1818 void computeLhContribution(cmaple::RealNumType& total_lh,
1819 std::unique_ptr<SeqRegions>& new_lower_lh,
1820 PhyloNode& node,
1821 const std::unique_ptr<SeqRegions>& lower_lh_1,
1822 const std::unique_ptr<SeqRegions>& lower_lh_2,
1823 const cmaple::Index neighbor_1_index,
1824 PhyloNode& neighbor_1,
1825 const cmaple::Index neighbor_2_index,
1826 PhyloNode& neighbor_2,
1827 const cmaple::PositionType& seq_length);
1828
1832 template <void (Tree::*task)(cmaple::RealNumType&,
1833 std::unique_ptr<SeqRegions>&,
1834 PhyloNode&,
1835 const std::unique_ptr<SeqRegions>&,
1836 const std::unique_ptr<SeqRegions>&,
1837 const cmaple::Index,
1838 PhyloNode&,
1839 const cmaple::Index,
1840 PhyloNode&,
1841 const cmaple::PositionType&)>
1842 cmaple::RealNumType performDFS();
1843
1848 template <const cmaple::StateType num_states>
1849 void updateModelParams();
1850
1854 template <
1855 void (Tree::*task)(PhyloNode&, const cmaple::Index, const cmaple::Index)>
1856 void performDFSAtLeave();
1857
1861 bool keepTraversing(const RealNumType& best_lh_diff,
1862 const RealNumType& lh_diff_at_node,
1863 const bool& strict_stop_seeking_placement_subtree,
1864 const std::unique_ptr<UpdatingNode>& updating_node,
1865 const int& failure_limit_subtree,
1866 const RealNumType& thresh_log_lh_subtree,
1867 const bool able2traverse = true);
1868
1869 // NHANLT: Debug aLRT
1870 // void log_current(std::stack<cmaple::Index>& node_stack_aLRT);
1871};
1872
1881std::ostream& operator<<(std::ostream& out_stream, cmaple::Tree& tree);
1882
1885std::istream& operator>>(std::istream& in_stream, cmaple::Tree& tree);
1886
1890template <const StateType num_states>
1891void cmaple::Tree::refreshAllLhs(bool avoid_using_upper_lr_lhs) {
1892 assert(aln);
1893 assert(model);
1894 assert(cumulative_rate);
1895
1896 // 1. update all the lower lhs along the tree
1897 if (avoid_using_upper_lr_lhs) {
1898 performDFS<&cmaple::Tree::updateLowerLhAvoidUsingUpperLRLh<num_states>>();
1899 } else {
1900 performDFS<&cmaple::Tree::updateLowerLh<num_states>>();
1901 }
1902
1903 // 2. update all the non-lower lhs along the tree
1904 refreshAllNonLowerLhs<num_states>();
1905}
1906
1907template <const StateType num_states>
1908RealNumType cmaple::Tree::improveEntireTree(bool short_range_search) {
1909 assert(aln);
1910 assert(model);
1911 assert(cumulative_rate);
1912 assert(nodes.size() > 0);
1913
1914 // start from the root
1915 std::stack<Index> node_stack;
1916 node_stack.push(Index(root_vector_index, TOP));
1917
1918 // dummy variables
1919 RealNumType total_improvement = 0;
1920 PositionType num_nodes = 0;
1921 PositionType count_node_1K = 0;
1922
1923 // traverse downward the tree
1924 while (!node_stack.empty()) {
1925 // pick the top node from the stack
1926 Index index = node_stack.top();
1927 node_stack.pop();
1928 PhyloNode& node = nodes[index.getVectorIndex()];
1929 // MiniIndex mini_index = index.getMiniIndex();
1930
1931 // add all children of the current nodes to the stack for further traversing
1932 // later
1933 /*for (Index neighbor_index:node.getNeighborIndexes(mini_index))
1934 node_stack.push(neighbor_index);*/
1935 assert(index.getMiniIndex() == TOP);
1936 if (node.isInternal()) {
1937 node_stack.push(node.getNeighborIndex(RIGHT));
1938 node_stack.push(node.getNeighborIndex(LEFT));
1939 }
1940
1941 // only process outdated node to avoid traversing the same part of the tree
1942 // multiple times
1943 if (node.isOutdated() && node.getSPRCount() <= 5) {
1944 node.setOutdated(false);
1945
1946 // if checkEachSPR:
1947 // root=node
1948 // while root.up!=None:
1949 // root=root.up
1950 // #print("Pre-SPR tree: "+createBinaryNewick(root))
1951 // oldTreeLK=calculateTreeLikelihood(root,mutMatrix,checkCorrectness=True)
1952 // #print("Pre-SPR tree likelihood: "+str(oldTreeLK))
1953 // reCalculateAllGenomeLists(root,mutMatrix, checkExistingAreCorrect=True)
1954
1955 // do SPR moves to improve the tree
1956 RealNumType improvement =
1957 improveSubTree<num_states>(index, node, short_range_search);
1958
1959 // if checkEachSPR:
1960 // #print(" apparent improvement "+str(improvement))
1961 // root=node
1962 // while root.up!=None:
1963 // root=root.up
1964 // #print("Post-SPR tree: "+createBinaryNewick(root))
1965 // newTreeLK=calculateTreeLikelihood(root,mutMatrix)
1966 // reCalculateAllGenomeLists(root,mutMatrix,
1967 // checkExistingAreCorrect=True)
1968 // #print("Post-SPR tree likelihood: "+str(newTreeLK))
1969 // if newTreeLK-oldTreeLK < improvement-1.0:
1970 // print("In startTopologyUpdates, LK score of improvement
1971 // "+str(newTreeLK)+" - "+str(oldTreeLK)+" =
1972 // "+str(newTreeLK-oldTreeLK)+" is less than //what is
1973 // supposd to be "+str(improvement))
1974 // exit()
1975 //
1976
1977 // update total_improvement
1978 total_improvement += improvement;
1979
1980 // NHANLT: LOGS FOR DEBUGGING
1981 /*if (params->debug && improvement > 0)
1982 std::cout << num_nodes << ": " << std::setprecision(20) <<
1983 total_improvement << std::endl;*/
1984
1985 // Show log every 1000 nodes
1986 ++num_nodes;
1987 if (cmaple::verbose_mode >= cmaple::VB_MED && num_nodes - count_node_1K >= 1000) {
1988 std::cout << "Processed topology for " << convertIntToString(num_nodes)
1989 << " nodes." << std::endl;
1990 count_node_1K = num_nodes;
1991 }
1992 }
1993 }
1994
1995 return total_improvement;
1996}
1997
1998template <const StateType num_states>
1999void cmaple::Tree::updateModelParams() {
2000 assert(aln);
2001 assert(model);
2002
2003 // perform a DFS -> at each leaf, update the pesudoCount of the model based on
2004 // the sequence of that leaf
2005 performDFSAtLeave<&cmaple::Tree::updatePesudoCountModel<num_states>>();
2006
2007 // update model params based on the pseudo count
2008 if (model->updateMutationMatEmpirical()) {
2009 computeCumulativeRate();
2010 }
2011}
2012
2013template <const StateType num_states>
2014void cmaple::Tree::seekSamplePlacement(
2015 const Index start_node_index,
2016 const NumSeqsType seq_name_index,
2017 const std::unique_ptr<SeqRegions>& sample_regions,
2018 Index& selected_node_index,
2019 RealNumType& best_lh_diff,
2020 bool& is_mid_branch,
2021 RealNumType& best_up_lh_diff,
2022 RealNumType& best_down_lh_diff,
2023 Index& best_child_index) {
2024 assert(sample_regions && sample_regions->size() > 0);
2025 assert(seq_name_index >= 0);
2026 assert(aln);
2027 assert(model);
2028 assert(cumulative_rate);
2029
2030 // init variables
2031 // output variables
2032 selected_node_index = start_node_index;
2033 // dummy variables
2034 RealNumType lh_diff_mid_branch = 0;
2035 RealNumType lh_diff_at_node = 0;
2036 PositionType seq_length = static_cast<PositionType>(aln->ref_seq.size());
2037 // stack of nodes to examine positions
2038 std::stack<TraversingNode> extended_node_stack;
2039 extended_node_stack.push(TraversingNode(start_node_index, 0, MIN_NEGATIVE));
2040
2041 // recursively examine positions for placing the new sample
2042 while (!extended_node_stack.empty()) {
2043 TraversingNode current_extended_node = std::move(extended_node_stack.top());
2044 extended_node_stack.pop();
2045 const NumSeqsType current_node_vec =
2046 current_extended_node.getIndex().getVectorIndex();
2047 PhyloNode& current_node = nodes[current_node_vec];
2048 const bool& is_internal = current_node.isInternal();
2049
2050 // NHANLT: debug
2051 // if (current_node->next && ((current_node->next->neighbor &&
2052 // current_node->next->neighbor->seq_name == "25")
2053 // || (current_node->next->next->neighbor &&
2054 // current_node->next->next->neighbor->seq_name ==
2055 // "25")))
2056 // cout << "fdsfsd";
2057
2058 // if the current node is a leaf AND the new sample/sequence is strictly
2059 // less informative than the current node
2060 // -> add the new sequence into the list of minor sequences of the current
2061 // node + stop seeking the placement
2062 if ((!is_internal) &&
2063 (current_node.getPartialLh(TOP)->compareWithSample(
2064 *sample_regions, seq_length, aln) == 1)) {
2065 current_node.addLessInfoSeqs(seq_name_index);
2066 selected_node_index = Index();
2067 return;
2068 }
2069
2070 const RealNumType current_node_blength = current_node.getUpperLength();
2071
2072 // 1. try first placing as a descendant of the mid-branch point of the
2073 // branch above the current node
2074 if (root_vector_index != current_node_vec && current_node_blength > 0) {
2075 examineSamplePlacementMidBranch<num_states>(
2076 selected_node_index, current_node.getMidBranchLh(), best_lh_diff,
2077 is_mid_branch, lh_diff_mid_branch, current_extended_node,
2078 sample_regions);
2079 }
2080 // otherwise, don't consider mid-branch point
2081 else {
2082 lh_diff_mid_branch = MIN_NEGATIVE;
2083 }
2084
2085 // 2. try to place as descendant of the current node (this is skipped if the
2086 // node has top branch length 0 and so is part of a polytomy).
2087 if (root_vector_index == current_node_vec || current_node_blength > 0) {
2088 examineSamplePlacementAtNode<num_states>(
2089 selected_node_index, current_node.getTotalLh(), best_lh_diff,
2090 is_mid_branch, lh_diff_at_node, lh_diff_mid_branch, best_up_lh_diff,
2091 best_down_lh_diff, best_child_index, current_extended_node,
2092 sample_regions);
2093 } else {
2094 lh_diff_at_node = current_extended_node.getLhDiff();
2095 }
2096
2097 // keep trying to place at children nodes, unless the number of attempts has
2098 // reaches the failure limit
2099 const short int failure_count = current_extended_node.getFailureCount();
2100 if ((params->strict_stop_seeking_placement_sample &&
2101 failure_count <= params->failure_limit_sample &&
2102 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)) ||
2103 (!params->strict_stop_seeking_placement_sample &&
2104 (failure_count <= params->failure_limit_sample ||
2105 lh_diff_at_node > (best_lh_diff - params->thresh_log_lh_sample)))) {
2106 /*for (Index neighbor_index:current_node.getNeighborIndexes(TOP))
2107 extended_node_stack.push(TraversingNode(neighbor_index,
2108 current_extended_node.getFailureCount(), lh_diff_at_node));*/
2109 if (is_internal) {
2110 extended_node_stack.push(
2111 TraversingNode(current_node.getNeighborIndex(RIGHT), failure_count,
2112 lh_diff_at_node));
2113 extended_node_stack.push(
2114 TraversingNode(current_node.getNeighborIndex(LEFT), failure_count,
2115 lh_diff_at_node));
2116 }
2117 }
2118 }
2119
2120 // exploration of the tree is finished, and we are left with the node found so
2121 // far with the best appending likelihood cost. Now we explore placement just
2122 // below this node for more fine-grained placement within its descendant
2123 // branches.
2124 best_down_lh_diff = MIN_NEGATIVE;
2125 best_child_index = Index();
2126
2127 // if best position so far is the descendant of a node -> explore further at
2128 // its children
2129 if (!is_mid_branch) {
2130 finetuneSamplePlacementAtNode<num_states>(
2131 nodes[selected_node_index.getVectorIndex()], best_down_lh_diff,
2132 best_child_index, sample_regions);
2133 }
2134}
2135
2136template <const StateType num_states>
2137void cmaple::Tree::placeNewSampleAtNode(const Index selected_node_index,
2138 std::unique_ptr<SeqRegions>& sample,
2139 const NumSeqsType seq_name_index,
2140 const RealNumType best_lh_diff,
2141 const RealNumType best_up_lh_diff,
2142 const RealNumType best_down_lh_diff,
2143 const Index best_child_index) {
2144 // dummy variables
2145 RealNumType best_child_lh = MIN_NEGATIVE;
2146 RealNumType best_child_blength_split = 0;
2147 RealNumType best_parent_lh;
2148 RealNumType best_parent_blength_split = 0;
2149 RealNumType best_root_blength = -1;
2150 const RealNumType threshold_prob = params->threshold_prob;
2151 std::unique_ptr<SeqRegions> best_parent_regions = nullptr;
2152 std::unique_ptr<SeqRegions> best_child_regions = nullptr;
2153
2154 assert(selected_node_index.getMiniIndex() == TOP);
2155 assert(sample && sample->size() > 0);
2156 assert(seq_name_index >= 0);
2157 assert(aln);
2158 assert(model);
2159 assert(cumulative_rate);
2160
2161 const NumSeqsType selected_node_vec_index =
2162 selected_node_index.getVectorIndex();
2163 PhyloNode& selected_node = nodes[selected_node_vec_index];
2164
2165 // place the new sample as a descendant of an existing node
2166 if (best_child_index.getMiniIndex() != UNDEFINED) {
2167 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2168 best_child_lh = best_down_lh_diff;
2169 assert(best_child_index.getMiniIndex() == TOP);
2170 best_child_blength_split =
2171 0.5 * best_child.getUpperLength(); // best_child->length;
2172 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2173 getPartialLhAtNode(best_child.getNeighborIndex(
2174 TOP)); // best_child->neighbor->getPartialLhAtNode(aln,
2175 // model, threshold_prob);
2176 const std::unique_ptr<SeqRegions>& lower_regions = best_child.getPartialLh(
2177 TOP); // ->getPartialLhAtNode(aln, model, threshold_prob);
2178 // best_child_regions = new SeqRegions(best_child->mid_branch_lh);
2179 // SeqRegions best_child_mid_clone =
2180 // SeqRegions(best_child.getMidBranchLh());
2181 best_child_regions =
2182 nullptr; // cmaple::make_unique<SeqRegions>(std::move(best_child_mid_clone));
2183
2184 // try a shorter split
2185 // tryShorterBranch<&cmaple::Tree::calculateSamplePlacementCost<num_states>>(best_child->length,
2186 // best_child_regions, sample, upper_left_right_regions, lower_regions,
2187 // best_child_lh, best_child_blength_split, default_blength, true);
2188 tryShorterBranch<num_states,
2189 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2190 best_child.getUpperLength(), best_child_regions, sample,
2191 upper_left_right_regions, lower_regions, best_child_lh,
2192 best_child_blength_split, default_blength, true);
2193
2194 // Delay cloning SeqRegions
2195 if (!best_child_regions) {
2196 best_child_regions =
2197 cmaple::make_unique<SeqRegions>(SeqRegions(best_child.getMidBranchLh()));
2198 }
2199 }
2200
2201 // if node is root, try to place as sibling of the current root.
2202 RealNumType old_root_lh = MIN_NEGATIVE;
2203 if (root_vector_index == selected_node_vec_index) {
2204 /*old_root_lh = selected_node->getPartialLhAtNode(aln, model,
2205 threshold_prob)->computeAbsoluteLhAtRoot(num_states, model); SeqRegions*
2206 lower_regions = selected_node->getPartialLhAtNode(aln, model,
2207 threshold_prob);*/
2208 const std::unique_ptr<SeqRegions>& lower_regions =
2209 selected_node.getPartialLh(TOP);
2210 old_root_lh = lower_regions->computeAbsoluteLhAtRoot<num_states>(
2211 model, cumulative_base);
2212
2213 // merge 2 lower vector into one
2214 RealNumType new_root_lh = lower_regions->mergeTwoLowers<num_states>(
2215 best_parent_regions, default_blength, *sample, default_blength, aln,
2216 model, cumulative_rate, threshold_prob, true);
2217
2218 new_root_lh += best_parent_regions->computeAbsoluteLhAtRoot<num_states>(
2219 model, cumulative_base);
2220 best_parent_lh = new_root_lh;
2221
2222 // try shorter branch lengths
2223 best_root_blength = default_blength;
2224 tryShorterBranchAtRoot<num_states>(sample, lower_regions,
2225 best_parent_regions, best_root_blength,
2226 best_parent_lh, default_blength);
2227
2228 // update best_parent_lh (taking into account old_root_lh)
2229 best_parent_lh -= old_root_lh;
2230 }
2231 // selected_node is not root
2232 else {
2233 best_parent_lh = best_up_lh_diff;
2234 best_parent_blength_split =
2235 0.5 * selected_node.getUpperLength(); // selected_node->length;
2236 /*SeqRegions* upper_left_right_regions =
2237 selected_node->neighbor->getPartialLhAtNode(aln, model, threshold_prob);
2238 SeqRegions* lower_regions = selected_node->getPartialLhAtNode(aln, model,
2239 threshold_prob); best_parent_regions = new
2240 SeqRegions(selected_node->mid_branch_lh);*/
2241 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2242 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2243 const std::unique_ptr<SeqRegions>& lower_regions =
2244 selected_node.getPartialLh(TOP);
2245 // SeqRegions seq_regions_clone =
2246 // SeqRegions(selected_node.getMidBranchLh());
2247 best_parent_regions =
2248 nullptr; // cmaple::make_unique<SeqRegions>(std::move(seq_regions_clone));
2249
2250 // try a shorter split
2251 // tryShorterBranch<&cmaple::Tree::calculateSamplePlacementCost<num_states>>(selected_node->length,
2252 // best_parent_regions, sample, upper_left_right_regions, lower_regions,
2253 // best_parent_lh, best_parent_blength_split, default_blength, false);
2254 tryShorterBranch<num_states,
2255 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2256 selected_node.getUpperLength(), best_parent_regions, sample,
2257 upper_left_right_regions, lower_regions, best_parent_lh,
2258 best_parent_blength_split, default_blength, false);
2259
2260 // Delay cloning SeqRegions
2261 if (!best_parent_regions) {
2262 best_parent_regions = cmaple::make_unique<SeqRegions>(
2263 SeqRegions(selected_node.getMidBranchLh()));
2264 }
2265 }
2266
2267 // if the best placement is below the selected_node => add an internal node
2268 // below the selected_node
2269 if (best_child_lh >= best_parent_lh && best_child_lh >= best_lh_diff) {
2270 assert(best_child_index.getMiniIndex() == TOP);
2271 PhyloNode& best_child = nodes[best_child_index.getVectorIndex()];
2272 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2273 getPartialLhAtNode(best_child.getNeighborIndex(
2274 TOP)); // best_child->neighbor->getPartialLhAtNode(aln,
2275 // model, threshold_prob);
2276
2277 // Estimate the length for the new branch
2278 RealNumType best_length = default_blength;
2279 estimateLengthNewBranch<
2280 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2281 best_child_lh, best_child_regions, sample, best_length, max_blength,
2282 min_blength, false);
2283
2284 // create new internal node and append child to it
2285 // connectNewSample2Branch(sample, seq_name_index, best_child,
2286 // best_child_blength_split, best_child->length - best_child_blength_split,
2287 // best_length, best_child_regions, upper_left_right_regions);
2288 connectNewSample2Branch<num_states>(
2289 sample, seq_name_index, best_child_index, best_child,
2290 best_child_blength_split,
2291 best_child.getUpperLength() - best_child_blength_split, best_length,
2292 best_child_regions, upper_left_right_regions);
2293 }
2294 // otherwise, add new parent to the selected_node
2295 else {
2296 // new parent is actually part of a polytomy since best placement is exactly
2297 // at the node
2298 if (best_lh_diff >= best_parent_lh) {
2299 best_root_blength = -1;
2300 best_parent_blength_split = -1;
2301 best_parent_lh = best_lh_diff;
2302 /*if (best_parent_regions) delete best_parent_regions;
2303 best_parent_regions = NULL;*/
2304 best_parent_regions = nullptr;
2305
2306 if (root_vector_index == selected_node_vec_index) {
2307 // selected_node->getPartialLhAtNode(aln, model,
2308 // threshold_prob)->mergeTwoLowers<num_states>(best_parent_regions,
2309 // -1, *sample, default_blength, aln, model, threshold_prob);
2310 selected_node.getPartialLh(TOP)->mergeTwoLowers<num_states>(
2311 best_parent_regions, -1, *sample, default_blength, aln, model,
2312 cumulative_rate, threshold_prob);
2313 } else {
2314 // best_parent_regions = new SeqRegions(selected_node->total_lh);
2315 SeqRegions seq_regions_clone = SeqRegions(selected_node.getTotalLh());
2316 best_parent_regions =
2317 cmaple::make_unique<SeqRegions>(std::move(seq_regions_clone));
2318 }
2319 }
2320
2321 // add parent to the root
2322 if (root_vector_index == selected_node_vec_index) {
2323 // now try different lengths for right branch
2324 best_parent_lh += old_root_lh;
2325 RealNumType best_length2 = default_blength;
2326 const std::unique_ptr<SeqRegions>& lower_regions =
2327 selected_node.getPartialLh(
2328 TOP); // ->getPartialLhAtNode(aln, model, threshold_prob);
2329
2330 estimateLengthNewBranchAtRoot<num_states>(
2331 sample, lower_regions, best_parent_regions, best_length2,
2332 best_parent_lh, best_root_blength, min_blength, false);
2333
2334 // update best_parent_lh (taking into account old_root_lh)
2335 best_parent_lh -= old_root_lh;
2336
2337 // add new sample to a new root
2338 connectNewSample2Root<num_states>(
2339 sample, seq_name_index, selected_node_index, selected_node,
2340 best_root_blength, best_length2, best_parent_regions);
2341 }
2342 // add parent to non-root node
2343 else {
2344 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2345 getPartialLhAtNode(selected_node.getNeighborIndex(
2346 TOP)); // selected_node->neighbor->getPartialLhAtNode(aln,
2347 // model, threshold_prob);
2348
2349 // now try different lengths for the new branch
2350 RealNumType best_length = default_blength;
2351 estimateLengthNewBranch<
2352 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2353 best_parent_lh, best_parent_regions, sample, best_length, max_blength,
2354 min_blength, false);
2355
2356 // now create new internal node and append child to it
2357 RealNumType down_distance = best_parent_blength_split;
2358 RealNumType top_distance =
2359 selected_node.getUpperLength() -
2360 down_distance; // selected_node->length - down_distance;
2361 if (best_parent_blength_split < 0) {
2362 down_distance = -1;
2363 top_distance =
2364 selected_node.getUpperLength(); // selected_node->length;
2365
2366 /*if (selected_node->total_lh) delete selected_node->total_lh;
2367 selected_node->total_lh = NULL;
2368
2369 if (selected_node->mid_branch_lh) delete selected_node->mid_branch_lh;
2370 selected_node->mid_branch_lh = NULL;*/
2371 selected_node.setTotalLh(nullptr);
2372 selected_node.setMidBranchLh(nullptr);
2373
2374 // node.furtherMidNodes=None
2375 }
2376 connectNewSample2Branch<num_states>(
2377 sample, seq_name_index, selected_node_index, selected_node,
2378 top_distance, down_distance, best_length, best_parent_regions,
2379 upper_left_right_regions);
2380 }
2381 }
2382
2383 // delete best_parent_regions and best_child_regions
2384 /*if (best_parent_regions)
2385 delete best_parent_regions;
2386 if (best_child_regions)
2387 delete best_child_regions;*/
2388}
2389
2390template <const StateType num_states>
2391void cmaple::Tree::placeNewSampleMidBranch(const Index& selected_node_index,
2392 std::unique_ptr<SeqRegions>& sample,
2393 const NumSeqsType seq_name_index,
2394 const RealNumType best_lh_diff) {
2395 // dummy variables
2396 // const RealNumType threshold_prob = params->threshold_prob;
2397 std::unique_ptr<SeqRegions> best_child_regions = nullptr;
2398
2399 // selected_node->neighbor->getPartialLhAtNode(aln, model, threshold_prob);
2400 // const MiniIndex seleted_node_mini_index =
2401 // selected_node_index.getMiniIndex();
2402 assert(selected_node_index.getMiniIndex() == TOP);
2403 assert(sample && sample->size() > 0);
2404 assert(seq_name_index >= 0);
2405 assert(aln);
2406 assert(model);
2407 assert(cumulative_rate);
2408
2409 PhyloNode& selected_node = nodes[selected_node_index.getVectorIndex()];
2410 const std::unique_ptr<SeqRegions>& upper_left_right_regions =
2411 getPartialLhAtNode(selected_node.getNeighborIndex(TOP));
2412 RealNumType best_split_lh = best_lh_diff;
2413 const RealNumType selected_node_blength = selected_node.getUpperLength();
2414 RealNumType best_branch_length_split = 0.5 * selected_node_blength;
2415 best_child_regions =
2416 nullptr; // cmaple::make_unique<SeqRegions>(SeqRegions(selected_node.getMidBranchLh()));
2417 // selected_node->getPartialLhAtNode(aln, model, threshold_prob);
2418 const std::unique_ptr<SeqRegions>& lower_regions =
2419 selected_node.getPartialLh(TOP);
2420
2421 // try different positions on the existing branch
2422 bool found_new_split =
2423 tryShorterBranch<num_states,
2424 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2425 selected_node_blength, best_child_regions, sample,
2426 upper_left_right_regions, lower_regions, best_split_lh,
2427 best_branch_length_split, default_blength, true);
2428
2429 if (!found_new_split) {
2430 // try on the second branch
2431 found_new_split = tryShorterBranch<
2432 num_states, &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2433 selected_node_blength, best_child_regions, sample,
2434 upper_left_right_regions, lower_regions, best_split_lh,
2435 best_branch_length_split, default_blength, false);
2436
2437 if (found_new_split) {
2438 best_branch_length_split =
2439 selected_node_blength - best_branch_length_split;
2440 }
2441 }
2442
2443 // Delay cloning SeqRegions
2444 if (!best_child_regions) {
2445 best_child_regions = cmaple::make_unique<SeqRegions>(
2446 SeqRegions(selected_node.getMidBranchLh()));
2447 }
2448
2449 // now try different lengths for the new branch
2450 RealNumType best_blength = default_blength;
2451 estimateLengthNewBranch<
2452 &cmaple::Tree::calculateSamplePlacementCost<num_states>>(
2453 best_split_lh, best_child_regions, sample, best_blength, max_blength,
2454 min_blength, false);
2455
2456 // create new internal node and append child to it
2457 connectNewSample2Branch<num_states>(
2458 sample, seq_name_index, selected_node_index, selected_node,
2459 best_branch_length_split,
2460 selected_node_blength - best_branch_length_split, best_blength,
2461 best_child_regions, upper_left_right_regions);
2462}
2464} // namespace cmaple
Definition alignment.h:9
Definition modelbase.h:17
Definition model.h:9
Definition tree.h:12
void doPlacement(std::ostream &out_stream=std::cout)
Do placement (using stepwise addition) to build an initial tree. Model parameters (if not fixed) will...
void load(const std::string &tree_filename, const bool fixed_blengths=false)
Load a tree from a (bifurcating or multifurcating) tree (with/without branch lengths) in NEWICK forma...
std::string exportNewick(const TreeType tree_type=BIN_TREE, const bool show_branch_supports=true)
Export the phylogenetic tree to a string in NEWICK format.
void applySPR(const TreeSearchType tree_search_type, const bool shallow_tree_search, std::ostream &out_stream=std::cout)
Apply SPR moves to optimize the tree.
TreeSearchType
Definition tree.h:17
@ EXHAUSTIVE_TREE_SEARCH
Definition tree.h:21
@ UNKNOWN_TREE_SEARCH
Definition tree.h:22
@ FAST_TREE_SEARCH
Definition tree.h:18
@ NORMAL_TREE_SEARCH
Definition tree.h:19
void computeBranchSupport(const int num_threads=1, const int num_replicates=1000, const double epsilon=0.1, const bool allow_replacing_ML_tree=true, std::ostream &out_stream=std::cout)
Compute branch supports (aLRT-SH) of the current tree, which may or may not contain all taxa in the a...
TreeType
Definition tree.h:28
@ MUL_TREE
Definition tree.h:30
@ BIN_TREE
Definition tree.h:29
@ UNKNOWN_TREE
Definition tree.h:31
RealNumType computeLh()
Compute the log likelihood of the current tree, which may or may not contain all taxa in the alignmen...
void infer(const TreeSearchType tree_search_type=NORMAL_TREE_SEARCH, const bool shallow_tree_search=false, std::ostream &out_stream=std::cout)
Infer a maximum likelihood tree by executing doPlacement(), applySPR(), and optimizeBranch()
void changeModel(Model *model)
Change the substitution model.
void changeAln(Alignment *aln)
Change the alignment.
Tree(Alignment *aln, Model *model, std::istream &tree_stream, const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &&params=nullptr)
Constructor from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths in N...
Tree(Alignment *aln, Model *model, const std::string &tree_filename="", const bool fixed_blengths=false, std::unique_ptr< cmaple::Params > &&params=nullptr)
Constructor from an optional (bifurcating or multifurcating) tree (with/without branch lengths in NEW...
void load(std::istream &tree_stream, const bool fixed_blengths=false)
Load a tree from a stream of a (bifurcating or multifurcating) tree (with/without branch lengths) in ...
void optimizeBranch(std::ostream &out_stream=std::cout)
Optimize the branch lengths of the tree.
std::istream & operator>>(std::istream &in_stream, cmaple::Tree &tree)
Customized >> operator to read a tree from a stream.
std::ostream & operator<<(std::ostream &out_stream, cmaple::Tree &tree)
Customized << operator to output the tree string in a (bifurcating) NEWICK format to a stream.