CMAPLE 1.0.0
C++ MAximum Parsimonious Likelihood Estimation
Loading...
Searching...
No Matches
modelbase.h
1
2#pragma once
3
4#include <cassert>
5#include <istream>
6#include <string>
7#include "../alignment/alignment.h"
8#include "../utils/matrix.h"
9#include "../utils/tools.h"
10
11class ModelsBlock; // do not pull in external headers!
12
13namespace cmaple {
14class SeqRegions;
15
17class ModelBase {
18 private:
22 void initEqualStateFreqs();
23
27 cmaple::SeqRegion::SeqType getSeqType();
28
34 void readStateFreq(std::istream& in);
35
41 void updateMutMatbyMutCount();
42
43 protected:
44 // NHANLT: we can change to use unique_ptr(s) instead of normal pointers
49 ModelsBlock* model_block = nullptr;
50
55 void init();
56
60 ModelsBlock* readModelsDefinition(const char* builtin_models);
61
67 bool readParametersString(std::string& model_str);
68
74 void readRates(std::istream& in, const bool is_reversible);
75
80 void normalizeQMatrix();
81
86 virtual void extractRootFreqs(const Alignment* aln);
87
91 void initPointers();
92
97 template <cmaple::StateType num_states>
98 void updateMutationMat();
99
106 template <cmaple::StateType num_states>
107 bool updateMutationMatEmpiricalTemplate();
110 public:
116 enum SubModel {
124
165
168 };
169
174 ModelBase() = delete;
175
179 ModelBase(const SubModel sub_model, const cmaple::StateType num_states);
180
184 virtual ~ModelBase();
185
189 const static std::map<std::string, SubModel> dna_models_mapping;
190
194 const static std::map<std::string, SubModel> aa_models_mapping;
195
196 // NHANLT: we can change to use unique_ptr(s) instead of normal pointers in
197 // the following
201 SubModel sub_model;
202
206 const cmaple::StateType num_states_ = 0;
207
211 cmaple::RealNumType* pseu_mutation_count = nullptr;
212
216 cmaple::RealNumType* root_freqs = nullptr;
217
221 cmaple::RealNumType* mutation_mat = nullptr;
222
226 cmaple::RealNumType* root_log_freqs = nullptr;
227
231 cmaple::RealNumType* diagonal_mut_mat = nullptr;
232
236 cmaple::RealNumType* transposed_mut_mat = nullptr;
237
241 cmaple::RealNumType* inverse_root_freqs = nullptr;
242
246 cmaple::RealNumType* freqi_freqj_qij = nullptr;
247
251 cmaple::RealNumType* freq_j_transposed_ij = nullptr;
252
256 cmaple::StateType* row_index = nullptr;
257
261 cmaple::RealNumType normalized_factor = 1.0;
262
266 bool fixed_params = false;
267
271 std::string getModelName() const;
272
276 std::string exportRootFrequenciesStr();
277
281 std::string exportQMatrixStr();
282
288 void extractRefInfo(const Alignment* aln);
289
294 virtual void initMutationMat(){}
295
303 virtual bool updateMutationMatEmpirical() {
304 return false;
305 }
306
313 virtual void updatePesudoCount(const Alignment* aln,
314 const SeqRegions& regions1,
315 const SeqRegions& regions2);
316
321 static cmaple::SeqRegion::SeqType detectSeqType(const SubModel sub_model);
322
327 static cmaple::ModelBase::SubModel parseModel(const std::string& model_name);
329};
330
332template <StateType num_states>
333void cmaple::ModelBase::updateMutationMat() {
334 // update Mutation matrix regarding the pseudo muation count
335 updateMutMatbyMutCount();
336
337 // compute the total rate regarding the root freqs
338 RealNumType total_rate = 0;
339 total_rate -= dotProduct<num_states>(root_freqs, diagonal_mut_mat);
340
341 // record sum as the normalized factor
342 normalized_factor = total_rate;
343
344 // inverse total_rate
345 total_rate = 1.0 / total_rate;
346
347 // normalize the mutation_mat
348 RealNumType* mutation_mat_row = mutation_mat;
349 RealNumType* freqi_freqj_qij_row = freqi_freqj_qij;
350 for (StateType i = 0; i < num_states_; ++i, mutation_mat_row += num_states_,
351 freqi_freqj_qij_row += num_states_) {
352 for (StateType j = 0; j < num_states_; ++j) {
353 mutation_mat_row[j] *= total_rate;
354 // mutation_mat_row[j] /= total_rate;
355
356 // update freqi_freqj_qij
357 if (i != j) {
358 freqi_freqj_qij_row[j] =
359 root_freqs[i] * inverse_root_freqs[j] * mutation_mat_row[j];
360 // root_freqs[i] / root_freqs[j] * mutation_mat_row[j];
361 } else {
362 freqi_freqj_qij_row[j] = mutation_mat_row[j];
363 }
364
365 // update the transposed mutation matrix
366 transposed_mut_mat[row_index[j] + i] = mutation_mat_row[j];
367 }
368
369 // update diagonal
370 diagonal_mut_mat[i] = mutation_mat_row[i];
371 }
372
373 // pre-compute matrix to speedup
374 RealNumType* transposed_mut_mat_row = transposed_mut_mat;
375 RealNumType* freq_j_transposed_ij_row = freq_j_transposed_ij;
376
377 for (StateType i = 0; i < num_states_; ++i,
378 transposed_mut_mat_row += num_states_,
379 freq_j_transposed_ij_row += num_states_) {
380 setVecByProduct<num_states>(freq_j_transposed_ij_row, root_freqs,
381 transposed_mut_mat_row);
382 }
383}
384
385template <StateType num_states>
386auto cmaple::ModelBase::updateMutationMatEmpiricalTemplate()
387 -> bool {
388 bool update = false;
389
390 if (!fixed_params) {
391 // clone the current mutation matrix
392 RealNumType* tmp_mut_mat = new RealNumType[row_index[num_states_]];
393 memcpy(tmp_mut_mat, mutation_mat,
394 row_index[num_states_] * sizeof(RealNumType));
395
396 // update the mutation matrix regarding the pseu_mutation_count
397 updateMutationMat<num_states>();
398
399 // set update = true if the mutation matrix changes more than a threshold
400 const RealNumType change_thresh = 1e-3;
401 RealNumType sum_change = 0;
402 RealNumType* tmp_mut_mat_ptr = tmp_mut_mat;
403 RealNumType* mutation_mat_ptr = mutation_mat;
404 for (StateType i = 0; i < num_states_;
405 ++i, tmp_mut_mat_ptr += num_states_, mutation_mat_ptr += num_states_) {
406 for (StateType j = 0; j < num_states_; ++j) {
407 sum_change += fabs(tmp_mut_mat_ptr[j] - mutation_mat_ptr[j]);
408 }
409
410 sum_change -= fabs(tmp_mut_mat_ptr[i] - mutation_mat_ptr[i]);
411 }
412
413 update = sum_change > change_thresh;
414
415 // delete tmp_diagonal_mutation_mat
416 delete[] tmp_mut_mat;
417 }
418
419 // return update
420 return update;
421}
423} // namespace cmaple
Definition alignment.h:9
Definition modelbase.h:17
SubModel
Definition modelbase.h:116
@ PMB
Definition modelbase.h:142
@ DEFAULT
Default - GTR for DNA, and LG for Protein data.
Definition modelbase.h:166
@ Q_MAMMAL
Definition modelbase.h:135
@ NQ_INSECT
Definition modelbase.h:161
@ MTINV
Definition modelbase.h:150
@ Q_PFAM
Definition modelbase.h:133
@ HIVW
Definition modelbase.h:154
@ MTVER
Definition modelbase.h:149
@ NQ_MAMMAL
Definition modelbase.h:160
@ FLAVI
Definition modelbase.h:152
@ JTT
Definition modelbase.h:132
@ CPREV
Definition modelbase.h:157
@ Q_PLANT
Definition modelbase.h:137
@ LG
Definition modelbase.h:130
@ HIVB
Definition modelbase.h:153
@ Q_YEAST
Definition modelbase.h:138
@ NQ_PLANT
Definition modelbase.h:162
@ FLU
Definition modelbase.h:155
@ VT
Definition modelbase.h:141
@ UNREST
Definition modelbase.h:122
@ MTMAM
Definition modelbase.h:151
@ Q_BIRD
Definition modelbase.h:134
@ DCMUT
Definition modelbase.h:140
@ MTMET
Definition modelbase.h:148
@ NONREV
Definition modelbase.h:129
@ MTART
Definition modelbase.h:146
@ JC
Definition modelbase.h:120
@ NQ_BIRD
Definition modelbase.h:159
@ MTREV
Definition modelbase.h:145
@ BLOSUM62
Definition modelbase.h:143
@ NQ_PFAM
Definition modelbase.h:158
@ GTR20
Definition modelbase.h:128
@ WAG
Definition modelbase.h:131
@ NQ_YEAST
Definition modelbase.h:163
@ GTR
Definition modelbase.h:121
@ Q_INSECT
Definition modelbase.h:136
@ JTTDCMUT
Definition modelbase.h:139
@ DAYHOFF
Definition modelbase.h:144
@ UNKNOWN
Unknown model.
Definition modelbase.h:167
@ MTZOA
Definition modelbase.h:147
@ RTREV
Definition modelbase.h:156
SeqType
Definition seqregion.h:25