1#include "../utils/timeutil.h"
4#ifndef CMAPLE_ALIGNMENT_H
5#define CMAPLE_ALIGNMENT_H
45 std::istream& aln_stream,
46 const std::string& ref_seq =
"",
68 const std::string& aln_filename,
69 const std::string& ref_seq =
"",
92 std::istream& aln_stream,
93 const std::string& ref_seq =
"",
114 const std::string& aln_filename,
115 const std::string& ref_seq =
"",
143 void write(
const std::string& aln_filename,
145 const bool overwrite =
false);
162 seq_type_ = seq_type;
175 auto readRefSeq(
const std::string& ref_filename,
176 const std::string& ref_name) -> std::string;
184 static char convertState2Char(
const cmaple::StateType& state,
192 static InputType parseAlnFormat(
const std::string& n_format);
197 std::vector<Sequence>
203 std::vector<cmaple::StateType> ref_seq;
208 cmaple::StateType num_states;
218 std::unordered_set<void*> attached_trees;
234 void updateNumStates();
250 cmaple::PositionType computeSeqDistance(Sequence& sequence,
251 cmaple::RealNumType hamming_weight);
259 void sortSeqsByDistances();
267 cmaple::StateType convertChar2State(
char state);
278 void extractMutations(
const cmaple::StrVector& sequences,
279 const cmaple::StrVector& seq_names,
280 const std::string& ref_sequence);
290 void readMaple(std::istream& aln_stream);
300 void readFastaOrPhylip(std::istream& aln_stream,
301 const std::string& ref_seq =
"");
309 void parseRefSeq(std::string& ref_sequence,
bool throw_error);
319 void readFasta(std::istream& aln_stream,
320 cmaple::StrVector& sequences,
321 cmaple::StrVector& seq_names,
322 bool check_min_seqs =
true);
332 void readPhylip(std::istream& aln_stream,
333 cmaple::StrVector& sequences,
334 cmaple::StrVector& seq_names,
335 bool check_min_seqs =
true);
346 void readSequences(std::istream& aln_stream,
347 cmaple::StrVector& sequences,
348 cmaple::StrVector& seq_names,
350 bool check_min_seqs =
true);
360 std::string generateRef(cmaple::StrVector& sequences);
366 void processSeq(std::string& sequence,
368 cmaple::PositionType line_num);
374 void addMutation(Sequence* sequence,
376 cmaple::PositionType pos,
377 cmaple::PositionType length = -1);
383 void writeMAPLE(std::ostream& aln_stream);
389 void writeFASTA(std::ostream& aln_stream);
395 void writePHYLIP(std::ostream& aln_stream);
400 auto getRefSeqStr() -> std::string;
405 auto getSeqString(
const std::string& ref_seq_str, Sequence* sequence) -> std::string;
414 InputType detectMAPLEorFASTA(std::istream& aln_stream);
425 InputType detectInputFile(std::istream& aln_stream);
448extern char symbols_protein[];
449extern char symbols_dna[];
450extern char symbols_rna[];
451extern char symbols_morph[];
Alignment(const std::string &aln_filename, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Constructor from an alignment file in FASTA, PHYLIP, or MAPLE format.
void read(std::istream &aln_stream, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Read an alignment from a stream in FASTA, PHYLIP, or MAPLE format.
Alignment()
Default constructor.
void read(const std::string &aln_filename, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Read an alignment from a file in FASTA, PHYLIP, or MAPLE format.
void write(const std::string &aln_filename, const InputType &format=IN_MAPLE, const bool overwrite=false)
Write the alignment to a file in FASTA, PHYLIP, or MAPLE format.
void write(std::ostream &aln_stream, const InputType &format=IN_MAPLE)
Write the alignment to a stream in FASTA, PHYLIP, or MAPLE format.
Alignment(std::istream &aln_stream, const std::string &ref_seq="", const InputType format=IN_AUTO, const cmaple::SeqRegion::SeqType seqtype=cmaple::SeqRegion::SEQ_AUTO)
Constructor from a stream of an alignment in FASTA, PHYLIP, or MAPLE format.
InputType
Definition alignment.h:14
@ IN_UNKNOWN
Definition alignment.h:20
@ IN_FASTA
Definition alignment.h:15
@ IN_AUTO
Definition alignment.h:19
@ IN_PHYLIP
Definition alignment.h:16
@ IN_MAPLE
Definition alignment.h:17
SeqType
Definition seqregion.h:25
@ SEQ_AUTO
Definition seqregion.h:28
std::istream & operator>>(std::istream &in_stream, cmaple::Tree &tree)
Customized >> operator to read a tree from a stream.
std::ostream & operator<<(std::ostream &out_stream, cmaple::Tree &tree)
Customized << operator to output the tree string in a (bifurcating) NEWICK format to a stream.