![]() |
Version 4.0.0 |
#include <seqpp/PhasedMarkov.h>
Inheritance diagram for PhasedMarkov:
Public Member Functions | |
PhasedMarkov (const string &markov_file, bool calc_rank=false) | |
Constructor 1 : read a configuration file. | |
PhasedMarkov (const SequenceSet &seqset, short phase, short initial_phase=0, bool calc_rank=false) | |
Constructor 2 : Estimate the transition matrices on the sequences of seqset. | |
PhasedMarkov (const Sequence &seq, short phase, short initial_phase=0, bool calc_rank=false) | |
Constructor 3 : Estimate the transition matrices on the sequence seq. | |
PhasedMarkov (const PhasedMarkov &phm) | |
Constructor 4 : Copy constructor. | |
PhasedMarkov () | |
Constructor 5 : Default constructor. | |
PhasedMarkov (short size, short order, short phase) | |
Constructor 6 : Minimal Constructor. | |
PhasedMarkov (const PhasedMarkov &M1, const PhasedMarkov &M2, const float p) | |
Constructor 7 : Creation of a "mixed" Markov chain M = p*M1 + (1-p)*M2 */. | |
PhasedMarkov (const SequenceSet &seqset, const vector< int > &Indseq, short phase, short initial_phase=0, bool calc_rank=false) | |
Constructor 8 : Estimation of the transition matrix based on the sequences of seqset given in Indseq. | |
PhasedMarkov (const gsl_rng *r, short size, short order, short phase, bool calc_rank=false) | |
Constructor 9 : random markov matrices. | |
PhasedMarkov (unsigned long **count, short size, short order, short phase, short initial_phase=0, bool calc_rank=false) | |
Constructor 10 Estimate the transition matrices on a word-count. | |
virtual | ~PhasedMarkov () |
Destructor. | |
template<class TSeq> | |
void | estimate (const TSeq &tseq, short phase, short initial_phase, unsigned long beg, unsigned long end, bool calc_rank=false, bool count_again=true) |
Estimate the transition matrices on the sequence/sequenceset tseq. | |
const double ** | markov_matrices () const |
access to the markov matrix(ces) | |
const double * | markov_matrix (short numphase) const |
access to the numphase-th markov matrix | |
void | draw_markov_matrices (const gsl_rng *r) |
draw at random the markov matrices | |
void | free_markov_matrices () |
free the memory allocated for markov matrices | |
double | total_variation (const PhasedMarkov &M) |
Total variation distance between *this and M. | |
void | compute_stat_laws (bool force=false) |
Compute the stationnary laws. | |
const double * | stat_law (short numphase=0) const |
access to the stationnary distrib in phase numphase | |
void | free_stat_laws () |
free the memory allocated for stationnary laws | |
void | compute_init_law (double *MuInit, const SequenceSet &seqset) const |
Get the empirical relative frequency of the first order+1 letters on the set of sequences "seqset". | |
virtual int | compute_rank () |
Computes the rank of convergence of the Markov Chain. | |
virtual long | nb_parameters () const |
return the number of effective parameters | |
void | link_to_translator (const Translator &trans) |
link to a Translator object to use proba methods with strings | |
double | proba_c (const string &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters (!use link_to_translator before!). | |
double | proba (const string &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (const vector< short > &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters. | |
double | proba (const vector< short > &word, Coder &coder, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (long word, int lw=-1, long jump=-1, short numphase=0) const |
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters. | |
double | proba (long word, int lw=-1, long jump=-1, short numphase=0) const |
Stationnary proba of a word. | |
double | proba_c (const long *seq, long tbeg, long tend, short numphase=0) const |
Stationnary proba of the word seq[tbeg...tend](size greater than _order) conditionnaly of its first letters. | |
double | proba (const long *seq, long tbeg, long tend, short numphase=0) const |
Stationnary proba of the word seq[tbeg...tend]. | |
double | log_likelihood (const SequenceSet &seqset, short initial_phase=0, short numphase=-1) const |
loglikelihood of a set of sequence | |
double | log_ratio_likelihood (const SequenceSet &seqset, const PhasedMarkov &M, short initial_phase1=0, short initial_phase2=0) const |
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M". | |
double | log_likelihood (const Sequence &seq, short initial_phase=0, short numphase=-1) const |
loglikelihood of a sequence | |
double | log_ratio_likelihood (const Sequence &seq, const PhasedMarkov &M, short initial_phase1=0, short initial_phase2=0) const |
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M". | |
template<class TSeq> | |
double | BIC (const TSeq &tseq, short initial_phase=0) const |
BIC of sequences (BIC = -2*loglikelihood + nbparam*log(length)). | |
template<class TSeq> | |
double | AIC (const TSeq &tseq, short initial_phase=0) const |
AIC of a set of sequences (AIC = -2*loglikelihood + 2*nbparam). | |
void | print (const string &FileOut) |
Print a summary of the object. | |
void | print (ofstream &Out) const |
Print a summary of the object. | |
int | tell_size () const |
Returns the alphabet size. | |
int | tell_rank () const |
Returns the convergence rank. | |
int | tell_order () const |
Returns the order. | |
int | tell_phase () const |
Returns the phase. | |
int | nMu () const |
size of the stat law vector | |
int | nPi () const |
size of the matrix | |
double | Pi (int i, int p=0) const |
Access to Markov matrix Pi. | |
double & | operator() (int i, int p=0) |
() operator for Markov matrix Pi elements | |
double | Mu (int i, int p=0) const |
Access to stationnary vector Mu elements. | |
bool | isPis () const |
_Pis != NULL ? | |
bool | isMus () const |
_Mus != NULL ? | |
short | nextPhase (short p) const |
Give the phase following p. | |
short | prevPhase (short p) const |
Give the phase preceding p. | |
bool | Stochasticity () |
Verify stochasticity of the _Pis[] and eventually rescale it. | |
Protected Member Functions | |
bool | isNextPhase () const |
_nextPhase != NULL ? | |
bool | isPrevPhase () const |
_prevPhase != NULL ? | |
Protected Attributes | |
short | _phase |
Phase of the model. | |
double ** | _Pis |
"Matrices" (in vector format) of transition probabilities for each phase | |
double ** | _Mus |
Vector of stationnary probabilities for each phase. | |
short | _size |
Size of the alphabet. | |
short | _order |
Order of the model (the same at each phase). | |
long | _nPi |
Dim of Pi :_size^(_order+1). | |
long | _nMu |
Dim of Mu :_size^_order. | |
long | _nb_param |
number of effective parameters | |
int | _rank |
How many steps to converge to Mu ? | |
long | _jump |
jump to the codes of _order+1 letters when Sequence-like code | |
short * | _nextPhase |
(Optimization) For each phase, give the next phase | |
short * | _prevPhase |
(Optimization) For each phase, give the previous phase | |
const Translator * | _trans |
link to a translator object for the use of proba methods |
This is generalization of a Markov chain, using different matrices in function of the considered position in the sequence. The phase is variable.
For example, if we consider 3 phases, and we note respectively Pi1, Pi2 and Pi3, the three transition matrices, the Markov sequences will be generated by the indices 123123123123... In a DNA modelisation (genomic field), this is useful to take into account the fact that a coding region is read by 3 bases-blocks. The order of the Markov Model, i.e. the number of previous states necessary to determine the distribution of the current state, is variable. It is assume here that this order is the same in all the phases.
Methods are implemented for Markovian transition matrix estimation, stationary distribution calculus, word probabilities, total variation distance between two Markovian matrices, and further. The efficiency of eigenproblems computation is ensured by the use of the implicitly restarted Arnoldi algorithm.
Simulations are also possible.
|
Constructor 2 : Estimate the transition matrices on the sequences of seqset.
|
|
Constructor 3 : Estimate the transition matrices on the sequence seq.
|
|
Constructor 6 : Minimal Constructor. Initialises the constants of the model but not the matrices nor the stat laws
|
|
Constructor 7 : Creation of a "mixed" Markov chain M = p*M1 + (1-p)*M2 */.
|
|
Constructor 8 : Estimation of the transition matrix based on the sequences of seqset given in Indseq.
|
|
Constructor 9 : random markov matrices.
const gsl_rng_type * T; // Choice a default generator and seed // from environment variables gsl_rng_env_setup(); // New created instance of the generator T = gsl_rng_default; gsl_rng * r = gsl_rng_alloc (T); // Initialize/Seeds the random number generator gsl_rng_set( r, (long)(time( NULL )) ); ... ... gsl_rng_free( r ); |
|
Constructor 10 Estimate the transition matrices on a word-count.
|
|
AIC of a set of sequences (AIC = -2*loglikelihood + 2*nbparam).
|
|
BIC of sequences (BIC = -2*loglikelihood + nbparam*log(length)).
|
|
draw at random the markov matrices
const gsl_rng_type * T; // Choice a default generator and seed // from environment variables gsl_rng_env_setup(); // New created instance of the generator T = gsl_rng_default; gsl_rng * r = gsl_rng_alloc (T); // Initialize/Seeds the random number generator gsl_rng_set( r, (long)(time( NULL )) ); ... ... gsl_rng_free( r ); |
|
Estimate the transition matrices on the sequence/sequenceset tseq.
|
|
loglikelihood of a sequence
|
|
loglikelihood of a set of sequence
|
|
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M".
|
|
Calculation of the logarithm of the ratio of the probability of observing "seq" under "this" distribution and "M".
|
|
Access to stationnary vector Mu elements.
|
|
() operator for Markov matrix Pi elements
|
|
Access to Markov matrix Pi.
|
|
Print a summary of the object. The estimation results can be saved in such a representation: # 1 <- Order of the phased Markov chain # 2 <- Phase # 4 <- Alphabet size # 19 steps <- Convergence to the stationnary distribution # Phase n°0 # Transition matrix: 0.3945322543 0.1652811616 0.1535033485 0.2866832356 etc........... # Stationnary Probability: 0.3127105148 0.2114684268 0.1783495332 0.2974715251 # Phase n°1 # Transition matrix: 0.3923961961 0.163516403 0.1521005152 0.2919868858 etc................ # Stationnary Probability: 0.3135417652 0.2089660861 0.1771006767 0.300391472 |
|
Stationnary proba of the word seq[tbeg...tend].
|
|
Stationnary proba of a word.
|
|
Stationnary proba of a word.
|
|
Stationnary proba of a word.
|
|
Stationnary proba of the word seq[tbeg...tend](size greater than _order) conditionnaly of its first letters.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters.
|
|
Stationnary proba of a word(size greater than _order) conditionnaly of its first letters (!use link_to_translator before!).
|
Download seq++ 4.0.0 |
Download previous versions |
Statistique & Genome Home |
Contributors : M.Baudry, P.Y.Bourguignon, M.Hoebeke, V.Miele, P.Nicolas, G.Nuel, H.Richard, D.Robelin |