00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00028 #ifndef SEQPP_PHASEDMARKOV_H
00029 #define SEQPP_PHASEDMARKOV_H
00030
00031 using namespace std;
00032 #include <seqpp/SequenceSet.h>
00033 #include <seqpp/arnoldi.h>
00034 #include <vector>
00035 #include <string>
00036
00037
00062 class PhasedMarkov
00063 {
00064
00065 public :
00066
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084 PhasedMarkov( const string & markov_file,
00085 bool calc_rank = false );
00086
00088
00094 PhasedMarkov( const SequenceSet & seqset,
00095 short phase, short initial_phase = 0,
00096 bool calc_rank = false );
00097
00099
00105 PhasedMarkov( const Sequence & seq,
00106 short phase, short initial_phase = 0,
00107 bool calc_rank = false );
00108
00110 PhasedMarkov( const PhasedMarkov & phm );
00111
00113 PhasedMarkov();
00114
00116
00122 PhasedMarkov( short size, short order, short phase);
00123
00125
00130 PhasedMarkov(const PhasedMarkov &M1, const PhasedMarkov &M2, const float p);
00131
00133
00140 PhasedMarkov(const SequenceSet & seqset,
00141 const vector<int> &Indseq,
00142 short phase, short initial_phase = 0,
00143 bool calc_rank = false );
00144
00146
00169 PhasedMarkov( const gsl_rng * r,
00170 short size, short order,
00171 short phase,
00172 bool calc_rank = false );
00173
00175
00183 PhasedMarkov( unsigned long * * count,
00184 short size, short order,
00185 short phase, short initial_phase = 0,
00186 bool calc_rank = false );
00187
00189 virtual ~PhasedMarkov();
00190
00191
00193
00202 template <class TSeq>
00203 void estimate( const TSeq & tseq,
00204 short phase, short initial_phase,
00205 unsigned long beg, unsigned long end,
00206 bool calc_rank = false, bool count_again = true )
00207 {
00208 short p,first_phase;
00209 long i,jump;
00210
00211
00212 if (!_Pis){
00213 _Pis = new double*[_phase];
00214 for (p=0; p<_phase; p++)
00215 _Pis[p] = new double[_nPi];
00216 }
00217
00218 jump = tseq.tell_jump();
00219 if (count_again)
00220 tseq.clear_count();
00221 tseq.count_p_occurencies(_phase,initial_phase,beg,end);
00222 for (p=0; p<_phase;p++)
00223 for (i=0; i<_nPi;i++){
00224 _Pis[p][i] = tseq.tell_p_occurencies( i+jump,p );
00225
00226 }
00227
00228
00229
00230 Stochasticity();
00231
00232 if (calc_rank)
00233 this->compute_rank();
00234 }
00235
00236
00238 const double ** markov_matrices( ) const{
00239 return const_cast< const double** >(_Pis);
00240 }
00242 const double * markov_matrix( short numphase ) const{
00243 if ( numphase<_phase )
00244 return _Pis[numphase];
00245 else
00246 return NULL;
00247 }
00248
00250
00269 void draw_markov_matrices(const gsl_rng * r);
00270
00272 inline void free_markov_matrices();
00273
00275 double total_variation( const PhasedMarkov & M );
00276
00277
00278
00280
00281
00282
00283 void compute_stat_laws( bool force = false );
00285 const double * stat_law( short numphase = 0 ) const{
00286 if ( numphase<_phase )
00287 return _Mus[numphase];
00288 else
00289 return NULL;
00290 }
00292 inline void free_stat_laws();
00293
00295
00296
00297
00298
00299 void compute_init_law( double * MuInit, const SequenceSet & seqset ) const;
00300
00301
00302
00304 virtual int compute_rank();
00306 virtual long nb_parameters() const{
00307 return _nb_param;
00308 }
00309
00310
00311
00313 void link_to_translator( const Translator & trans ){
00314 _trans = &trans;
00315 }
00317
00322 double proba_c( const string & word,
00323 Coder & coder, short numphase=0 ) const;
00325
00330 double proba( const string & word,
00331 Coder & coder, short numphase=0 ) const;
00332
00333
00335
00340 double proba_c( const vector<short> & word,
00341 Coder & coder, short numphase=0 ) const;
00343
00348 double proba( const vector<short> & word,
00349 Coder & coder, short numphase=0 ) const;
00350
00351
00353
00359 double proba_c( long word, int lw=-1, long jump=-1, short numphase=0 ) const;
00361
00367 double proba( long word, int lw=-1, long jump=-1, short numphase=0 ) const;
00368
00369
00371
00377 double proba_c(const long * seq, long tbeg, long tend, short numphase =0 ) const;
00378
00380
00386 double proba(const long * seq, long tbeg, long tend, short numphase =0 ) const;
00387
00388
00390
00395 double log_likelihood( const SequenceSet & seqset,
00396 short initial_phase = 0, short numphase=-1 ) const;
00397
00399
00408 double log_ratio_likelihood( const SequenceSet & seqset,
00409 const PhasedMarkov &M,
00410 short initial_phase1=0,
00411 short initial_phase2=0 ) const ;
00412
00413
00415
00420 double log_likelihood( const Sequence & seq,
00421 short initial_phase = 0, short numphase=-1 ) const;
00422
00424
00433 double log_ratio_likelihood( const Sequence & seq,
00434 const PhasedMarkov &M,
00435 short initial_phase1=0,
00436 short initial_phase2=0 ) const ;
00437
00439
00443 template <class TSeq>
00444 double BIC( const TSeq & tseq, short initial_phase=0 ) const{
00445
00446 return( -2*log_likelihood( tseq, initial_phase ) + _nb_param*log( (double)tseq.tell_length() ) );
00447 }
00448
00449
00451
00455 template <class TSeq>
00456 double AIC( const TSeq & tseq, short initial_phase=0 ) const{
00457
00458 return( -2*log_likelihood( tseq, initial_phase ) + 2*_nb_param );
00459 }
00460
00461
00462
00464
00485 inline void print( const string &FileOut );
00486
00488 void print(ofstream &Out) const;
00489
00490
00492 int tell_size() const { return(_size);};
00494 int tell_rank() const { return(_rank);};
00496 int tell_order() const { return(_order);};
00498 int tell_phase() const { return(_phase); }
00499
00500
00502 int nMu() const { return(_nMu); }
00504 int nPi() const { return(_nPi); }
00506
00510 double Pi(int i, int p=0) const {
00511 return(_Pis[p][i]);
00512 }
00514
00518 double & operator() (int i, int p=0){
00519 return(_Pis[p][i]);
00520 }
00521
00523
00527 double Mu(int i, int p=0) const {
00528 return(_Mus[p][i]);
00529 }
00531 bool isPis() const { return(_Pis != NULL);};
00533 bool isMus() const { return(_Mus != NULL);};
00534
00535
00537 inline short nextPhase(short p) const;
00539 inline short prevPhase(short p) const;
00540
00541
00543 bool Stochasticity();
00544
00545 protected :
00546
00548 short _phase;
00549
00551 double **_Pis;
00552
00554 double **_Mus;
00555
00557 short _size;
00558
00560 short _order;
00561
00563 long _nPi;
00564
00566 long _nMu;
00567
00569 long _nb_param;
00570
00572 int _rank;
00573
00575 long _jump;
00576
00578 short *_nextPhase;
00579
00581 short *_prevPhase;
00582
00584 const Translator * _trans;
00585
00586
00587 inline void FreeNextPhase();
00588 inline void FreePrevPhase();
00589
00591 bool isNextPhase() const { return(_nextPhase != NULL);};
00593 bool isPrevPhase() const { return(_prevPhase != NULL);};
00594 };
00595
00596
00597 inline short PhasedMarkov::nextPhase(short p) const
00598 {
00599 return (_nextPhase[p]);
00600 }
00601
00602 inline short PhasedMarkov::prevPhase(short p) const
00603 {
00604 return (_prevPhase[p]);
00605 }
00606
00607
00608 inline void PhasedMarkov::free_markov_matrices()
00609 {
00610 if (_Pis)
00611 {
00612 for (int p=0;p<_phase;p++)
00613 delete []_Pis[p];
00614 delete []_Pis;
00615 }
00616 _Pis = NULL;
00617 }
00618
00619 inline void PhasedMarkov::free_stat_laws()
00620 {
00621 if (_Mus)
00622 {
00623 for (int p=0;p<_phase;p++)
00624 delete []_Mus[p];
00625 delete []_Mus;
00626 }
00627 _Mus = NULL;
00628 }
00629
00630 inline void PhasedMarkov::FreeNextPhase()
00631 {
00632 if (isNextPhase())
00633 delete [] _nextPhase;
00634 _nextPhase = NULL;
00635 }
00636
00637 inline void PhasedMarkov::FreePrevPhase()
00638 {
00639 if (isPrevPhase())
00640 delete [] _prevPhase;
00641 _prevPhase = NULL;
00642 }
00643
00644
00645 inline void PhasedMarkov::print(const string & FileOut )
00646 {
00647 ofstream Out(FileOut.c_str(), ios::out);
00648 print(Out);
00649 }
00650
00651 #endif
00652