Version 4.0.0
Main Page | Class Hierarchy | Class List | File List | Class Members | Related Pages

PhasedMarkov.h

Go to the documentation of this file.
00001 /* seqpp/PhasedMarkov.h
00002  *
00003  * Copyright (C) 2003 Laboratoire Statistique & Génome
00004  *
00005  * This program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 2 of the License, or (at
00008  * your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful, but
00011  * WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  */
00019 
00028 #ifndef SEQPP_PHASEDMARKOV_H
00029 #define SEQPP_PHASEDMARKOV_H
00030 
00031 using namespace std;
00032 #include <seqpp/SequenceSet.h>
00033 #include <seqpp/arnoldi.h>
00034 #include <vector>
00035 #include <string>
00036 
00037 
00062 class PhasedMarkov
00063 {
00064 
00065 public :
00066 
00068   /*
00069     \param markov_file description of an input model in a file (generated by print method) 
00070     \param calc_rank calculus of the convergence rank if true
00071     Exple of file:
00072     \code
00073     # 4 <- Order of the phased Markov chain<br>
00074     # 3 <- Phase<br>
00075     # 4 <- Alphabet size<br>
00076     # Phase n°: 0<br>
00077     # 10 steps <- Convergence to the stationnary distribution (!!Optionnal!!)<br>
00078     # Transition matrix:<br>
00079     ...<br>
00080     # Stationnary Probability: (!!Optionnal!!)<<br>
00081     ...
00082     \endcode
00083   */
00084   PhasedMarkov(  const string & markov_file,
00085                  bool calc_rank = false  );
00086  
00088 
00094   PhasedMarkov( const SequenceSet & seqset,
00095                 short phase, short initial_phase = 0,
00096                 bool calc_rank = false );
00097   
00099 
00105   PhasedMarkov( const Sequence & seq,
00106                 short phase, short initial_phase = 0,
00107                 bool calc_rank = false );
00108 
00110   PhasedMarkov( const PhasedMarkov & phm );
00111 
00113   PhasedMarkov();
00114 
00116 
00122   PhasedMarkov( short size, short order, short phase);
00123 
00125 
00130   PhasedMarkov(const PhasedMarkov &M1, const PhasedMarkov &M2, const float p);
00131 
00133 
00140   PhasedMarkov(const SequenceSet & seqset, 
00141                const vector<int> &Indseq, 
00142                short phase, short initial_phase = 0,
00143                bool calc_rank = false ); 
00144 
00146 
00169   PhasedMarkov( const gsl_rng * r,
00170                 short size, short order, 
00171                 short phase, 
00172                 bool calc_rank = false );
00173 
00175 
00183   PhasedMarkov( unsigned long * * count,
00184                 short size, short order,
00185                 short phase, short initial_phase = 0,
00186                 bool calc_rank = false );
00187 
00189   virtual ~PhasedMarkov();
00190   
00191   //---------------------------------------
00193 
00202   template <class TSeq>
00203   void estimate( const TSeq & tseq,
00204                  short phase, short initial_phase, 
00205                  unsigned long beg, unsigned long end,
00206                  bool calc_rank = false, bool count_again = true )
00207   {
00208     short p,first_phase;
00209     long i,jump;
00210     
00211     //Memory allocation
00212     if (!_Pis){
00213         _Pis = new double*[_phase];
00214         for (p=0; p<_phase; p++)
00215           _Pis[p] = new double[_nPi];
00216       }
00217     
00218     jump = tseq.tell_jump(); 
00219     if (count_again)
00220       tseq.clear_count();  
00221     tseq.count_p_occurencies(_phase,initial_phase,beg,end);
00222     for (p=0; p<_phase;p++)
00223       for (i=0; i<_nPi;i++){
00224         _Pis[p][i] = tseq.tell_p_occurencies( i+jump,p );
00225         //cout<<_Pis[p][i]<<" ";
00226       }
00227     //cout<<endl;
00228     
00229     //Calculate the proportion line by line in Pi
00230     Stochasticity();    
00231     //rank calculus;
00232     if (calc_rank)
00233       this->compute_rank();
00234   }
00235 
00236   //---------------------------------------
00238   const double ** markov_matrices( ) const{
00239     return const_cast< const double** >(_Pis);
00240   }
00242   const double * markov_matrix( short numphase ) const{
00243     if ( numphase<_phase )
00244       return _Pis[numphase];
00245     else 
00246       return NULL;
00247   }
00248   
00250 
00269   void draw_markov_matrices(const  gsl_rng * r);
00270 
00272   inline void free_markov_matrices();
00273 
00275   double total_variation( const PhasedMarkov & M );
00276 
00277 
00278   //---------------------------------------
00280   /*
00281     \param force "true" to force the calculation, even if the laws already exist. Default => "false". 
00282   */ 
00283   void compute_stat_laws( bool force = false ); 
00285   const double * stat_law( short numphase = 0 ) const{
00286     if ( numphase<_phase )
00287       return _Mus[numphase];
00288     else 
00289       return NULL;
00290   }
00292   inline void free_stat_laws();
00293   
00295   /*
00296     \param MuInit initial law, must have allocated
00297     \seqset set of sequences
00298   */
00299   void compute_init_law( double * MuInit, const SequenceSet & seqset ) const;
00300 
00301 
00302   //---------------------------------------
00304   virtual int compute_rank();   
00306   virtual long nb_parameters() const{
00307     return  _nb_param;
00308   }
00309 
00310 
00311   //---------------------------------------
00313   void link_to_translator( const Translator & trans ){
00314     _trans = &trans;
00315   }
00317 
00322   double proba_c( const string & word, 
00323                   Coder & coder, short numphase=0 ) const;
00325 
00330   double proba( const string & word, 
00331                 Coder & coder, short numphase=0 ) const;
00332 
00333 
00335 
00340   double proba_c( const vector<short> & word, 
00341                   Coder & coder, short numphase=0 ) const;
00343 
00348   double proba( const vector<short> & word, 
00349                 Coder & coder, short numphase=0 ) const;
00350 
00351 
00353 
00359   double proba_c( long word, int lw=-1, long jump=-1, short numphase=0 ) const;
00361 
00367   double proba( long word, int lw=-1, long jump=-1, short numphase=0 ) const;
00368 
00369   
00371 
00377   double proba_c(const long * seq, long tbeg, long tend, short numphase =0 ) const;
00378 
00380 
00386   double proba(const long * seq, long tbeg, long tend, short numphase =0 ) const;
00387 
00388   //---------------------------------------
00390 
00395   double log_likelihood( const SequenceSet & seqset,
00396                          short initial_phase = 0, short numphase=-1 ) const; 
00397 
00399 
00408   double log_ratio_likelihood( const SequenceSet & seqset,
00409                                const PhasedMarkov &M,
00410                                short initial_phase1=0,                               
00411                                short initial_phase2=0 ) const ; 
00412 
00413 
00415 
00420   double log_likelihood( const Sequence & seq,
00421                          short initial_phase = 0, short numphase=-1 ) const; 
00422 
00424 
00433   double log_ratio_likelihood( const Sequence & seq,
00434                                const PhasedMarkov &M,
00435                                short initial_phase1=0,                               
00436                                short initial_phase2=0 ) const ;
00437   
00439 
00443   template <class TSeq>
00444     double BIC( const TSeq & tseq, short initial_phase=0 ) const{   
00445     //BIC = -2*loglikelihood + nbparam*log(length)
00446     return( -2*log_likelihood( tseq, initial_phase ) + _nb_param*log( (double)tseq.tell_length() ) );
00447   }
00448 
00449  
00451 
00455   template <class TSeq>
00456     double AIC( const TSeq & tseq, short initial_phase=0 ) const{
00457     //AIC = -2*loglikelihood + 2*nbparam
00458     return( -2*log_likelihood( tseq, initial_phase )  + 2*_nb_param );
00459   }
00460 
00461 
00462   //---------------------------------------
00464 
00485   inline void print( const string &FileOut );
00486   
00488   void print(ofstream &Out) const;
00489 
00490   //---------------------------------------
00492   int tell_size() const { return(_size);};
00494   int tell_rank() const { return(_rank);};
00496   int tell_order() const { return(_order);};
00498   int tell_phase() const { return(_phase); }
00499 
00500   //---------------------------------------
00502   int nMu() const { return(_nMu); }
00504   int nPi() const { return(_nPi); }
00506 
00510   double Pi(int i, int p=0) const { 
00511     return(_Pis[p][i]); 
00512   }
00514 
00518   double & operator() (int i, int p=0){ 
00519     return(_Pis[p][i]); 
00520   }
00521 
00523 
00527   double Mu(int i, int p=0) const { 
00528     return(_Mus[p][i]); 
00529   }  
00531   bool isPis() const { return(_Pis != NULL);};
00533   bool isMus()  const { return(_Mus != NULL);};
00534 
00535 
00537   inline short nextPhase(short p) const;
00539   inline short prevPhase(short p) const;
00540   //--------------------------------------- 
00541 
00543   bool Stochasticity();
00544    
00545  protected :
00546   
00548   short _phase;
00549 
00551   double **_Pis; 
00552   
00554   double **_Mus;   
00555   
00557   short _size; 
00558   
00560   short _order;
00561   
00563   long _nPi; 
00564 
00566   long _nMu; 
00567  
00569   long _nb_param;
00570 
00572   int _rank;
00573 
00575   long _jump;
00576 
00578   short *_nextPhase;
00579 
00581   short *_prevPhase;
00582 
00584   const Translator * _trans;
00585 
00586   
00587   inline void FreeNextPhase();
00588   inline void FreePrevPhase();
00589 
00591   bool isNextPhase()  const { return(_nextPhase != NULL);};
00593   bool isPrevPhase()  const { return(_prevPhase != NULL);}; 
00594 };
00595 
00596 
00597 inline short PhasedMarkov::nextPhase(short p) const
00598 {
00599   return (_nextPhase[p]);
00600 }
00601 
00602 inline short PhasedMarkov::prevPhase(short p) const
00603 {
00604   return (_prevPhase[p]);
00605 }
00606 
00607 
00608 inline void PhasedMarkov::free_markov_matrices()
00609 {
00610    if (_Pis)
00611      {
00612        for (int p=0;p<_phase;p++)
00613          delete []_Pis[p];
00614        delete []_Pis;
00615      }
00616    _Pis = NULL;
00617 }
00618 
00619 inline void PhasedMarkov::free_stat_laws()
00620 {
00621   if (_Mus)
00622     {
00623       for (int p=0;p<_phase;p++)
00624         delete []_Mus[p];
00625       delete []_Mus;
00626     }
00627   _Mus = NULL;
00628 }
00629 
00630 inline void PhasedMarkov::FreeNextPhase()
00631 {
00632   if (isNextPhase())
00633     delete [] _nextPhase;
00634   _nextPhase = NULL;
00635 }
00636 
00637 inline void PhasedMarkov::FreePrevPhase()
00638 {
00639   if (isPrevPhase())
00640     delete [] _prevPhase;
00641   _prevPhase = NULL;
00642 }
00643 
00644 
00645 inline void PhasedMarkov::print(const string & FileOut )
00646 {
00647   ofstream Out(FileOut.c_str(), ios::out);
00648   print(Out);
00649 }
00650 
00651 #endif/*SEQPP_PHASEDMARKOV_H*/
00652 



Download seq++ 4.0.0
Download previous versions
Statistique & Genome Home


Generated on Wed Mar 23 09:25:57 2005 for seqpp by doxygen 1.3.9.1