SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2007 Soeren Sonnenburg 00008 * Written (W) 1999-2007 Gunnar Raetsch 00009 * Written (W) 2008-2009 Jonas Behr 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/structure/DynProg.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/lib/config.h> 00017 #include <shogun/features/StringFeatures.h> 00018 #include <shogun/features/Alphabet.h> 00019 #include <shogun/structure/Plif.h> 00020 #include <shogun/structure/IntronList.h> 00021 00022 #include <stdlib.h> 00023 #include <stdio.h> 00024 #include <time.h> 00025 #include <ctype.h> 00026 00027 using namespace shogun; 00028 00029 //#define USE_TMP_ARRAYCLASS 00030 //#define DYNPROG_DEBUG 00031 00032 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ; 00033 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ; 00034 int32_t CDynProg::frame_plifs[3]={4,5,6}; 00035 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ; 00036 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1, 00037 1,1,1,1,1,1,1,1, 00038 0,0,0,0,0,0,0,0, 00039 0,0,0,0,0,0,0,0} ; 00040 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true, 00041 false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts 00042 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0, 00043 1,1,1,1,1,1,1,1} ; // which string should be used 00044 00045 CDynProg::CDynProg(int32_t num_svms /*= 8 */) 00046 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1), 00047 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1), 00048 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1), 00049 m_end_state_distribution_q_deriv(1), 00050 00051 // multi svm 00052 m_num_degrees(4), 00053 m_num_svms(num_svms), 00054 m_word_degree(word_degree_default, m_num_degrees, true, true), 00055 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true), 00056 m_cum_num_words_array(m_cum_num_words.get_array()), 00057 m_num_words(num_words_default, m_num_degrees, true, true), 00058 m_num_words_array(m_num_words.get_array()), 00059 m_mod_words(mod_words_default, m_num_svms, 2, true, true), 00060 m_mod_words_array(m_mod_words.get_array()), 00061 m_sign_words(sign_words_default, m_num_svms, true, true), 00062 m_sign_words_array(m_sign_words.get_array()), 00063 m_string_words(string_words_default, m_num_svms, true, true), 00064 m_string_words_array(m_string_words.get_array()), 00065 //m_svm_pos_start(m_num_degrees), 00066 m_num_unique_words(m_num_degrees), 00067 m_svm_arrays_clean(true), 00068 00069 m_max_a_id(0), m_observation_matrix(1,1,1), 00070 m_pos(1), 00071 m_seq_len(0), 00072 m_orf_info(1,2), 00073 m_plif_list(1), 00074 m_PEN(1,1), 00075 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 00076 m_segment_ids(1), 00077 m_segment_mask(1), 00078 m_my_state_seq(1), 00079 m_my_pos_seq(1), 00080 m_my_scores(1), 00081 m_my_losses(1), 00082 m_scores(1), 00083 m_states(1,1), 00084 m_positions(1,1), 00085 00086 m_seq_sparse1(NULL), 00087 m_seq_sparse2(NULL), 00088 m_plif_matrices(NULL), 00089 00090 m_genestr_stop(1), 00091 m_intron_list(NULL), 00092 m_num_intron_plifs(0), 00093 m_lin_feat(1,1), //by Jonas 00094 m_raw_intensities(NULL), 00095 m_probe_pos(NULL), 00096 m_num_probes_cum(NULL), 00097 m_num_lin_feat_plifs_cum(NULL), 00098 m_num_raw_data(0), 00099 00100 m_long_transitions(true), 00101 m_long_transition_threshold(1000) 00102 { 00103 trans_list_forward = NULL ; 00104 trans_list_forward_cnt = NULL ; 00105 trans_list_forward_val = NULL ; 00106 trans_list_forward_id = NULL ; 00107 trans_list_len = 0 ; 00108 00109 mem_initialized = true ; 00110 00111 m_N=1; 00112 00113 m_raw_intensities = NULL; 00114 m_probe_pos = NULL; 00115 m_num_probes_cum = SG_MALLOC(int32_t, 100); 00116 m_num_probes_cum[0] = 0; 00117 //m_use_tiling=false; 00118 m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100); 00119 m_num_lin_feat_plifs_cum[0] = m_num_svms; 00120 m_num_raw_data = 0; 00121 #ifdef ARRAY_STATISTICS 00122 m_word_degree.set_array_name("word_degree"); 00123 #endif 00124 00125 m_transition_matrix_a_id.set_array_name("transition_matrix_a_id"); 00126 m_transition_matrix_a.set_array_name("transition_matrix_a"); 00127 m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv"); 00128 m_mod_words.set_array_name("mod_words"); 00129 m_orf_info.set_array_name("orf_info"); 00130 m_segment_sum_weights.set_array_name("segment_sum_weights"); 00131 m_PEN.set_array_name("PEN"); 00132 m_PEN_state_signals.set_array_name("PEN_state_signals"); 00133 m_dict_weights.set_array_name("dict_weights"); 00134 m_states.set_array_name("states"); 00135 m_positions.set_array_name("positions"); 00136 m_lin_feat.set_array_name("lin_feat"); 00137 00138 00139 m_observation_matrix.set_array_name("m_observation_matrix"); 00140 m_segment_loss.set_array_name("m_segment_loss"); 00141 m_seg_loss_obj = new CSegmentLoss(); 00142 } 00143 00144 CDynProg::~CDynProg() 00145 { 00146 if (trans_list_forward_cnt) 00147 SG_FREE(trans_list_forward_cnt); 00148 if (trans_list_forward) 00149 { 00150 for (int32_t i=0; i<trans_list_len; i++) 00151 { 00152 if (trans_list_forward[i]) 00153 SG_FREE(trans_list_forward[i]); 00154 } 00155 SG_FREE(trans_list_forward); 00156 } 00157 if (trans_list_forward_val) 00158 { 00159 for (int32_t i=0; i<trans_list_len; i++) 00160 { 00161 if (trans_list_forward_val[i]) 00162 SG_FREE(trans_list_forward_val[i]); 00163 } 00164 SG_FREE(trans_list_forward_val); 00165 } 00166 if (trans_list_forward_id) 00167 { 00168 for (int32_t i=0; i<trans_list_len; i++) 00169 { 00170 if (trans_list_forward_id[i]) 00171 SG_FREE(trans_list_forward_id[i]); 00172 } 00173 SG_FREE(trans_list_forward_id); 00174 } 00175 if (m_raw_intensities) 00176 SG_FREE(m_raw_intensities); 00177 if (m_probe_pos) 00178 SG_FREE(m_probe_pos); 00179 if (m_num_probes_cum) 00180 SG_FREE(m_num_probes_cum); 00181 if (m_num_lin_feat_plifs_cum) 00182 SG_FREE(m_num_lin_feat_plifs_cum); 00183 00184 delete m_intron_list; 00185 00186 SG_UNREF(m_seq_sparse1); 00187 SG_UNREF(m_seq_sparse2); 00188 SG_UNREF(m_plif_matrices); 00189 } 00190 00192 int32_t CDynProg::get_num_svms() 00193 { 00194 return m_num_svms; 00195 } 00196 00197 void CDynProg::precompute_stop_codons() 00198 { 00199 int32_t length=m_genestr.get_dim1(); 00200 00201 m_genestr_stop.resize_array(length) ; 00202 m_genestr_stop.set_const(0) ; 00203 m_genestr_stop.set_array_name("genestr_stop") ; 00204 { 00205 for (int32_t i=0; i<length-2; i++) 00206 if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 00207 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 00208 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) || 00209 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') ))) 00210 { 00211 m_genestr_stop.element(i)=true ; 00212 } 00213 else 00214 m_genestr_stop.element(i)=false ; 00215 m_genestr_stop.element(length-2)=false ; 00216 m_genestr_stop.element(length-1)=false ; 00217 } 00218 } 00219 00220 void CDynProg::set_num_states(int32_t p_N) 00221 { 00222 m_N=p_N ; 00223 00224 m_transition_matrix_a_id.resize_array(m_N,m_N) ; 00225 m_transition_matrix_a.resize_array(m_N,m_N) ; 00226 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 00227 m_initial_state_distribution_p.resize_array(m_N) ; 00228 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 00229 m_end_state_distribution_q.resize_array(m_N); 00230 m_end_state_distribution_q_deriv.resize_array(m_N) ; 00231 00232 m_orf_info.resize_array(m_N,2) ; 00233 m_PEN.resize_array(m_N,m_N) ; 00234 } 00235 00236 int32_t CDynProg::get_num_states() 00237 { 00238 return m_N; 00239 } 00240 00241 void CDynProg::init_tiling_data( 00242 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes) 00243 { 00244 m_num_raw_data++; 00245 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes; 00246 00247 int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]); 00248 float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]); 00249 00250 00251 if (m_num_raw_data==1){ 00252 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00253 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00254 //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2); 00255 }else{ 00256 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 00257 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t)); 00258 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t)); 00259 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t)); 00260 } 00261 SG_FREE(m_probe_pos); 00262 SG_FREE(m_raw_intensities); 00263 m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes); 00264 m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes); 00265 00266 //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t)); 00267 //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t)); 00268 00269 } 00270 00271 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms) 00272 { 00273 m_lin_feat.resize_array(p_num_svms, m_seq_len); 00274 00275 // initialize array 00276 for (int s=0; s<p_num_svms; s++) 00277 for (int p=0; p<m_seq_len; p++) 00278 m_lin_feat.set_element(0.0, s, p) ; 00279 } 00280 00281 void CDynProg::resize_lin_feat(const int32_t num_new_feat) 00282 { 00283 int32_t dim1, dim2; 00284 m_lin_feat.get_array_size(dim1, dim2); 00285 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]); 00286 ASSERT(dim2==m_seq_len); // == number of candidate positions 00287 00288 00289 00290 float64_t* arr = m_lin_feat.get_array(); 00291 float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2); 00292 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ; 00293 for(int32_t j=0;j<m_seq_len;j++) 00294 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++) 00295 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k]; 00296 00297 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 00298 SG_FREE(tmp); 00299 00300 /*for(int32_t j=0;j<5;j++) 00301 { 00302 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++) 00303 { 00304 SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j)); 00305 } 00306 SG_PRINT("\n"); 00307 } 00308 m_lin_feat.get_array_size(dim1,dim2); 00309 SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/ 00310 00311 //SG_PRINT("resize_lin_feat: done\n"); 00312 } 00313 00314 void CDynProg::precompute_tiling_plifs( 00315 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs) 00316 { 00317 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs; 00318 float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs); 00319 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 00320 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++) 00321 svm_value[i]=0.0; 00322 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs); 00323 for (int32_t i=0; i<num_tiling_plifs; i++) 00324 { 00325 tiling_plif[i]=0.0; 00326 CPlif * plif = PEN[tiling_plif_ids[i]]; 00327 tiling_rows[i] = plif->get_use_svm(); 00328 00329 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i) 00330 } 00331 resize_lin_feat(num_tiling_plifs); 00332 00333 00334 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]]; 00335 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]]; 00336 int32_t num=m_num_probes_cum[m_num_raw_data-1]; 00337 00338 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++) 00339 { 00340 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx]) 00341 { 00342 for (int32_t i=0; i<num_tiling_plifs; i++) 00343 { 00344 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data; 00345 CPlif * plif = PEN[tiling_plif_ids[i]]; 00346 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1); 00347 plif->set_do_calc(true); 00348 tiling_plif[i]+=plif->lookup_penalty(0,svm_value); 00349 plif->set_do_calc(false); 00350 } 00351 p_tiling_data++; 00352 p_tiling_pos++; 00353 num++; 00354 } 00355 for (int32_t i=0; i<num_tiling_plifs; i++) 00356 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx); 00357 } 00358 SG_FREE(svm_value); 00359 SG_FREE(tiling_plif); 00360 SG_FREE(tiling_rows); 00361 } 00362 00363 void CDynProg::create_word_string() 00364 { 00365 SG_FREE(m_wordstr); 00366 m_wordstr=SG_MALLOC(uint16_t**, 5440); 00367 int32_t k=0; 00368 int32_t genestr_len=m_genestr.get_dim1(); 00369 00370 m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees); 00371 for (int32_t j=0; j<m_num_degrees; j++) 00372 { 00373 m_wordstr[k][j]=NULL ; 00374 { 00375 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len); 00376 for (int32_t i=0; i<genestr_len; i++) 00377 switch (m_genestr[i]) 00378 { 00379 case 'A': 00380 case 'a': m_wordstr[k][j][i]=0 ; break ; 00381 case 'C': 00382 case 'c': m_wordstr[k][j][i]=1 ; break ; 00383 case 'G': 00384 case 'g': m_wordstr[k][j][i]=2 ; break ; 00385 case 'T': 00386 case 't': m_wordstr[k][j][i]=3 ; break ; 00387 default: ASSERT(0) ; 00388 } 00389 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ; 00390 } 00391 } 00392 } 00393 00394 void CDynProg::precompute_content_values() 00395 { 00396 for (int32_t s=0; s<m_num_svms; s++) 00397 m_lin_feat.set_element(0.0, s, 0); 00398 00399 for (int32_t p=0 ; p<m_seq_len-1 ; p++) 00400 { 00401 int32_t from_pos = m_pos[p]; 00402 int32_t to_pos = m_pos[p+1]; 00403 float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms); 00404 //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos); 00405 00406 ASSERT(from_pos<=m_genestr.get_dim1()); 00407 ASSERT(to_pos<=m_genestr.get_dim1()); 00408 00409 for (int32_t s=0; s<m_num_svms; s++) 00410 my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p); 00411 00412 for (int32_t i=from_pos; i<to_pos; i++) 00413 { 00414 for (int32_t j=0; j<m_num_degrees; j++) 00415 { 00416 uint16_t word = m_wordstr[0][j][i] ; 00417 for (int32_t s=0; s<m_num_svms; s++) 00418 { 00419 // check if this k-mer should be considered for this SVM 00420 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1)) 00421 continue; 00422 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ; 00423 } 00424 } 00425 } 00426 for (int32_t s=0; s<m_num_svms; s++) 00427 { 00428 float64_t prev = m_lin_feat.get_element(s, p); 00429 //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ; 00430 if (prev<-1e20 || prev>1e20) 00431 { 00432 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ; 00433 prev=0 ; 00434 } 00435 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1); 00436 } 00437 SG_FREE(my_svm_values_unnormalized); 00438 } 00439 //for (int32_t j=0; j<m_num_degrees; j++) 00440 // SG_FREE(m_wordstr[0][j]); 00441 //SG_FREE(m_wordstr[0]); 00442 } 00443 00444 void CDynProg::set_p_vector(SGVector<float64_t> p) 00445 { 00446 if (!(p.vlen==m_N)) 00447 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N); 00448 00449 m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true); 00450 } 00451 00452 void CDynProg::set_q_vector(SGVector<float64_t> q) 00453 { 00454 if (!(q.vlen==m_N)) 00455 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N); 00456 m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true); 00457 } 00458 00459 void CDynProg::set_a(SGMatrix<float64_t> a) 00460 { 00461 ASSERT(a.num_cols==m_N); 00462 ASSERT(a.num_rows==m_N); 00463 m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true); 00464 m_transition_matrix_a_deriv.resize_array(m_N, m_N); 00465 } 00466 00467 void CDynProg::set_a_id(SGMatrix<int32_t> a) 00468 { 00469 ASSERT(a.num_cols==m_N); 00470 ASSERT(a.num_rows==m_N); 00471 m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true); 00472 m_max_a_id = 0; 00473 for (int32_t i=0; i<m_N; i++) 00474 { 00475 for (int32_t j=0; j<m_N; j++) 00476 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)); 00477 } 00478 } 00479 00480 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans) 00481 { 00482 int32_t num_trans=a_trans.num_rows; 00483 int32_t num_cols=a_trans.num_cols; 00484 00485 //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans"); 00486 00487 if (!((num_cols==3) || (num_cols==4))) 00488 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols); 00489 00490 SG_FREE(trans_list_forward); 00491 SG_FREE(trans_list_forward_cnt); 00492 SG_FREE(trans_list_forward_val); 00493 SG_FREE(trans_list_forward_id); 00494 00495 trans_list_forward = NULL ; 00496 trans_list_forward_cnt = NULL ; 00497 trans_list_forward_val = NULL ; 00498 trans_list_len = 0 ; 00499 00500 m_transition_matrix_a.set_const(0) ; 00501 m_transition_matrix_a_id.set_const(0) ; 00502 00503 mem_initialized = true ; 00504 00505 trans_list_forward_cnt=NULL ; 00506 trans_list_len = m_N ; 00507 trans_list_forward = SG_MALLOC(T_STATES*, m_N); 00508 trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N); 00509 trans_list_forward_val = SG_MALLOC(float64_t*, m_N); 00510 trans_list_forward_id = SG_MALLOC(int32_t*, m_N); 00511 00512 int32_t start_idx=0; 00513 for (int32_t j=0; j<m_N; j++) 00514 { 00515 int32_t old_start_idx=start_idx; 00516 00517 while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j) 00518 { 00519 start_idx++; 00520 00521 if (start_idx>1 && start_idx<num_trans) 00522 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]); 00523 } 00524 00525 if (start_idx>1 && start_idx<num_trans) 00526 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]); 00527 00528 int32_t len=start_idx-old_start_idx; 00529 ASSERT(len>=0); 00530 00531 trans_list_forward_cnt[j] = 0 ; 00532 00533 if (len>0) 00534 { 00535 trans_list_forward[j] = SG_MALLOC(T_STATES, len); 00536 trans_list_forward_val[j] = SG_MALLOC(float64_t, len); 00537 trans_list_forward_id[j] = SG_MALLOC(int32_t, len); 00538 } 00539 else 00540 { 00541 trans_list_forward[j] = NULL; 00542 trans_list_forward_val[j] = NULL; 00543 trans_list_forward_id[j] = NULL; 00544 } 00545 } 00546 00547 for (int32_t i=0; i<num_trans; i++) 00548 { 00549 int32_t from_state = (int32_t)a_trans.matrix[i] ; 00550 int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ; 00551 float64_t val = a_trans.matrix[i+num_trans*2] ; 00552 int32_t id = 0 ; 00553 if (num_cols==4) 00554 id = (int32_t)a_trans.matrix[i+num_trans*3] ; 00555 //SG_DEBUG( "id=%i\n", id) ; 00556 00557 ASSERT(to_state>=0 && to_state<m_N); 00558 ASSERT(from_state>=0 && from_state<m_N); 00559 00560 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ; 00561 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ; 00562 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ; 00563 trans_list_forward_cnt[to_state]++ ; 00564 m_transition_matrix_a.element(from_state, to_state) = val ; 00565 m_transition_matrix_a_id.element(from_state, to_state) = id ; 00566 //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state)); 00567 } ; 00568 00569 m_max_a_id = 0 ; 00570 for (int32_t i=0; i<m_N; i++) 00571 for (int32_t j=0; j<m_N; j++) 00572 { 00573 //if (m_transition_matrix_a_id.element(i,j)) 00574 //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ; 00575 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ; 00576 } 00577 //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ; 00578 } 00579 00580 00581 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input) 00582 { 00583 //for (int32_t i=0; i<mod_words_input.num_cols; i++) 00584 //{ 00585 // for (int32_t j=0; j<mod_words_input.num_rows; j++) 00586 // SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j]); 00587 // SG_PRINT("\n"); 00588 //} 00589 m_svm_arrays_clean=false ; 00590 00591 ASSERT(m_num_svms==mod_words_input.num_rows); 00592 ASSERT(mod_words_input.num_cols==2); 00593 00594 m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ; 00595 m_mod_words_array = m_mod_words.get_array() ; 00596 00597 /*SG_DEBUG( "m_mod_words=[") ; 00598 for (int32_t i=0; i<mod_words_input.num_rows; i++) 00599 SG_DEBUG( "%i, ", p_mod_words_array[i]) ; 00600 SG_DEBUG( "]\n") ;*/ 00601 } 00602 00603 bool CDynProg::check_svm_arrays() 00604 { 00605 //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings); 00606 if ((m_word_degree.get_dim1()==m_num_degrees) && 00607 (m_cum_num_words.get_dim1()==m_num_degrees+1) && 00608 (m_num_words.get_dim1()==m_num_degrees) && 00609 //(word_used.get_dim1()==m_num_degrees) && 00610 //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) && 00611 //(word_used.get_dim3()==m_num_strings) && 00612 // (svm_values_unnormalized.get_dim1()==m_num_degrees) && 00613 // (svm_values_unnormalized.get_dim2()==m_num_svms) && 00614 //(m_svm_pos_start.get_dim1()==m_num_degrees) && 00615 (m_num_unique_words.get_dim1()==m_num_degrees) && 00616 (m_mod_words.get_dim1()==m_num_svms) && 00617 (m_mod_words.get_dim2()==2) && 00618 (m_sign_words.get_dim1()==m_num_svms) && 00619 (m_string_words.get_dim1()==m_num_svms)) 00620 { 00621 m_svm_arrays_clean=true ; 00622 return true ; 00623 } 00624 else 00625 { 00626 if ((m_num_unique_words.get_dim1()==m_num_degrees) && 00627 (m_mod_words.get_dim1()==m_num_svms) && 00628 (m_mod_words.get_dim2()==2) && 00629 (m_sign_words.get_dim1()==m_num_svms) && 00630 (m_string_words.get_dim1()==m_num_svms)) 00631 SG_PRINT("OK\n") ; 00632 else 00633 SG_PRINT("not OK\n") ; 00634 00635 if (!(m_word_degree.get_dim1()==m_num_degrees)) 00636 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ; 00637 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1)) 00638 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ; 00639 if (!(m_num_words.get_dim1()==m_num_degrees)) 00640 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ; 00641 //if (!(m_svm_pos_start.get_dim1()==m_num_degrees)) 00642 // SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ; 00643 if (!(m_num_unique_words.get_dim1()==m_num_degrees)) 00644 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ; 00645 if (!(m_mod_words.get_dim1()==m_num_svms)) 00646 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ; 00647 if (!(m_mod_words.get_dim2()==2)) 00648 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ; 00649 if (!(m_sign_words.get_dim1()==m_num_svms)) 00650 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ; 00651 if (!(m_string_words.get_dim1()==m_num_svms)) 00652 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ; 00653 00654 m_svm_arrays_clean=false ; 00655 return false ; 00656 } 00657 } 00658 00659 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq) 00660 { 00661 if (seq.num_dims!=3) 00662 SG_ERROR("Expected 3-dimensional Matrix\n"); 00663 00664 int32_t N=seq.dims[0]; 00665 int32_t cand_pos=seq.dims[1]; 00666 int32_t max_num_features=seq.dims[2]; 00667 00668 if (!m_svm_arrays_clean) 00669 { 00670 SG_ERROR( "SVM arrays not clean") ; 00671 return ; 00672 } ; 00673 00674 ASSERT(N==m_N); 00675 ASSERT(cand_pos==m_seq_len); 00676 ASSERT(m_initial_state_distribution_p.get_dim1()==N); 00677 ASSERT(m_end_state_distribution_q.get_dim1()==N); 00678 00679 m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ; 00680 } 00681 int32_t CDynProg::get_num_positions() 00682 { 00683 return m_seq_len; 00684 } 00685 00686 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path) 00687 { 00688 ASSERT(seg_path.num_rows==2); 00689 ASSERT(seg_path.num_cols==m_seq_len); 00690 00691 if (seg_path.matrix!=NULL) 00692 { 00693 int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len); 00694 float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len); 00695 for (int32_t i=0; i<m_seq_len; i++) 00696 { 00697 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ; 00698 segment_mask[i] = seg_path.matrix[2*i+1] ; 00699 } 00700 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ; 00701 SG_FREE(segment_ids); 00702 SG_FREE(segment_mask); 00703 } 00704 else 00705 { 00706 int32_t *izeros = SG_MALLOC(int32_t, m_seq_len); 00707 float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len); 00708 for (int32_t i=0; i<m_seq_len; i++) 00709 { 00710 izeros[i]=0 ; 00711 dzeros[i]=0.0 ; 00712 } 00713 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ; 00714 SG_FREE(izeros); 00715 SG_FREE(dzeros); 00716 } 00717 } 00718 00719 void CDynProg::set_pos(SGVector<int32_t> pos) 00720 { 00721 m_pos.set_array(pos.vector, pos.vlen, true, true) ; 00722 m_seq_len = pos.vlen; 00723 } 00724 00725 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info) 00726 { 00727 if (orf_info.num_cols!=2) 00728 SG_ERROR( "orf_info size incorrect %i!=2\n", orf_info.num_cols) ; 00729 00730 m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ; 00731 m_orf_info.set_array_name("orf_info") ; 00732 } 00733 00734 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2) 00735 { 00736 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2)) 00737 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n"); 00738 00739 SG_UNREF(m_seq_sparse1); 00740 SG_UNREF(m_seq_sparse2); 00741 00742 m_seq_sparse1=seq_sparse1; 00743 m_seq_sparse2=seq_sparse2; 00744 SG_REF(m_seq_sparse1); 00745 SG_REF(m_seq_sparse2); 00746 } 00747 00748 void CDynProg::set_plif_matrices(CPlifMatrix* pm) 00749 { 00750 SG_UNREF(m_plif_matrices); 00751 00752 m_plif_matrices=pm; 00753 00754 SG_REF(m_plif_matrices); 00755 } 00756 00757 void CDynProg::set_gene_string(SGVector<char> genestr) 00758 { 00759 ASSERT(genestr.vector); 00760 ASSERT(genestr.vlen>0); 00761 00762 m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ; 00763 } 00764 00765 void CDynProg::set_my_state_seq(int32_t* my_state_seq) 00766 { 00767 ASSERT(my_state_seq && m_seq_len>0); 00768 m_my_state_seq.resize_array(m_seq_len); 00769 for (int32_t i=0; i<m_seq_len; i++) 00770 m_my_state_seq[i]=my_state_seq[i]; 00771 } 00772 00773 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq) 00774 { 00775 ASSERT(my_pos_seq && m_seq_len>0); 00776 m_my_pos_seq.resize_array(m_seq_len); 00777 for (int32_t i=0; i<m_seq_len; i++) 00778 m_my_pos_seq[i]=my_pos_seq[i]; 00779 } 00780 00781 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights) 00782 { 00783 if (m_num_svms!=dictionary_weights.num_cols) 00784 { 00785 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", 00786 m_num_svms, dictionary_weights.num_cols) ; 00787 } 00788 00789 m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ; 00790 00791 // initialize, so it does not bother when not used 00792 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ; 00793 m_segment_loss.set_const(0) ; 00794 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ; 00795 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ; 00796 m_segment_ids.set_const(0) ; 00797 m_segment_mask.set_const(0) ; 00798 } 00799 00800 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss) 00801 { 00802 int32_t m=segment_loss.num_rows; 00803 int32_t n=segment_loss.num_cols; 00804 // here we need two matrices. Store it in one: 2N x N 00805 if (2*m!=n) 00806 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ; 00807 00808 if (m!=m_max_a_id+1) 00809 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ; 00810 00811 m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ; 00812 /*for (int32_t i=0; i<n; i++) 00813 for (int32_t j=0; j<n; j++) 00814 SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/ 00815 } 00816 00817 void CDynProg::best_path_set_segment_ids_mask( 00818 int32_t* segment_ids, float64_t* segment_mask, int32_t m) 00819 { 00820 00821 if (m!=m_observation_matrix.get_dim2()) 00822 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2()); 00823 int32_t max_id = 0; 00824 for (int32_t i=1;i<m;i++) 00825 max_id = CMath::max(max_id,segment_ids[i]); 00826 //SG_PRINT("max_id: %i, m:%i\n",max_id, m); 00827 m_segment_ids.set_array(segment_ids, m, true, true) ; 00828 m_segment_ids.set_array_name("m_segment_ids"); 00829 m_segment_mask.set_array(segment_mask, m, true, true) ; 00830 m_segment_mask.set_array_name("m_segment_mask"); 00831 00832 m_seg_loss_obj->set_segment_mask(&m_segment_mask); 00833 m_seg_loss_obj->set_segment_ids(&m_segment_ids); 00834 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len); 00835 } 00836 00837 SGVector<float64_t> CDynProg::get_scores() 00838 { 00839 SGVector<float64_t> scores(m_scores.get_dim1()); 00840 memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1())); 00841 00842 return scores; 00843 } 00844 00845 SGMatrix<int32_t> CDynProg::get_states() 00846 { 00847 SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2()); 00848 00849 int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() ); 00850 memcpy(states.matrix ,m_states.get_array(),sz); 00851 00852 return states; 00853 } 00854 00855 SGMatrix<int32_t> CDynProg::get_positions() 00856 { 00857 SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2()); 00858 00859 int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2()); 00860 memcpy(positions.matrix, m_positions.get_array(),sz); 00861 00862 return positions; 00863 } 00864 00865 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len) 00866 { 00867 ASSERT(scores && seq_len); 00868 00869 *seq_len=m_my_scores.get_dim1(); 00870 00871 int32_t sz = sizeof(float64_t)*(*seq_len); 00872 00873 *scores = SG_MALLOC(float64_t, *seq_len); 00874 ASSERT(*scores); 00875 00876 memcpy(*scores,m_my_scores.get_array(),sz); 00877 } 00878 00879 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len) 00880 { 00881 ASSERT(losses && seq_len); 00882 00883 *seq_len=m_my_losses.get_dim1(); 00884 00885 int32_t sz = sizeof(float64_t)*(*seq_len); 00886 00887 *losses = SG_MALLOC(float64_t, *seq_len); 00888 ASSERT(*losses); 00889 00890 memcpy(*losses,m_my_losses.get_array(),sz); 00891 } 00892 00894 00895 bool CDynProg::extend_orf( 00896 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, 00897 int32_t to) 00898 { 00899 #ifdef DYNPROG_TIMING_DETAIL 00900 MyTime.start() ; 00901 #endif 00902 00903 if (start<0) 00904 start=0 ; 00905 if (to<0) 00906 to=0 ; 00907 00908 int32_t orf_target = orf_to-orf_from ; 00909 if (orf_target<0) orf_target+=3 ; 00910 00911 int32_t pos ; 00912 if (last_pos==to) 00913 pos = to-orf_to-3 ; 00914 else 00915 pos=last_pos ; 00916 00917 if (pos<0) 00918 { 00919 #ifdef DYNPROG_TIMING_DETAIL 00920 MyTime.stop() ; 00921 orf_time += MyTime.time_diff_sec() ; 00922 #endif 00923 return true ; 00924 } 00925 00926 for (; pos>=start; pos-=3) 00927 if (m_genestr_stop[pos]) 00928 { 00929 #ifdef DYNPROG_TIMING_DETAIL 00930 MyTime.stop() ; 00931 orf_time += MyTime.time_diff_sec() ; 00932 #endif 00933 return false ; 00934 } 00935 00936 00937 last_pos = CMath::min(pos+3,to-orf_to-3) ; 00938 00939 #ifdef DYNPROG_TIMING_DETAIL 00940 MyTime.stop() ; 00941 orf_time += MyTime.time_diff_sec() ; 00942 #endif 00943 return true ; 00944 } 00945 00946 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf, 00947 int16_t nbest, bool with_loss, bool with_multiple_sequences) 00948 { 00949 00950 //FIXME we need checks here if all the fields are of right size 00951 //SG_PRINT("m_seq_len: %i\n", m_seq_len); 00952 //SG_PRINT("m_pos[0]: %i\n", m_pos[0]); 00953 //SG_PRINT("\n"); 00954 00955 //FIXME these variables can go away when compute_nbest_paths uses them 00956 //instead of the local pointers below 00957 const float64_t* seq_array = m_observation_matrix.get_array(); 00958 m_scores.resize_array(nbest) ; 00959 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00960 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ; 00961 00962 for (int32_t i=0; i<nbest; i++) 00963 { 00964 m_scores[i]=-1; 00965 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++) 00966 { 00967 m_states.element(i,j)=-1; 00968 m_positions.element(i,j)=-1; 00969 } 00970 } 00971 float64_t* prob_nbest=m_scores.get_array(); 00972 int32_t* my_state_seq=m_states.get_array(); 00973 int32_t* my_pos_seq=m_positions.get_array(); 00974 00975 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 00976 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 00977 //END FIXME 00978 00979 00980 #ifdef DYNPROG_TIMING 00981 segment_init_time = 0.0 ; 00982 segment_pos_time = 0.0 ; 00983 segment_extend_time = 0.0 ; 00984 segment_clean_time = 0.0 ; 00985 orf_time = 0.0 ; 00986 svm_init_time = 0.0 ; 00987 svm_pos_time = 0.0 ; 00988 svm_clean_time = 0.0 ; 00989 inner_loop_time = 0.0 ; 00990 content_svm_values_time = 0.0 ; 00991 content_plifs_time = 0.0 ; 00992 inner_loop_max_time = 0.0 ; 00993 long_transition_time = 0.0 ; 00994 00995 MyTime2.start() ; 00996 #endif 00997 00998 if (!m_svm_arrays_clean) 00999 { 01000 SG_ERROR( "SVM arrays not clean") ; 01001 return ; 01002 } 01003 01004 #ifdef DYNPROG_DEBUG 01005 m_transition_matrix_a.set_array_name("transition_matrix"); 01006 m_transition_matrix_a.display_array(); 01007 m_mod_words.display_array() ; 01008 m_sign_words.display_array() ; 01009 m_string_words.display_array() ; 01010 //SG_PRINT("use_orf = %i\n", use_orf) ; 01011 #endif 01012 01013 int32_t max_look_back = 1000 ; 01014 bool use_svm = false ; 01015 01016 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ; 01017 01018 //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++) 01019 // SG_PRINT("(%i)%0.2f ",i,seq_array[i]); 01020 01021 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase* 01022 PEN.set_array_name("PEN"); 01023 01024 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase* 01025 PEN_state_signals.set_array_name("state_signals"); 01026 01027 CDynamicArray<float64_t> seq(m_N, m_seq_len) ; // 2d 01028 seq.set_array_name("seq") ; 01029 seq.set_const(0) ; 01030 01031 #ifdef DYNPROG_DEBUG 01032 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data); 01033 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs); 01034 SG_PRINT("m_num_svms: %i\n", m_num_svms); 01035 SG_PRINT("m_num_lin_feat_plifs_cum: "); 01036 for (int i=0; i<=m_num_raw_data; i++) 01037 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]); 01038 SG_PRINT("\n"); 01039 #endif 01040 01041 float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01042 { // initialize svm_svalue 01043 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01044 svm_value[s]=0 ; 01045 } 01046 01047 { // convert seq_input to seq 01048 // this is independent of the svm values 01049 01050 CDynamicArray<float64_t> *seq_input=NULL ; // 3d 01051 if (seq_array!=NULL) 01052 { 01053 //SG_PRINT("using dense seq_array\n") ; 01054 01055 seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ; 01056 seq_input->set_array_name("seq_input") ; 01057 //seq_input.display_array() ; 01058 01059 ASSERT(m_seq_sparse1==NULL) ; 01060 ASSERT(m_seq_sparse2==NULL) ; 01061 } else 01062 { 01063 SG_PRINT("using sparse seq_array\n") ; 01064 01065 ASSERT(m_seq_sparse1!=NULL) ; 01066 ASSERT(m_seq_sparse2!=NULL) ; 01067 ASSERT(max_num_signals==2) ; 01068 } 01069 01070 for (int32_t i=0; i<m_N; i++) 01071 for (int32_t j=0; j<m_seq_len; j++) 01072 seq.element(i,j) = 0 ; 01073 01074 for (int32_t i=0; i<m_N; i++) 01075 for (int32_t j=0; j<m_seq_len; j++) 01076 for (int32_t k=0; k<max_num_signals; k++) 01077 { 01078 if ((PEN_state_signals.element(i,k)==NULL) && (k==0)) 01079 { 01080 // no plif 01081 if (seq_input!=NULL) 01082 seq.element(i,j) = seq_input->element(i,j,k) ; 01083 else 01084 { 01085 if (k==0) 01086 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ; 01087 if (k==1) 01088 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ; 01089 } 01090 break ; 01091 } 01092 if (PEN_state_signals.element(i,k)!=NULL) 01093 { 01094 if (seq_input!=NULL) 01095 { 01096 // just one plif 01097 if (CMath::is_finite(seq_input->element(i,j,k))) 01098 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ; 01099 else 01100 // keep infinity values 01101 seq.element(i,j) = seq_input->element(i, j, k) ; 01102 } 01103 else 01104 { 01105 if (k==0) 01106 { 01107 // just one plif 01108 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j))) 01109 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ; 01110 else 01111 // keep infinity values 01112 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ; 01113 } 01114 if (k==1) 01115 { 01116 // just one plif 01117 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j))) 01118 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ; 01119 else 01120 // keep infinity values 01121 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ; 01122 } 01123 } 01124 } 01125 else 01126 break ; 01127 } 01128 delete seq_input; 01129 SG_FREE(svm_value); 01130 } 01131 01132 // allow longer transitions than look_back 01133 bool long_transitions = m_long_transitions ; 01134 CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ; // 2d 01135 long_transition_content_start_position.set_array_name("long_transition_content_start_position"); 01136 #ifdef DYNPROG_DEBUG 01137 CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ; // 2d 01138 long_transition_content_end_position.set_array_name("long_transition_content_end_position"); 01139 #endif 01140 CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ; // 2d 01141 long_transition_content_start.set_array_name("long_transition_content_start"); 01142 01143 CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ; // 2d 01144 long_transition_content_scores.set_array_name("long_transition_content_scores"); 01145 #ifdef DYNPROG_DEBUG 01146 01147 CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ; // 2d 01148 long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen"); 01149 01150 CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ; // 2d 01151 long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev"); 01152 01153 CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ; // 2d 01154 long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem"); 01155 #endif 01156 CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ; // 2d 01157 long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss"); 01158 01159 if (nbest!=1) 01160 { 01161 SG_ERROR("Long transitions are not supported for nbest!=1") ; 01162 long_transitions = false ; 01163 } 01164 long_transition_content_scores.set_const(-CMath::INFTY); 01165 #ifdef DYNPROG_DEBUG 01166 long_transition_content_scores_pen.set_const(0) ; 01167 long_transition_content_scores_elem.set_const(0) ; 01168 long_transition_content_scores_prev.set_const(0) ; 01169 #endif 01170 if (with_loss) 01171 long_transition_content_scores_loss.set_const(0) ; 01172 long_transition_content_start.set_const(0) ; 01173 long_transition_content_start_position.set_const(0) ; 01174 #ifdef DYNPROG_DEBUG 01175 long_transition_content_end_position.set_const(0) ; 01176 #endif 01177 01178 svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 01179 { // initialize svm_svalue 01180 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 01181 svm_value[s]=0 ; 01182 } 01183 01184 CDynamicArray<int32_t> look_back(m_N,m_N) ; // 2d 01185 look_back.set_array_name("look_back"); 01186 //CDynamicArray<int32_t> look_back_orig(m_N,m_N) ; 01187 //look_back.set_array_name("look_back_orig"); 01188 01189 01190 { // determine maximal length of look-back 01191 for (int32_t i=0; i<m_N; i++) 01192 for (int32_t j=0; j<m_N; j++) 01193 { 01194 look_back.set_element(INT_MAX, i, j) ; 01195 //look_back_orig.set_element(INT_MAX, i, j) ; 01196 } 01197 01198 for (int32_t j=0; j<m_N; j++) 01199 { 01200 // only consider transitions that are actually allowed 01201 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01202 const T_STATES *elem_list = trans_list_forward[j] ; 01203 01204 for (int32_t i=0; i<num_elem; i++) 01205 { 01206 T_STATES ii = elem_list[i] ; 01207 01208 CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ; 01209 if (penij==NULL) 01210 { 01211 if (long_transitions) 01212 { 01213 look_back.set_element(m_long_transition_threshold, j, ii) ; 01214 //look_back_orig.set_element(m_long_transition_max, j, ii) ; 01215 } 01216 continue ; 01217 } 01218 01219 /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */ 01220 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions)) 01221 { 01222 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01223 //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ; 01224 if (CMath::ceil(penij->get_max_value()) > max_look_back) 01225 { 01226 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value()); 01227 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value())); 01228 } 01229 } 01230 else 01231 { 01232 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ; 01233 //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ; 01234 } 01235 01236 if (penij->uses_svm_values()) 01237 use_svm=true ; 01238 } 01239 } 01240 /* make sure max_look_back is at least as long as a long transition */ 01241 if (long_transitions) 01242 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ; 01243 01244 /* make sure max_look_back is not longer than the whole string */ 01245 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ; 01246 01247 int32_t num_long_transitions = 0 ; 01248 for (int32_t i=0; i<m_N; i++) 01249 for (int32_t j=0; j<m_N; j++) 01250 { 01251 if (look_back.get_element(i,j)==m_long_transition_threshold) 01252 num_long_transitions++ ; 01253 if (look_back.get_element(i,j)==INT_MAX) 01254 { 01255 if (long_transitions) 01256 { 01257 look_back.set_element(m_long_transition_threshold, i, j) ; 01258 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01259 } 01260 else 01261 { 01262 look_back.set_element(max_look_back, i, j) ; 01263 //look_back_orig.set_element(m_long_transition_max, i, j) ; 01264 } 01265 } 01266 } 01267 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ; 01268 } 01269 //SG_PRINT("max_look_back: %i \n", max_look_back) ; 01270 01271 //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ; 01272 SG_DEBUG("use_svm=%i\n", use_svm) ; 01273 01274 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest); 01275 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ; 01276 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ; 01277 /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+ 01278 look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+ 01279 m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+ 01280 m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/ 01281 01282 //bool is_big = (mem_use>200) || (m_seq_len>5000) ; 01283 01284 /*if (is_big) 01285 { 01286 SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 01287 m_seq_len, m_N, max_look_back, nbest) ; 01288 SG_DEBUG("allocating %1.2fMB of memory\n", 01289 mem_use) ; 01290 }*/ 01291 ASSERT(nbest<32000) ; 01292 01293 CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ; // 3d 01294 delta.set_array_name("delta"); 01295 float64_t* delta_array = delta.get_array() ; 01296 //delta.set_const(0) ; 01297 01298 CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ; // 3d 01299 psi.set_array_name("psi"); 01300 //psi.set_const(0) ; 01301 01302 CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ; // 3d 01303 ktable.set_array_name("ktable"); 01304 //ktable.set_const(0) ; 01305 01306 CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ; // 3d 01307 ptable.set_array_name("ptable"); 01308 //ptable.set_const(0) ; 01309 01310 CDynamicArray<float64_t> delta_end(nbest) ; 01311 delta_end.set_array_name("delta_end"); 01312 //delta_end.set_const(0) ; 01313 01314 CDynamicArray<T_STATES> path_ends(nbest) ; 01315 path_ends.set_array_name("path_ends"); 01316 //path_ends.set_const(0) ; 01317 01318 CDynamicArray<int16_t> ktable_end(nbest) ; 01319 ktable_end.set_array_name("ktable_end"); 01320 //ktable_end.set_const(0) ; 01321 01322 float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen); 01323 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ; 01324 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen); 01325 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ; 01326 01327 CDynamicArray<float64_t> oldtempvv(look_back_buflen) ; 01328 oldtempvv.set_array_name("oldtempvv"); 01329 01330 CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ; 01331 oldtempvv2.set_array_name("oldtempvv2"); 01332 //oldtempvv.set_const(0) ; 01333 //oldtempvv.display_size() ; 01334 01335 CDynamicArray<int32_t> oldtempii(look_back_buflen) ; 01336 oldtempii.set_array_name("oldtempii"); 01337 01338 CDynamicArray<int32_t> oldtempii2(look_back_buflen) ; 01339 oldtempii2.set_array_name("oldtempii2"); 01340 //oldtempii.set_const(0) ; 01341 01342 CDynamicArray<T_STATES> state_seq(m_seq_len) ; 01343 state_seq.set_array_name("state_seq"); 01344 //state_seq.set_const(0) ; 01345 01346 CDynamicArray<int32_t> pos_seq(m_seq_len) ; 01347 pos_seq.set_array_name("pos_seq"); 01348 //pos_seq.set_const(0) ; 01349 01350 01351 m_dict_weights.set_array_name("dict_weights") ; 01352 m_word_degree.set_array_name("word_degree") ; 01353 m_cum_num_words.set_array_name("cum_num_words") ; 01354 m_num_words.set_array_name("num_words") ; 01355 //word_used.set_array_name("word_used") ; 01356 //svm_values_unnormalized.set_array_name("svm_values_unnormalized") ; 01357 //m_svm_pos_start.set_array_name("svm_pos_start") ; 01358 m_num_unique_words.set_array_name("num_unique_words") ; 01359 01360 PEN.set_array_name("PEN") ; 01361 seq.set_array_name("seq") ; 01362 01363 delta.set_array_name("delta") ; 01364 psi.set_array_name("psi") ; 01365 ktable.set_array_name("ktable") ; 01366 ptable.set_array_name("ptable") ; 01367 delta_end.set_array_name("delta_end") ; 01368 path_ends.set_array_name("path_ends") ; 01369 ktable_end.set_array_name("ktable_end") ; 01370 01371 #ifdef USE_TMP_ARRAYCLASS 01372 fixedtempvv.set_array_name("fixedtempvv") ; 01373 fixedtempii.set_array_name("fixedtempvv") ; 01374 #endif 01375 01376 oldtempvv.set_array_name("oldtempvv") ; 01377 oldtempvv2.set_array_name("oldtempvv2") ; 01378 oldtempii.set_array_name("oldtempii") ; 01379 oldtempii2.set_array_name("oldtempii2") ; 01380 01381 01383 01384 #ifdef DYNPROG_DEBUG 01385 state_seq.display_size() ; 01386 pos_seq.display_size() ; 01387 01388 m_dict_weights.display_size() ; 01389 m_word_degree.display_array() ; 01390 m_cum_num_words.display_array() ; 01391 m_num_words.display_array() ; 01392 //word_used.display_size() ; 01393 //svm_values_unnormalized.display_size() ; 01394 //m_svm_pos_start.display_array() ; 01395 m_num_unique_words.display_array() ; 01396 01397 PEN.display_size() ; 01398 PEN_state_signals.display_size() ; 01399 seq.display_size() ; 01400 m_orf_info.display_size() ; 01401 01402 //m_genestr_stop.display_size() ; 01403 delta.display_size() ; 01404 psi.display_size() ; 01405 ktable.display_size() ; 01406 ptable.display_size() ; 01407 delta_end.display_size() ; 01408 path_ends.display_size() ; 01409 ktable_end.display_size() ; 01410 01411 #ifdef USE_TMP_ARRAYCLASS 01412 fixedtempvv.display_size() ; 01413 fixedtempii.display_size() ; 01414 #endif 01415 01416 //oldtempvv.display_size() ; 01417 //oldtempii.display_size() ; 01418 01419 state_seq.display_size() ; 01420 pos_seq.display_size() ; 01421 01422 //seq.set_const(0) ; 01423 01424 #endif //DYNPROG_DEBUG 01425 01427 01428 01429 01430 { 01431 for (int32_t s=0; s<m_num_svms; s++) 01432 ASSERT(m_string_words_array[s]<1) ; 01433 } 01434 01435 01436 //CDynamicArray<int32_t*> trans_matrix_svms(m_N,m_N); // 2d 01437 //CDynamicArray<int32_t> trans_matrix_num_svms(m_N,m_N); // 2d 01438 01439 { // initialization 01440 01441 for (T_STATES i=0; i<m_N; i++) 01442 { 01443 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01444 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ; // get_p defined in HMM.h to be equiv to initial_state_distribution 01445 psi.element(0,i,0) = 0 ; 01446 if (nbest>1) 01447 ktable.element(0,i,0) = 0 ; 01448 ptable.element(0,i,0) = 0 ; 01449 for (int16_t k=1; k<nbest; k++) 01450 { 01451 int32_t dim1, dim2, dim3 ; 01452 delta.get_array_size(dim1, dim2, dim3) ; 01453 //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ; 01454 //delta.element(0, i, k) = -CMath::INFTY ; 01455 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ; 01456 psi.element(0,i,0) = 0 ; // <--- what's this for? 01457 if (nbest>1) 01458 ktable.element(0,i,k) = 0 ; 01459 ptable.element(0,i,k) = 0 ; 01460 } 01461 /* 01462 for (T_STATES j=0; j<m_N; j++) 01463 { 01464 CPlifBase * penalty = PEN.element(i,j) ; 01465 int32_t num_current_svms=0; 01466 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 01467 if (penalty) 01468 { 01469 SG_PRINT("trans %i -> %i \n",i,j); 01470 penalty->get_used_svms(&num_current_svms, svm_ids); 01471 trans_matrix_svms.set_element(svm_ids,i,j); 01472 for (int32_t l=0;l<num_current_svms;l++) 01473 SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]); 01474 trans_matrix_num_svms.set_element(num_current_svms,i,j); 01475 } 01476 } 01477 */ 01478 01479 } 01480 } 01481 01482 SG_DEBUG("START_RECURSION \n\n"); 01483 01484 // recursion 01485 for (int32_t t=1; t<m_seq_len; t++) 01486 { 01487 //if (is_big && t%(1+(m_seq_len/1000))==1) 01488 // SG_PROGRESS(t, 0, m_seq_len); 01489 //SG_PRINT("%i\n", t) ; 01490 01491 for (T_STATES j=0; j<m_N; j++) 01492 { 01493 if (seq.element(j,t)<=-1e20) 01494 { // if we cannot observe the symbol here, then we can omit the rest 01495 for (int16_t k=0; k<nbest; k++) 01496 { 01497 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ; 01498 psi.element(t,j,k) = 0 ; 01499 if (nbest>1) 01500 ktable.element(t,j,k) = 0 ; 01501 ptable.element(t,j,k) = 0 ; 01502 } 01503 } 01504 else 01505 { 01506 const T_STATES num_elem = trans_list_forward_cnt[j] ; 01507 const T_STATES *elem_list = trans_list_forward[j] ; 01508 const float64_t *elem_val = trans_list_forward_val[j] ; 01509 const int32_t *elem_id = trans_list_forward_id[j] ; 01510 01511 int32_t fixed_list_len = 0 ; 01512 float64_t fixedtempvv_ = CMath::INFTY ; 01513 int32_t fixedtempii_ = 0 ; 01514 bool fixedtemplong = false ; 01515 01516 for (int32_t i=0; i<num_elem; i++) 01517 { 01518 T_STATES ii = elem_list[i] ; 01519 01520 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ; 01521 01522 /*int32_t look_back = max_look_back ; 01523 if (0) 01524 { // find lookback length 01525 CPlifBase *pen = (CPlifBase*) penalty ; 01526 if (pen!=NULL) 01527 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01528 if (look_back>=1e6) 01529 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01530 ASSERT(look_back<1e6); 01531 } */ 01532 01533 int32_t look_back_ = look_back.element(j, ii) ; 01534 01535 int32_t orf_from = m_orf_info.element(ii,0) ; 01536 int32_t orf_to = m_orf_info.element(j,1) ; 01537 if((orf_from!=-1)!=(orf_to!=-1)) 01538 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01539 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01540 01541 int32_t orf_target = -1 ; 01542 if (orf_from!=-1) 01543 { 01544 orf_target=orf_to-orf_from ; 01545 if (orf_target<0) 01546 orf_target+=3 ; 01547 ASSERT(orf_target>=0 && orf_target<3) ; 01548 } 01549 01550 int32_t orf_last_pos = m_pos[t] ; 01551 #ifdef DYNPROG_TIMING 01552 MyTime3.start() ; 01553 #endif 01554 int32_t num_ok_pos = 0 ; 01555 01556 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--) 01557 { 01558 bool ok ; 01559 //int32_t plen=t-ts; 01560 01561 /*for (int32_t s=0; s<m_num_svms; s++) 01562 if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) || 01563 (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6)) 01564 { 01565 SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]); 01566 }*/ 01567 01568 if (orf_target==-1) 01569 ok=true ; 01570 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target) 01571 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ; 01572 else 01573 ok=false ; 01574 01575 if (ok) 01576 { 01577 01578 float64_t segment_loss = 0.0 ; 01579 if (with_loss) 01580 { 01581 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]); 01582 //if (segment_loss!=segment_loss2) 01583 //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2); 01584 } 01586 // BEST_PATH_TRANS 01588 01589 int32_t frame = orf_from;//m_orf_info.element(ii,0); 01590 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01591 01592 float64_t pen_val = 0.0 ; 01593 if (penalty) 01594 { 01595 #ifdef DYNPROG_TIMING_DETAIL 01596 MyTime.start() ; 01597 #endif 01598 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01599 01600 #ifdef DYNPROG_TIMING_DETAIL 01601 MyTime.stop() ; 01602 content_plifs_time += MyTime.time_diff_sec() ; 01603 #endif 01604 } 01605 01606 #ifdef DYNPROG_TIMING_DETAIL 01607 MyTime.start() ; 01608 #endif 01609 num_ok_pos++ ; 01610 01611 if (nbest==1) 01612 { 01613 float64_t val = elem_val[i] + pen_val ; 01614 if (with_loss) 01615 val += segment_loss ; 01616 01617 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ; 01618 01619 if (mval<fixedtempvv_) 01620 { 01621 fixedtempvv_ = mval ; 01622 fixedtempii_ = ii + ts*m_N; 01623 fixed_list_len = 1 ; 01624 fixedtemplong = false ; 01625 } 01626 } 01627 else 01628 { 01629 for (int16_t diff=0; diff<nbest; diff++) 01630 { 01631 float64_t val = elem_val[i] ; 01632 val += pen_val ; 01633 if (with_loss) 01634 val += segment_loss ; 01635 01636 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ; 01637 01638 /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */ 01639 /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/ 01640 /* fixed_list_len has the number of elements in fixedtempvv */ 01641 01642 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1]))) 01643 { 01644 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) ) 01645 { 01646 fixedtempvv[fixed_list_len] = mval ; 01647 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest; 01648 fixed_list_len++ ; 01649 } 01650 else // must have mval < fixedtempvv[fixed_list_len-1] 01651 { 01652 int32_t addhere = fixed_list_len; 01653 while ((addhere > 0) && (mval < fixedtempvv[addhere-1])) 01654 addhere--; 01655 01656 // move everything from addhere+1 one forward 01657 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--) 01658 { 01659 fixedtempvv[jj] = fixedtempvv[jj-1]; 01660 fixedtempii[jj] = fixedtempii[jj-1]; 01661 } 01662 01663 fixedtempvv[addhere] = mval; 01664 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest; 01665 01666 if (fixed_list_len < nbest) 01667 fixed_list_len++; 01668 } 01669 } 01670 } 01671 } 01672 #ifdef DYNPROG_TIMING_DETAIL 01673 MyTime.stop() ; 01674 inner_loop_max_time += MyTime.time_diff_sec() ; 01675 #endif 01676 } 01677 } 01678 #ifdef DYNPROG_TIMING 01679 MyTime3.stop() ; 01680 inner_loop_time += MyTime3.time_diff_sec() ; 01681 #endif 01682 } 01683 for (int32_t i=0; i<num_elem; i++) 01684 { 01685 T_STATES ii = elem_list[i] ; 01686 01687 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ; 01688 01689 /*int32_t look_back = max_look_back ; 01690 if (0) 01691 { // find lookback length 01692 CPlifBase *pen = (CPlifBase*) penalty ; 01693 if (pen!=NULL) 01694 look_back=(int32_t) (CMath::ceil(pen->get_max_value())); 01695 if (look_back>=1e6) 01696 SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ; 01697 ASSERT(look_back<1e6); 01698 } */ 01699 01700 int32_t look_back_ = look_back.element(j, ii) ; 01701 //int32_t look_back_orig_ = look_back_orig.element(j, ii) ; 01702 01703 int32_t orf_from = m_orf_info.element(ii,0) ; 01704 int32_t orf_to = m_orf_info.element(j,1) ; 01705 if((orf_from!=-1)!=(orf_to!=-1)) 01706 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ; 01707 ASSERT((orf_from!=-1)==(orf_to!=-1)) ; 01708 01709 int32_t orf_target = -1 ; 01710 if (orf_from!=-1) 01711 { 01712 orf_target=orf_to-orf_from ; 01713 if (orf_target<0) 01714 orf_target+=3 ; 01715 ASSERT(orf_target>=0 && orf_target<3) ; 01716 } 01717 01718 //int32_t loss_last_pos = t ; 01719 //float64_t last_loss = 0.0 ; 01720 01721 #ifdef DYNPROG_TIMING 01722 MyTime3.start() ; 01723 #endif 01724 01725 /* long transition stuff */ 01726 /* only do this, if 01727 * this feature is enabled 01728 * this is not a transition with ORF restrictions 01729 * the loss is switched off 01730 * nbest=1 01731 */ 01732 #ifdef DYNPROG_TIMING 01733 MyTime3.start() ; 01734 #endif 01735 // long transitions, only when not considering ORFs 01736 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold ) 01737 { 01738 01739 // update table for 5' part of the long segment 01740 01741 int32_t start = long_transition_content_start.get_element(ii, j) ; 01742 int32_t end_5p_part = start ; 01743 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++) 01744 { 01745 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away 01746 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold) 01747 end_5p_part++ ; 01748 01749 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ; 01750 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ; 01751 01752 float64_t pen_val = 0.0; 01753 /* recompute penalty, if necessary */ 01754 if (penalty) 01755 { 01756 int32_t frame = m_orf_info.element(ii,0); 01757 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part 01758 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ; 01759 } 01760 01761 /*if (m_pos[start_5p_part]==1003) 01762 { 01763 SG_PRINT("Part1: %i - %i vs %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) ; 01764 SG_PRINT("Part1: ts=%i t=%i start_5p_part=%i m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) ; 01765 }*/ 01766 01767 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ; 01768 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check 01769 01770 float64_t segment_loss_part1=0.0 ; 01771 if (with_loss) 01772 { // this is the loss from the start of the long segment (5' part + middle section) 01773 01774 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure 01775 01776 mval_trans -= segment_loss_part1 ; 01777 } 01778 01779 01780 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/) 01781 { 01782 // this restricts the maximal length of segments, 01783 // but the current implementation is not valid since the 01784 // long transition is discarded without loocking if there 01785 // is a second best long transition in between 01786 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ; 01787 long_transition_content_start_position.set_element(0, ii, j) ; 01788 if (with_loss) 01789 long_transition_content_scores_loss.set_element(0.0, ii, j) ; 01790 #ifdef DYNPROG_DEBUG 01791 long_transition_content_scores_pen.set_element(0.0, ii, j) ; 01792 long_transition_content_scores_elem.set_element(0.0, ii, j) ; 01793 long_transition_content_scores_prev.set_element(0.0, ii, j) ; 01794 long_transition_content_end_position.set_element(0, ii, j) ; 01795 #endif 01796 } 01797 if (with_loss) 01798 { 01799 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ; 01800 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]); 01801 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ; 01802 long_transition_content_scores.set_element(score, ii, j) ; 01803 long_transition_content_scores_loss.set_element(new_loss, ii, j) ; 01804 #ifdef DYNPROG_DEBUG 01805 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01806 #endif 01807 01808 } 01809 if (-long_transition_content_scores.get_element(ii, j) > mval_trans ) 01810 { 01811 /* then the old long transition is either too far away or worse than the current one */ 01812 long_transition_content_scores.set_element(-mval_trans, ii, j) ; 01813 long_transition_content_start_position.set_element(start_5p_part, ii, j) ; 01814 if (with_loss) 01815 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ; 01816 #ifdef DYNPROG_DEBUG 01817 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ; 01818 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ; 01819 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ; 01820 /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) + 01821 long_transition_content_scores_elem.get_element(ii, j) + 01822 long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/ 01823 long_transition_content_end_position.set_element(end_5p_part, ii, j) ; 01824 #endif 01825 } 01826 // 01827 // this sets the position where the search for better 5'parts is started the next time 01828 // whithout this the prediction takes ages 01829 // 01830 long_transition_content_start.set_element(start_5p_part, ii, j) ; 01831 } 01832 01833 // consider the 3' part at the end of the long segment: 01834 // * with length = m_long_transition_threshold 01835 // * content prediction and loss only for this part 01836 01837 // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 01838 // precompute: only depends on t 01839 int ts = t; 01840 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold) 01841 ts-- ; 01842 01843 if (ts>0) 01844 { 01845 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ; 01846 01847 01848 /* only consider this transition, if the right position was found */ 01849 float pen_val_3p = 0.0 ; 01850 if (penalty) 01851 { 01852 int32_t frame = orf_from ; //m_orf_info.element(ii, 0); 01853 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 01854 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ; 01855 } 01856 01857 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ; 01858 01859 { 01860 #ifdef DYNPROG_DEBUG 01861 float64_t segment_loss_part2=0.0 ; 01862 float64_t segment_loss_part1=0.0 ; 01863 #endif 01864 float64_t segment_loss_total=0.0 ; 01865 01866 if (with_loss) 01867 { // this is the loss for the 3' end fragment of the segment 01868 // (the 5' end and the middle section loss is already contained in mval) 01869 01870 #ifdef DYNPROG_DEBUG 01871 // this is an alternative, which should be identical, if the loss is additive 01872 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]); 01873 //mval -= segment_loss_part2 ; 01874 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]); 01875 #endif 01876 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]); 01877 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ; 01878 } 01879 01880 #ifdef DYNPROG_DEBUG 01881 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561) 01882 { 01883 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n", 01884 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 01885 long_transition_content_scores.get_element(ii, j), 01886 long_transition_content_scores_pen.get_element(ii, j), 01887 long_transition_content_scores_prev.get_element(ii, j), 01888 long_transition_content_scores_elem.get_element(ii, j), 01889 long_transition_content_scores_loss.get_element(ii, j), 01890 m_pos[long_transition_content_start_position.get_element(ii,j)], 01891 m_pos[long_transition_content_end_position.get_element(ii,j)], 01892 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ; 01893 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] ); 01894 } 01895 01896 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3) 01897 { 01898 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n", 01899 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 01900 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)], 01901 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 01902 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 01903 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ; 01904 } 01905 #endif 01906 } 01907 01908 // prefer simpler version to guarantee optimality 01909 // 01910 // original: 01911 /* if ((mval < fixedtempvv_) && 01912 (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */ 01913 if (mval < fixedtempvv_) 01914 { 01915 /* then the long transition is better than the short one => replace it */ 01916 int32_t fromtjk = fixedtempii_ ; 01917 /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 01918 m_pos[t], j, 01919 mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 01920 m_pos[long_transition_content_position.get_element(ii, j)], 01921 fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/ 01922 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ; 01923 01924 fixedtempvv_ = mval ; 01925 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ; 01926 fixed_list_len = 1 ; 01927 fixedtemplong = true ; 01928 } 01929 } 01930 } 01931 } 01932 #ifdef DYNPROG_TIMING 01933 MyTime3.stop() ; 01934 long_transition_time += MyTime3.time_diff_sec() ; 01935 #endif 01936 01937 01938 int32_t numEnt = fixed_list_len; 01939 01940 float64_t minusscore; 01941 int64_t fromtjk; 01942 01943 for (int16_t k=0; k<nbest; k++) 01944 { 01945 if (k<numEnt) 01946 { 01947 if (nbest==1) 01948 { 01949 minusscore = fixedtempvv_ ; 01950 fromtjk = fixedtempii_ ; 01951 } 01952 else 01953 { 01954 minusscore = fixedtempvv[k]; 01955 fromtjk = fixedtempii[k]; 01956 } 01957 01958 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t); 01959 psi.element(t,j,k) = (fromtjk%m_N) ; 01960 if (nbest>1) 01961 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ; 01962 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ; 01963 } 01964 else 01965 { 01966 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ; 01967 psi.element(t,j,k) = 0 ; 01968 if (nbest>1) 01969 ktable.element(t,j,k) = 0 ; 01970 ptable.element(t,j,k) = 0 ; 01971 } 01972 } 01973 } 01974 } 01975 } 01976 { //termination 01977 int32_t list_len = 0 ; 01978 for (int16_t diff=0; diff<nbest; diff++) 01979 { 01980 for (T_STATES i=0; i<m_N; i++) 01981 { 01982 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ; 01983 oldtempii[list_len] = i + diff*m_N ; 01984 list_len++ ; 01985 } 01986 } 01987 01988 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ; 01989 01990 for (int16_t k=0; k<nbest; k++) 01991 { 01992 delta_end.element(k) = -oldtempvv[k] ; 01993 path_ends.element(k) = (oldtempii[k]%m_N) ; 01994 if (nbest>1) 01995 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ; 01996 } 01997 01998 01999 } 02000 02001 { //state sequence backtracking 02002 for (int16_t k=0; k<nbest; k++) 02003 { 02004 prob_nbest[k]= delta_end.element(k) ; 02005 02006 int32_t i = 0 ; 02007 state_seq[i] = path_ends.element(k) ; 02008 int16_t q = 0 ; 02009 if (nbest>1) 02010 q=ktable_end.element(k) ; 02011 pos_seq[i] = m_seq_len-1 ; 02012 02013 while (pos_seq[i]>0) 02014 { 02015 ASSERT(i+1<m_seq_len); 02016 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02017 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q); 02018 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ; 02019 if (nbest>1) 02020 q = ktable.element(pos_seq[i], state_seq[i], q) ; 02021 i++ ; 02022 } 02023 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ; 02024 int32_t num_states = i+1 ; 02025 for (i=0; i<num_states;i++) 02026 { 02027 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ; 02028 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ; 02029 } 02030 if (num_states<m_seq_len) 02031 { 02032 my_state_seq[num_states+k*m_seq_len]=-1 ; 02033 my_pos_seq[num_states+k*m_seq_len]=-1 ; 02034 } 02035 } 02036 } 02037 02038 //if (is_big) 02039 // SG_PRINT( "DONE. \n") ; 02040 02041 02042 #ifdef DYNPROG_TIMING 02043 MyTime2.stop() ; 02044 02045 //if (is_big) 02046 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ; 02047 #endif 02048 02049 SG_FREE(fixedtempvv); 02050 SG_FREE(fixedtempii); 02051 } 02052 02053 02054 void CDynProg::best_path_trans_deriv( 02055 int32_t *my_state_seq, int32_t *my_pos_seq, 02056 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) 02057 { 02058 m_initial_state_distribution_p_deriv.resize_array(m_N) ; 02059 m_end_state_distribution_q_deriv.resize_array(m_N) ; 02060 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ; 02061 //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ; 02062 //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ; 02063 m_my_scores.resize_array(my_seq_len); 02064 m_my_losses.resize_array(my_seq_len); 02065 float64_t* my_scores=m_my_scores.get_array(); 02066 float64_t* my_losses=m_my_losses.get_array(); 02067 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix(); 02068 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals(); 02069 02070 if (!m_svm_arrays_clean) 02071 { 02072 SG_ERROR( "SVM arrays not clean") ; 02073 return ; 02074 } ; 02075 //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ; 02076 //m_mod_words.display() ; 02077 //m_sign_words.display() ; 02078 //m_string_words.display() ; 02079 02080 bool use_svm = false ; 02081 02082 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase* 02083 PEN.set_array_name("PEN"); 02084 02085 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase* 02086 PEN_state_signals.set_array_name("PEN_state_signals"); 02087 02088 CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ; 02089 seq_input.set_array_name("seq_input"); 02090 02091 { // determine whether to use svm outputs and clear derivatives 02092 for (int32_t i=0; i<m_N; i++) 02093 for (int32_t j=0; j<m_N; j++) 02094 { 02095 CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ; 02096 if (penij==NULL) 02097 continue ; 02098 02099 if (penij->uses_svm_values()) 02100 use_svm=true ; 02101 penij->penalty_clear_derivative() ; 02102 } 02103 for (int32_t i=0; i<m_N; i++) 02104 for (int32_t j=0; j<max_num_signals; j++) 02105 { 02106 CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ; 02107 if (penij==NULL) 02108 continue ; 02109 if (penij->uses_svm_values()) 02110 use_svm=true ; 02111 penij->penalty_clear_derivative() ; 02112 } 02113 } 02114 02115 { // set derivatives of p, q and a to zero 02116 02117 for (int32_t i=0; i<m_N; i++) 02118 { 02119 m_initial_state_distribution_p_deriv.element(i)=0 ; 02120 m_end_state_distribution_q_deriv.element(i)=0 ; 02121 for (int32_t j=0; j<m_N; j++) 02122 m_transition_matrix_a_deriv.element(i,j)=0 ; 02123 } 02124 } 02125 02126 { // clear score vector 02127 for (int32_t i=0; i<my_seq_len; i++) 02128 { 02129 my_scores[i]=0.0 ; 02130 my_losses[i]=0.0 ; 02131 } 02132 } 02133 02134 //int32_t total_len = 0 ; 02135 02136 //m_transition_matrix_a.display_array() ; 02137 //m_transition_matrix_a_id.display_array() ; 02138 02139 // compute derivatives for given path 02140 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02141 float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02142 float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs); 02143 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02144 { 02145 svm_value[s]=0 ; 02146 svm_value_part1[s]=0 ; 02147 svm_value_part2[s]=0 ; 02148 } 02149 02150 //#ifdef DYNPROG_DEBUG 02151 float64_t total_score = 0.0 ; 02152 float64_t total_loss = 0.0 ; 02153 //#endif 02154 02155 ASSERT(my_state_seq[0]>=0) ; 02156 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ; 02157 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ; 02158 02159 ASSERT(my_state_seq[my_seq_len-1]>=0) ; 02160 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ; 02161 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]); 02162 02163 //#ifdef DYNPROG_DEBUG 02164 total_score += my_scores[0] + my_scores[my_seq_len-1] ; 02165 //#endif 02166 02167 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ; 02168 for (int32_t i=0; i<my_seq_len-1; i++) 02169 { 02170 if (my_state_seq[i+1]==-1) 02171 break ; 02172 int32_t from_state = my_state_seq[i] ; 02173 int32_t to_state = my_state_seq[i+1] ; 02174 int32_t from_pos = my_pos_seq[i] ; 02175 int32_t to_pos = my_pos_seq[i+1] ; 02176 02177 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ; 02178 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id); 02179 02180 #ifdef DYNPROG_DEBUG 02181 02182 02183 if (i>0)// test if segment loss is additive 02184 { 02185 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id); 02186 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id); 02187 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id); 02188 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3); 02189 if (CMath::abs(loss1+loss2-loss3)>0) 02190 { 02191 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02192 } 02193 } 02194 io->set_loglevel(M_DEBUG) ; 02195 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ; 02196 #endif 02197 // increase usage of this transition 02198 m_transition_matrix_a_deriv.element(from_state, to_state)++ ; 02199 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ; 02200 //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state)); 02201 #ifdef DYNPROG_DEBUG 02202 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02203 #endif 02204 02205 /*int32_t last_svm_pos[m_num_degrees] ; 02206 for (int32_t qq=0; qq<m_num_degrees; qq++) 02207 last_svm_pos[qq]=-1 ;*/ 02208 02209 bool is_long_transition = false ; 02210 if (m_long_transitions) 02211 { 02212 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold) 02213 is_long_transition = true ; 02214 if (m_orf_info.element(from_state,0)!=-1) 02215 is_long_transition = false ; 02216 } 02217 02218 int32_t from_pos_thresh = from_pos ; 02219 int32_t to_pos_thresh = to_pos ; 02220 02221 if (use_svm) 02222 { 02223 if (is_long_transition) 02224 { 02225 02226 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // * 02227 from_pos_thresh++ ; 02228 ASSERT(from_pos_thresh<to_pos) ; 02229 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // * 02230 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// * 02231 02232 int32_t frame = m_orf_info.element(from_state,0); 02233 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame); 02234 02235 #ifdef DYNPROG_DEBUG 02236 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ; 02237 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02238 SG_PRINT("%1.4f ", svm_value_part1[s]); 02239 SG_PRINT("\n"); 02240 #endif 02241 02242 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // * 02243 to_pos_thresh-- ; 02244 ASSERT(to_pos_thresh>0) ; 02245 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // * 02246 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // * 02247 02248 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame); 02249 02250 #ifdef DYNPROG_DEBUG 02251 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ; 02252 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02253 SG_PRINT("%1.4f ", svm_value_part2[s]); 02254 SG_PRINT("\n"); 02255 #endif 02256 } 02257 else 02258 { 02259 /* normal case */ 02260 02261 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]); 02262 int32_t frame = m_orf_info.element(from_state,0); 02263 if (false)//(frame>=0) 02264 { 02265 int32_t num_current_svms=0; 02266 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02267 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame); 02268 ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids); 02269 SG_PRINT("\n"); 02270 } 02271 02272 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame); 02273 #ifdef DYNPROG_DEBUG 02274 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ; 02275 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++) 02276 SG_PRINT("%1.4f ", svm_value[s]); 02277 SG_PRINT("\n"); 02278 #endif 02279 } 02280 } 02281 02282 if (PEN.element(to_state, from_state)!=NULL) 02283 { 02284 float64_t nscore = 0 ; 02285 if (is_long_transition) 02286 { 02287 float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ; 02288 float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ; 02289 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ; 02290 } 02291 else 02292 nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ; 02293 02294 if (false)//(nscore<-1e9) 02295 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n", 02296 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ; 02297 02298 my_scores[i] += nscore ; 02299 02300 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/ 02301 { 02302 svm_value[s]=-CMath::INFTY; 02303 svm_value_part1[s]=-CMath::INFTY; 02304 svm_value_part2[s]=-CMath::INFTY; 02305 } 02306 02307 #ifdef DYNPROG_DEBUG 02308 //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ; 02309 #endif 02310 if (is_long_transition) 02311 { 02312 #ifdef DYNPROG_DEBUG 02313 float64_t sum_score = 0.0 ; 02314 02315 for (int kk=0; kk<i; kk++) 02316 sum_score += my_scores[i] ; 02317 02318 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n", 02319 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 02320 nscore, sum_score, 02321 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 02322 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ; 02323 #endif 02324 } 02325 02326 if (is_long_transition) 02327 { 02328 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ; 02329 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ; 02330 } 02331 else 02332 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ; 02333 02334 //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ; 02335 02336 // for tiling array and rna-seq data every single measurement must be added to the derivative 02337 // in contrast to the content svm predictions where we have a single value per transition; 02338 // content svm predictions have already been added to the derivative, thus we start with d=1 02339 // instead of d=0 02340 if (is_long_transition) 02341 { 02342 for (int32_t d=1; d<=m_num_raw_data; d++) 02343 { 02344 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02345 svm_value[s]=-CMath::INFTY; 02346 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02347 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d); 02348 for (int32_t k=0;k<num_intensities;k++) 02349 { 02350 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02351 svm_value[j]=intensities[k]; 02352 02353 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02354 02355 } 02356 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d); 02357 for (int32_t k=0;k<num_intensities;k++) 02358 { 02359 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02360 svm_value[j]=intensities[k]; 02361 02362 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ; 02363 02364 } 02365 SG_FREE(intensities); 02366 02367 } 02368 } 02369 else 02370 { 02371 for (int32_t d=1; d<=m_num_raw_data; d++) 02372 { 02373 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++) 02374 svm_value[s]=-CMath::INFTY; 02375 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]); 02376 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d); 02377 //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities); 02378 for (int32_t k=0;k<num_intensities;k++) 02379 { 02380 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++) 02381 svm_value[j]=intensities[k]; 02382 02383 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ; 02384 02385 } 02386 SG_FREE(intensities); 02387 } 02388 } 02389 02390 } 02391 #ifdef DYNPROG_DEBUG 02392 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ; 02393 #endif 02394 02395 //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ; 02396 for (int32_t k=0; k<max_num_signals; k++) 02397 { 02398 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0)) 02399 { 02400 #ifdef DYNPROG_DEBUG 02401 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ; 02402 #endif 02403 my_scores[i] += seq_input.element(to_state, to_pos, k) ; 02404 //if (seq_input.element(to_state, to_pos, k) !=0) 02405 // SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k)); 02406 break ; 02407 } 02408 if (PEN_state_signals.element(to_state, k)!=NULL) 02409 { 02410 float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter) 02411 my_scores[i] += nscore ; 02412 #ifdef DYNPROG_DEBUG 02413 if (false)//(nscore<-1e9) 02414 { 02415 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n", 02416 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ; 02417 for (int x=0; x<23; x++) 02418 { 02419 for (int i=-10; i<10; i++) 02420 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k)); 02421 SG_PRINT("\n"); 02422 } 02423 02424 } 02425 #endif 02426 //break ; 02427 //int32_t num_current_svms=0; 02428 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1}; 02429 //SG_PRINT("PEN_state_signals->id: "); 02430 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids); 02431 //SG_PRINT("\n"); 02432 //if (nscore != 0) 02433 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02434 #ifdef DYNPROG_DEBUG 02435 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ; 02436 #endif 02437 ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter) 02438 } else 02439 break ; 02440 } 02441 02442 //#ifdef DYNPROG_DEBUG 02443 //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ; 02444 //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ; 02445 total_score += my_scores[i] ; 02446 total_loss += my_losses[i] ; 02447 //#endif 02448 } 02449 //#ifdef DYNPROG_DEBUG 02450 //SG_PRINT( "total score = %f \n", total_score) ; 02451 //SG_PRINT( "total loss = %f \n", total_loss) ; 02452 //#endif 02453 SG_FREE(svm_value); 02454 SG_FREE(svm_value_part1); 02455 SG_FREE(svm_value_part2); 02456 } 02457 02458 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type) 02459 { 02460 ASSERT(from_pos<to_pos); 02461 int32_t num_intensities = 0; 02462 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]]; 02463 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]]; 02464 int32_t last_pos; 02465 int32_t num = m_num_probes_cum[type-1]; 02466 while (*p_tiling_pos<to_pos) 02467 { 02468 if (*p_tiling_pos>=from_pos) 02469 { 02470 intensities[num_intensities] = *p_tiling_data; 02471 num_intensities++; 02472 } 02473 num++; 02474 if (num>=m_num_probes_cum[type]) 02475 break; 02476 last_pos = *p_tiling_pos; 02477 p_tiling_pos++; 02478 p_tiling_data++; 02479 ASSERT(last_pos<*p_tiling_pos); 02480 } 02481 return num_intensities; 02482 } 02483 02484 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame) 02485 { 02486 #ifdef DYNPROG_TIMING_DETAIL 02487 MyTime.start() ; 02488 #endif 02489 // ASSERT(from_state<to_state); 02490 // if (!(from_pos<to_pos)) 02491 // SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos); 02492 for (int32_t i=0;i<m_num_svms;i++) 02493 { 02494 float64_t to_val = m_lin_feat.get_element(i, to_state); 02495 float64_t from_val = m_lin_feat.get_element(i, from_state); 02496 svm_values[i] = (to_val-from_val)/(to_pos-from_pos); 02497 } 02498 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++) 02499 { 02500 float64_t to_val = m_lin_feat.get_element(i, to_state); 02501 float64_t from_val = m_lin_feat.get_element(i, from_state); 02502 svm_values[i] = to_val-from_val ; 02503 } 02504 if (m_intron_list) 02505 { 02506 int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs); 02507 m_intron_list->get_intron_support(support, from_state, to_state); 02508 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data]; 02509 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 02510 int32_t cnt = 0; 02511 for (int32_t i=intron_list_start; i<intron_list_end;i++) 02512 { 02513 svm_values[i] = (float64_t) (support[cnt]); 02514 cnt++; 02515 } 02516 //if (to_pos>3990 && to_pos<4010) 02517 // SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]); 02518 SG_FREE(support); 02519 } 02520 // find the correct row with precomputed frame predictions 02521 if (frame!=-1) 02522 { 02523 svm_values[frame_plifs[0]] = 1e10; 02524 svm_values[frame_plifs[1]] = 1e10; 02525 svm_values[frame_plifs[2]] = 1e10; 02526 int32_t global_frame = from_pos%3; 02527 int32_t row = ((global_frame+frame)%3)+4; 02528 float64_t to_val = m_lin_feat.get_element(row, to_state); 02529 float64_t from_val = m_lin_feat.get_element(row, from_state); 02530 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos); 02531 } 02532 #ifdef DYNPROG_TIMING_DETAIL 02533 MyTime.stop() ; 02534 content_svm_values_time += MyTime.time_diff_sec() ; 02535 #endif 02536 } 02537 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs) 02538 { 02539 m_intron_list = intron_list; 02540 m_num_intron_plifs = num_plifs; 02541 } 02542