CLAM-Development
1.1
|
00001 /* 00002 * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG) 00003 * UNIVERSITAT POMPEU FABRA 00004 * 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 00022 #include "Segmentator.hxx" 00023 #include "Point.hxx" 00024 #include "Segment.hxx" 00025 #include <iostream> 00026 #include "SegmentDescriptors.hxx" 00027 00028 00029 namespace CLAM 00030 { 00031 class SegmentBoundaries 00032 { 00033 public: 00034 Array < Array < PointTmpl < int , TData > > > mArray; 00035 00036 SegmentBoundaries(int size):mArray(size) 00037 { 00038 } 00039 }; 00040 } 00041 00042 using namespace CLAM; 00043 00047 00048 std::ostream& operator << (std::ostream& myStream, const TDescriptorsParams& a) 00049 { 00050 switch (a.id) 00051 { 00052 case SpectralMeanId: 00053 { 00054 myStream << "SpectralMean"; 00055 break; 00056 } 00057 case SpectralGeometricMeanId : 00058 { 00059 myStream << "SpectralGeometricMean"; 00060 break; 00061 } 00062 case SpectralEnergyId: 00063 { 00064 myStream << "SpectralEnergy"; 00065 break; 00066 } 00067 case SpectralCentroidId : 00068 { 00069 myStream << "SpectralCentroid"; 00070 break; 00071 } 00072 case SpectralMoment2Id: 00073 { 00074 myStream << "SpectralMoment2"; 00075 break; 00076 } 00077 case SpectralMoment3Id: 00078 { 00079 myStream << "SpectralMoment3"; 00080 break; 00081 } 00082 case SpectralMoment4Id: 00083 { 00084 myStream << "SpectralMoment4"; 00085 break; 00086 } 00087 case SpectralMoment5Id: 00088 { 00089 myStream << "SpectralMoment5"; 00090 break; 00091 } 00092 case SpectralMoment6Id: 00093 { 00094 myStream << "SpectralMoment6"; 00095 break; 00096 } 00097 case SpectralFlatnessId : 00098 { 00099 myStream << "SpectralFlatness"; 00100 break; 00101 } 00102 case SpectralKurtosisId : 00103 { 00104 myStream << "SpectralKurtosis"; 00105 break; 00106 } 00107 case FundamentalId : 00108 { 00109 myStream << "Fundamental"; 00110 break; 00111 } 00112 default: 00113 { 00114 myStream << "UnknownDescriptor"; 00115 break; 00116 } 00117 } 00118 myStream << "threshold="; 00119 myStream << a.threshold; 00120 myStream << " "; 00121 myStream << "percentil="; 00122 myStream << a.percentil; 00123 return myStream; 00124 } 00125 00126 std::istream& operator >> (std::istream& myStream, const TDescriptorsParams& a) 00127 { 00128 CLAM_ASSERT(false, "TDescriptorParams extractor operator is not implemented"); 00129 return myStream; 00130 } 00131 00132 00133 void SegmentatorConfig::DefaultInit() 00134 { 00135 //AddAll(); 00136 AddDescriptorsParams(); 00137 AddMinSegmentLength(); 00138 UpdateData(); 00139 Array<TDescriptorsParams> tmpArray(0); 00140 SetDescriptorsParams(tmpArray); 00141 SearchArray<TDescriptorsParams> tmpSearch(GetDescriptorsParams()); 00142 SetDescriptorsSearch(tmpSearch); 00143 00144 // Default values 00145 SetMinSegmentLength(0); 00146 }; 00147 00148 void SegmentatorConfig::AddDescParams(const TDescriptorsParams& descParams) 00149 { 00150 TIndex position; 00151 if(GetDescriptorsParams().Size()==0) 00152 GetDescriptorsParams().AddElem(descParams); 00153 else if ((position=GetDescriptorsSearch().Find(descParams))==-1) 00154 { 00155 if(descParams<GetDescriptorsParams()[0]) 00156 GetDescriptorsParams().InsertElem(0,descParams); 00157 else 00158 GetDescriptorsParams().AddElem(descParams); 00159 } 00160 else 00161 GetDescriptorsParams().InsertElem(position,descParams); 00162 } 00163 00164 /*false if descriptor is not found, true if it is*/ 00165 bool SegmentatorConfig::FindDescParams(TDescriptorsParams& descParams) 00166 { 00167 int pos; 00168 if((pos=GetDescriptorsSearch().Find(descParams))!=-1) 00169 { 00170 descParams.percentil=GetDescriptorsParams()[pos].percentil; 00171 descParams.threshold=GetDescriptorsParams()[pos].threshold; 00172 return true; 00173 } 00174 return false; 00175 } 00176 00177 void SegmentatorConfig::ClearDescParams() { 00178 GetDescriptorsParams().Init(); 00179 } 00180 00184 00185 Segmentator::Segmentator() 00186 { 00187 Configure(SegmentatorConfig()); 00188 } 00189 00190 Segmentator::Segmentator(const SegmentatorConfig& c) 00191 { 00192 Configure(c); 00193 } 00194 00195 Segmentator::~Segmentator() 00196 { 00197 } 00198 00199 bool Segmentator::ConcreteConfigure(const ProcessingConfig& c) 00200 { 00201 CopyAsConcreteConfig(mConfig, c); 00202 return true; 00203 } 00204 00205 bool Segmentator::Do() 00206 { 00207 CLAM_DEBUG_ASSERT(IsRunning(), "Segmentator: Do(): Not in execution mode"); 00208 00209 CLAM_ASSERT(false, "Segmentator: Do(): Supervised mode not implemented"); 00210 00211 return false; 00212 } 00213 00214 00215 bool Segmentator::Do(Segment& originalSegment,SegmentDescriptors& descriptors) 00216 { 00217 int nFrames=originalSegment.GetnFrames(); 00218 Matrix descriptorsValues(mConfig.GetDescriptorsParams().Size(),nFrames); 00219 UnwrapDescriptors(originalSegment, descriptors,descriptorsValues); 00220 Algorithm(originalSegment,descriptorsValues); 00221 return true; 00222 } 00223 00224 00225 void Segmentator::UnwrapDescriptors(const Segment& originalSegment, SegmentDescriptors& descriptors,Matrix& descriptorsValues) 00226 { 00227 int nFrames=originalSegment.GetnFrames(); 00228 int nDescriptors=mConfig.GetDescriptorsParams().Size(); 00229 for(int i=0;i<nFrames;i++) 00230 { 00231 /*This looks ugly but right now is the only way to deal with it*/ 00232 int z=0; 00233 TData value; 00234 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMeanId) 00235 { 00236 value=descriptors.GetFrameD(i).GetSpectrumD().GetMean(); 00237 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00238 descriptorsValues.SetAt(z,i,value); 00239 else descriptorsValues.SetAt(z,i,0); 00240 z++; 00241 } 00242 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralGeometricMeanId ) 00243 { 00244 value=descriptors.GetFrameD(i).GetSpectrumD().GetGeometricMean(); 00245 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00246 descriptorsValues.SetAt(z,i,value); 00247 else descriptorsValues.SetAt(z,i,0); 00248 z++; 00249 } 00250 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralEnergyId) 00251 { 00252 value=descriptors.GetFrameD(i).GetSpectrumD().GetEnergy(); 00253 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00254 descriptorsValues.SetAt(z,i,value); 00255 else descriptorsValues.SetAt(z,i,0); 00256 z++; 00257 } 00258 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralCentroidId ) 00259 { 00260 value=descriptors.GetFrameD(i).GetSpectrumD().GetCentroid(); 00261 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00262 descriptorsValues.SetAt(z,i,value); 00263 else descriptorsValues.SetAt(z,i,0); 00264 z++; 00265 } 00266 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment2Id) 00267 { 00268 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment2(); 00269 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00270 descriptorsValues.SetAt(z,i,value); 00271 else descriptorsValues.SetAt(z,i,0); 00272 z++; 00273 } 00274 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment3Id) 00275 { 00276 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment3(); 00277 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00278 descriptorsValues.SetAt(z,i,value); 00279 else descriptorsValues.SetAt(z,i,0); 00280 z++; 00281 } 00282 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment4Id) 00283 { 00284 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment4(); 00285 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00286 descriptorsValues.SetAt(z,i,value); 00287 else descriptorsValues.SetAt(z,i,0); 00288 z++; 00289 } 00290 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment5Id) 00291 { 00292 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment5(); 00293 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00294 descriptorsValues.SetAt(z,i,value); 00295 else descriptorsValues.SetAt(z,i,0); 00296 z++; 00297 } 00298 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralMoment6Id) 00299 { 00300 value=descriptors.GetFrameD(i).GetSpectrumD().GetMoment6(); 00301 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00302 descriptorsValues.SetAt(z,i,value); 00303 else descriptorsValues.SetAt(z,i,0); 00304 z++; 00305 } 00306 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralFlatnessId ) 00307 { 00308 value=descriptors.GetFrameD(i).GetSpectrumD().GetFlatness(); 00309 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00310 descriptorsValues.SetAt(z,i,value); 00311 else descriptorsValues.SetAt(z,i,0); 00312 z++; 00313 } 00314 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==SpectralKurtosisId ) 00315 { 00316 value=descriptors.GetFrameD(i).GetSpectrumD().GetMagnitudeKurtosis(); 00317 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00318 descriptorsValues.SetAt(z,i,value); 00319 else descriptorsValues.SetAt(z,i,0); 00320 z++; 00321 } 00322 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==FundamentalId ) 00323 { 00324 value=originalSegment.GetFrame(i).GetFundamental().GetFreq(); 00325 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00326 descriptorsValues.SetAt(z,i,value); 00327 else descriptorsValues.SetAt(z,i,0); 00328 z++; 00329 } 00330 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioEnergyId) 00331 { 00332 value=descriptors.GetFrameD(i).GetAudioFrameD().GetEnergy(); 00333 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00334 descriptorsValues.SetAt(z,i,value); 00335 else descriptorsValues.SetAt(z,i,0); 00336 z++; 00337 } 00338 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioVarianceId) 00339 { 00340 value=descriptors.GetFrameD(i).GetAudioFrameD().GetVariance(); 00341 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00342 descriptorsValues.SetAt(z,i,value); 00343 else descriptorsValues.SetAt(z,i,0); 00344 z++; 00345 } 00346 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioCentroidId) 00347 { 00348 value=descriptors.GetFrameD(i).GetAudioFrameD().GetTemporalCentroid(); 00349 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00350 descriptorsValues.SetAt(z,i,value); 00351 else descriptorsValues.SetAt(z,i,0); 00352 z++; 00353 } 00354 if(z<nDescriptors&&mConfig.GetDescriptorsParams()[z]==AudioZeroCrossingRateId) 00355 { 00356 value=descriptors.GetFrameD(i).GetAudioFrameD().GetZeroCrossingRate(); 00357 if(value>mConfig.GetDescriptorsParams()[z].threshold) 00358 descriptorsValues.SetAt(z,i,value); 00359 else descriptorsValues.SetAt(z,i,0); 00360 z++; 00361 } 00362 } 00363 00364 00365 } 00366 00367 00368 void Segmentator::Algorithm(Segment& s,const Matrix& values) 00369 { 00370 00371 // Segmentation objects 00372 // segment boundaries for each parameter 00373 int nFrames=s.GetnFrames(); 00374 int nDescriptors=mConfig.GetDescriptorsParams().Size(); 00375 SegmentBoundaries segmentBoundaries(nDescriptors); 00376 // segment boundaries for each parameter 00377 segmentBoundaries.mArray.SetSize(nDescriptors); 00378 00379 for (int z=0;z<nDescriptors;z++) 00380 { 00381 segmentBoundaries.mArray[z].AddElem(PointTmpl<int,TData>(0,100));//very high value 00382 } 00383 for (int i=0; i<nFrames-4; i++) 00384 { 00385 for (int z=0;z<nDescriptors;z++) 00386 { 00387 const TData & x3 = values.GetAt(z,i+3); 00388 const TData & x2 = values.GetAt(z,i+2); 00389 const TData & x1 = values.GetAt(z,i+1); 00390 const TData & x0 = values.GetAt(z,i); 00391 // Avoid div by 0 00392 if (x2==0) continue; 00393 00394 const TData relevance = fabs((x3-x2)/x2); 00395 const TData & ratio = mConfig.GetDescriptorsParams()[z].percentil/100; 00396 00397 00398 if ((x3/x2)>(1+ratio) || 00399 (x3/x2)<(1-ratio)) 00400 { 00401 /* 00402 if (i>2) 00403 { 00404 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio)) 00405 { 00406 //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength()) 00407 */ 00408 if (( x3>x2 && x2>x1 && x1>x0 )|| 00409 ( x3<x2 && x2<x1 && x1<x0 )) 00410 { 00411 PointTmpl<int,TData> tmpValue(i+3,relevance/ratio); 00412 segmentBoundaries.mArray[z].AddElem(tmpValue); 00413 } 00414 else if((x3/x2)>(1+2*ratio)|| 00415 (x3/x2)<(1-2*ratio)) 00416 { 00417 PointTmpl<int,TData> tmpValue(i+3,relevance/ratio); 00418 segmentBoundaries.mArray[z].AddElem(tmpValue); 00419 } 00420 00421 } 00422 /* 00423 } 00424 } 00425 else if (i>2) 00426 { 00427 if ((x3/x1)>(1+ratio) || (x3/x1)<(1-ratio)) 00428 { 00429 //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength()) 00430 segmentBoundaries.mArray[z].AddElem(i); 00431 } 00432 } 00433 else if (i>3) 00434 { 00435 if ((x3/x0)>(1+ratio) || (x3/x0)<(1-ratio)) 00436 { 00437 //if((i-segmentBoundaries.mArray[z][segmentBoundaries.mArray[z].Size()-1])>=mConfig.GetMinSegmentLength()) 00438 segmentBoundaries.mArray[z].AddElem(i); 00439 } 00440 } 00441 */ 00442 /* 00443 if ( x3==0 && x2!=0 ) 00444 { 00445 Point<int,TData> tmpValue(i,100); 00446 segmentBoundaries.mArray[z].AddElem(tmpValue); 00447 } 00448 */ 00449 00450 } 00451 } 00452 DataFusion(s,segmentBoundaries); 00453 } 00454 00455 void Segmentator::DataFusion(Segment& s,const SegmentBoundaries& segmentBoundaries) 00456 { 00457 00458 // DATA FUSION (of the segmentation parameters), taken from Rossignol's Thesis 00459 // DoNothing,1) Generate probability functions for both parameters 00460 const int nFrames=s.GetnFrames(); 00461 const int nDescriptors=mConfig.GetDescriptorsParams().Size(); 00462 TData duration=s.GetFrame(0).GetDuration();/*BEWARE!Assuming equal lengthed frames*/ 00463 TData sampleRate=s.GetSamplingRate(); 00464 00465 /*Initializing Probability Matrix*/ 00466 Matrix probabilityMatrix(nFrames,nDescriptors); 00467 memset(probabilityMatrix.GetBuffer().GetPtr(),0,nFrames*nDescriptors*sizeof(TData)); 00468 00469 /*Setting probability to one wherever a segment boundary was found*/ 00470 for (int z=0;z<nDescriptors;z++) 00471 { 00472 for (int n=0;n<segmentBoundaries.mArray[z].Size();n++) 00473 probabilityMatrix.SetAt(segmentBoundaries.mArray[z][n].GetX(),z,segmentBoundaries.mArray[z][n].GetY()); 00474 } 00475 00476 // Adding probability values of different descriptors 00477 Array<TData> globalProb; 00478 for (int n=0; n<nFrames; n++) 00479 { 00480 TData tmpProb=0; 00481 for(int z=0;z<nDescriptors;z++) 00482 { 00483 tmpProb+=probabilityMatrix.GetAt(n,z); 00484 } 00485 globalProb.AddElem(tmpProb); 00486 } 00487 00488 // MERGE: Two comments, choose one 00489 // 3) Fusion of too near marks (separated 1 or 2 frames) 00490 // 3) Fusion of too near marks (separated less than the minSegmentLength) 00491 // Also compute maximun (to re-use the loop) 00492 Array<TData> prob_fusion(globalProb); 00493 { 00494 int n=0; 00495 while(globalProb[n]<=0) // Find first frame with prob>0 00496 n++; 00497 TData mag=globalProb[n]; 00498 TData gcenter=n*globalProb[n]; 00499 prob_fusion[n]=0; 00500 for (int m=n+1; m<globalProb.Size(); m++) 00501 { 00502 if (globalProb[m]<=0) continue; 00503 if ((m-n)>mConfig.GetMinSegmentLength()) 00504 { 00505 // Store information and begin another search 00506 prob_fusion[(int)(gcenter/mag)]=mag; 00507 mag=0; 00508 gcenter=0; 00509 } 00510 mag+=globalProb[m]; 00511 gcenter+=m*globalProb[m]; 00512 prob_fusion[m]=0; 00513 n=m; 00514 } 00515 } 00516 // 4) DELETE SMALL MARKS (1/7 of the max value, parameter that should be optimized...) 00517 TData max=0; 00518 for (int n=0; n<prob_fusion.Size(); n++) 00519 if (prob_fusion[n]>max) 00520 max=prob_fusion[n]; 00521 for (int n=0; n<prob_fusion.Size(); n++) 00522 // MERGE: cuidado max/100 vs. CLAM04 max/7 00523 if (prob_fusion[n]<=(max/100)) 00524 prob_fusion[n]=0; 00525 00526 Array<TData> finalSegments; // final segment boundaries in samples 00527 for (int n=0; n<prob_fusion.Size(); n++) 00528 { 00529 if (prob_fusion[n]>0) 00530 finalSegments.AddElem(n*duration*sampleRate); 00531 } 00532 00533 // Store segment boundaries information 00534 00535 if (finalSegments.Size()<=0) return; 00536 00537 for (int n=0; n<(finalSegments.Size()-1); n++) 00538 { 00539 Segment tmpSegment; 00540 tmpSegment.SetBeginTime(finalSegments[n] /sampleRate); 00541 tmpSegment.SetEndTime (finalSegments[n+1]/sampleRate); 00542 tmpSegment.SetpParent(&s); 00543 tmpSegment.SetHoldsData( false ); 00544 s.GetChildren().AddElem(tmpSegment); 00545 } 00546 00547 Segment tmpSegment; 00548 tmpSegment.SetBeginTime(finalSegments[finalSegments.Size()-1] /sampleRate); 00549 tmpSegment.SetEndTime(s.GetAudio().GetEndTime()); 00550 tmpSegment.SetpParent(&s); 00551 s.GetChildren().AddElem(tmpSegment); 00552 00553 } 00554