CLAM-Development
1.1
|
00001 /* 00002 * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG) 00003 * UNIVERSITAT POMPEU FABRA 00004 * 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 00022 00023 00024 #include <cmath> 00025 #include "AudioDescriptors.hxx" 00026 #include "Audio.hxx" 00027 #include "OSDefines.hxx" 00028 00029 namespace CLAM { 00030 00031 00032 const TData AudioDescriptors::mEpsilon = 1e-5; 00033 00034 AudioDescriptors::AudioDescriptors(Audio* pAudio): DescriptorAbs(eNumAttr) 00035 { 00036 MandatoryInit(); 00037 mpAudio=pAudio; 00038 } 00039 00040 AudioDescriptors::AudioDescriptors(TData initVal):DescriptorAbs(eNumAttr) 00041 { 00042 MandatoryInit(); 00043 AddAll(); 00044 UpdateData(); 00045 SetMean(initVal); 00046 SetTemporalCentroid(initVal); 00047 SetEnergy(initVal); 00048 SetVariance(initVal); 00049 SetZeroCrossingRate(initVal); 00050 SetRiseTime(initVal); 00051 SetLogAttackTime(initVal); 00052 SetDecrease(initVal); 00053 } 00054 00055 void AudioDescriptors::DefaultInit() { 00056 mpAudio=0; 00057 } 00058 00059 00060 void AudioDescriptors::CopyInit(const AudioDescriptors & copied) { 00061 mpAudio=copied.mpAudio; 00062 } 00063 00064 const Audio* AudioDescriptors::GetpAudio() const { 00065 return mpAudio; 00066 } 00067 00068 00069 00070 void AudioDescriptors::SetpAudio(Audio* pAudio) { 00071 mpAudio=pAudio; 00072 //TODO: it may give problems because pointer passed 00073 InitStats(&mpAudio->GetBuffer()); 00074 mIsAttackTimeComputed=false; 00075 } 00076 00077 void AudioDescriptors::ConcreteCompute() 00078 { 00079 if (HasMean()) 00080 SetMean(mpStats->GetMean()); 00081 if (HasTemporalCentroid()) 00082 SetTemporalCentroid(mpStats->GetCentroid()/mpAudio->GetSampleRate()); 00083 if (HasEnergy()) 00084 SetEnergy(mpStats->GetEnergy()); 00085 if(HasVariance()) 00086 SetVariance(mpStats->GetVariance()); 00087 if(HasZeroCrossingRate()) 00088 SetZeroCrossingRate(ComputeZeroCrossingRate()); 00089 if(HasRiseTime()) 00090 SetRiseTime(ComputeAttackTime()); 00091 if(HasLogAttackTime()) 00092 SetLogAttackTime(ComputeLogAttackTime()); 00093 if(HasDecrease()) 00094 SetDecrease(ComputeDecrease()); 00095 } 00096 00097 TData AudioDescriptors::ComputeZeroCrossingRate() 00098 { 00099 DataArray& data = mpAudio->GetBuffer(); 00100 00101 int signChangeCount = 0; 00102 const TSize size = data.Size(); 00103 bool wasPositive = data[0] > 0.0; 00104 00105 for (int i=1; i<size; i++) 00106 { 00107 const bool isPositive = (data[i] > 0.0); 00108 if (wasPositive == isPositive) continue; 00109 00110 signChangeCount++; 00111 wasPositive = isPositive; 00112 } 00113 // Average 00114 return ((TData)signChangeCount)/size; 00115 } 00116 00117 TData AudioDescriptors::ComputeAttackTime() 00118 { 00119 if(mIsAttackTimeComputed) return mComputedAttackTime; 00120 00121 const DataArray& data = mpAudio->GetBuffer(); 00122 const TSize dataSize = mpAudio->GetSize(); 00123 00124 DataArray energyEnv; 00125 energyEnv.Resize(dataSize); 00126 energyEnv.SetSize(dataSize); 00127 00128 // Compute 20Hz lowpass filter coefficients 00129 const TData omega_c = 2*PI*20/mpAudio->GetSampleRate(); 00130 const TData alpha = (1-sin(omega_c)) / cos(omega_c); 00131 00132 const TData b0 = (1-alpha)/2; 00133 const TData a1 = -alpha; 00134 00135 // Find maximum value 00136 energyEnv[0] = b0*CLAM::Abs(data[0]); 00137 TData maxVal = energyEnv[0]; 00138 00139 for (TIndex i=1; i<dataSize; i++) { 00140 energyEnv[i] = b0*(CLAM::Abs(data[i]) + CLAM::Abs(data[i-1])) - a1*energyEnv[i-1]; 00141 if (energyEnv[i] > maxVal) maxVal = energyEnv[i]; 00142 } 00143 00144 // Locate start and stop of attack 00145 const TData startThreshold = 0.02*maxVal; 00146 const TData stopThreshold = 0.80*maxVal; 00147 00148 TIndex startIdx; 00149 for (startIdx=0; startIdx<dataSize; startIdx++) { 00150 if (energyEnv[startIdx] > startThreshold) break; 00151 } 00152 00153 TIndex stopIdx; 00154 for (stopIdx=startIdx; stopIdx<dataSize; stopIdx++) { 00155 if (energyEnv[stopIdx] > stopThreshold) break; 00156 } 00157 00158 mComputedAttackTime=(stopIdx - startIdx) / mpAudio->GetSampleRate(); 00159 mIsAttackTimeComputed=true; 00160 return mComputedAttackTime; 00161 } 00162 00163 00164 TData AudioDescriptors::ComputeLogAttackTime() 00165 { 00166 ComputeAttackTime(); 00167 if (mComputedAttackTime==0) 00168 return log10(mEpsilon); 00169 return log10(mComputedAttackTime); 00170 } 00171 00172 00173 TData AudioDescriptors::ComputeDecrease() 00174 { 00175 const DataArray& data = mpAudio->GetBuffer(); 00176 const TSize dataSize = mpAudio->GetSize(); 00177 00178 // Compute 20Hz lowpass filter coefficients 00179 const double omega_c = 2*PI*20/mpAudio->GetSampleRate(); 00180 const double alpha = (1-sin(omega_c)) / cos(omega_c); 00181 00182 const double b0 = (1-alpha)/2; 00183 const double a1 = -alpha; 00184 00185 // Find maximum value 00186 double y = b0*CLAM::Abs(data[0]); 00187 TData correctedY = y<mEpsilon ? mEpsilon : y; 00188 double logEnv = log10(correctedY); 00189 00190 TData maxVal = logEnv; 00191 TSize maxIdx = 0; 00192 double sumXX = 0; 00193 double sumY = 0; 00194 double sumXY = 0; 00195 00196 for (TIndex i=1; i<dataSize; i++) 00197 { 00198 y = b0*(CLAM::Abs(data[i-1]) + CLAM::Abs(data[i])) - a1*y; 00199 correctedY = y<mEpsilon ? mEpsilon : y; 00200 const double logEnv = log10(correctedY); 00201 00202 if (logEnv > maxVal) 00203 { 00204 maxVal = logEnv; 00205 maxIdx = i; 00206 sumXX = 0; 00207 sumY = 0; 00208 sumXY = 0; 00209 } 00210 sumY += logEnv; 00211 sumXY += i*logEnv; 00212 sumXX += i*i; 00213 } 00214 00215 // Compute means and gradient of decay part 00216 const long N = dataSize - maxIdx; 00217 TData sumX = N*(N + 2*maxIdx - 1)/2; 00218 00219 TData num = N * sumXY - sumX * sumY; 00220 TData den = N * sumXX - sumX * sumX; 00221 00222 return (num / den) * mpAudio->GetSampleRate(); 00223 } 00224 00225 00226 AudioDescriptors operator * (const AudioDescriptors& a,TData mult) 00227 { 00228 00229 AudioDescriptors tmpD(a); 00230 00231 if (a.HasMean()) 00232 { 00233 tmpD.SetMean(a.GetMean()*mult); 00234 } 00235 if (a.HasTemporalCentroid()) 00236 { 00237 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*mult); 00238 } 00239 if (a.HasEnergy()) 00240 { 00241 tmpD.SetEnergy(a.GetEnergy()*mult); 00242 } 00243 if(a.HasVariance()) 00244 { 00245 tmpD.SetVariance(a.GetVariance()*mult); 00246 } 00247 if(a.HasZeroCrossingRate()) 00248 { 00249 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*mult); 00250 } 00251 if(a.HasRiseTime()) 00252 { 00253 tmpD.SetRiseTime(a.GetRiseTime()*mult); 00254 } 00255 if(a.HasLogAttackTime()) 00256 { 00257 tmpD.SetLogAttackTime(a.GetLogAttackTime()*mult); 00258 } 00259 if(a.HasDecrease()) 00260 { 00261 tmpD.SetDecrease(a.GetDecrease()*mult); 00262 } 00263 return tmpD; 00264 } 00265 00266 AudioDescriptors operator * (TData mult, const AudioDescriptors& a) 00267 { 00268 return a*mult; 00269 } 00270 00271 AudioDescriptors operator * (const AudioDescriptors& a,const AudioDescriptors& b) 00272 { 00273 AudioDescriptors tmpD; 00274 00275 if (a.HasMean() && b.HasMean() ) 00276 { 00277 tmpD.AddMean(); 00278 tmpD.UpdateData(); 00279 tmpD.SetMean(a.GetMean()*b.GetMean() ); 00280 } 00281 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() ) 00282 { 00283 tmpD.AddTemporalCentroid(); 00284 tmpD.UpdateData(); 00285 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()*b.GetTemporalCentroid() ); 00286 } 00287 if (a.HasEnergy() && b.HasEnergy() ) 00288 { 00289 tmpD.AddEnergy(); 00290 tmpD.UpdateData(); 00291 tmpD.SetEnergy(a.GetEnergy()*b.GetEnergy() ); 00292 } 00293 if(a.HasVariance() && b.HasVariance() ) 00294 { 00295 tmpD.AddVariance(); 00296 tmpD.UpdateData(); 00297 tmpD.SetVariance(a.GetVariance()*b.GetVariance() ); 00298 } 00299 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() ) 00300 { 00301 tmpD.AddZeroCrossingRate(); 00302 tmpD.UpdateData(); 00303 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()*b.GetZeroCrossingRate() ); 00304 } 00305 if(a.HasRiseTime() && b.HasRiseTime() ) 00306 { 00307 tmpD.AddRiseTime(); 00308 tmpD.UpdateData(); 00309 tmpD.SetRiseTime(a.GetRiseTime()*b.GetRiseTime() ); 00310 } 00311 if(a.HasLogAttackTime() && b.HasLogAttackTime() ) 00312 { 00313 tmpD.AddLogAttackTime(); 00314 tmpD.UpdateData(); 00315 tmpD.SetLogAttackTime(a.GetLogAttackTime()*b.GetLogAttackTime() ); 00316 } 00317 if(a.HasDecrease() && b.HasDecrease() ) 00318 { 00319 tmpD.AddDecrease(); 00320 tmpD.UpdateData(); 00321 tmpD.SetDecrease(a.GetDecrease()*b.GetDecrease() ); 00322 } 00323 return tmpD; 00324 } 00325 00326 AudioDescriptors operator + (const AudioDescriptors& a,const AudioDescriptors& b) 00327 { 00328 AudioDescriptors tmpD; 00329 00330 if (a.HasMean() && b.HasMean() ) 00331 { 00332 tmpD.AddMean(); 00333 tmpD.UpdateData(); 00334 tmpD.SetMean(a.GetMean()+b.GetMean() ); 00335 } 00336 if (a.HasTemporalCentroid() && b.HasTemporalCentroid() ) 00337 { 00338 tmpD.AddTemporalCentroid(); 00339 tmpD.UpdateData(); 00340 tmpD.SetTemporalCentroid(a.GetTemporalCentroid()+b.GetTemporalCentroid() ); 00341 } 00342 if (a.HasEnergy() && b.HasEnergy() ) 00343 { 00344 tmpD.AddEnergy(); 00345 tmpD.UpdateData(); 00346 tmpD.SetEnergy(a.GetEnergy()+b.GetEnergy() ); 00347 } 00348 if(a.HasVariance() && b.HasVariance() ) 00349 { 00350 tmpD.AddVariance(); 00351 tmpD.UpdateData(); 00352 tmpD.SetVariance(a.GetVariance()+b.GetVariance() ); 00353 } 00354 if(a.HasZeroCrossingRate() && b.HasZeroCrossingRate() ) 00355 { 00356 tmpD.AddZeroCrossingRate(); 00357 tmpD.UpdateData(); 00358 tmpD.SetZeroCrossingRate(a.GetZeroCrossingRate()+b.GetZeroCrossingRate() ); 00359 } 00360 if(a.HasRiseTime() && b.HasRiseTime() ) 00361 { 00362 tmpD.AddRiseTime(); 00363 tmpD.UpdateData(); 00364 tmpD.SetRiseTime(a.GetRiseTime()+b.GetRiseTime() ); 00365 } 00366 if(a.HasLogAttackTime() && b.HasLogAttackTime() ) 00367 { 00368 tmpD.AddLogAttackTime(); 00369 tmpD.UpdateData(); 00370 tmpD.SetLogAttackTime(a.GetLogAttackTime()+b.GetLogAttackTime() ); 00371 } 00372 if(a.HasDecrease() && b.HasDecrease() ) 00373 { 00374 tmpD.AddDecrease(); 00375 tmpD.UpdateData(); 00376 tmpD.SetDecrease(a.GetDecrease()+b.GetDecrease() ); 00377 } 00378 return tmpD; 00379 00380 } 00381 00382 AudioDescriptors operator - (const AudioDescriptors& a,const AudioDescriptors& b) 00383 { 00384 return a+((-1)*b); 00385 } 00386 00387 AudioDescriptors operator / (const AudioDescriptors& a,TData div) 00388 { 00389 return a*(1/div); 00390 } 00391 00392 } 00393