CLAM-Development
1.1
|
00001 /* 00002 * Copyright (c) 2004 MUSIC TECHNOLOGY GROUP (MTG) 00003 * UNIVERSITAT POMPEU FABRA 00004 * 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 00022 #include "MelFilterBank.hxx" 00023 #include "ProcessingFactory.hxx" 00024 00025 namespace CLAM 00026 { 00027 00028 namespace Hidden 00029 { 00030 static const char * metadata[] = { 00031 "key", "MelFilterBank", 00032 "category", "Analysis", 00033 "description", "MelFilterBank", 00034 0 00035 }; 00036 static FactoryRegistrator<ProcessingFactory, MelFilterBank> reg = metadata; 00037 } 00038 00039 void MelFilterBankConfig::DefaultInit() 00040 { 00041 AddAll(); 00042 UpdateData(); 00043 SetUsePower(false); 00044 SetSpectrumSize(513); 00045 SetNumBands(20); 00046 SetSpectralRange(22050); 00047 SetLowCutoff(0); 00048 SetHighCutoff(11025); 00049 } 00050 00051 MelFilterBank::MelFilterBank() 00052 : mIn("Spectrum", this) 00053 , mOut("Mel Spectrum", this) 00054 { 00055 Configure(MelFilterBankConfig()); 00056 } 00057 00058 MelFilterBank::MelFilterBank( const MelFilterBankConfig& cfg ) 00059 : mIn("Spectrum", this) 00060 , mOut("Mel Spectrum", this) 00061 { 00062 Configure( cfg ); 00063 } 00064 00065 MelFilterBank::~MelFilterBank() 00066 { 00067 } 00068 00069 bool MelFilterBank::Do() 00070 { 00071 const Spectrum & spectrum = mIn.GetData(); 00072 MelSpectrum & melSpectrum = mOut.GetData(); 00073 bool ok = Do(spectrum,melSpectrum); 00074 mIn.Consume(); 00075 mOut.Produce(); 00076 return ok; 00077 } 00078 00079 bool MelFilterBank::Do( const Spectrum& spec, MelSpectrum& melSpec ) 00080 { 00081 if ( !AbleToExecute() ) return false; // Object is disabled 00082 00083 const DataArray& specMag = spec.GetMagBuffer(); 00084 00085 CLAM_ASSERT( specMag.Size() == mConfig.GetSpectrumSize(), 00086 "Spectrum doesn't have the expected size!" ); 00087 CLAM_ASSERT( spec.GetSpectralRange() == mConfig.GetSpectralRange(), 00088 "Spectrum doesn't have the expected frequency range!" ); 00089 CLAM_ASSERT( spec.GetScale() == EScale::eLinear, 00090 "Spectrum is not in linear scale!" ); 00091 00092 const TSize numBands = mConfig.GetNumBands(); 00093 00094 melSpec.SetNumBands(numBands); 00095 melSpec.SetLowCutoff(mConfig.GetLowCutoff()); 00096 melSpec.SetHighCutoff(mConfig.GetHighCutoff()); 00097 melSpec.SetSpectralRange(spec.GetSpectralRange()); 00098 00099 DataArray& melCoeffs = melSpec.GetCoefficients(); 00100 00101 if ( melCoeffs.Size() != numBands ) { 00102 melCoeffs.Resize( numBands ); 00103 melCoeffs.SetSize( numBands ); 00104 } 00105 for( int i = 0; i < numBands; i++) 00106 melCoeffs[i] = 0; 00107 00108 const bool usePower = mConfig.GetUsePower(); 00109 00110 for (TIndex i=mLowIdx; i<=mHighIdx; i++) { 00111 00112 // NOTE: What is the significance of using square? 00113 TData mag = specMag[i]; 00114 if (usePower) mag *=mag; 00115 00116 // Get the Mel band number. 00117 TIndex bandIdx = mMelBand[i]; 00118 00119 // Weight spectrum sample with triangular window. 00120 TData weightedMag = mFilterWeights[i] * mag; 00121 00122 // Add spectrum sample contribution to apropriate bands. 00123 if (bandIdx >= 0) { 00124 melCoeffs[bandIdx] += weightedMag; 00125 } 00126 00127 if ((bandIdx+1) < numBands) { 00128 melCoeffs[bandIdx+1] += mag - weightedMag; 00129 } 00130 } 00131 return true; 00132 } 00133 00134 bool MelFilterBank::ConcreteConfigure( const ProcessingConfig& cfg ) 00135 { 00136 CopyAsConcreteConfig( mConfig, cfg ); 00137 InitializeTables(); 00138 return true; 00139 } 00140 00141 TData MelFilterBank::Mel( TData linFreq ) 00142 { 00143 /* When using log instead of log10, the scaling factor is 1127 00144 instead of 2595. */ 00145 00146 /* NOTE: The discrepancy between HTK and CLAM originates here, or 00147 more specificaly from multiplying the argument passed to this 00148 method with deltaFreq. (Ref. fb.fres in HTK source code.) */ 00149 00150 return 1127.0*log(1.0 + linFreq/700.0); 00151 } 00152 00153 void MelFilterBank::InitializeTables() 00154 { 00155 00156 /* Filterbank cutoff frequencies in Hz. */ 00157 const TData lowCutoff = mConfig.GetLowCutoff(); 00158 const TData highCutoff = mConfig.GetHighCutoff(); 00159 00160 /* Filterbank cutoff frequencies in Mel scale. */ 00161 const TData melLowCutoff = Mel(lowCutoff); 00162 const TData melHighCutoff = Mel(highCutoff); 00163 const TData melFreqRange = melHighCutoff - melLowCutoff; 00164 00165 /* Index frequency resolution. */ 00166 const TSize specSize = mConfig.GetSpectrumSize(); 00167 const TData specRange = mConfig.GetSpectralRange(); 00168 00169 const TData deltaFreq = specRange/specSize; 00170 00171 00172 /* Spectrum index of lowest filterbank frequency (must be 1 or 00173 more). */ 00174 mLowIdx = (TIndex)(lowCutoff/deltaFreq + 1.5); 00175 00176 00177 if (mLowIdx < 1) mLowIdx = 1; 00178 00179 /* Spectrum index of highest filterbank frequency (must not exceed 00180 spectrum size). */ 00181 mHighIdx = (TIndex)(highCutoff/deltaFreq - 0.5); 00182 00183 if (mHighIdx >= specSize) mHighIdx = specSize-1; 00184 00185 00186 /* Table of filterbank centre frequencies. */ 00187 const TSize maxBands = mConfig.GetNumBands() + 1; 00188 00189 TData* centreFreq = new TData[maxBands]; 00190 00191 for (TIndex i=0; i<maxBands; i++) { 00192 centreFreq[i] = ((i+1)/(TData)maxBands)*melFreqRange + melLowCutoff; 00193 } 00194 00195 00196 /* Table for converting linear indexes to Mel band numbers. */ 00197 mMelBand.Resize( specSize ); 00198 mMelBand.SetSize( specSize ); 00199 00200 TData melFreq = 0; 00201 TIndex bandIdx = 0; 00202 for (TIndex i=0; i<specSize; i++) { 00203 00204 if (i<mLowIdx || i>mHighIdx) { 00205 00206 // Index is outside the desired range. 00207 mMelBand[i] = -1; 00208 } else { 00209 00210 melFreq = Mel((TData)i*deltaFreq); 00211 00212 // Select the band of the closest centre frequency beneath. 00213 00214 // NOTE: The condition bandIdx<maxBands may cause an index out 00215 // of range error in Do(...)!!! 00216 00217 while (centreFreq[bandIdx]<melFreq && bandIdx<maxBands) bandIdx++; 00218 mMelBand[i] = bandIdx-1; 00219 } 00220 00221 } 00222 00223 00224 /* Table of triangular filterbank window weights. */ 00225 mFilterWeights.Resize( specSize ); 00226 mFilterWeights.SetSize( specSize ); 00227 00228 for (TIndex i=0; i<specSize; i++) { 00229 bandIdx = mMelBand[i]; 00230 00231 if (i<mLowIdx || i>mHighIdx) { 00232 mFilterWeights[i] = 0.0; 00233 } else { 00234 00235 if (bandIdx >= 0) { 00236 mFilterWeights[i] = (centreFreq[bandIdx+1] - Mel((TData)i*deltaFreq)) 00237 / (centreFreq[bandIdx+1] - centreFreq[bandIdx]); 00238 00239 } else { 00240 mFilterWeights[i] = (centreFreq[0] - Mel((TData)i*deltaFreq)) 00241 / (centreFreq[0] - melLowCutoff); 00242 00243 } 00244 } 00245 00246 } // End for 00247 00248 delete [] centreFreq; 00249 } 00250 00251 } 00252