CLAM-Development
1.1
|
00001 /* 00002 * Copyright (c) 2004 MUSIC TECHNOLOGY GROUP (MTG) 00003 * UNIVERSITAT POMPEU FABRA 00004 * 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 00022 #include "CepstralTransform.hxx" 00023 #include "OSDefines.hxx" 00024 #include "Assert.hxx" 00025 #include "ProcessingFactory.hxx" 00026 00027 namespace CLAM 00028 { 00029 namespace Hidden 00030 { 00031 static const char* metadata[] = { 00032 "key", "CepstralTransform", 00033 "category", "Analysis", 00034 "description", "CepstralTransform", 00035 0 00036 }; 00037 static FactoryRegistrator<ProcessingFactory, CepstralTransform> reg = metadata; 00038 } 00039 00040 void CepstralTransformConfig::DefaultInit() 00041 { 00042 AddAll(); 00043 UpdateData(); 00044 SetNumMelCoefficients(20); 00045 SetNumCepstrumCoefficients(20); 00046 SetUseBase10(false); 00047 } 00048 00049 CepstralTransform::CepstralTransform( ) 00050 : mIn("Mel Spectrum", this) 00051 , mOut("Mel Cepstrum", this) 00052 { 00053 Configure(CepstralTransformConfig()); 00054 } 00055 00056 CepstralTransform::CepstralTransform( const CepstralTransformConfig& cfg ) 00057 : mIn("Mel Spectrum", this) 00058 , mOut("Mel Cepstrum", this) 00059 { 00060 Configure( cfg ); 00061 } 00062 00063 bool CepstralTransform::ConcreteConfigure( const ProcessingConfig& cfg ) 00064 { 00065 CopyAsConcreteConfig( mConfig, cfg ); 00066 00067 mLogBuffer.Resize(mConfig.GetNumMelCoefficients()); 00068 mLogBuffer.SetSize(mConfig.GetNumMelCoefficients()); 00069 00070 return true; 00071 } 00072 00073 bool CepstralTransform::Do() 00074 { 00075 const MelSpectrum & spectrum = mIn.GetData(); 00076 MelCepstrum & cepstrum = mOut.GetData(); 00077 bool ok = Do(spectrum, cepstrum); 00078 mIn.Consume(); 00079 mOut.Produce(); 00080 return ok; 00081 } 00082 00083 bool CepstralTransform::ConcreteStart() 00084 { 00085 return true; 00086 } 00087 00088 bool CepstralTransform::Do( const MelSpectrum& melSpec, MelCepstrum& melCeps ) 00089 { 00090 if ( !AbleToExecute() ) return false; // object was disabled 00091 00092 const DataArray& melCoeffs = melSpec.GetCoefficients(); 00093 DataArray& cepstrumCoeffs = melCeps.GetCoefficients(); 00094 const TSize numMelCoefficients = melCoeffs.Size(); 00095 const TSize numCepstrumCoeffs = mConfig.GetNumCepstrumCoefficients(); 00096 00097 if ( cepstrumCoeffs.Size() < numCepstrumCoeffs ) 00098 { 00099 cepstrumCoeffs.Resize(numCepstrumCoeffs); 00100 cepstrumCoeffs.SetSize(numCepstrumCoeffs); 00101 } 00102 00103 melCeps.SetCenterTime(melSpec.GetCenterTime()); 00104 melCeps.SetLowCutoff(melSpec.GetLowCutoff()); 00105 melCeps.SetHighCutoff(melSpec.GetHighCutoff()); 00106 00107 /* Take the logarithm of the Mel spectrum. NOTE: What is the 00108 significance of log vs. log10? */ 00109 const TData melFloor = 1.0; 00110 const bool useBase10 = mConfig.GetUseBase10(); 00111 00112 if ( mLogBuffer.Size()!=numMelCoefficients) 00113 { 00114 // TODO: This has realtime penalty 00115 mLogBuffer.Resize(numMelCoefficients); 00116 mLogBuffer.SetSize(numMelCoefficients); 00117 } 00118 for (TIndex i=0; i<numMelCoefficients; i++) { 00119 if (melCoeffs[i] < melFloor) 00120 mLogBuffer[i] = useBase10 ? log10(melFloor) : log(melFloor); 00121 else 00122 mLogBuffer[i] = useBase10 ? log10(melCoeffs[i]) : log(melCoeffs[i]); 00123 } 00124 00125 /* Take the DCT of the logarithm. */ 00126 const TData piord = PI / (TData)numMelCoefficients; 00127 const TData scale = sqrt(2.0/(TData)numMelCoefficients); 00128 00129 // Compute N-1 last coefficients. 00130 TData freq = 0; 00131 for (TIndex i=1; i<numCepstrumCoeffs; i++) 00132 { 00133 freq = i*piord; 00134 cepstrumCoeffs[i] = 0.0; 00135 for (TIndex j=0; j<numMelCoefficients; j++) 00136 cepstrumCoeffs[i] += mLogBuffer[j] * cos(freq*(j+1 - 0.5)); 00137 cepstrumCoeffs[i] *= scale; 00138 } 00139 00140 // Compute first coefficient. 00141 for (TIndex j=0; j<numMelCoefficients; j++) 00142 cepstrumCoeffs[0] += mLogBuffer[j]; 00143 cepstrumCoeffs[0] *= scale; 00144 00145 00146 /* NOTE: How about implementing cepstral liftering from HTK? Probably not. */ 00147 00148 return true; 00149 } 00150 00151 } 00152