CLAM-Development
1.1
|
00001 /* 00002 * Copyright (c) 2001-2004 MUSIC TECHNOLOGY GROUP (MTG) 00003 * UNIVERSITAT POMPEU FABRA 00004 * 00005 * 00006 * This program is free software; you can redistribute it and/or modify 00007 * it under the terms of the GNU General Public License as published by 00008 * the Free Software Foundation; either version 2 of the License, or 00009 * (at your option) any later version. 00010 * 00011 * This program is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 * GNU General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU General Public License 00017 * along with this program; if not, write to the Free Software 00018 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00019 * 00020 */ 00021 #include "DataTypes.hxx" 00022 #include "CLAM_Math.hxx" 00023 #include "Normalization.hxx" 00024 #include "Stats.hxx" 00025 00026 00027 00028 namespace CLAM 00029 { 00030 00031 00032 void NormalizationConfig::DefaultInit() 00033 { 00034 AddType(); 00035 AddFrameSize(); 00036 UpdateData(); 00037 SetType(1); 00038 SetFrameSize(2205);//0.05s at 44.1k 00039 00040 } 00041 00042 00043 Normalization::Normalization() 00044 : mIsSilenceCtrl( "Silence", this ) 00045 { 00046 Configure(NormalizationConfig()); 00047 } 00048 00049 Normalization::Normalization(NormalizationConfig& c) 00050 : mIsSilenceCtrl( "Silence", this ) 00051 { 00052 Configure(c); 00053 } 00054 00055 Normalization::~Normalization() {} 00056 00057 00058 bool Normalization::ConcreteConfigure(const ProcessingConfig& c) 00059 { 00060 CopyAsConcreteConfig(mConfig,c); 00061 00062 mType=mConfig.GetType(); 00063 mFrameSize=mConfig.GetFrameSize(); 00064 00065 return true; 00066 } 00067 00068 bool Normalization::Do(void) 00069 { 00070 return false; 00071 } 00072 00073 bool Normalization::Do(Audio &in) 00074 { 00075 TData scaleFactor = 0; 00076 00077 //Type #1: normalizes according to the max energy 00078 //Type #2: normalizes according to the average energy 00079 //Type #3: normalizes according to the threshold under which lies percent% of 00080 //the energy values that are not silence 00081 00082 if ( mType == 1 ) 00083 scaleFactor = ComputeScaleFactorFromMaxEnergy( in.GetBuffer() ); 00084 else if ( mType == 2 ) 00085 scaleFactor = ComputeScaleFactorFromAvgEnergy( in.GetBuffer() ); 00086 else if ( mType == 3 ) 00087 scaleFactor = ComputeScaleFactorFromDominantEnergy( in.GetBuffer() ); 00088 00089 const TData invScaleFactor = 1.0 / scaleFactor; 00090 DataArray& inBufferSamples = in.GetBuffer(); 00091 00092 for (int n=0; n<in.GetSize(); n++) 00093 inBufferSamples[n]*=invScaleFactor; 00094 00095 return true; 00096 } 00097 00098 00099 bool Normalization::Do(const Audio& unnorm, Audio& norm) 00100 { 00101 CLAM_ASSERT( unnorm.GetSize() == norm.GetSize(), 00102 "Normalization::Do() : input and output audio sizes must match" ); 00103 00104 TData scaleFactor = 0; 00105 00106 //Type #1: normalizes according to the max energy 00107 //Type #2: normalizes according to the average energy 00108 //Type #3: normalizes according to the threshold under which lies percent% of 00109 //the energy values that are not silence 00110 00111 if ( mType == 1 ) 00112 scaleFactor = ComputeScaleFactorFromMaxEnergy( unnorm.GetBuffer() ); 00113 else if ( mType == 2 ) 00114 scaleFactor = ComputeScaleFactorFromAvgEnergy( unnorm.GetBuffer() ); 00115 else if ( mType == 3 ) 00116 scaleFactor = ComputeScaleFactorFromDominantEnergy( unnorm.GetBuffer() ); 00117 00118 const TData invScaleFactor = 1.0 / scaleFactor; 00119 DataArray& outBufferSamples = norm.GetBuffer(); 00120 const DataArray& inBufferSamples = unnorm.GetBuffer(); 00121 00122 for (int n=0; n<norm.GetSize(); n++) 00123 outBufferSamples[n]=inBufferSamples[n]*invScaleFactor; 00124 00125 return true; 00126 } 00127 00128 00129 TData Normalization::ComputeScaleFactorFromMaxEnergy( DataArray& inAudio ) 00130 { 00131 TIndex p = 0; 00132 const TIndex end = inAudio.Size() - mFrameSize; 00133 DataArray chunk; 00134 TData maxEnergy = 0.0; 00135 const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 ); 00136 00137 do 00138 { 00139 chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize ); 00140 /* unused: TSize size = chunk.GetSize(); */ 00141 DataArray moments(4); 00142 moments.SetSize(4); 00143 Stats myStats(&chunk); 00144 myStats.GetMoments(moments, FifthOrder); 00145 00146 TData currentChunkEnergy = myStats.GetEnergy(); 00147 00148 //remove silence 00149 if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization 00150 { 00151 if(maxEnergy<currentChunkEnergy) maxEnergy=currentChunkEnergy; 00152 } 00153 00154 p += mFrameSize; 00155 00156 } while (p <= end ); 00157 00158 // Enjoy the Silence... 00159 if ( maxEnergy <= 1e-7 ) 00160 { 00161 mIsSilenceCtrl.SendControl( true ); 00162 return 1.0; 00163 } 00164 00165 mIsSilenceCtrl.SendControl(false); 00166 00167 return CLAM_sqrt( maxEnergy / TData(mFrameSize ) ); 00168 00169 00170 } 00171 00172 TData Normalization::ComputeScaleFactorFromAvgEnergy( DataArray& inAudio ) 00173 { 00174 00175 TIndex p = 0; 00176 const TIndex end = inAudio.Size() - mFrameSize; 00177 DataArray chunk; 00178 TData avgEnergy = 0.0; 00179 const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 ); 00180 00181 do 00182 { 00183 chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize ); 00184 DataArray moments(4); 00185 moments.SetSize(4); 00186 Stats myStats(&chunk); 00187 myStats.GetMoments(moments, FifthOrder); 00188 00189 TData currentChunkEnergy = myStats.GetEnergy(); 00190 00191 //remove silence 00192 if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization 00193 { 00194 avgEnergy += currentChunkEnergy; 00195 } 00196 00197 p += mFrameSize; 00198 00199 } while (p <= end ); 00200 00201 avgEnergy /= TData( inAudio.Size() ); 00202 00203 // Enjoy the Silence... 00204 if ( avgEnergy <= 1e-7 ) 00205 { 00206 mIsSilenceCtrl.SendControl( true ); 00207 return 1.0; 00208 } 00209 00210 mIsSilenceCtrl.SendControl(false); 00211 00212 return CLAM_sqrt( avgEnergy ); 00213 00214 } 00215 00216 TData Normalization::ComputeScaleFactorFromDominantEnergy( DataArray& inAudio ) 00217 { 00218 TIndex p = 0; 00219 const TIndex end = inAudio.Size() - mFrameSize; 00220 DataArray chunk; 00221 DataArray chunksEnergies; 00222 const TData quantizationThreshold = 0.3 * TData( mFrameSize ) / TData( 4410 ); 00223 00224 do 00225 { 00226 chunk.SetPtr( inAudio.GetPtr()+p, mFrameSize ); 00227 DataArray moments(4); 00228 moments.SetSize(4); 00229 Stats myStats(&chunk); 00230 myStats.GetMoments(moments, FifthOrder); 00231 00232 TData currentChunkEnergy = myStats.GetEnergy(); 00233 00234 //remove silence 00235 if ( currentChunkEnergy > quantizationThreshold ) //seems to be just above noise due to 8 bits quantization 00236 { 00237 chunksEnergies.AddElem( currentChunkEnergy ); 00238 } 00239 00240 p += mFrameSize; 00241 } while (p <= end ); 00242 00243 // Enjoy the silence... 00244 if ( chunksEnergies.Size() == 0 ) 00245 { 00246 mIsSilenceCtrl.SendControl( true ); 00247 return 1.0; 00248 } 00249 00250 std::sort( chunksEnergies.GetPtr(), chunksEnergies.GetPtr()+chunksEnergies.Size() ); 00251 00252 //find the threshold under which lies percent% of the energy values 00253 //that are not silence 00254 00255 int percentage = 90; 00256 00257 int i = ( chunksEnergies.Size()*percentage ) / 100; 00258 00259 i = ( i == 0 ) ? i : i - 1; 00260 00261 mIsSilenceCtrl.SendControl(false); 00262 00263 return CLAM_sqrt( chunksEnergies[i]/TData(mFrameSize) ); 00264 00265 } 00266 00267 void Normalization::CheckSilence( int size ) 00268 { 00269 if (size==0) 00270 mIsSilenceCtrl.SendControl(true); 00271 else 00272 mIsSilenceCtrl.SendControl(false); 00273 } 00274 00275 00276 } 00277