qm-dsp 1.8
MFCC.cpp
Go to the documentation of this file.
1/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3/*
4 QM DSP Library
5
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2005 Nicolas Chetry, copyright 2008 QMUL.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14*/
15
16#include <cmath>
17#include <cstdlib>
18#include <cstring>
19
20#include "MFCC.h"
21#include "dsp/transforms/FFT.h"
22#include "base/Window.h"
23
25{
26 int i,j;
27
28 /* Calculate at startup */
29 double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
30
31 lowestFrequency = 66.6666666;
32 linearFilters = 13;
33 linearSpacing = 66.66666666;
34 logFilters = 27;
35 logSpacing = 1.0711703;
36
37 /* FFT and analysis window sizes */
38 fftSize = config.fftsize;
39 fft = new FFTReal(fftSize);
40
42 logPower = config.logpower;
43
44 samplingRate = config.FS;
45
46 /* The number of cepstral componenents */
47 nceps = config.nceps;
48
49 /* Set if user want C0 */
50 WANT_C0 = (config.want_c0 ? 1 : 0);
51
52 /* Allocate space for feature vector */
53 if (WANT_C0 == 1) {
54 ceps = (double*)calloc(nceps+1, sizeof(double));
55 } else {
56 ceps = (double*)calloc(nceps, sizeof(double));
57 }
58
59 /* Allocate space for local vectors */
60 mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
61 for (i = 0; i < nceps+1; i++) {
62 mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
63 }
64
65 mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
66 for (i = 0; i < totalFilters; i++) {
67 mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
68 }
69
70 freqs = (double*)calloc(totalFilters+2,sizeof(double));
71
72 lower = (double*)calloc(totalFilters,sizeof(double));
73 center = (double*)calloc(totalFilters,sizeof(double));
74 upper = (double*)calloc(totalFilters,sizeof(double));
75
76 triangleHeight = (double*)calloc(totalFilters,sizeof(double));
77 fftFreqs = (double*)calloc(fftSize,sizeof(double));
78
79 for (i = 0; i < linearFilters; i++) {
80 freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
81 }
82
83 for (i = linearFilters; i < totalFilters+2; i++) {
84 freqs[i] = freqs[linearFilters-1] *
85 pow(logSpacing, (double)(i-linearFilters+1));
86 }
87
88 /* Define lower, center and upper */
89 memcpy(lower, freqs,totalFilters*sizeof(double));
90 memcpy(center, &freqs[1],totalFilters*sizeof(double));
91 memcpy(upper, &freqs[2],totalFilters*sizeof(double));
92
93 for (i=0;i<totalFilters;i++){
94 triangleHeight[i] = 2./(upper[i]-lower[i]);
95 }
96
97 for (i=0;i<fftSize;i++){
98 fftFreqs[i] = ((double) i / ((double) fftSize ) *
99 (double) samplingRate);
100 }
101
102 /* Build now the mccFilterWeight matrix */
103 for (i=0;i<totalFilters;i++){
104
105 for (j=0;j<fftSize;j++) {
106
107 if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
108
109 mfccFilterWeights[i][j] = triangleHeight[i] *
110 (fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
111
112 }
113 else
114 {
115 mfccFilterWeights[i][j] = 0.0;
116 }
117
118 if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
119
121 + triangleHeight[i] * (upper[i]-fftFreqs[j])
122 / (upper[i]-center[i]);
123 }
124 else
125 {
126 mfccFilterWeights[i][j] = mfccFilterWeights[i][j] + 0.0;
127 }
128 }
129
130 }
131
132 /*
133 * We calculate now mfccDCT matrix
134 * NB: +1 because of the DC component
135 */
136
137 const double pi = 3.14159265358979323846264338327950288;
138
139 for (i = 0; i < nceps+1; i++) {
140 for (j = 0; j < totalFilters; j++) {
141 mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
142 * cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
143 }
144 }
145
146 for (j = 0; j < totalFilters; j++){
147 mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
148 }
149
150 /* The analysis window */
151 window = new Window<double>(config.window, fftSize);
152
153 /* Allocate memory for the FFT */
154 realOut = (double*)calloc(fftSize, sizeof(double));
155 imagOut = (double*)calloc(fftSize, sizeof(double));
156
157 earMag = (double*)calloc(totalFilters, sizeof(double));
158 fftMag = (double*)calloc(fftSize/2, sizeof(double));
159
160 free(freqs);
161 free(lower);
162 free(center);
163 free(upper);
164 free(triangleHeight);
165 free(fftFreqs);
166}
167
169{
170 int i;
171
172 /* Free the structure */
173 for (i = 0; i < nceps+1; i++) {
174 free(mfccDCTMatrix[i]);
175 }
176 free(mfccDCTMatrix);
177
178 for (i = 0; i < totalFilters; i++) {
179 free(mfccFilterWeights[i]);
180 }
181 free(mfccFilterWeights);
182
183 /* Free the feature vector */
184 free(ceps);
185
186 /* The analysis window */
187 delete window;
188
189 free(earMag);
190 free(fftMag);
191
192 /* Free the FFT */
193 free(realOut);
194 free(imagOut);
195
196 delete fft;
197}
198
199
200/*
201 *
202 * Extract the MFCC on the input frame
203 *
204 */
205int MFCC::process(const double *inframe, double *outceps)
206{
207 double *inputData = (double *)malloc(fftSize * sizeof(double));
208 for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
209
210 window->cut(inputData);
211
212 /* Calculate the fft on the input frame */
213 fft->forward(inputData, realOut, imagOut);
214
215 free(inputData);
216
217 return process(realOut, imagOut, outceps);
218}
219
220int MFCC::process(const double *real, const double *imag, double *outceps)
221{
222 int i, j;
223
224 for (i = 0; i < fftSize/2; ++i) {
225 fftMag[i] = sqrt(real[i] * real[i] + imag[i] * imag[i]);
226 }
227
228 for (i = 0; i < totalFilters; ++i) {
229 earMag[i] = 0.0;
230 }
231
232 /* Multiply by mfccFilterWeights */
233 for (i = 0; i < totalFilters; i++) {
234 double tmp = 0.0;
235 for (j = 0; j < fftSize/2; j++) {
236 tmp = tmp + (mfccFilterWeights[i][j] * fftMag[j]);
237 }
238 if (tmp > 0) earMag[i] = log10(tmp);
239 else earMag[i] = 0.0;
240
241 if (logPower != 1.0) {
242 earMag[i] = pow(earMag[i], logPower);
243 }
244 }
245
246 /*
247 *
248 * Calculate now the cepstral coefficients
249 * with or without the DC component
250 *
251 */
252
253 if (WANT_C0 == 1) {
254
255 for (i = 0; i < nceps+1; i++) {
256 double tmp = 0.;
257 for (j = 0; j < totalFilters; j++){
258 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
259 }
260 outceps[i] = tmp;
261 }
262 }
263 else
264 {
265 for (i = 1; i < nceps+1; i++) {
266 double tmp = 0.;
267 for (j = 0; j < totalFilters; j++){
268 tmp = tmp + mfccDCTMatrix[i][j] * earMag[j];
269 }
270 outceps[i-1] = tmp;
271 }
272 }
273
274 return nceps;
275}
276
Definition FFT.h:47
void forward(const double *realIn, double *realOut, double *imagOut)
Carry out a forward real-to-complex transform of size nsamples, where nsamples is the value provided ...
Definition FFT.cpp:184
Window< double > * window
Definition MFCC.h:83
int fftSize
Definition MFCC.h:67
double linearSpacing
Definition MFCC.h:62
double * ceps
Definition MFCC.h:77
int logFilters
Definition MFCC.h:63
double ** mfccDCTMatrix
Definition MFCC.h:79
int nceps
Definition MFCC.h:74
double ** mfccFilterWeights
Definition MFCC.h:80
double * realOut
Definition MFCC.h:86
double * fftMag
Definition MFCC.h:88
double * earMag
Definition MFCC.h:89
int linearFilters
Definition MFCC.h:61
double logSpacing
Definition MFCC.h:64
FFTReal * fft
Definition MFCC.h:90
virtual ~MFCC()
Definition MFCC.cpp:168
int WANT_C0
Definition MFCC.h:93
double * imagOut
Definition MFCC.h:87
double lowestFrequency
Definition MFCC.h:60
double logPower
Definition MFCC.h:70
MFCC(MFCCConfig config)
Definition MFCC.cpp:24
int totalFilters
Definition MFCC.h:69
int samplingRate
Definition MFCC.h:73
int process(const double *inframe, double *outceps)
Process time-domain input data.
Definition MFCC.cpp:205
Various shaped windows for sample frame conditioning, including cosine windows (Hann etc) and triangu...
Definition Window.h:41
void cut(T *src) const
Definition Window.h:61
double logpower
Definition MFCC.h:27
bool want_c0
Definition MFCC.h:28
int nceps
Definition MFCC.h:26
int fftsize
Definition MFCC.h:25
WindowType window
Definition MFCC.h:29
int FS
Definition MFCC.h:24