SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 00038 #include <stdio.h> 00039 #include <math.h> 00040 #include <string.h> 00041 #include <stdlib.h> 00042 #include <assert.h> 00043 00044 #ifdef HAVE_CONFIG_H 00045 #include <config.h> 00046 #endif 00047 00048 #ifdef _MSC_VER 00049 #pragma warning (disable: 4244) 00050 #endif 00051 00052 #include "sphinxbase/prim_type.h" 00053 #include "sphinxbase/ckd_alloc.h" 00054 #include "sphinxbase/byteorder.h" 00055 #include "sphinxbase/fixpoint.h" 00056 #include "sphinxbase/fe.h" 00057 #include "sphinxbase/genrand.h" 00058 #include "sphinxbase/libutil.h" 00059 #include "sphinxbase/err.h" 00060 00061 #include "fe_internal.h" 00062 #include "fe_warp.h" 00063 00064 /* Use extra precision for cosines, Hamming window, pre-emphasis 00065 * coefficient, twiddle factors. */ 00066 #ifdef FIXED_POINT 00067 #define FLOAT2COS(x) FLOAT2FIX_ANY(x,30) 00068 #define COSMUL(x,y) FIXMUL_ANY(x,y,30) 00069 #else 00070 #define FLOAT2COS(x) (x) 00071 #define COSMUL(x,y) ((x)*(y)) 00072 #endif 00073 00074 #ifdef FIXED_POINT 00075 /* Internal log-addition table for natural log with radix point at 8 00076 * bits. Each entry is 256 * log(1 + e^{-n/256}). This is used in the 00077 * log-add computation: 00078 * 00079 * e^z = e^x + e^y 00080 * e^z = e^x(1 + e^{y-x}) = e^y(1 + e^{x-y}) 00081 * z = x + log(1 + e^{y-x}) = y + log(1 + e^{x-y}) 00082 * 00083 * So when y > x, z = y + logadd_table[-(x-y)] 00084 * when x > y, z = x + logadd_table[-(y-x)] 00085 */ 00086 static const unsigned char fe_logadd_table[] = { 00087 177, 177, 176, 176, 175, 175, 174, 174, 173, 173, 00088 172, 172, 172, 171, 171, 170, 170, 169, 169, 168, 00089 168, 167, 167, 166, 166, 165, 165, 164, 164, 163, 00090 163, 162, 162, 161, 161, 161, 160, 160, 159, 159, 00091 158, 158, 157, 157, 156, 156, 155, 155, 155, 154, 00092 154, 153, 153, 152, 152, 151, 151, 151, 150, 150, 00093 149, 149, 148, 148, 147, 147, 147, 146, 146, 145, 00094 145, 144, 144, 144, 143, 143, 142, 142, 141, 141, 00095 141, 140, 140, 139, 139, 138, 138, 138, 137, 137, 00096 136, 136, 136, 135, 135, 134, 134, 134, 133, 133, 00097 132, 132, 131, 131, 131, 130, 130, 129, 129, 129, 00098 128, 128, 128, 127, 127, 126, 126, 126, 125, 125, 00099 124, 124, 124, 123, 123, 123, 122, 122, 121, 121, 00100 121, 120, 120, 119, 119, 119, 118, 118, 118, 117, 00101 117, 117, 116, 116, 115, 115, 115, 114, 114, 114, 00102 113, 113, 113, 112, 112, 112, 111, 111, 110, 110, 00103 110, 109, 109, 109, 108, 108, 108, 107, 107, 107, 00104 106, 106, 106, 105, 105, 105, 104, 104, 104, 103, 00105 103, 103, 102, 102, 102, 101, 101, 101, 100, 100, 00106 100, 99, 99, 99, 98, 98, 98, 97, 97, 97, 00107 96, 96, 96, 96, 95, 95, 95, 94, 94, 94, 00108 93, 93, 93, 92, 92, 92, 92, 91, 91, 91, 00109 90, 90, 90, 89, 89, 89, 89, 88, 88, 88, 00110 87, 87, 87, 87, 86, 86, 86, 85, 85, 85, 00111 85, 84, 84, 84, 83, 83, 83, 83, 82, 82, 00112 82, 82, 81, 81, 81, 80, 80, 80, 80, 79, 00113 79, 79, 79, 78, 78, 78, 78, 77, 77, 77, 00114 77, 76, 76, 76, 75, 75, 75, 75, 74, 74, 00115 74, 74, 73, 73, 73, 73, 72, 72, 72, 72, 00116 71, 71, 71, 71, 71, 70, 70, 70, 70, 69, 00117 69, 69, 69, 68, 68, 68, 68, 67, 67, 67, 00118 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, 00119 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 00120 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 00121 60, 60, 60, 60, 59, 59, 59, 59, 59, 58, 00122 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 00123 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 00124 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, 00125 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 00126 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 00127 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 00128 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 00129 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 00130 44, 44, 43, 43, 43, 43, 43, 43, 43, 42, 00131 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 00132 41, 41, 40, 40, 40, 40, 40, 40, 40, 39, 00133 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, 00134 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, 00135 37, 36, 36, 36, 36, 36, 36, 36, 35, 35, 00136 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, 00137 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, 00138 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 00139 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 00140 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, 00141 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, 00142 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 00143 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, 00144 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 00145 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, 00146 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 00147 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 00148 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 00149 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 00150 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 00151 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 00152 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 00153 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 00154 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 00155 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 00156 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 00157 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 00158 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 00159 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 00160 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 00161 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 00162 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 00163 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 00164 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 00165 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 00166 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 00167 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 00168 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 00169 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 00170 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 00171 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 00172 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 00173 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 00174 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00175 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00176 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 00177 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00178 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00179 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 00180 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 00181 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00182 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00183 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00184 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 00185 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00186 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00187 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00188 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00189 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 00190 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 00191 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00192 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00193 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00194 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00195 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 00196 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 00197 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00198 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00199 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00200 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00201 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00202 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00203 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00204 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 00205 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 00206 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00207 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00208 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00209 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00210 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00211 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00212 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00213 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00214 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00215 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00216 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00217 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 00218 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 00219 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00220 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00221 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00222 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00223 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00224 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00225 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00226 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00227 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00229 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00231 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00232 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00233 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00234 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00235 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00236 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00237 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00238 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00239 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00240 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00241 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00243 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00244 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00245 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00246 1, 1, 1, 1, 1, 1, 1, 0 00247 }; 00248 static const int fe_logadd_table_size = sizeof(fe_logadd_table) / sizeof(fe_logadd_table[0]); 00249 00250 static fixed32 00251 fe_log_add(fixed32 x, fixed32 y) 00252 { 00253 fixed32 d, r; 00254 00255 if (x > y) { 00256 d = (x - y) >> (DEFAULT_RADIX - 8); 00257 r = x; 00258 } 00259 else { 00260 d = (y - x) >> (DEFAULT_RADIX - 8); 00261 r = y; 00262 } 00263 if (d > fe_logadd_table_size) 00264 return r; 00265 else { 00266 r += ((fixed32)fe_logadd_table[d] << (DEFAULT_RADIX - 8)); 00267 /* 00268 printf("%d + %d = %d | %f + %f = %f | %f + %f = %f\n", 00269 x, y, r, FIX2FLOAT(x), FIX2FLOAT(y), FIX2FLOAT(r), 00270 exp(FIX2FLOAT(x)), exp(FIX2FLOAT(y)), exp(FIX2FLOAT(r))); 00271 */ 00272 return r; 00273 } 00274 } 00275 00276 static fixed32 00277 fe_log(float32 x) 00278 { 00279 if (x <= 0) { 00280 return MIN_FIXLOG; 00281 } 00282 else { 00283 return FLOAT2FIX(log(x)); 00284 } 00285 } 00286 #endif 00287 00288 static float32 00289 fe_mel(melfb_t *mel, float32 x) 00290 { 00291 float32 warped = fe_warp_unwarped_to_warped(mel, x); 00292 00293 return (float32) (2595.0 * log10(1.0 + warped / 700.0)); 00294 } 00295 00296 static float32 00297 fe_melinv(melfb_t *mel, float32 x) 00298 { 00299 float32 warped = (float32) (700.0 * (pow(10.0, x / 2595.0) - 1.0)); 00300 return fe_warp_warped_to_unwarped(mel, warped); 00301 } 00302 00303 int32 00304 fe_build_melfilters(melfb_t *mel_fb) 00305 { 00306 float32 melmin, melmax, melbw, fftfreq; 00307 int n_coeffs, i, j; 00308 00309 /* Filter coefficient matrix, in flattened form. */ 00310 mel_fb->spec_start = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->spec_start)); 00311 mel_fb->filt_start = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->filt_start)); 00312 mel_fb->filt_width = ckd_malloc(mel_fb->num_filters * sizeof(*mel_fb->filt_width)); 00313 00314 /* First calculate the widths of each filter. */ 00315 /* Minimum and maximum frequencies in mel scale. */ 00316 melmin = fe_mel(mel_fb, mel_fb->lower_filt_freq); 00317 melmax = fe_mel(mel_fb, mel_fb->upper_filt_freq); 00318 00319 /* Width of filters in mel scale */ 00320 melbw = (melmax - melmin) / (mel_fb->num_filters + 1); 00321 if (mel_fb->doublewide) { 00322 melmin -= melbw; 00323 melmax += melbw; 00324 if ((fe_melinv(mel_fb, melmin) < 0) || 00325 (fe_melinv(mel_fb, melmax) > mel_fb->sampling_rate / 2)) { 00326 E_WARN 00327 ("Out of Range: low filter edge = %f (%f)\n", 00328 fe_melinv(mel_fb, melmin), 0.0); 00329 E_WARN 00330 (" high filter edge = %f (%f)\n", 00331 fe_melinv(mel_fb, melmax), mel_fb->sampling_rate / 2); 00332 return FE_INVALID_PARAM_ERROR; 00333 } 00334 } 00335 00336 /* DFT point spacing */ 00337 fftfreq = mel_fb->sampling_rate / (float32) mel_fb->fft_size; 00338 00339 /* Count and place filter coefficients. */ 00340 n_coeffs = 0; 00341 for (i = 0; i < mel_fb->num_filters; ++i) { 00342 float32 freqs[3]; 00343 00344 /* Left, center, right frequencies in Hertz */ 00345 for (j = 0; j < 3; ++j) { 00346 if (mel_fb->doublewide) 00347 freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); 00348 else 00349 freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); 00350 /* Round them to DFT points if requested */ 00351 if (mel_fb->round_filters) 00352 freqs[j] = ((int)(freqs[j] / fftfreq + 0.5)) * fftfreq; 00353 } 00354 00355 /* spec_start is the start of this filter in the power spectrum. */ 00356 mel_fb->spec_start[i] = -1; 00357 /* There must be a better way... */ 00358 for (j = 0; j < mel_fb->fft_size/2+1; ++j) { 00359 float32 hz = j * fftfreq; 00360 if (hz < freqs[0]) 00361 continue; 00362 else if (hz > freqs[2] || j == mel_fb->fft_size/2) { 00363 /* filt_width is the width in DFT points of this filter. */ 00364 mel_fb->filt_width[i] = j - mel_fb->spec_start[i]; 00365 /* filt_start is the start of this filter in the filt_coeffs array. */ 00366 mel_fb->filt_start[i] = n_coeffs; 00367 n_coeffs += mel_fb->filt_width[i]; 00368 break; 00369 } 00370 if (mel_fb->spec_start[i] == -1) 00371 mel_fb->spec_start[i] = j; 00372 } 00373 } 00374 00375 /* Now go back and allocate the coefficient array. */ 00376 mel_fb->filt_coeffs = ckd_malloc(n_coeffs * sizeof(*mel_fb->filt_coeffs)); 00377 00378 /* And now generate the coefficients. */ 00379 n_coeffs = 0; 00380 for (i = 0; i < mel_fb->num_filters; ++i) { 00381 float32 freqs[3]; 00382 00383 /* Left, center, right frequencies in Hertz */ 00384 for (j = 0; j < 3; ++j) { 00385 if (mel_fb->doublewide) 00386 freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); 00387 else 00388 freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); 00389 /* Round them to DFT points if requested */ 00390 if (mel_fb->round_filters) 00391 freqs[j] = ((int)(freqs[j] / fftfreq + 0.5)) * fftfreq; 00392 } 00393 00394 for (j = 0; j < mel_fb->filt_width[i]; ++j) { 00395 float32 hz, loslope, hislope; 00396 00397 hz = (mel_fb->spec_start[i] + j) * fftfreq; 00398 if (hz < freqs[0] || hz > freqs[2]) { 00399 E_FATAL("WTF, %f < %f > %f\n", freqs[0], hz, freqs[2]); 00400 } 00401 loslope = (hz - freqs[0]) / (freqs[1] - freqs[0]); 00402 hislope = (freqs[2] - hz) / (freqs[2] - freqs[1]); 00403 if (mel_fb->unit_area) { 00404 loslope *= 2 / (freqs[2] - freqs[0]); 00405 hislope *= 2 / (freqs[2] - freqs[0]); 00406 } 00407 if (loslope < hislope) { 00408 #ifdef FIXED_POINT 00409 mel_fb->filt_coeffs[n_coeffs] = fe_log(loslope); 00410 #else 00411 mel_fb->filt_coeffs[n_coeffs] = loslope; 00412 #endif 00413 } 00414 else { 00415 #ifdef FIXED_POINT 00416 mel_fb->filt_coeffs[n_coeffs] = fe_log(hislope); 00417 #else 00418 mel_fb->filt_coeffs[n_coeffs] = hislope; 00419 #endif 00420 } 00421 ++n_coeffs; 00422 } 00423 } 00424 00425 00426 return FE_SUCCESS; 00427 } 00428 00429 int32 00430 fe_compute_melcosine(melfb_t * mel_fb) 00431 { 00432 00433 float64 freqstep; 00434 int32 i, j; 00435 00436 mel_fb->mel_cosine = 00437 (mfcc_t **) ckd_calloc_2d(mel_fb->num_cepstra, 00438 mel_fb->num_filters, 00439 sizeof(mfcc_t)); 00440 00441 freqstep = M_PI / mel_fb->num_filters; 00442 /* NOTE: The first row vector is actually unnecessary but we leave 00443 * it in to avoid confusion. */ 00444 for (i = 0; i < mel_fb->num_cepstra; i++) { 00445 for (j = 0; j < mel_fb->num_filters; j++) { 00446 float64 cosine; 00447 00448 cosine = cos(freqstep * i * (j + 0.5)); 00449 mel_fb->mel_cosine[i][j] = FLOAT2COS(cosine); 00450 } 00451 } 00452 00453 /* Also precompute normalization constants for unitary DCT. */ 00454 mel_fb->sqrt_inv_n = FLOAT2COS(sqrt(1.0 / mel_fb->num_filters)); 00455 mel_fb->sqrt_inv_2n = FLOAT2COS(sqrt(2.0 / mel_fb->num_filters)); 00456 00457 /* And liftering weights */ 00458 if (mel_fb->lifter_val) { 00459 mel_fb->lifter = calloc(mel_fb->num_cepstra, sizeof(*mel_fb->lifter)); 00460 for (i = 0; i < mel_fb->num_cepstra; ++i) { 00461 mel_fb->lifter[i] = FLOAT2MFCC(1 + mel_fb->lifter_val / 2 00462 * sin(i * M_PI / mel_fb->lifter_val)); 00463 } 00464 } 00465 00466 return (0); 00467 } 00468 00469 static void 00470 fe_pre_emphasis(int16 const *in, frame_t * out, int32 len, 00471 float32 factor, int16 prior) 00472 { 00473 int i; 00474 00475 #if defined(FIXED16) 00476 int16 fxd_alpha = (int16)(factor * 0x8000); 00477 int32 tmp1, tmp2; 00478 00479 tmp1 = (int32)in[0] << 15; 00480 tmp2 = (int32)prior * fxd_alpha; 00481 out[0] = (int16)((tmp1 - tmp2) >> 15); 00482 for (i = 1; i < len; ++i) { 00483 tmp1 = (int32)in[i] << 15; 00484 tmp2 = (int32)in[i-1] * fxd_alpha; 00485 out[i] = (int16)((tmp1 - tmp2) >> 15); 00486 } 00487 #elif defined(FIXED_POINT) 00488 fixed32 fxd_alpha = FLOAT2FIX(factor); 00489 out[0] = ((fixed32)in[0] << DEFAULT_RADIX) - (prior * fxd_alpha); 00490 for (i = 1; i < len; ++i) 00491 out[i] = ((fixed32)in[i] << DEFAULT_RADIX) 00492 - (fixed32)in[i-1] * fxd_alpha; 00493 #else 00494 out[0] = (frame_t) in[0] - (frame_t) prior * factor; 00495 for (i = 1; i < len; i++) 00496 out[i] = (frame_t) in[i] - (frame_t) in[i-1] * factor; 00497 #endif 00498 } 00499 00500 static void 00501 fe_short_to_frame(int16 const *in, frame_t * out, int32 len) 00502 { 00503 int i; 00504 00505 #if defined(FIXED16) 00506 memcpy(out, in, len * sizeof(*out)); 00507 #elif defined(FIXED_POINT) 00508 for (i = 0; i < len; i++) 00509 out[i] = (int32) in[i] << DEFAULT_RADIX; 00510 #else /* FIXED_POINT */ 00511 for (i = 0; i < len; i++) 00512 out[i] = (frame_t) in[i]; 00513 #endif /* FIXED_POINT */ 00514 } 00515 00516 void 00517 fe_create_hamming(window_t * in, int32 in_len) 00518 { 00519 int i; 00520 00521 /* Symmetric, so we only create the first half of it. */ 00522 for (i = 0; i < in_len / 2; i++) { 00523 float64 hamm; 00524 hamm = (0.54 - 0.46 * cos(2 * M_PI * i / 00525 ((float64) in_len - 1.0))); 00526 #ifdef FIXED16 00527 in[i] = (int16)(hamm * 0x8000); 00528 #else 00529 in[i] = FLOAT2COS(hamm); 00530 #endif 00531 } 00532 } 00533 00534 static void 00535 fe_hamming_window(frame_t * in, window_t * window, int32 in_len, int32 remove_dc) 00536 { 00537 int i; 00538 00539 if (remove_dc) { 00540 #ifdef FIXED16 00541 int32 mean = 0; /* Use int32 to avoid possibility of overflow */ 00542 #else 00543 frame_t mean = 0; 00544 #endif 00545 00546 for (i = 0; i < in_len; i++) 00547 mean += in[i]; 00548 mean /= in_len; 00549 for (i = 0; i < in_len; i++) 00550 in[i] -= (frame_t)mean; 00551 } 00552 00553 #ifdef FIXED16 00554 for (i = 0; i < in_len/2; i++) { 00555 int32 tmp1, tmp2; 00556 00557 tmp1 = (int32)in[i] * window[i]; 00558 tmp2 = (int32)in[in_len-1-i] * window[i]; 00559 in[i] = (int16)(tmp1 >> 15); 00560 in[in_len-1-i] = (int16)(tmp2 >> 15); 00561 } 00562 #else 00563 for (i = 0; i < in_len/2; i++) { 00564 in[i] = COSMUL(in[i], window[i]); 00565 in[in_len-1-i] = COSMUL(in[in_len-1-i], window[i]); 00566 } 00567 #endif 00568 } 00569 00570 static int 00571 fe_spch_to_frame(fe_t *fe, int len) 00572 { 00573 /* Copy to the frame buffer. */ 00574 if (fe->pre_emphasis_alpha != 0.0) { 00575 fe_pre_emphasis(fe->spch, fe->frame, len, 00576 fe->pre_emphasis_alpha, fe->prior); 00577 if (len >= fe->frame_shift) 00578 fe->prior = fe->spch[fe->frame_shift - 1]; 00579 else 00580 fe->prior = fe->spch[len - 1]; 00581 } 00582 else 00583 fe_short_to_frame(fe->spch, fe->frame, len); 00584 00585 /* Zero pad up to FFT size. */ 00586 memset(fe->frame + len, 0, 00587 (fe->fft_size - len) * sizeof(*fe->frame)); 00588 00589 /* Window. */ 00590 fe_hamming_window(fe->frame, fe->hamming_window, fe->frame_size, 00591 fe->remove_dc); 00592 00593 return len; 00594 } 00595 00596 int 00597 fe_read_frame(fe_t *fe, int16 const *in, int32 len) 00598 { 00599 int i; 00600 00601 if (len > fe->frame_size) 00602 len = fe->frame_size; 00603 00604 /* Read it into the raw speech buffer. */ 00605 memcpy(fe->spch, in, len * sizeof(*in)); 00606 /* Swap and dither if necessary. */ 00607 if (fe->swap) 00608 for (i = 0; i < len; ++i) 00609 SWAP_INT16(&fe->spch[i]); 00610 if (fe->dither) 00611 for (i = 0; i < len; ++i) 00612 fe->spch[i] += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); 00613 00614 return fe_spch_to_frame(fe, len); 00615 } 00616 00617 int 00618 fe_shift_frame(fe_t *fe, int16 const *in, int32 len) 00619 { 00620 int offset, i; 00621 00622 if (len > fe->frame_shift) 00623 len = fe->frame_shift; 00624 offset = fe->frame_size - fe->frame_shift; 00625 00626 /* Shift data into the raw speech buffer. */ 00627 memmove(fe->spch, fe->spch + fe->frame_shift, 00628 offset * sizeof(*fe->spch)); 00629 memcpy(fe->spch + offset, in, len * sizeof(*fe->spch)); 00630 /* Swap and dither if necessary. */ 00631 if (fe->swap) 00632 for (i = 0; i < len; ++i) 00633 SWAP_INT16(&fe->spch[offset + i]); 00634 if (fe->dither) 00635 for (i = 0; i < len; ++i) 00636 fe->spch[offset + i] 00637 += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); 00638 00639 return fe_spch_to_frame(fe, offset + len); 00640 } 00641 00645 void 00646 fe_create_twiddle(fe_t *fe) 00647 { 00648 int i; 00649 00650 for (i = 0; i < fe->fft_size / 4; ++i) { 00651 float64 a = 2 * M_PI * i / fe->fft_size; 00652 #ifdef FIXED16 00653 fe->ccc[i] = (int16)(cos(a) * 0x8000); 00654 fe->sss[i] = (int16)(sin(a) * 0x8000); 00655 #elif defined(FIXED_POINT) 00656 fe->ccc[i] = FLOAT2COS(cos(a)); 00657 fe->sss[i] = FLOAT2COS(sin(a)); 00658 #else 00659 fe->ccc[i] = cos(a); 00660 fe->sss[i] = sin(a); 00661 #endif 00662 } 00663 } 00664 00665 /* Translated from the FORTRAN (obviously) from "Real-Valued Fast 00666 * Fourier Transform Algorithms" by Henrik V. Sorensen et al., IEEE 00667 * Transactions on Acoustics, Speech, and Signal Processing, vol. 35, 00668 * no.6. The 16-bit version does a version of "block floating 00669 * point" in order to avoid rounding errors. 00670 */ 00671 #if defined(FIXED16) 00672 static int 00673 fe_fft_real(fe_t *fe) 00674 { 00675 int i, j, k, m, n, lz; 00676 frame_t *x, xt, max; 00677 00678 x = fe->frame; 00679 m = fe->fft_order; 00680 n = fe->fft_size; 00681 00682 /* Bit-reverse the input. */ 00683 j = 0; 00684 for (i = 0; i < n - 1; ++i) { 00685 if (i < j) { 00686 xt = x[j]; 00687 x[j] = x[i]; 00688 x[i] = xt; 00689 } 00690 k = n / 2; 00691 while (k <= j) { 00692 j -= k; 00693 k /= 2; 00694 } 00695 j += k; 00696 } 00697 /* Determine how many bits of dynamic range are in the input. */ 00698 max = 0; 00699 for (i = 0; i < n; ++i) 00700 if (abs(x[i]) > max) 00701 max = abs(x[i]); 00702 /* The FFT has a gain of M bits, so we need to attenuate the input 00703 * by M bits minus the number of leading zeroes in the input's 00704 * range in order to avoid overflows. */ 00705 for (lz = 0; lz < m; ++lz) 00706 if (max & (1 << (15-lz))) 00707 break; 00708 00709 /* Basic butterflies (2-point FFT, real twiddle factors): 00710 * x[i] = x[i] + 1 * x[i+1] 00711 * x[i+1] = x[i] + -1 * x[i+1] 00712 */ 00713 /* The quantization error introduced by attenuating the input at 00714 * any given stage of the FFT has a cascading effect, so we hold 00715 * off on it until it's absolutely necessary. */ 00716 for (i = 0; i < n; i += 2) { 00717 int atten = (lz == 0); 00718 xt = x[i] >> atten; 00719 x[i] = xt + (x[i + 1] >> atten); 00720 x[i + 1] = xt - (x[i + 1] >> atten); 00721 } 00722 00723 /* The rest of the butterflies, in stages from 1..m */ 00724 for (k = 1; k < m; ++k) { 00725 int n1, n2, n4; 00726 /* Start attenuating once we hit the number of leading zeros. */ 00727 int atten = (k >= lz); 00728 00729 n4 = k - 1; 00730 n2 = k; 00731 n1 = k + 1; 00732 /* Stride over each (1 << (k+1)) points */ 00733 for (i = 0; i < n; i += (1 << n1)) { 00734 /* Basic butterfly with real twiddle factors: 00735 * x[i] = x[i] + 1 * x[i + (1<<k)] 00736 * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] 00737 */ 00738 xt = x[i] >> atten; 00739 x[i] = xt + (x[i + (1 << n2)] >> atten); 00740 x[i + (1 << n2)] = xt - (x[i + (1 << n2)] >> atten); 00741 00742 /* The other ones with real twiddle factors: 00743 * x[i + (1<<k) + (1<<(k-1))] 00744 * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] 00745 * x[i + (1<<(k-1))] 00746 * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] 00747 */ 00748 x[i + (1 << n2) + (1 << n4)] = -x[i + (1 << n2) + (1 << n4)] >> atten; 00749 x[i + (1 << n4)] = x[i + (1 << n4)] >> atten; 00750 00751 /* Butterflies with complex twiddle factors. 00752 * There are (1<<k-1) of them. 00753 */ 00754 for (j = 1; j < (1 << n4); ++j) { 00755 frame_t cc, ss, t1, t2; 00756 int i1, i2, i3, i4; 00757 00758 i1 = i + j; 00759 i2 = i + (1 << n2) - j; 00760 i3 = i + (1 << n2) + j; 00761 i4 = i + (1 << n2) + (1 << n2) - j; 00762 00763 /* 00764 * cc = real(W[j * n / (1<<(k+1))]) 00765 * ss = imag(W[j * n / (1<<(k+1))]) 00766 */ 00767 cc = fe->ccc[j << (m - n1)]; 00768 ss = fe->sss[j << (m - n1)]; 00769 00770 /* There are some symmetry properties which allow us 00771 * to get away with only four multiplications here. */ 00772 { 00773 int32 tmp1, tmp2; 00774 tmp1 = (int32)x[i3] * cc + (int32)x[i4] * ss; 00775 tmp2 = (int32)x[i3] * ss - (int32)x[i4] * cc; 00776 t1 = (int16)(tmp1 >> 15) >> atten; 00777 t2 = (int16)(tmp2 >> 15) >> atten; 00778 } 00779 00780 x[i4] = (x[i2] >> atten) - t2; 00781 x[i3] = (-x[i2] >> atten) - t2; 00782 x[i2] = (x[i1] >> atten) - t1; 00783 x[i1] = (x[i1] >> atten) + t1; 00784 } 00785 } 00786 } 00787 00788 /* Return the residual scaling factor. */ 00789 return lz; 00790 } 00791 #else /* !FIXED16 */ 00792 static int 00793 fe_fft_real(fe_t *fe) 00794 { 00795 int i, j, k, m, n; 00796 frame_t *x, xt; 00797 00798 x = fe->frame; 00799 m = fe->fft_order; 00800 n = fe->fft_size; 00801 00802 /* Bit-reverse the input. */ 00803 j = 0; 00804 for (i = 0; i < n - 1; ++i) { 00805 if (i < j) { 00806 xt = x[j]; 00807 x[j] = x[i]; 00808 x[i] = xt; 00809 } 00810 k = n / 2; 00811 while (k <= j) { 00812 j -= k; 00813 k /= 2; 00814 } 00815 j += k; 00816 } 00817 00818 /* Basic butterflies (2-point FFT, real twiddle factors): 00819 * x[i] = x[i] + 1 * x[i+1] 00820 * x[i+1] = x[i] + -1 * x[i+1] 00821 */ 00822 for (i = 0; i < n; i += 2) { 00823 xt = x[i]; 00824 x[i] = (xt + x[i + 1]); 00825 x[i + 1] = (xt - x[i + 1]); 00826 } 00827 00828 /* The rest of the butterflies, in stages from 1..m */ 00829 for (k = 1; k < m; ++k) { 00830 int n1, n2, n4; 00831 00832 n4 = k - 1; 00833 n2 = k; 00834 n1 = k + 1; 00835 /* Stride over each (1 << (k+1)) points */ 00836 for (i = 0; i < n; i += (1 << n1)) { 00837 /* Basic butterfly with real twiddle factors: 00838 * x[i] = x[i] + 1 * x[i + (1<<k)] 00839 * x[i + (1<<k)] = x[i] + -1 * x[i + (1<<k)] 00840 */ 00841 xt = x[i]; 00842 x[i] = (xt + x[i + (1 << n2)]); 00843 x[i + (1 << n2)] = (xt - x[i + (1 << n2)]); 00844 00845 /* The other ones with real twiddle factors: 00846 * x[i + (1<<k) + (1<<(k-1))] 00847 * = 0 * x[i + (1<<k-1)] + -1 * x[i + (1<<k) + (1<<k-1)] 00848 * x[i + (1<<(k-1))] 00849 * = 1 * x[i + (1<<k-1)] + 0 * x[i + (1<<k) + (1<<k-1)] 00850 */ 00851 x[i + (1 << n2) + (1 << n4)] = -x[i + (1 << n2) + (1 << n4)]; 00852 x[i + (1 << n4)] = x[i + (1 << n4)]; 00853 00854 /* Butterflies with complex twiddle factors. 00855 * There are (1<<k-1) of them. 00856 */ 00857 for (j = 1; j < (1 << n4); ++j) { 00858 frame_t cc, ss, t1, t2; 00859 int i1, i2, i3, i4; 00860 00861 i1 = i + j; 00862 i2 = i + (1 << n2) - j; 00863 i3 = i + (1 << n2) + j; 00864 i4 = i + (1 << n2) + (1 << n2) - j; 00865 00866 /* 00867 * cc = real(W[j * n / (1<<(k+1))]) 00868 * ss = imag(W[j * n / (1<<(k+1))]) 00869 */ 00870 cc = fe->ccc[j << (m - n1)]; 00871 ss = fe->sss[j << (m - n1)]; 00872 00873 /* There are some symmetry properties which allow us 00874 * to get away with only four multiplications here. */ 00875 t1 = COSMUL(x[i3], cc) + COSMUL(x[i4], ss); 00876 t2 = COSMUL(x[i3], ss) - COSMUL(x[i4], cc); 00877 00878 x[i4] = (x[i2] - t2); 00879 x[i3] = (-x[i2] - t2); 00880 x[i2] = (x[i1] - t1); 00881 x[i1] = (x[i1] + t1); 00882 } 00883 } 00884 } 00885 00886 /* This isn't used, but return it for completeness. */ 00887 return m; 00888 } 00889 #endif /* !FIXED16 */ 00890 00891 static void 00892 fe_spec_magnitude(fe_t *fe) 00893 { 00894 frame_t *fft; 00895 powspec_t *spec; 00896 int32 j, scale, fftsize; 00897 00898 /* Do FFT and get the scaling factor back (only actually used in 00899 * fixed-point). Note the scaling factor is expressed in bits. */ 00900 scale = fe_fft_real(fe); 00901 00902 /* Convenience pointers to make things less awkward below. */ 00903 fft = fe->frame; 00904 spec = fe->spec; 00905 fftsize = fe->fft_size; 00906 00907 /* We need to scale things up the rest of the way to N. */ 00908 scale = fe->fft_order - scale; 00909 00910 /* The first point (DC coefficient) has no imaginary part */ 00911 { 00912 #ifdef FIXED16 00913 spec[0] = fixlog(abs(fft[0]) << scale) * 2; 00914 #elif defined(FIXED_POINT) 00915 spec[0] = FIXLN(abs(fft[0]) << scale) * 2; 00916 #else 00917 spec[0] = fft[0] * fft[0]; 00918 #endif 00919 } 00920 00921 for (j = 1; j <= fftsize / 2; j++) { 00922 #ifdef FIXED16 00923 int32 rr = fixlog(abs(fft[j]) << scale) * 2; 00924 int32 ii = fixlog(abs(fft[fftsize - j]) << scale) * 2; 00925 spec[j] = fe_log_add(rr, ii); 00926 #elif defined(FIXED_POINT) 00927 int32 rr = FIXLN(abs(fft[j]) << scale) * 2; 00928 int32 ii = FIXLN(abs(fft[fftsize - j]) << scale) * 2; 00929 spec[j] = fe_log_add(rr, ii); 00930 #else 00931 spec[j] = fft[j] * fft[j] + fft[fftsize - j] * fft[fftsize - j]; 00932 #endif 00933 } 00934 } 00935 00936 static void 00937 fe_mel_spec(fe_t * fe) 00938 { 00939 int whichfilt; 00940 powspec_t *spec, *mfspec; 00941 00942 /* Convenience poitners. */ 00943 spec = fe->spec; 00944 mfspec = fe->mfspec; 00945 00946 for (whichfilt = 0; whichfilt < fe->mel_fb->num_filters; whichfilt++) { 00947 int spec_start, filt_start, i; 00948 00949 spec_start = fe->mel_fb->spec_start[whichfilt]; 00950 filt_start = fe->mel_fb->filt_start[whichfilt]; 00951 00952 #ifdef FIXED_POINT 00953 mfspec[whichfilt] = spec[spec_start] + fe->mel_fb->filt_coeffs[filt_start]; 00954 for (i = 1; i < fe->mel_fb->filt_width[whichfilt]; i++) { 00955 mfspec[whichfilt] = fe_log_add(mfspec[whichfilt], 00956 spec[spec_start + i] + 00957 fe->mel_fb->filt_coeffs[filt_start + i]); 00958 } 00959 #else /* !FIXED_POINT */ 00960 mfspec[whichfilt] = 0; 00961 for (i = 0; i < fe->mel_fb->filt_width[whichfilt]; i++) 00962 mfspec[whichfilt] += 00963 spec[spec_start + i] * fe->mel_fb->filt_coeffs[filt_start + i]; 00964 #endif /* !FIXED_POINT */ 00965 } 00966 } 00967 00968 static void 00969 fe_mel_cep(fe_t * fe, mfcc_t *mfcep) 00970 { 00971 int32 i; 00972 powspec_t *mfspec; 00973 00974 /* Convenience pointer. */ 00975 mfspec = fe->mfspec; 00976 00977 for (i = 0; i < fe->mel_fb->num_filters; ++i) { 00978 #ifndef FIXED_POINT /* It's already in log domain for fixed point */ 00979 if (mfspec[i] > 0) 00980 mfspec[i] = log(mfspec[i]); 00981 else /* This number should be smaller than anything 00982 * else, but not too small, so as to avoid 00983 * infinities in the inverse transform (this is 00984 * the frequency-domain equivalent of 00985 * dithering) */ 00986 mfspec[i] = -10.0; 00987 #endif /* !FIXED_POINT */ 00988 } 00989 00990 /* If we are doing LOG_SPEC, then do nothing. */ 00991 if (fe->log_spec == RAW_LOG_SPEC) { 00992 for (i = 0; i < fe->feature_dimension; i++) { 00993 mfcep[i] = (mfcc_t) mfspec[i]; 00994 } 00995 } 00996 /* For smoothed spectrum, do DCT-II followed by (its inverse) DCT-III */ 00997 else if (fe->log_spec == SMOOTH_LOG_SPEC) { 00998 /* FIXME: This is probably broken for fixed-point. */ 00999 fe_dct2(fe, mfspec, mfcep, 0); 01000 fe_dct3(fe, mfcep, mfspec); 01001 for (i = 0; i < fe->feature_dimension; i++) { 01002 mfcep[i] = (mfcc_t) mfspec[i]; 01003 } 01004 } 01005 else if (fe->transform == DCT_II) 01006 fe_dct2(fe, mfspec, mfcep, FALSE); 01007 else if (fe->transform == DCT_HTK) 01008 fe_dct2(fe, mfspec, mfcep, TRUE); 01009 else 01010 fe_spec2cep(fe, mfspec, mfcep); 01011 01012 return; 01013 } 01014 01015 void 01016 fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep) 01017 { 01018 int32 i, j, beta; 01019 01020 /* Compute C0 separately (its basis vector is 1) to avoid 01021 * costly multiplications. */ 01022 mfcep[0] = mflogspec[0] / 2; /* beta = 0.5 */ 01023 for (j = 1; j < fe->mel_fb->num_filters; j++) 01024 mfcep[0] += mflogspec[j]; /* beta = 1.0 */ 01025 mfcep[0] /= (frame_t) fe->mel_fb->num_filters; 01026 01027 for (i = 1; i < fe->num_cepstra; ++i) { 01028 mfcep[i] = 0; 01029 for (j = 0; j < fe->mel_fb->num_filters; j++) { 01030 if (j == 0) 01031 beta = 1; /* 0.5 */ 01032 else 01033 beta = 2; /* 1.0 */ 01034 mfcep[i] += COSMUL(mflogspec[j], 01035 fe->mel_fb->mel_cosine[i][j]) * beta; 01036 } 01037 /* Note that this actually normalizes by num_filters, like the 01038 * original Sphinx front-end, due to the doubled 'beta' factor 01039 * above. */ 01040 mfcep[i] /= (frame_t) fe->mel_fb->num_filters * 2; 01041 } 01042 } 01043 01044 void 01045 fe_dct2(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep, int htk) 01046 { 01047 int32 i, j; 01048 01049 /* Compute C0 separately (its basis vector is 1) to avoid 01050 * costly multiplications. */ 01051 mfcep[0] = mflogspec[0]; 01052 for (j = 1; j < fe->mel_fb->num_filters; j++) 01053 mfcep[0] += mflogspec[j]; 01054 if (htk) 01055 mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_2n); 01056 else /* sqrt(1/N) = sqrt(2/N) * 1/sqrt(2) */ 01057 mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_n); 01058 01059 for (i = 1; i < fe->num_cepstra; ++i) { 01060 mfcep[i] = 0; 01061 for (j = 0; j < fe->mel_fb->num_filters; j++) { 01062 mfcep[i] += COSMUL(mflogspec[j], 01063 fe->mel_fb->mel_cosine[i][j]); 01064 } 01065 mfcep[i] = COSMUL(mfcep[i], fe->mel_fb->sqrt_inv_2n); 01066 } 01067 } 01068 01069 void 01070 fe_lifter(fe_t *fe, mfcc_t *mfcep) 01071 { 01072 int32 i; 01073 01074 if (fe->mel_fb->lifter_val == 0) 01075 return; 01076 01077 for (i = 0; i < fe->num_cepstra; ++i) { 01078 mfcep[i] = MFCCMUL(mfcep[i], fe->mel_fb->lifter[i]); 01079 } 01080 } 01081 01082 void 01083 fe_dct3(fe_t * fe, const mfcc_t * mfcep, powspec_t * mflogspec) 01084 { 01085 int32 i, j; 01086 01087 for (i = 0; i < fe->mel_fb->num_filters; ++i) { 01088 mflogspec[i] = COSMUL(mfcep[0], SQRT_HALF); 01089 for (j = 1; j < fe->num_cepstra; j++) { 01090 mflogspec[i] += COSMUL(mfcep[j], 01091 fe->mel_fb->mel_cosine[j][i]); 01092 } 01093 mflogspec[i] = COSMUL(mflogspec[i], fe->mel_fb->sqrt_inv_2n); 01094 } 01095 } 01096 01097 int32 01098 fe_write_frame(fe_t * fe, mfcc_t * fea) 01099 { 01100 fe_spec_magnitude(fe); 01101 fe_mel_spec(fe); 01102 fe_mel_cep(fe, fea); 01103 fe_lifter(fe, fea); 01104 01105 return 1; 01106 } 01107 01108 void * 01109 fe_create_2d(int32 d1, int32 d2, int32 elem_size) 01110 { 01111 return (void *)ckd_calloc_2d(d1, d2, elem_size); 01112 } 01113 01114 void 01115 fe_free_2d(void *arr) 01116 { 01117 ckd_free_2d((void **)arr); 01118 }