qm-dsp 1.8
ClusterMeltSegmenter.cpp
Go to the documentation of this file.
1/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3/*
4 * ClusterMeltSegmenter.cpp
5 *
6 * Created by Mark Levy on 23/03/2006.
7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
8
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
14 */
15
16#include <cfloat>
17#include <cmath>
18
20#include "cluster_segmenter.h"
21#include "segment.h"
22
23#include "dsp/transforms/FFT.h"
26#include "dsp/mfcc/MFCC.h"
27
29 window(NULL),
30 fft(NULL),
31 constq(NULL),
32 mfcc(NULL),
33 featureType(params.featureType),
34 hopSize(params.hopSize),
35 windowSize(params.windowSize),
36 fmin(params.fmin),
37 fmax(params.fmax),
38 nbins(params.nbins),
39 ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c
40 nHMMStates(params.nHMMStates),
41 nclusters(params.nclusters),
42 histogramLength(params.histogramLength),
43 neighbourhoodLimit(params.neighbourhoodLimit),
44 decimator(NULL)
45{
46}
47
49{
50 samplerate = fs;
51
54
55 // run internal processing at 11025 or thereabouts
56 int internalRate = 11025;
57 int decimationFactor = samplerate / internalRate;
58 if (decimationFactor < 1) decimationFactor = 1;
59
60 // must be a power of two
61 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
62
63 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
64 decimationFactor = Decimator::getHighestSupportedFactor();
65 }
66
67 if (decimationFactor > 1) {
68 decimator = new Decimator(getWindowsize(), decimationFactor);
69 }
70
71 CQConfig config;
72 config.FS = samplerate / decimationFactor;
73 config.min = fmin;
74 config.max = fmax;
75 config.BPO = nbins;
76 config.CQThresh = 0.0054;
77
78 constq = new ConstantQ(config);
80
81 ncoeff = constq->getK();
82
84
85 } else if (featureType == FEATURE_TYPE_MFCC) {
86
87 // run internal processing at 22050 or thereabouts
88 int internalRate = 22050;
89 int decimationFactor = samplerate / internalRate;
90 if (decimationFactor < 1) decimationFactor = 1;
91
92 // must be a power of two
93 while (decimationFactor & (decimationFactor - 1)) ++decimationFactor;
94
95 if (decimationFactor > Decimator::getHighestSupportedFactor()) {
96 decimationFactor = Decimator::getHighestSupportedFactor();
97 }
98
99 if (decimationFactor > 1) {
100 decimator = new Decimator(getWindowsize(), decimationFactor);
101 }
102
103 MFCCConfig config(samplerate / decimationFactor);
104 config.fftsize = 2048;
105 config.nceps = 19;
106 config.want_c0 = true;
107
108 mfcc = new MFCC(config);
109 ncoeff = config.nceps + 1;
110 }
111}
112
114{
115 delete window;
116 delete constq;
117 delete decimator;
118 delete fft;
119}
120
121int
123{
124 return static_cast<int>(windowSize * samplerate + 0.001);
125}
126
127int
129{
130 return static_cast<int>(hopSize * samplerate + 0.001);
131}
132
133void ClusterMeltSegmenter::extractFeatures(const double* samples, int nsamples)
134{
137 extractFeaturesConstQ(samples, nsamples);
138 } else if (featureType == FEATURE_TYPE_MFCC) {
139 extractFeaturesMFCC(samples, nsamples);
140 }
141}
142
143void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsamples)
144{
145 if (!constq) {
146 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesConstQ: "
147 << "No const-q: initialise not called?"
148 << std::endl;
149 return;
150 }
151
152 if (nsamples < getWindowsize()) {
153 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
154 return;
155 }
156
157 int fftsize = constq->getfftlength();
158
159 if (!window || window->getSize() != fftsize) {
160 delete window;
161 window = new Window<double>(HammingWindow, fftsize);
162 }
163
164 vector<double> cq(ncoeff);
165
166 for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
167
168 const double *psource = samples;
169 int pcount = nsamples;
170
171 if (decimator) {
172 pcount = nsamples / decimator->getFactor();
173 double *decout = new double[pcount];
174 decimator->process(samples, decout);
175 psource = decout;
176 }
177
178 int origin = 0;
179
180// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
181
182 int frames = 0;
183
184 double *frame = new double[fftsize];
185 double *real = new double[fftsize];
186 double *imag = new double[fftsize];
187 double *cqre = new double[ncoeff];
188 double *cqim = new double[ncoeff];
189
190 while (origin <= pcount) {
191
192 // always need at least one fft window per block, but after
193 // that we want to avoid having any incomplete ones
194 if (origin > 0 && origin + fftsize >= pcount) break;
195
196 for (int i = 0; i < fftsize; ++i) {
197 if (origin + i < pcount) {
198 frame[i] = psource[origin + i];
199 } else {
200 frame[i] = 0.0;
201 }
202 }
203
204 for (int i = 0; i < fftsize/2; ++i) {
205 double value = frame[i];
206 frame[i] = frame[i + fftsize/2];
207 frame[i + fftsize/2] = value;
208 }
209
210 window->cut(frame);
211
212 fft->forward(frame, real, imag);
213
214 constq->process(real, imag, cqre, cqim);
215
216 for (int i = 0; i < ncoeff; ++i) {
217 cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
218 }
219 ++frames;
220
221 origin += fftsize/2;
222 }
223
224 delete [] cqre;
225 delete [] cqim;
226 delete [] real;
227 delete [] imag;
228 delete [] frame;
229
230 for (int i = 0; i < ncoeff; ++i) {
231 cq[i] /= frames;
232 }
233
234 if (decimator) delete[] psource;
235
236 features.push_back(cq);
237}
238
239void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsamples)
240{
241 if (!mfcc) {
242 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeaturesMFCC: "
243 << "No mfcc: initialise not called?"
244 << std::endl;
245 return;
246 }
247
248 if (nsamples < getWindowsize()) {
249 std::cerr << "ERROR: ClusterMeltSegmenter::extractFeatures: nsamples < windowsize (" << nsamples << " < " << getWindowsize() << ")" << std::endl;
250 return;
251 }
252
253 int fftsize = mfcc->getfftlength();
254
255 vector<double> cc(ncoeff);
256
257 for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
258
259 const double *psource = samples;
260 int pcount = nsamples;
261
262 if (decimator) {
263 pcount = nsamples / decimator->getFactor();
264 double *decout = new double[pcount];
265 decimator->process(samples, decout);
266 psource = decout;
267 }
268
269 int origin = 0;
270 int frames = 0;
271
272 double *frame = new double[fftsize];
273 double *ccout = new double[ncoeff];
274
275 while (origin <= pcount) {
276
277 // always need at least one fft window per block, but after
278 // that we want to avoid having any incomplete ones
279 if (origin > 0 && origin + fftsize >= pcount) break;
280
281 for (int i = 0; i < fftsize; ++i) {
282 if (origin + i < pcount) {
283 frame[i] = psource[origin + i];
284 } else {
285 frame[i] = 0.0;
286 }
287 }
288
289 mfcc->process(frame, ccout);
290
291 for (int i = 0; i < ncoeff; ++i) {
292 cc[i] += ccout[i];
293 }
294 ++frames;
295
296 origin += fftsize/2;
297 }
298
299 delete [] ccout;
300 delete [] frame;
301
302 for (int i = 0; i < ncoeff; ++i) {
303 cc[i] /= frames;
304 }
305
306 if (decimator) delete[] psource;
307
308 features.push_back(cc);
309}
310
312{
313 nclusters = m;
314 segment();
315}
316
317void ClusterMeltSegmenter::setFeatures(const vector<vector<double> >& f)
318{
319 features = f;
321}
322
324{
325 delete constq;
326 constq = 0;
327 delete mfcc;
328 mfcc = 0;
329 delete decimator;
330 decimator = 0;
331
332 if (features.size() < histogramLength) return;
333/*
334 std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
335 << " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
336*/
337 // copy the features to a native array and use the existing C segmenter...
338 double** arrFeatures = new double*[features.size()];
339 for (int i = 0; i < features.size(); i++)
340 {
342 arrFeatures[i] = new double[features[0].size()];
343 for (int j = 0; j < features[0].size(); j++)
344 arrFeatures[i][j] = features[i][j];
345 } else {
346 arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
347 for (int j = 0; j < ncoeff; j++)
348 arrFeatures[i][j] = features[i][j];
349 }
350 }
351
352 q = new int[features.size()];
353
356 cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
358 else
359 constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
361
362 // convert the cluster assignment sequence to a segmentation
363 makeSegmentation(q, features.size());
364
365 // de-allocate arrays
366 delete [] q;
367 for (int i = 0; i < features.size(); i++)
368 delete [] arrFeatures[i];
369 delete [] arrFeatures;
370
371 // clear the features
372 clear();
373}
374
376{
377 segmentation.segments.clear();
380
382 segment.start = 0;
383 segment.type = q[0];
384
385 for (int i = 1; i < len; i++)
386 {
387 if (q[i] != q[i-1])
388 {
389 segment.end = i * getHopsize();
390 segmentation.segments.push_back(segment);
391 segment.type = q[i];
392 segment.start = segment.end;
393 }
394 }
395 segment.end = len * getHopsize();
396 segmentation.segments.push_back(segment);
397}
398
#define NULL
Definition Filter.h:20
@ HammingWindow
Definition Window.h:26
virtual void extractFeatures(const double *samples, int nsamples)
virtual void initialise(int samplerate)
void extractFeaturesMFCC(const double *, int)
ClusterMeltSegmenter(ClusterMeltSegmenterParams params)
void setFeatures(const vector< vector< double > > &f)
void extractFeaturesConstQ(const double *, int)
void makeSegmentation(int *q, int len)
Window< double > * window
void process(const double *FFTRe, const double *FFTIm, double *CQRe, double *CQIm)
int getfftlength()
Definition ConstantQ.h:53
void sparsekernel()
Definition ConstantQ.cpp:76
int getK()
Definition ConstantQ.h:52
Decimator carries out a fast downsample by a power-of-two factor.
Definition Decimator.h:25
static int getHighestSupportedFactor()
Definition Decimator.h:57
int getFactor() const
Definition Decimator.h:56
void process(const double *src, double *dst)
Process inLength samples (as supplied to constructor) from src and write inLength / decFactor samples...
Definition FFT.h:47
void forward(const double *realIn, double *realOut, double *imagOut)
Carry out a forward real-to-complex transform of size nsamples, where nsamples is the value provided ...
Definition FFT.cpp:184
Definition MFCC.h:36
int getfftlength() const
Definition MFCC.h:56
int process(const double *inframe, double *outceps)
Process time-domain input data.
Definition MFCC.cpp:205
int start
Definition Segmenter.h:28
vector< Segment > segments
Definition Segmenter.h:38
Segmentation segmentation
Definition Segmenter.h:58
int samplerate
Definition Segmenter.h:59
virtual void clear()
Definition Segmenter.h:54
vector< vector< double > > features
Definition Segmenter.h:57
Various shaped windows for sample frame conditioning, including cosine windows (Hann etc) and triangu...
Definition Window.h:41
void cut(T *src) const
Definition Window.h:61
int getSize() const
Definition Window.h:67
void constq_segment(int *q, double **features, int frames_read, int bins, int ncoeff, int feature_type, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
void cluster_segment(int *q, double **features, int frames_read, int feature_length, int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
@ FEATURE_TYPE_UNKNOWN
Definition segment.h:39
@ FEATURE_TYPE_CHROMA
Definition segment.h:41
@ FEATURE_TYPE_MFCC
Definition segment.h:42
@ FEATURE_TYPE_CONSTQ
Definition segment.h:40
double CQThresh
Definition ConstantQ.h:28
double min
Definition ConstantQ.h:25
unsigned int BPO
Definition ConstantQ.h:27
unsigned int FS
Definition ConstantQ.h:24
double max
Definition ConstantQ.h:26
bool want_c0
Definition MFCC.h:28
int nceps
Definition MFCC.h:26
int fftsize
Definition MFCC.h:25