5 * Created by Mark Levy on 06/04/2006.
6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
16 #include "cluster_segmenter.h"
18 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
21 /* converts constant-Q features to normalised chroma */
22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
26 //double maxchroma; /* max chroma value at each time, for normalisation */
27 //double sum; /* for normalisation */
29 for (t = 0; t < nframes; t++)
31 for (b = 0; b < bins; b++)
33 for (oct = 0; oct < noct; oct++)
36 for (b = 0; b < bins; b++)
37 chroma[t][b] += fabs(cq[t][ix+b]);
39 /* normalise to unit sum
41 for (b = 0; b < bins; b++)
43 for (b = 0; b < bins; b++)
46 /* normalise to unit max - NO this made results much worse!
48 for (b = 0; b < bins; b++)
49 if (chroma[t][b] > maxchroma)
50 maxchroma = chroma[t][b];
52 for (b = 0; b < bins; b++)
53 chroma[t][b] /= maxchroma;
58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
59 void mpeg7_constq(double** features, int nframes, int ncoeff)
66 /* convert const-Q features to dB scale */
67 for (i = 0; i < nframes; i++)
68 for (j = 0; j < ncoeff; j++)
69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
71 /* normalise each feature vector and add the norm as an extra feature dimension */
72 for (i = 0; i < nframes; i++)
75 for (j = 0; j < ncoeff; j++)
76 ss += features[i][j] * features[i][j];
78 for (j = 0; j < ncoeff; j++)
79 features[i][j] /= env;
80 features[i][ncoeff] = env;
84 /* normalise the envelopes */
85 for (i = 0; i < nframes; i++)
86 features[i][ncoeff] /= maxenv;
89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
90 /* NB h is a vector in row major order, as required by cluster_melt() */
91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
92 void create_histograms(int* x, int nx, int m, int hlen, double* h)
97 for (i = 0; i < nx*m; i++)
100 for (i = hlen/2; i < nx-hlen/2; i++)
102 for (j = 0; j < m; j++)
104 for (t = i-hlen/2; t <= i+hlen/2; t++)
107 for (j = 0; j < m; j++)
108 norm += h[i*m+j] * h[i*m+j];
109 for (j = 0; j < m; j++)
113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
114 for (i = 0; i < hlen/2; i++)
115 for (j = 0; j < m; j++)
116 h[i*m+j] = h[hlen/2*m+j];
117 for (i = nx-hlen/2; i < nx; i++)
118 for (j = 0; j < m; j++)
119 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
122 /* segment using HMM and then histogram clustering */
123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
124 int histogram_length, int nclusters, int neighbour_limit)
128 /*****************************/
130 /* try just using the predominant bin number as a 'decoded state' */
131 nHMM_states = feature_length + 1; /* allow a 'zero' state */
132 double chroma_thresh = 0.05;
135 for (i = 0; i < frames_read; i++)
138 for (j = 0; j < feature_length; j++)
140 if (features[i][j] > maxval)
142 maxval = features[i][j];
146 if (maxval > chroma_thresh)
149 q[i] = feature_length;
154 /*****************************/
157 /* scale all the features to 'balance covariances' during HMM training */
159 for (i = 0; i < frames_read; i++)
160 for (j = 0; j < feature_length; j++)
161 features[i][j] *= scale;
163 /* train an HMM on the features */
166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
168 /* train the model */
169 hmm_train(features, frames_read, model);
171 printf("\n\nafter training:\n");
174 /* decode the hidden state sequence */
175 viterbi_decode(features, frames_read, model, q);
178 /*****************************/
180 /*****************************/
184 fprintf(stderr, "HMM state sequence:\n");
185 for (i = 0; i < frames_read; i++)
186 fprintf(stderr, "%d ", q[i]);
187 fprintf(stderr, "\n\n");
190 /* create histograms of states */
191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
192 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
194 /* cluster the histograms */
195 int nbsched = 20; /* length of inverse temperature schedule */
196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
200 for (i = 1; i < nbsched; i++)
201 bsched[i] = alpha * bsched[i-1];
202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
204 /* now q holds a sequence of cluster assignments */
210 /* segment constant-Q or chroma features */
211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
218 if (feature_type == FEATURE_TYPE_CONSTQ)
220 /* fprintf(stderr, "Converting to dB and normalising...\n");
222 mpeg7_constq(features, frames_read, ncoeff);
224 fprintf(stderr, "Running PCA...\n");
226 /* do PCA on the features (but not the envelope) */
227 int ncomponents = 20;
228 pca_project(features, frames_read, ncoeff, ncomponents);
230 /* copy the envelope so that it immediatly follows the chosen components */
231 for (i = 0; i < frames_read; i++)
232 features[i][ncomponents] = features[i][ncoeff];
234 feature_length = ncomponents + 1;
236 /**************************************
239 char* dir = "/Users/mark/documents/semma/audio/";
240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
241 strcpy(file_name, dir);
242 strcat(file_name, trackname);
243 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
245 // get the features from Matlab from mat-file
247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
248 readmatarray(file_name, 2, frames_in_file, feature_length, features);
249 // copy final frame to ensure that we get as many as we expected
250 int missing_frames = frames_read - frames_in_file;
251 while (missing_frames > 0)
253 for (i = 0; i < feature_length; i++)
254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
259 ******************************************/
261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
264 if (feature_type == FEATURE_TYPE_CHROMA)
267 fprintf(stderr, "Converting to chroma features...\n");
269 /* convert constant-Q to normalised chroma features */
270 chroma = (double**) malloc(frames_read*sizeof(double*));
271 for (i = 0; i < frames_read; i++)
272 chroma[i] = (double*) malloc(bins*sizeof(double));
273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
274 feature_length = bins;
276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
278 for (i = 0; i < frames_read; i++)