libs/vamp-plugins/OnsetDetect.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     QM Vamp Plugin Set
   5
   6     Centre for Digital Music, Queen Mary, University of London.
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License as
  10     published by the Free Software Foundation; either version 2 of the
  11     License, or (at your option) any later version.  See the file
  12     COPYING included with this distribution for more information.
  13 */
  14
  15 #ifdef COMPILER_MSVC
  16 #include <ardourext/float_cast.h>
  17 #endif
  18 #include "OnsetDetect.h"
  19
  20 #include "dsp/onsets/DetectionFunction.h"
  21 #include "dsp/onsets/PeakPicking.h"
  22 #include "dsp/tempotracking/TempoTrack.h"
  23
  24 using std::string;
  25 using std::vector;
  26 using std::cerr;
  27 using std::endl;
  28
  29 float OnsetDetector::m_preferredStepSecs = 0.01161;
  30
  31 class OnsetDetectorData
  32 {
  33 public:
  34     OnsetDetectorData(const DFConfig &config) : dfConfig(config) {
  35         df = new DetectionFunction(config);
  36     }
  37     ~OnsetDetectorData() {
  38         delete df;
  39     }
  40     void reset() {
  41         delete df;
  42         df = new DetectionFunction(dfConfig);
  43         dfOutput.clear();
  44         origin = Vamp::RealTime::zeroTime;
  45     }
  46
  47     DFConfig dfConfig;
  48     DetectionFunction *df;
  49     vector<double> dfOutput;
  50     Vamp::RealTime origin;
  51 };
  52
  53
  54 OnsetDetector::OnsetDetector(float inputSampleRate) :
  55     Vamp::Plugin(inputSampleRate),
  56     m_d(0),
  57     m_dfType(DF_COMPLEXSD),
  58     m_sensitivity(50),
  59     m_whiten(false)
  60 {
  61 }
  62
  63 OnsetDetector::~OnsetDetector()
  64 {
  65     delete m_d;
  66 }
  67
  68 string
  69 OnsetDetector::getIdentifier() const
  70 {
  71     return "qm-onsetdetector";
  72 }
  73
  74 string
  75 OnsetDetector::getName() const
  76 {
  77     return "Note Onset Detector";
  78 }
  79
  80 string
  81 OnsetDetector::getDescription() const
  82 {
  83     return "Estimate individual note onset positions";
  84 }
  85
  86 string
  87 OnsetDetector::getMaker() const
  88 {
  89     return "Queen Mary, University of London";
  90 }
  91
  92 int
  93 OnsetDetector::getPluginVersion() const
  94 {
  95     return 3;
  96 }
  97
  98 string
  99 OnsetDetector::getCopyright() const
 100 {
 101     return "Plugin by Christian Landone, Chris Duxbury and Juan Pablo Bello.  Copyright (c) 2006-2009 QMUL - All Rights Reserved";
 102 }
 103
 104 OnsetDetector::ParameterList
 105 OnsetDetector::getParameterDescriptors() const
 106 {
 107     ParameterList list;
 108
 109     ParameterDescriptor desc;
 110     desc.identifier = "dftype";
 111     desc.name = "Onset Detection Function Type";
 112     desc.description = "Method used to calculate the onset detection function";
 113     desc.minValue = 0;
 114     desc.maxValue = 4;
 115     desc.defaultValue = 3;
 116     desc.isQuantized = true;
 117     desc.quantizeStep = 1;
 118     desc.valueNames.push_back("High-Frequency Content");
 119     desc.valueNames.push_back("Spectral Difference");
 120     desc.valueNames.push_back("Phase Deviation");
 121     desc.valueNames.push_back("Complex Domain");
 122     desc.valueNames.push_back("Broadband Energy Rise");
 123     list.push_back(desc);
 124
 125     desc.identifier = "sensitivity";
 126     desc.name = "Onset Detector Sensitivity";
 127     desc.description = "Sensitivity of peak-picker for onset detection";
 128     desc.minValue = 0;
 129     desc.maxValue = 100;
 130     desc.defaultValue = 50;
 131     desc.isQuantized = true;
 132     desc.quantizeStep = 1;
 133     desc.unit = "%";
 134     desc.valueNames.clear();
 135     list.push_back(desc);
 136
 137     desc.identifier = "whiten";
 138     desc.name = "Adaptive Whitening";
 139     desc.description = "Normalize frequency bin magnitudes relative to recent peak levels";
 140     desc.minValue = 0;
 141     desc.maxValue = 1;
 142     desc.defaultValue = 0;
 143     desc.isQuantized = true;
 144     desc.quantizeStep = 1;
 145     desc.unit = "";
 146     list.push_back(desc);
 147
 148     return list;
 149 }
 150
 151 float
 152 OnsetDetector::getParameter(std::string name) const
 153 {
 154     if (name == "dftype") {
 155         switch (m_dfType) {
 156         case DF_HFC: return 0;
 157         case DF_SPECDIFF: return 1;
 158         case DF_PHASEDEV: return 2;
 159         default: case DF_COMPLEXSD: return 3;
 160         case DF_BROADBAND: return 4;
 161         }
 162     } else if (name == "sensitivity") {
 163         return m_sensitivity;
 164     } else if (name == "whiten") {
 165         return m_whiten ? 1.0 : 0.0;
 166     }
 167     return 0.0;
 168 }
 169
 170 void
 171 OnsetDetector::setParameter(std::string name, float value)
 172 {
 173     if (name == "dftype") {
 174         int dfType = m_dfType;
 175         switch (lrintf(value)) {
 176         case 0: dfType = DF_HFC; break;
 177         case 1: dfType = DF_SPECDIFF; break;
 178         case 2: dfType = DF_PHASEDEV; break;
 179         default: case 3: dfType = DF_COMPLEXSD; break;
 180         case 4: dfType = DF_BROADBAND; break;
 181         }
 182         if (dfType == m_dfType) return;
 183         m_dfType = dfType;
 184         m_program = "";
 185     } else if (name == "sensitivity") {
 186         if (m_sensitivity == value) return;
 187         m_sensitivity = value;
 188         m_program = "";
 189     } else if (name == "whiten") {
 190         if (m_whiten == (value > 0.5)) return;
 191         m_whiten = (value > 0.5);
 192         m_program = "";
 193     }
 194 }
 195
 196 OnsetDetector::ProgramList
 197 OnsetDetector::getPrograms() const
 198 {
 199     ProgramList programs;
 200     programs.push_back("");
 201     programs.push_back("General purpose");
 202     programs.push_back("Soft onsets");
 203     programs.push_back("Percussive onsets");
 204     return programs;
 205 }
 206
 207 std::string
 208 OnsetDetector::getCurrentProgram() const
 209 {
 210     if (m_program == "") return "";
 211     else return m_program;
 212 }
 213
 214 void
 215 OnsetDetector::selectProgram(std::string program)
 216 {
 217     if (program == "General purpose") {
 218         setParameter("dftype", 3); // complex
 219         setParameter("sensitivity", 50);
 220         setParameter("whiten", 0);
 221     } else if (program == "Soft onsets") {
 222         setParameter("dftype", 3); // complex
 223         setParameter("sensitivity", 40);
 224         setParameter("whiten", 1);
 225     } else if (program == "Percussive onsets") {
 226         setParameter("dftype", 4); // broadband energy rise
 227         setParameter("sensitivity", 40);
 228         setParameter("whiten", 0);
 229     } else {
 230         return;
 231     }
 232     m_program = program;
 233 }
 234
 235 bool
 236 OnsetDetector::initialise(size_t channels, size_t stepSize, size_t blockSize)
 237 {
 238     if (m_d) {
 239         delete m_d;
 240         m_d = 0;
 241     }
 242
 243     if (channels < getMinChannelCount() ||
 244         channels > getMaxChannelCount()) {
 245         std::cerr << "OnsetDetector::initialise: Unsupported channel count: "
 246                   << channels << std::endl;
 247         return false;
 248     }
 249
 250     if (stepSize != getPreferredStepSize()) {
 251         std::cerr << "WARNING: OnsetDetector::initialise: Possibly sub-optimal step size for this sample rate: "
 252                   << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
 253     }
 254
 255     if (blockSize != getPreferredBlockSize()) {
 256         std::cerr << "WARNING: OnsetDetector::initialise: Possibly sub-optimal block size for this sample rate: "
 257                   << blockSize << " (wanted " << (getPreferredBlockSize()) << ")" << std::endl;
 258     }
 259
 260     DFConfig dfConfig;
 261     dfConfig.DFType = m_dfType;
 262     dfConfig.stepSize = stepSize;
 263     dfConfig.frameLength = blockSize;
 264     dfConfig.dbRise = 6.0 - m_sensitivity / 16.6667;
 265     dfConfig.adaptiveWhitening = m_whiten;
 266     dfConfig.whiteningRelaxCoeff = -1;
 267     dfConfig.whiteningFloor = -1;
 268
 269     m_d = new OnsetDetectorData(dfConfig);
 270     return true;
 271 }
 272
 273 void
 274 OnsetDetector::reset()
 275 {
 276     if (m_d) m_d->reset();
 277 }
 278
 279 size_t
 280 OnsetDetector::getPreferredStepSize() const
 281 {
 282     size_t step = size_t(m_inputSampleRate * m_preferredStepSecs + 0.0001);
 283     if (step < 1) step = 1;
 284 //    std::cerr << "OnsetDetector::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
 285     return step;
 286 }
 287
 288 size_t
 289 OnsetDetector::getPreferredBlockSize() const
 290 {
 291     return getPreferredStepSize() * 2;
 292 }
 293
 294 OnsetDetector::OutputList
 295 OnsetDetector::getOutputDescriptors() const
 296 {
 297     OutputList list;
 298
 299     float stepSecs = m_preferredStepSecs;
 300 //    if (m_d) stepSecs = m_d->dfConfig.stepSecs;
 301
 302     OutputDescriptor onsets;
 303     onsets.identifier = "onsets";
 304     onsets.name = "Note Onsets";
 305     onsets.description = "Perceived note onset positions";
 306     onsets.unit = "";
 307     onsets.hasFixedBinCount = true;
 308     onsets.binCount = 0;
 309     onsets.sampleType = OutputDescriptor::VariableSampleRate;
 310     onsets.sampleRate = 1.0 / stepSecs;
 311
 312     OutputDescriptor df;
 313     df.identifier = "detection_fn";
 314     df.name = "Onset Detection Function";
 315     df.description = "Probability function of note onset likelihood";
 316     df.unit = "";
 317     df.hasFixedBinCount = true;
 318     df.binCount = 1;
 319     df.hasKnownExtents = false;
 320     df.isQuantized = false;
 321     df.sampleType = OutputDescriptor::OneSamplePerStep;
 322
 323     OutputDescriptor sdf;
 324     sdf.identifier = "smoothed_df";
 325     sdf.name = "Smoothed Detection Function";
 326     sdf.description = "Smoothed probability function used for peak-picking";
 327     sdf.unit = "";
 328     sdf.hasFixedBinCount = true;
 329     sdf.binCount = 1;
 330     sdf.hasKnownExtents = false;
 331     sdf.isQuantized = false;
 332
 333     sdf.sampleType = OutputDescriptor::VariableSampleRate;
 334
 335 //!!! SV doesn't seem to handle these correctly in getRemainingFeatures
 336 //    sdf.sampleType = OutputDescriptor::FixedSampleRate;
 337     sdf.sampleRate = 1.0 / stepSecs;
 338
 339     list.push_back(onsets);
 340     list.push_back(df);
 341     list.push_back(sdf);
 342
 343     return list;
 344 }
 345
 346 OnsetDetector::FeatureSet
 347 OnsetDetector::process(const float *const *inputBuffers,
 348                        Vamp::RealTime timestamp)
 349 {
 350     if (!m_d) {
 351         cerr << "ERROR: OnsetDetector::process: "
 352              << "OnsetDetector has not been initialised"
 353              << endl;
 354         return FeatureSet();
 355     }
 356
 357     size_t len = m_d->dfConfig.frameLength / 2 + 1;
 358
 359 //    float mean = 0.f;
 360 //    for (size_t i = 0; i < len; ++i) {
 361 ////        std::cerr << inputBuffers[0][i] << " ";
 362 //        mean += inputBuffers[0][i];
 363 //    }
 364 ////    std::cerr << std::endl;
 365 //    mean /= len;
 366
 367 //    std::cerr << "OnsetDetector::process(" << timestamp << "): "
 368 //              << "dftype " << m_dfType << ", sens " << m_sensitivity
 369 //              << ", len " << len << ", mean " << mean << std::endl;
 370
 371     double *reals = new double[len];
 372     double *imags = new double[len];
 373
 374     // We only support a single input channel
 375
 376     for (size_t i = 0; i < len; ++i) {
 377         reals[i] = inputBuffers[0][i*2];
 378         imags[i] = inputBuffers[0][i*2+1];
 379     }
 380
 381     double output = m_d->df->processFrequencyDomain(reals, imags);
 382
 383     delete[] reals;
 384     delete[] imags;
 385
 386     if (m_d->dfOutput.empty()) m_d->origin = timestamp;
 387
 388     m_d->dfOutput.push_back(output);
 389
 390     FeatureSet returnFeatures;
 391
 392     Feature feature;
 393     feature.hasTimestamp = false;
 394     feature.values.push_back(output);
 395
 396 //    std::cerr << "df: " << output << std::endl;
 397
 398     returnFeatures[1].push_back(feature); // detection function is output 1
 399     return returnFeatures;
 400 }
 401
 402 OnsetDetector::FeatureSet
 403 OnsetDetector::getRemainingFeatures()
 404 {
 405     if (!m_d) {
 406         cerr << "ERROR: OnsetDetector::getRemainingFeatures: "
 407              << "OnsetDetector has not been initialised"
 408              << endl;
 409         return FeatureSet();
 410     }
 411
 412     if (m_dfType == DF_BROADBAND) {
 413         for (size_t i = 0; i < m_d->dfOutput.size(); ++i) {
 414             if (m_d->dfOutput[i] < ((110 - m_sensitivity) *
 415                                     m_d->dfConfig.frameLength) / 200) {
 416                 m_d->dfOutput[i] = 0;
 417             }
 418         }
 419     }
 420
 421     double aCoeffs[] = { 1.0000, -0.5949, 0.2348 };
 422     double bCoeffs[] = { 0.1600,  0.3200, 0.1600 };
 423
 424     FeatureSet returnFeatures;
 425
 426     PPickParams ppParams;
 427     ppParams.length = m_d->dfOutput.size();
 428     // tau and cutoff appear to be unused in PeakPicking, but I've
 429     // inserted some moderately plausible values rather than leave
 430     // them unset.  The QuadThresh values come from trial and error.
 431     // The rest of these are copied from ttParams in the BeatTracker
 432     // code: I don't claim to know whether they're good or not --cc
 433     ppParams.tau = m_d->dfConfig.stepSize / m_inputSampleRate;
 434     ppParams.alpha = 9;
 435     ppParams.cutoff = m_inputSampleRate/4;
 436     ppParams.LPOrd = 2;
 437     ppParams.LPACoeffs = aCoeffs;
 438     ppParams.LPBCoeffs = bCoeffs;
 439     ppParams.WinT.post = 8;
 440     ppParams.WinT.pre = 7;
 441     ppParams.QuadThresh.a = (100 - m_sensitivity) / 1000.0;
 442     ppParams.QuadThresh.b = 0;
 443     ppParams.QuadThresh.c = (100 - m_sensitivity) / 1500.0;
 444
 445     PeakPicking peakPicker(ppParams);
 446
 447     double *ppSrc = new double[ppParams.length];
 448     for (unsigned int i = 0; i < ppParams.length; ++i) {
 449         ppSrc[i] = m_d->dfOutput[i];
 450     }
 451
 452     vector<int> onsets;
 453     peakPicker.process(ppSrc, ppParams.length, onsets);
 454
 455     for (size_t i = 0; i < onsets.size(); ++i) {
 456
 457         size_t index = onsets[i];
 458
 459         if (m_dfType != DF_BROADBAND) {
 460             double prevDiff = 0.0;
 461             while (index > 1) {
 462                 double diff = ppSrc[index] - ppSrc[index-1];
 463                 if (diff < prevDiff * 0.9) break;
 464                 prevDiff = diff;
 465                 --index;
 466             }
 467         }
 468
 469         size_t frame = index * m_d->dfConfig.stepSize;
 470
 471         Feature feature;
 472         feature.hasTimestamp = true;
 473         feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 474             (frame, lrintf(m_inputSampleRate));
 475
 476         returnFeatures[0].push_back(feature); // onsets are output 0
 477     }
 478
 479     for (unsigned int i = 0; i < ppParams.length; ++i) {
 480
 481         Feature feature;
 482 //        feature.hasTimestamp = false;
 483         feature.hasTimestamp = true;
 484         size_t frame = i * m_d->dfConfig.stepSize;
 485         feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 486             (frame, lrintf(m_inputSampleRate));
 487
 488         feature.values.push_back(ppSrc[i]);
 489         returnFeatures[2].push_back(feature); // smoothed df is output 2
 490     }
 491
 492     return returnFeatures;
 493 }
 494