1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
5 An audio time-stretching and pitch-shifting library.
6 Copyright 2007-2008 Chris Cannam.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
15 #include "StretchCalculator.h"
30 StretchCalculator::StretchCalculator(size_t sampleRate,
31 size_t inputIncrement,
33 m_sampleRate(sampleRate),
34 m_increment(inputIncrement),
39 m_transientAmnesty(0),
40 m_useHardPeaks(useHardPeaks)
42 // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
45 StretchCalculator::~StretchCalculator()
50 StretchCalculator::calculate(double ratio, size_t inputDuration,
51 const std::vector<float> &phaseResetDf,
52 const std::vector<float> &stretchDf)
54 assert(phaseResetDf.size() == stretchDf.size());
56 m_lastPeaks = findPeaks(phaseResetDf);
57 std::vector<Peak> &peaks = m_lastPeaks;
58 size_t totalCount = phaseResetDf.size();
60 std::vector<int> increments;
62 size_t outputDuration = lrint(inputDuration * ratio);
64 if (m_debugLevel > 0) {
65 std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
68 outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
70 if (m_debugLevel > 0) {
71 std::cerr << " (rounded up to " << outputDuration << ")";
72 std::cerr << ", df size " << phaseResetDf.size() << std::endl;
75 std::vector<size_t> fixedAudioChunks;
76 for (size_t i = 0; i < peaks.size(); ++i) {
77 fixedAudioChunks.push_back
78 (lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
81 if (m_debugLevel > 1) {
82 std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
85 size_t totalInput = 0, totalOutput = 0;
87 // For each region between two consecutive time sync points, we
88 // want to take the number of output chunks to be allocated and
89 // the detection function values within the range, and produce a
90 // series of increments that sum to the number of output chunks,
91 // such that each increment is displaced from the input increment
92 // by an amount inversely proportional to the magnitude of the
93 // stretch detection function at that input step.
95 size_t regionTotalChunks = 0;
97 for (size_t i = 0; i <= peaks.size(); ++i) {
99 size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
100 bool phaseReset = false;
103 regionStartChunk = 0;
106 regionStartChunk = peaks[i-1].chunk;
107 regionStart = fixedAudioChunks[i-1];
108 phaseReset = peaks[i-1].hard;
111 if (i == peaks.size()) {
112 regionEndChunk = totalCount;
113 regionEnd = outputDuration;
115 regionEndChunk = peaks[i].chunk;
116 regionEnd = fixedAudioChunks[i];
119 size_t regionDuration = regionEnd - regionStart;
120 regionTotalChunks += regionDuration;
122 std::vector<float> dfRegion;
124 for (size_t j = regionStartChunk; j != regionEndChunk; ++j) {
125 dfRegion.push_back(stretchDf[j]);
128 if (m_debugLevel > 1) {
129 std::cerr << "distributeRegion from " << regionStartChunk << " to " << regionEndChunk << " (chunks " << regionStart << " to " << regionEnd << ")" << std::endl;
132 dfRegion = smoothDF(dfRegion);
134 std::vector<int> regionIncrements = distributeRegion
135 (dfRegion, regionDuration, ratio, phaseReset);
137 size_t totalForRegion = 0;
139 for (size_t j = 0; j < regionIncrements.size(); ++j) {
141 int incr = regionIncrements[j];
143 if (j == 0 && phaseReset) increments.push_back(-incr);
144 else increments.push_back(incr);
146 if (incr > 0) totalForRegion += incr;
147 else totalForRegion += -incr;
149 totalInput += m_increment;
152 if (totalForRegion != regionDuration) {
153 std::cerr << "*** WARNING: distributeRegion returned wrong duration " << totalForRegion << ", expected " << regionDuration << std::endl;
156 totalOutput += totalForRegion;
159 if (m_debugLevel > 0) {
160 std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << size_t(ceil(totalInput * ratio)) << std::endl;
161 std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
168 StretchCalculator::calculateSingle(double ratio,
172 if (increment == 0) increment = m_increment;
174 bool isTransient = false;
176 // We want to ensure, as close as possible, that the phase reset
177 // points appear at _exactly_ the right audio frame numbers.
179 // In principle, the threshold depends on chunk size: larger chunk
180 // sizes need higher thresholds. Since chunk size depends on
181 // ratio, I suppose we could in theory calculate the threshold
182 // from the ratio directly. For the moment we're happy if it
183 // works well in common situations.
185 float transientThreshold = 0.35f;
186 if (ratio > 1) transientThreshold = 0.25f;
188 if (m_useHardPeaks && df > m_prevDf * 1.1f && df > transientThreshold) {
192 if (m_debugLevel > 2) {
193 std::cerr << "df = " << df << ", prevDf = " << m_prevDf
194 << ", thresh = " << transientThreshold << std::endl;
199 bool ratioChanged = (ratio != m_prevRatio);
202 if (isTransient && m_transientAmnesty == 0) {
203 if (m_debugLevel > 1) {
204 std::cerr << "StretchCalculator::calculateSingle: transient"
207 m_divergence += increment - (increment * ratio);
209 // as in offline mode, 0.05 sec approx min between transients
211 lrint(ceil(double(m_sampleRate) / (20 * double(increment))));
213 m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
214 return -int(increment);
218 m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
221 if (m_transientAmnesty > 0) --m_transientAmnesty;
223 int incr = lrint(increment * ratio - m_recovery);
224 if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
225 std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
227 if (incr < lrint((increment * ratio) / 2)) {
228 incr = lrint((increment * ratio) / 2);
229 } else if (incr > lrint(increment * ratio * 2)) {
230 incr = lrint(increment * ratio * 2);
233 double divdiff = (increment * ratio) - incr;
235 if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
236 std::cerr << "divdiff = " << divdiff << std::endl;
239 double prevDivergence = m_divergence;
240 m_divergence -= divdiff;
241 if ((prevDivergence < 0 && m_divergence > 0) ||
242 (prevDivergence > 0 && m_divergence < 0)) {
243 m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
250 StretchCalculator::reset()
256 std::vector<StretchCalculator::Peak>
257 StretchCalculator::findPeaks(const std::vector<float> &rawDf)
259 std::vector<float> df = smoothDF(rawDf);
261 // We distinguish between "soft" and "hard" peaks. A soft peak is
262 // simply the result of peak-picking on the smoothed onset
263 // detection function, and it represents any (strong-ish) onset.
264 // We aim to ensure always that soft peaks are placed at the
265 // correct position in time. A hard peak is where there is a very
266 // rapid rise in detection function, and it presumably represents
267 // a more broadband, noisy transient. For these we perform a
268 // phase reset (if in the appropriate mode), and we locate the
269 // reset at the first point where we notice enough of a rapid
270 // rise, rather than necessarily at the peak itself, in order to
271 // preserve the shape of the transient.
273 std::set<size_t> hardPeakCandidates;
274 std::set<size_t> softPeakCandidates;
276 if (m_useHardPeaks) {
278 // 0.05 sec approx min between hard peaks
279 size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
280 (20 * double(m_increment))));
281 size_t prevHardPeak = 0;
283 if (m_debugLevel > 1) {
284 std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
287 for (size_t i = 1; i + 1 < df.size(); ++i) {
289 if (df[i] < 0.1) continue;
290 if (df[i] <= df[i-1] * 1.1) continue;
291 if (df[i] < 0.22) continue;
293 if (!hardPeakCandidates.empty() &&
294 i < prevHardPeak + hardPeakAmnesty) {
298 bool hard = (df[i] > 0.4);
300 if (hard && (m_debugLevel > 1)) {
301 std::cerr << "hard peak at " << i << ": " << df[i]
302 << " > absolute " << 0.4
307 hard = (df[i] > df[i-1] * 1.4);
309 if (hard && (m_debugLevel > 1)) {
310 std::cerr << "hard peak at " << i << ": " << df[i]
311 << " > prev " << df[i-1] << " * 1.4"
316 if (!hard && i > 1) {
317 hard = (df[i] > df[i-1] * 1.2 &&
318 df[i-1] > df[i-2] * 1.2);
320 if (hard && (m_debugLevel > 1)) {
321 std::cerr << "hard peak at " << i << ": " << df[i]
322 << " > prev " << df[i-1] << " * 1.2 and "
323 << df[i-1] << " > prev " << df[i-2] << " * 1.2"
328 if (!hard && i > 2) {
329 // have already established that df[i] > df[i-1] * 1.1
330 hard = (df[i] > 0.3 &&
331 df[i-1] > df[i-2] * 1.1 &&
332 df[i-2] > df[i-3] * 1.1);
334 if (hard && (m_debugLevel > 1)) {
335 std::cerr << "hard peak at " << i << ": " << df[i]
336 << " > prev " << df[i-1] << " * 1.1 and "
337 << df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
338 << df[i-2] << " > prev " << df[i-3] << " * 1.1"
345 // (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
348 size_t peakLocation = i;
350 if (i + 1 < rawDf.size() &&
351 rawDf[i + 1] > rawDf[i] * 1.4) {
355 if (m_debugLevel > 1) {
356 std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
360 hardPeakCandidates.insert(peakLocation);
361 prevHardPeak = peakLocation;
365 size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
366 double(m_increment))); // 1 sec ish
368 if (m_debugLevel > 1) {
369 std::cerr << "mediansize = " << medianmaxsize << std::endl;
371 if (medianmaxsize < 7) {
373 if (m_debugLevel > 1) {
374 std::cerr << "adjusted mediansize = " << medianmaxsize << std::endl;
378 int minspacing = lrint(ceil(double(m_sampleRate) /
379 (20 * double(m_increment)))); // 0.05 sec ish
381 std::deque<float> medianwin;
382 std::vector<float> sorted;
383 int softPeakAmnesty = 0;
385 for (size_t i = 0; i < medianmaxsize/2; ++i) {
386 medianwin.push_back(0);
388 for (size_t i = 0; i < medianmaxsize/2 && i < df.size(); ++i) {
389 medianwin.push_back(df[i]);
392 size_t lastSoftPeak = 0;
394 for (size_t i = 0; i < df.size(); ++i) {
396 size_t mediansize = medianmaxsize;
398 if (medianwin.size() < mediansize) {
399 mediansize = medianwin.size();
402 size_t middle = medianmaxsize / 2;
403 if (middle >= mediansize) middle = mediansize-1;
405 size_t nextDf = i + mediansize - middle;
407 if (mediansize < 2) {
408 if (mediansize > medianmaxsize) { // absurd, but never mind that
409 medianwin.pop_front();
411 if (nextDf < df.size()) {
412 medianwin.push_back(df[nextDf]);
414 medianwin.push_back(0);
419 if (m_debugLevel > 2) {
420 // std::cerr << "have " << mediansize << " in median buffer" << std::endl;
424 for (size_t j = 0; j < mediansize; ++j) {
425 sorted.push_back(medianwin[j]);
427 std::sort(sorted.begin(), sorted.end());
429 size_t n = 90; // percentile above which we pick peaks
430 size_t index = (sorted.size() * n) / 100;
431 if (index >= sorted.size()) index = sorted.size()-1;
432 if (index == sorted.size()-1 && index > 0) --index;
433 float thresh = sorted[index];
435 // if (m_debugLevel > 2) {
436 // std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
437 // if (medianwin[middle] == 0.f) {
438 // std::cerr << "contents: ";
439 // for (size_t j = 0; j < medianwin.size(); ++j) {
440 // std::cerr << medianwin[j] << " ";
442 // std::cerr << std::endl;
446 if (medianwin[middle] > thresh &&
447 medianwin[middle] > medianwin[middle-1] &&
448 medianwin[middle] > medianwin[middle+1] &&
449 softPeakAmnesty == 0) {
451 size_t maxindex = middle;
452 float maxval = medianwin[middle];
454 for (size_t j = middle+1; j < mediansize; ++j) {
455 if (medianwin[j] > maxval) {
456 maxval = medianwin[j];
458 } else if (medianwin[j] < medianwin[middle]) {
463 size_t peak = i + maxindex - middle;
465 // std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
467 if (softPeakCandidates.empty() || lastSoftPeak != peak) {
469 if (m_debugLevel > 1) {
470 std::cerr << "soft peak at " << peak << " ("
471 << peak * m_increment << "): "
472 << medianwin[middle] << " > "
475 << " > " << medianwin[middle-1] << " and "
477 << " > " << medianwin[middle+1]
481 if (peak >= df.size()) {
482 if (m_debugLevel > 2) {
483 std::cerr << "peak is beyond end" << std::endl;
486 softPeakCandidates.insert(peak);
491 softPeakAmnesty = minspacing + maxindex - middle;
492 if (m_debugLevel > 2) {
493 std::cerr << "amnesty = " << softPeakAmnesty << std::endl;
496 } else if (softPeakAmnesty > 0) --softPeakAmnesty;
498 if (mediansize >= medianmaxsize) {
499 medianwin.pop_front();
501 if (nextDf < df.size()) {
502 medianwin.push_back(df[nextDf]);
504 medianwin.push_back(0);
508 std::vector<Peak> peaks;
510 while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
512 bool haveHardPeak = !hardPeakCandidates.empty();
513 bool haveSoftPeak = !softPeakCandidates.empty();
515 size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
516 size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
520 peak.chunk = softPeak;
525 (!haveSoftPeak || hardPeak <= softPeak)) {
527 if (m_debugLevel > 2) {
528 std::cerr << "Hard peak: " << hardPeak << std::endl;
532 peak.chunk = hardPeak;
533 hardPeakCandidates.erase(hardPeakCandidates.begin());
536 if (m_debugLevel > 2) {
537 std::cerr << "Soft peak: " << softPeak << std::endl;
539 if (!peaks.empty() &&
540 peaks[peaks.size()-1].hard &&
541 peaks[peaks.size()-1].chunk + 3 >= softPeak) {
542 if (m_debugLevel > 2) {
543 std::cerr << "(ignoring, as we just had a hard peak)"
550 if (haveSoftPeak && peak.chunk == softPeak) {
551 softPeakCandidates.erase(softPeakCandidates.begin());
555 peaks.push_back(peak);
563 StretchCalculator::smoothDF(const std::vector<float> &df)
565 std::vector<float> smoothedDF;
567 for (size_t i = 0; i < df.size(); ++i) {
568 // three-value moving mean window for simple smoothing
569 float total = 0.f, count = 0;
570 if (i > 0) { total += df[i-1]; ++count; }
571 total += df[i]; ++count;
572 if (i+1 < df.size()) { total += df[i+1]; ++count; }
573 float mean = total / count;
574 smoothedDF.push_back(mean);
581 StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
582 size_t duration, float ratio, bool phaseReset)
584 std::vector<float> df(dfIn);
585 std::vector<int> increments;
587 // The peak for the stretch detection function may appear after
588 // the peak that we're using to calculate the start of the region.
589 // We don't want that. If we find a peak in the first half of
590 // the region, we should set all the values up to that point to
591 // the same value as the peak.
593 // (This might not be subtle enough, especially if the region is
594 // long -- we want a bound that corresponds to acoustic perception
595 // of the audible bounce.)
597 for (size_t i = 1; i < df.size()/2; ++i) {
598 if (df[i] < df[i-1]) {
599 if (m_debugLevel > 1) {
600 std::cerr << "stretch peak offset: " << i-1 << " (peak " << df[i-1] << ")" << std::endl;
602 for (size_t j = 0; j < i-1; ++j) {
611 for (size_t i = 0; i < df.size(); ++i) {
612 if (i == 0 || df[i] > maxDf) maxDf = df[i];
615 // We want to try to ensure the last 100ms or so (if possible) are
616 // tending back towards the maximum df, so that the stretchiness
617 // reduces at the end of the stretched region.
619 int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
620 if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
622 for (int i = 0; i < reducedRegion; ++i) {
623 size_t index = df.size() - reducedRegion + i;
624 df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
627 long toAllot = long(duration) - long(m_increment * df.size());
629 if (m_debugLevel > 1) {
630 std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl;
633 size_t totalIncrement = 0;
635 // We place limits on the amount of displacement per chunk. if
636 // ratio < 0, no increment should be larger than increment*ratio
637 // or smaller than increment*ratio/2; if ratio > 0, none should be
638 // smaller than increment*ratio or larger than increment*ratio*2.
639 // We need to enforce this in the assignment of displacements to
640 // allotments, not by trying to respond if something turns out
643 // Note that the ratio is only provided to this function for the
644 // purposes of establishing this bound to the displacement.
647 // maxDisplacement / totalDisplacement > increment * ratio*2 - increment
650 // maxDisplacement / totalDisplacement < increment * ratio/2
653 // then we need to adjust and accommodate
655 bool acceptableSquashRange = false;
657 double totalDisplacement = 0;
658 double maxDisplacement = 0; // min displacement will be 0 by definition
663 while (!acceptableSquashRange) {
665 acceptableSquashRange = true;
666 calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
669 if (m_debugLevel > 1) {
670 std::cerr << "totalDisplacement " << totalDisplacement << ", max " << maxDisplacement << " (maxDf " << maxDf << ", df count " << df.size() << ")" << std::endl;
673 if (totalDisplacement == 0) {
674 // Not usually a problem, in fact
675 // std::cerr << "WARNING: totalDisplacement == 0 (duration " << duration << ", " << df.size() << " values in df)" << std::endl;
676 if (!df.empty() && adj == 0) {
677 acceptableSquashRange = false;
683 int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement);
685 if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
686 std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl;
687 } else if (extremeIncrement < (m_increment * ratio) / 2) {
688 if (m_debugLevel > 0) {
689 std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl;
691 acceptableSquashRange = false;
694 if (extremeIncrement > m_increment * ratio * 2) {
695 if (m_debugLevel > 0) {
696 std::cerr << "WARNING: extreme increment " << extremeIncrement << " > " << m_increment * ratio * 2 << std::endl;
698 acceptableSquashRange = false;
699 } else if (extremeIncrement < lrint(floor(m_increment * ratio))) {
700 std::cerr << "ERROR: extreme increment " << extremeIncrement << " < " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl;
704 if (!acceptableSquashRange) {
705 // Need to make maxDisplacement smaller as a proportion of
706 // the total displacement, yet ensure that the
707 // displacements still sum to the total.
712 for (size_t i = 0; i < df.size(); ++i) {
714 double displacement = maxDf - df[i];
715 if (displacement < 0) displacement -= adj;
716 else displacement += adj;
718 if (i == 0 && phaseReset) {
719 if (df.size() == 1) {
720 increments.push_back(duration);
721 totalIncrement += duration;
723 increments.push_back(m_increment);
724 totalIncrement += m_increment;
726 totalDisplacement -= displacement;
730 double theoreticalAllotment = 0;
732 if (totalDisplacement != 0) {
733 theoreticalAllotment = (toAllot * displacement) / totalDisplacement;
735 int allotment = lrint(theoreticalAllotment);
736 if (i + 1 == df.size()) allotment = toAllot;
738 int increment = m_increment + allotment;
740 if (increment <= 0) {
741 // this is a serious problem, the allocation is quite
742 // wrong if it allows increment to diverge so far from the
744 std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
746 allotment = increment - m_increment;
749 increments.push_back(increment);
750 totalIncrement += increment;
752 toAllot -= allotment;
753 totalDisplacement -= displacement;
755 if (m_debugLevel > 2) {
756 std::cerr << "df " << df[i] << ", smoothed " << df[i] << ", disp " << displacement << ", allot " << theoreticalAllotment << ", incr " << increment << ", remain " << toAllot << std::endl;
760 if (m_debugLevel > 2) {
761 std::cerr << "total increment: " << totalIncrement << ", left over: " << toAllot << " to allot, displacement " << totalDisplacement << std::endl;
764 if (totalIncrement != duration) {
765 std::cerr << "*** WARNING: calculated output duration " << totalIncrement << " != expected " << duration << std::endl;
772 StretchCalculator::calculateDisplacements(const std::vector<float> &df,
774 double &totalDisplacement,
775 double &maxDisplacement,
778 totalDisplacement = maxDisplacement = 0;
782 for (size_t i = 0; i < df.size(); ++i) {
783 if (i == 0 || df[i] > maxDf) maxDf = df[i];
786 for (size_t i = 0; i < df.size(); ++i) {
787 double displacement = maxDf - df[i];
788 if (displacement < 0) displacement -= adj;
789 else displacement += adj;
790 totalDisplacement += displacement;
791 if (i == 0 || displacement > maxDisplacement) {
792 maxDisplacement = displacement;