1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
5 An audio time-stretching and pitch-shifting library.
6 Copyright 2007 Chris Cannam.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
15 #include "StretchCalculator.h"
27 StretchCalculator::StretchCalculator(size_t sampleRate,
28 size_t inputIncrement,
30 m_sampleRate(sampleRate),
31 m_increment(inputIncrement),
36 m_transientAmnesty(0),
37 m_useHardPeaks(useHardPeaks)
39 // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
42 StretchCalculator::~StretchCalculator()
47 StretchCalculator::calculate(double ratio, size_t inputDuration,
48 const std::vector<float> &phaseResetDf,
49 const std::vector<float> &stretchDf)
51 assert(phaseResetDf.size() == stretchDf.size());
53 m_lastPeaks = findPeaks(phaseResetDf);
54 std::vector<Peak> &peaks = m_lastPeaks;
55 size_t totalCount = phaseResetDf.size();
57 std::vector<int> increments;
59 size_t outputDuration = lrint(inputDuration * ratio);
61 if (m_debugLevel > 0) {
62 std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
65 outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
67 if (m_debugLevel > 0) {
68 std::cerr << " (rounded up to " << outputDuration << ")";
69 std::cerr << ", df size " << phaseResetDf.size() << std::endl;
72 std::vector<size_t> fixedAudioChunks;
73 for (size_t i = 0; i < peaks.size(); ++i) {
74 fixedAudioChunks.push_back
75 (lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
78 if (m_debugLevel > 1) {
79 std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
82 size_t totalInput = 0, totalOutput = 0;
84 // For each region between two consecutive time sync points, we
85 // want to take the number of output chunks to be allocated and
86 // the detection function values within the range, and produce a
87 // series of increments that sum to the number of output chunks,
88 // such that each increment is displaced from the input increment
89 // by an amount inversely proportional to the magnitude of the
90 // stretch detection function at that input step.
92 size_t regionTotalChunks = 0;
94 for (size_t i = 0; i <= peaks.size(); ++i) {
96 size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
97 bool phaseReset = false;
100 regionStartChunk = 0;
103 regionStartChunk = peaks[i-1].chunk;
104 regionStart = fixedAudioChunks[i-1];
105 phaseReset = peaks[i-1].hard;
108 if (i == peaks.size()) {
109 regionEndChunk = totalCount;
110 regionEnd = outputDuration;
112 regionEndChunk = peaks[i].chunk;
113 regionEnd = fixedAudioChunks[i];
116 size_t regionDuration = regionEnd - regionStart;
117 regionTotalChunks += regionDuration;
119 std::vector<float> dfRegion;
121 for (size_t j = regionStartChunk; j != regionEndChunk; ++j) {
122 dfRegion.push_back(stretchDf[j]);
125 if (m_debugLevel > 1) {
126 std::cerr << "distributeRegion from " << regionStartChunk << " to " << regionEndChunk << " (chunks " << regionStart << " to " << regionEnd << ")" << std::endl;
129 dfRegion = smoothDF(dfRegion);
131 std::vector<int> regionIncrements = distributeRegion
132 (dfRegion, regionDuration, ratio, phaseReset);
134 size_t totalForRegion = 0;
136 for (size_t j = 0; j < regionIncrements.size(); ++j) {
138 int incr = regionIncrements[j];
140 if (j == 0 && phaseReset) increments.push_back(-incr);
141 else increments.push_back(incr);
143 if (incr > 0) totalForRegion += incr;
144 else totalForRegion += -incr;
146 totalInput += m_increment;
149 if (totalForRegion != regionDuration) {
150 std::cerr << "*** WARNING: distributeRegion returned wrong duration " << totalForRegion << ", expected " << regionDuration << std::endl;
153 totalOutput += totalForRegion;
156 if (m_debugLevel > 0) {
157 std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << size_t(ceil(totalInput * ratio)) << std::endl;
158 std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
165 StretchCalculator::calculateSingle(double ratio,
166 size_t inputDurationSoFar,
169 bool isTransient = false;
171 // We want to ensure, as close as possible, that the phase reset
172 // points appear at _exactly_ the right audio frame numbers.
174 // In principle, the threshold depends on chunk size: larger chunk
175 // sizes need higher thresholds. Since chunk size depends on
176 // ratio, I suppose we could in theory calculate the threshold
177 // from the ratio directly. For the moment we're happy if it
178 // works well in common situations.
180 float transientThreshold = 0.35;
181 if (ratio > 1) transientThreshold = 0.25;
183 if (m_useHardPeaks && df > m_prevDf * 1.1 && df > transientThreshold) {
187 if (m_debugLevel > 2) {
188 std::cerr << "df = " << df << ", prevDf = " << m_prevDf
189 << ", thresh = " << transientThreshold << std::endl;
194 if (isTransient && m_transientAmnesty == 0) {
195 if (m_debugLevel > 1) {
196 std::cerr << "StretchCalculator::calculateSingle: transient found at "
197 << inputDurationSoFar << std::endl;
199 m_divergence += m_increment - (m_increment * ratio);
201 // as in offline mode, 0.05 sec approx min between transients
203 lrint(ceil(double(m_sampleRate) / (20 * double(m_increment))));
205 m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
209 if (m_prevRatio != ratio) {
210 m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
214 if (m_transientAmnesty > 0) --m_transientAmnesty;
216 int incr = lrint(m_increment * ratio - m_recovery);
217 if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
218 std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
220 if (incr < lrint((m_increment * ratio) / 2)) {
221 incr = lrint((m_increment * ratio) / 2);
222 } else if (incr > lrint(m_increment * ratio * 2)) {
223 incr = lrint(m_increment * ratio * 2);
226 double divdiff = (m_increment * ratio) - incr;
228 if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
229 std::cerr << "divdiff = " << divdiff << std::endl;
232 double prevDivergence = m_divergence;
233 m_divergence -= divdiff;
234 if ((prevDivergence < 0 && m_divergence > 0) ||
235 (prevDivergence > 0 && m_divergence < 0)) {
236 m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
243 StretchCalculator::reset()
249 std::vector<StretchCalculator::Peak>
250 StretchCalculator::findPeaks(const std::vector<float> &rawDf)
252 std::vector<float> df = smoothDF(rawDf);
254 // We distinguish between "soft" and "hard" peaks. A soft peak is
255 // simply the result of peak-picking on the smoothed onset
256 // detection function, and it represents any (strong-ish) onset.
257 // We aim to ensure always that soft peaks are placed at the
258 // correct position in time. A hard peak is where there is a very
259 // rapid rise in detection function, and it presumably represents
260 // a more broadband, noisy transient. For these we perform a
261 // phase reset (if in the appropriate mode), and we locate the
262 // reset at the first point where we notice enough of a rapid
263 // rise, rather than necessarily at the peak itself, in order to
264 // preserve the shape of the transient.
266 std::set<size_t> hardPeakCandidates;
267 std::set<size_t> softPeakCandidates;
269 if (m_useHardPeaks) {
271 // 0.05 sec approx min between hard peaks
272 size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
273 (20 * double(m_increment))));
274 size_t prevHardPeak = 0;
276 if (m_debugLevel > 1) {
277 std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
280 for (size_t i = 1; i + 1 < df.size(); ++i) {
282 if (df[i] < 0.1) continue;
283 if (df[i] <= df[i-1] * 1.1) continue;
284 if (df[i] < 0.22) continue;
286 if (!hardPeakCandidates.empty() &&
287 i < prevHardPeak + hardPeakAmnesty) {
291 bool hard = (df[i] > 0.4);
293 if (hard && (m_debugLevel > 1)) {
294 std::cerr << "hard peak at " << i << ": " << df[i]
295 << " > absolute " << 0.4
300 hard = (df[i] > df[i-1] * 1.4);
302 if (hard && (m_debugLevel > 1)) {
303 std::cerr << "hard peak at " << i << ": " << df[i]
304 << " > prev " << df[i-1] << " * 1.4"
309 if (!hard && i > 1) {
310 hard = (df[i] > df[i-1] * 1.2 &&
311 df[i-1] > df[i-2] * 1.2);
313 if (hard && (m_debugLevel > 1)) {
314 std::cerr << "hard peak at " << i << ": " << df[i]
315 << " > prev " << df[i-1] << " * 1.2 and "
316 << df[i-1] << " > prev " << df[i-2] << " * 1.2"
321 if (!hard && i > 2) {
322 // have already established that df[i] > df[i-1] * 1.1
323 hard = (df[i] > 0.3 &&
324 df[i-1] > df[i-2] * 1.1 &&
325 df[i-2] > df[i-3] * 1.1);
327 if (hard && (m_debugLevel > 1)) {
328 std::cerr << "hard peak at " << i << ": " << df[i]
329 << " > prev " << df[i-1] << " * 1.1 and "
330 << df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
331 << df[i-2] << " > prev " << df[i-3] << " * 1.1"
338 // (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
341 size_t peakLocation = i;
343 if (i + 1 < rawDf.size() &&
344 rawDf[i + 1] > rawDf[i] * 1.4) {
348 if (m_debugLevel > 1) {
349 std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
353 hardPeakCandidates.insert(peakLocation);
354 prevHardPeak = peakLocation;
358 size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
359 double(m_increment))); // 1 sec ish
361 if (m_debugLevel > 1) {
362 std::cerr << "mediansize = " << medianmaxsize << std::endl;
364 if (medianmaxsize < 7) {
366 if (m_debugLevel > 1) {
367 std::cerr << "adjusted mediansize = " << medianmaxsize << std::endl;
371 int minspacing = lrint(ceil(double(m_sampleRate) /
372 (20 * double(m_increment)))); // 0.05 sec ish
374 std::deque<float> medianwin;
375 std::vector<float> sorted;
376 int softPeakAmnesty = 0;
378 for (size_t i = 0; i < medianmaxsize/2; ++i) {
379 medianwin.push_back(0);
381 for (size_t i = 0; i < medianmaxsize/2 && i < df.size(); ++i) {
382 medianwin.push_back(df[i]);
385 size_t lastSoftPeak = 0;
387 for (size_t i = 0; i < df.size(); ++i) {
389 size_t mediansize = medianmaxsize;
391 if (medianwin.size() < mediansize) {
392 mediansize = medianwin.size();
395 size_t middle = medianmaxsize / 2;
396 if (middle >= mediansize) middle = mediansize-1;
398 size_t nextDf = i + mediansize - middle;
400 if (mediansize < 2) {
401 if (mediansize > medianmaxsize) { // absurd, but never mind that
402 medianwin.pop_front();
404 if (nextDf < df.size()) {
405 medianwin.push_back(df[nextDf]);
407 medianwin.push_back(0);
412 if (m_debugLevel > 2) {
413 // std::cerr << "have " << mediansize << " in median buffer" << std::endl;
417 for (size_t j = 0; j < mediansize; ++j) {
418 sorted.push_back(medianwin[j]);
420 std::sort(sorted.begin(), sorted.end());
422 size_t n = 90; // percentile above which we pick peaks
423 size_t index = (sorted.size() * n) / 100;
424 if (index >= sorted.size()) index = sorted.size()-1;
425 if (index == sorted.size()-1 && index > 0) --index;
426 float thresh = sorted[index];
428 // if (m_debugLevel > 2) {
429 // std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
430 // if (medianwin[middle] == 0.f) {
431 // std::cerr << "contents: ";
432 // for (size_t j = 0; j < medianwin.size(); ++j) {
433 // std::cerr << medianwin[j] << " ";
435 // std::cerr << std::endl;
439 if (medianwin[middle] > thresh &&
440 medianwin[middle] > medianwin[middle-1] &&
441 medianwin[middle] > medianwin[middle+1] &&
442 softPeakAmnesty == 0) {
444 size_t maxindex = middle;
445 float maxval = medianwin[middle];
447 for (size_t j = middle+1; j < mediansize; ++j) {
448 if (medianwin[j] > maxval) {
449 maxval = medianwin[j];
451 } else if (medianwin[j] < medianwin[middle]) {
456 size_t peak = i + maxindex - middle;
458 // std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
460 if (softPeakCandidates.empty() || lastSoftPeak != peak) {
462 if (m_debugLevel > 1) {
463 std::cerr << "soft peak at " << peak << " ("
464 << peak * m_increment << "): "
465 << medianwin[middle] << " > "
468 << " > " << medianwin[middle-1] << " and "
470 << " > " << medianwin[middle+1]
474 if (peak >= df.size()) {
475 if (m_debugLevel > 2) {
476 std::cerr << "peak is beyond end" << std::endl;
479 softPeakCandidates.insert(peak);
484 softPeakAmnesty = minspacing + maxindex - middle;
485 if (m_debugLevel > 2) {
486 std::cerr << "amnesty = " << softPeakAmnesty << std::endl;
489 } else if (softPeakAmnesty > 0) --softPeakAmnesty;
491 if (mediansize >= medianmaxsize) {
492 medianwin.pop_front();
494 if (nextDf < df.size()) {
495 medianwin.push_back(df[nextDf]);
497 medianwin.push_back(0);
501 std::vector<Peak> peaks;
503 while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
505 bool haveHardPeak = !hardPeakCandidates.empty();
506 bool haveSoftPeak = !softPeakCandidates.empty();
508 size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
509 size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
513 peak.chunk = softPeak;
518 (!haveSoftPeak || hardPeak <= softPeak)) {
520 if (m_debugLevel > 2) {
521 std::cerr << "Hard peak: " << hardPeak << std::endl;
525 peak.chunk = hardPeak;
526 hardPeakCandidates.erase(hardPeakCandidates.begin());
529 if (m_debugLevel > 2) {
530 std::cerr << "Soft peak: " << softPeak << std::endl;
532 if (!peaks.empty() &&
533 peaks[peaks.size()-1].hard &&
534 peaks[peaks.size()-1].chunk + 3 >= softPeak) {
535 if (m_debugLevel > 2) {
536 std::cerr << "(ignoring, as we just had a hard peak)"
543 if (haveSoftPeak && peak.chunk == softPeak) {
544 softPeakCandidates.erase(softPeakCandidates.begin());
548 peaks.push_back(peak);
556 StretchCalculator::smoothDF(const std::vector<float> &df)
558 std::vector<float> smoothedDF;
560 for (size_t i = 0; i < df.size(); ++i) {
561 // three-value moving mean window for simple smoothing
562 float total = 0.f, count = 0;
563 if (i > 0) { total += df[i-1]; ++count; }
564 total += df[i]; ++count;
565 if (i+1 < df.size()) { total += df[i+1]; ++count; }
566 float mean = total / count;
567 smoothedDF.push_back(mean);
574 StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
575 size_t duration, float ratio, bool phaseReset)
577 std::vector<float> df(dfIn);
578 std::vector<int> increments;
580 // The peak for the stretch detection function may appear after
581 // the peak that we're using to calculate the start of the region.
582 // We don't want that. If we find a peak in the first half of
583 // the region, we should set all the values up to that point to
584 // the same value as the peak.
586 // (This might not be subtle enough, especially if the region is
587 // long -- we want a bound that corresponds to acoustic perception
588 // of the audible bounce.)
590 for (size_t i = 1; i < df.size()/2; ++i) {
591 if (df[i] < df[i-1]) {
592 if (m_debugLevel > 1) {
593 std::cerr << "stretch peak offset: " << i-1 << " (peak " << df[i-1] << ")" << std::endl;
595 for (size_t j = 0; j < i-1; ++j) {
604 for (size_t i = 0; i < df.size(); ++i) {
605 if (i == 0 || df[i] > maxDf) maxDf = df[i];
608 // We want to try to ensure the last 100ms or so (if possible) are
609 // tending back towards the maximum df, so that the stretchiness
610 // reduces at the end of the stretched region.
612 int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
613 if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
615 for (int i = 0; i < reducedRegion; ++i) {
616 size_t index = df.size() - reducedRegion + i;
617 df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
620 long toAllot = long(duration) - long(m_increment * df.size());
622 if (m_debugLevel > 1) {
623 std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl;
626 size_t totalIncrement = 0;
628 // We place limits on the amount of displacement per chunk. if
629 // ratio < 0, no increment should be larger than increment*ratio
630 // or smaller than increment*ratio/2; if ratio > 0, none should be
631 // smaller than increment*ratio or larger than increment*ratio*2.
632 // We need to enforce this in the assignment of displacements to
633 // allotments, not by trying to respond if something turns out
636 // Note that the ratio is only provided to this function for the
637 // purposes of establishing this bound to the displacement.
640 // maxDisplacement / totalDisplacement > increment * ratio*2 - increment
643 // maxDisplacement / totalDisplacement < increment * ratio/2
646 // then we need to adjust and accommodate
648 bool acceptableSquashRange = false;
650 double totalDisplacement = 0;
651 double maxDisplacement = 0; // min displacement will be 0 by definition
656 while (!acceptableSquashRange) {
658 acceptableSquashRange = true;
659 calculateDisplacements(df, maxDf, totalDisplacement, maxDisplacement,
662 if (m_debugLevel > 1) {
663 std::cerr << "totalDisplacement " << totalDisplacement << ", max " << maxDisplacement << " (maxDf " << maxDf << ", df count " << df.size() << ")" << std::endl;
666 if (totalDisplacement == 0) {
667 // Not usually a problem, in fact
668 // std::cerr << "WARNING: totalDisplacement == 0 (duration " << duration << ", " << df.size() << " values in df)" << std::endl;
669 if (!df.empty() && adj == 0) {
670 acceptableSquashRange = false;
676 int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement);
678 if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
679 std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl;
680 } else if (extremeIncrement < (m_increment * ratio) / 2) {
681 if (m_debugLevel > 0) {
682 std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl;
684 acceptableSquashRange = false;
687 if (extremeIncrement > m_increment * ratio * 2) {
688 if (m_debugLevel > 0) {
689 std::cerr << "WARNING: extreme increment " << extremeIncrement << " > " << m_increment * ratio * 2 << std::endl;
691 acceptableSquashRange = false;
692 } else if (extremeIncrement < lrint(floor(m_increment * ratio))) {
693 std::cerr << "ERROR: extreme increment " << extremeIncrement << " < " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl;
697 if (!acceptableSquashRange) {
698 // Need to make maxDisplacement smaller as a proportion of
699 // the total displacement, yet ensure that the
700 // displacements still sum to the total.
705 for (size_t i = 0; i < df.size(); ++i) {
707 double displacement = maxDf - df[i];
708 if (displacement < 0) displacement -= adj;
709 else displacement += adj;
711 if (i == 0 && phaseReset) {
712 if (df.size() == 1) {
713 increments.push_back(duration);
714 totalIncrement += duration;
716 increments.push_back(m_increment);
717 totalIncrement += m_increment;
719 totalDisplacement -= displacement;
723 double theoreticalAllotment = 0;
725 if (totalDisplacement != 0) {
726 theoreticalAllotment = (toAllot * displacement) / totalDisplacement;
728 int allotment = lrint(theoreticalAllotment);
729 if (i + 1 == df.size()) allotment = toAllot;
731 int increment = m_increment + allotment;
733 if (increment <= 0) {
734 // this is a serious problem, the allocation is quite
735 // wrong if it allows increment to diverge so far from the
737 std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
739 allotment = increment - m_increment;
742 increments.push_back(increment);
743 totalIncrement += increment;
745 toAllot -= allotment;
746 totalDisplacement -= displacement;
748 if (m_debugLevel > 2) {
749 std::cerr << "df " << df[i] << ", smoothed " << df[i] << ", disp " << displacement << ", allot " << theoreticalAllotment << ", incr " << increment << ", remain " << toAllot << std::endl;
753 if (m_debugLevel > 2) {
754 std::cerr << "total increment: " << totalIncrement << ", left over: " << toAllot << " to allot, displacement " << totalDisplacement << std::endl;
757 if (totalIncrement != duration) {
758 std::cerr << "*** WARNING: calculated output duration " << totalIncrement << " != expected " << duration << std::endl;
765 StretchCalculator::calculateDisplacements(const std::vector<float> &df,
767 double &totalDisplacement,
768 double &maxDisplacement,
771 totalDisplacement = maxDisplacement = 0;
775 for (size_t i = 0; i < df.size(); ++i) {
776 if (i == 0 || df[i] > maxDf) maxDf = df[i];
779 for (size_t i = 0; i < df.size(); ++i) {
780 double displacement = maxDf - df[i];
781 if (displacement < 0) displacement -= adj;
782 else displacement += adj;
783 totalDisplacement += displacement;
784 if (i == 0 || displacement > maxDisplacement) {
785 maxDisplacement = displacement;