src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <sndfile.h>
  31 extern "C" {
  32 #include <libavcodec/avcodec.h>
  33 #include <libavformat/avformat.h>
  34 }
  35 #include "filter.h"
  36 #include "exceptions.h"
  37 #include "image.h"
  38 #include "util.h"
  39 #include "log.h"
  40 #include "ffmpeg_decoder.h"
  41 #include "ffmpeg_audio_stream.h"
  42 #include "ffmpeg_subtitle_stream.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46 #include "image_proxy.h"
  47
  48 #include "i18n.h"
  49
  50 using std::cout;
  51 using std::string;
  52 using std::vector;
  53 using std::stringstream;
  54 using std::list;
  55 using std::min;
  56 using std::pair;
  57 using boost::shared_ptr;
  58 using boost::optional;
  59 using boost::dynamic_pointer_cast;
  60 using dcp::Size;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
  63         : VideoDecoder (c)
  64         , AudioDecoder (c)
  65         , FFmpeg (c)
  66         , _log (log)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69 {
  70         setup_subtitle ();
  71
  72         /* Audio and video frame PTS values may not start with 0.  We want
  73            to fiddle them so that:
  74
  75            1.  One of them starts at time 0.
  76            2.  The first video PTS value ends up on a frame boundary.
  77
  78            Then we remove big initial gaps in PTS and we allow our
  79            insertion of black frames to work.
  80
  81            We will do pts_to_use = pts_from_ffmpeg + pts_offset;
  82         */
  83
  84         bool const have_video = c->first_video();
  85         bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
  86
  87         /* First, make one of them start at 0 */
  88
  89         if (have_audio && have_video) {
  90                 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  91         } else if (have_video) {
  92                 _pts_offset = - c->first_video().get();
  93         } else if (have_audio) {
  94                 _pts_offset = - c->audio_stream()->first_audio.get();
  95         }
  96
  97         /* Now adjust both so that the video pts starts on a frame */
  98         if (have_video && have_audio) {
  99                 ContentTime first_video = c->first_video().get() + _pts_offset;
 100                 ContentTime const old_first_video = first_video;
 101                 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
 102         }
 103 }
 104
 105 FFmpegDecoder::~FFmpegDecoder ()
 106 {
 107         boost::mutex::scoped_lock lm (_mutex);
 108
 109         if (_subtitle_codec_context) {
 110                 avcodec_close (_subtitle_codec_context);
 111         }
 112 }
 113
 114 void
 115 FFmpegDecoder::flush ()
 116 {
 117         /* Get any remaining frames */
 118
 119         _packet.data = 0;
 120         _packet.size = 0;
 121
 122         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 123
 124         while (decode_video_packet ()) {}
 125
 126         if (_ffmpeg_content->audio_stream()) {
 127                 decode_audio_packet ();
 128                 AudioDecoder::flush ();
 129         }
 130 }
 131
 132 bool
 133 FFmpegDecoder::pass ()
 134 {
 135         int r = av_read_frame (_format_context, &_packet);
 136
 137         if (r < 0) {
 138                 if (r != AVERROR_EOF) {
 139                         /* Maybe we should fail here, but for now we'll just finish off instead */
 140                         char buf[256];
 141                         av_strerror (r, buf, sizeof(buf));
 142                         _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 143                 }
 144
 145                 flush ();
 146                 return true;
 147         }
 148
 149         int const si = _packet.stream_index;
 150
 151         if (si == _video_stream) {
 152                 decode_video_packet ();
 153         } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
 154                 decode_audio_packet ();
 155         } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
 156                 decode_subtitle_packet ();
 157         }
 158
 159         av_free_packet (&_packet);
 160         return false;
 161 }
 162
 163 /** @param data pointer to array of pointers to buffers.
 164  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 165  */
 166 shared_ptr<AudioBuffers>
 167 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 168 {
 169         assert (_ffmpeg_content->audio_channels());
 170         assert (bytes_per_audio_sample());
 171
 172         /* Deinterleave and convert to float */
 173
 174         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 175
 176         int const total_samples = size / bytes_per_audio_sample();
 177         int const frames = total_samples / _ffmpeg_content->audio_channels();
 178         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 179
 180         switch (audio_sample_format()) {
 181         case AV_SAMPLE_FMT_U8:
 182         {
 183                 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
 184                 int sample = 0;
 185                 int channel = 0;
 186                 for (int i = 0; i < total_samples; ++i) {
 187                         audio->data(channel)[sample] = float(*p++) / (1 << 23);
 188
 189                         ++channel;
 190                         if (channel == _ffmpeg_content->audio_channels()) {
 191                                 channel = 0;
 192                                 ++sample;
 193                         }
 194                 }
 195         }
 196         break;
 197
 198         case AV_SAMPLE_FMT_S16:
 199         {
 200                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 201                 int sample = 0;
 202                 int channel = 0;
 203                 for (int i = 0; i < total_samples; ++i) {
 204                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 205
 206                         ++channel;
 207                         if (channel == _ffmpeg_content->audio_channels()) {
 208                                 channel = 0;
 209                                 ++sample;
 210                         }
 211                 }
 212         }
 213         break;
 214
 215         case AV_SAMPLE_FMT_S16P:
 216         {
 217                 int16_t** p = reinterpret_cast<int16_t **> (data);
 218                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 219                         for (int j = 0; j < frames; ++j) {
 220                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 221                         }
 222                 }
 223         }
 224         break;
 225
 226         case AV_SAMPLE_FMT_S32:
 227         {
 228                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 229                 int sample = 0;
 230                 int channel = 0;
 231                 for (int i = 0; i < total_samples; ++i) {
 232                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 233
 234                         ++channel;
 235                         if (channel == _ffmpeg_content->audio_channels()) {
 236                                 channel = 0;
 237                                 ++sample;
 238                         }
 239                 }
 240         }
 241         break;
 242
 243         case AV_SAMPLE_FMT_FLT:
 244         {
 245                 float* p = reinterpret_cast<float*> (data[0]);
 246                 int sample = 0;
 247                 int channel = 0;
 248                 for (int i = 0; i < total_samples; ++i) {
 249                         audio->data(channel)[sample] = *p++;
 250
 251                         ++channel;
 252                         if (channel == _ffmpeg_content->audio_channels()) {
 253                                 channel = 0;
 254                                 ++sample;
 255                         }
 256                 }
 257         }
 258         break;
 259
 260         case AV_SAMPLE_FMT_FLTP:
 261         {
 262                 float** p = reinterpret_cast<float**> (data);
 263                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 264                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 265                 }
 266         }
 267         break;
 268
 269         default:
 270                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 271         }
 272
 273         return audio;
 274 }
 275
 276 AVSampleFormat
 277 FFmpegDecoder::audio_sample_format () const
 278 {
 279         if (!_ffmpeg_content->audio_stream()) {
 280                 return (AVSampleFormat) 0;
 281         }
 282
 283         return audio_codec_context()->sample_fmt;
 284 }
 285
 286 int
 287 FFmpegDecoder::bytes_per_audio_sample () const
 288 {
 289         return av_get_bytes_per_sample (audio_sample_format ());
 290 }
 291
 292 int
 293 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
 294 {
 295         int frames_read = 0;
 296         optional<ContentTime> last_video;
 297         optional<ContentTime> last_audio;
 298
 299         while (!finished (last_video, last_audio, frames_read)) {
 300                 int r = av_read_frame (_format_context, &_packet);
 301                 if (r < 0) {
 302                         /* We should flush our decoders here, possibly yielding a few more frames,
 303                            but the consequence of having to do that is too hideous to contemplate.
 304                            Instead we give up and say that you can't seek too close to the end
 305                            of a file.
 306                         */
 307                         return frames_read;
 308                 }
 309
 310                 ++frames_read;
 311
 312                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 313
 314                 if (_packet.stream_index == _video_stream) {
 315
 316                         avcodec_get_frame_defaults (_frame);
 317
 318                         int got_picture = 0;
 319                         r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
 320                         if (r >= 0 && got_picture) {
 321                                 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 322                         }
 323
 324                 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
 325                         AVPacket copy_packet = _packet;
 326                         while (copy_packet.size > 0) {
 327
 328                                 int got_frame;
 329                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
 330                                 if (r >= 0 && got_frame) {
 331                                         last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
 332                                 }
 333
 334                                 copy_packet.data += r;
 335                                 copy_packet.size -= r;
 336                         }
 337                 }
 338
 339                 av_free_packet (&_packet);
 340         }
 341
 342         return frames_read;
 343 }
 344
 345 bool
 346 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
 347 {
 348         return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
 349 }
 350
 351 bool
 352 FFmpegDecoder::seek_final_finished (int n, int done) const
 353 {
 354         return n == done;
 355 }
 356
 357 void
 358 FFmpegDecoder::seek_and_flush (ContentTime t)
 359 {
 360         ContentTime const u = t - _pts_offset;
 361         int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
 362
 363         if (_ffmpeg_content->audio_stream ()) {
 364                 s = min (
 365                         s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
 366                         );
 367         }
 368
 369         /* Ridiculous empirical hack */
 370         s--;
 371         if (s < 0) {
 372                 s = 0;
 373         }
 374
 375         av_seek_frame (_format_context, _video_stream, s, 0);
 376
 377         avcodec_flush_buffers (video_codec_context());
 378         if (audio_codec_context ()) {
 379                 avcodec_flush_buffers (audio_codec_context ());
 380         }
 381         if (_subtitle_codec_context) {
 382                 avcodec_flush_buffers (_subtitle_codec_context);
 383         }
 384 }
 385
 386 void
 387 FFmpegDecoder::seek (ContentTime time, bool accurate)
 388 {
 389         VideoDecoder::seek (time, accurate);
 390         AudioDecoder::seek (time, accurate);
 391
 392         /* If we are doing an accurate seek, our initial shot will be 2s (2 being
 393            a number plucked from the air) earlier than we want to end up.  The loop below
 394            will hopefully then step through to where we want to be.
 395         */
 396
 397         ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
 398         ContentTime initial_seek = time - pre_roll;
 399         if (initial_seek < ContentTime (0)) {
 400                 initial_seek = ContentTime (0);
 401         }
 402
 403         /* Initial seek time in the video stream's timebase */
 404
 405         seek_and_flush (initial_seek);
 406
 407         if (!accurate) {
 408                 /* That'll do */
 409                 return;
 410         }
 411
 412         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 413
 414         seek_and_flush (initial_seek);
 415         if (N > 0) {
 416                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 417         }
 418 }
 419
 420 void
 421 FFmpegDecoder::decode_audio_packet ()
 422 {
 423         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 424            several times.
 425         */
 426
 427         AVPacket copy_packet = _packet;
 428
 429         while (copy_packet.size > 0) {
 430
 431                 int frame_finished;
 432                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 433
 434                 if (decode_result < 0) {
 435                         _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 436                         return;
 437                 }
 438
 439                 if (frame_finished) {
 440                         ContentTime const ct = ContentTime::from_seconds (
 441                                 av_frame_get_best_effort_timestamp (_frame) *
 442                                 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
 443                                 + _pts_offset;
 444
 445                         int const data_size = av_samples_get_buffer_size (
 446                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 447                                 );
 448
 449                         audio (deinterleave_audio (_frame->data, data_size), ct);
 450                 }
 451
 452                 copy_packet.data += decode_result;
 453                 copy_packet.size -= decode_result;
 454         }
 455 }
 456
 457 bool
 458 FFmpegDecoder::decode_video_packet ()
 459 {
 460         int frame_finished;
 461         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 462                 return false;
 463         }
 464
 465         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 466
 467         shared_ptr<FilterGraph> graph;
 468
 469         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 470         while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 471                 ++i;
 472         }
 473
 474         if (i == _filter_graphs.end ()) {
 475                 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 476                 _filter_graphs.push_back (graph);
 477                 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 478         } else {
 479                 graph = *i;
 480         }
 481
 482         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 483
 484         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 485
 486                 shared_ptr<Image> image = i->first;
 487
 488                 if (i->second != AV_NOPTS_VALUE) {
 489                         double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
 490                         video (shared_ptr<ImageProxy> (new RawImageProxy (image)), rint (pts * _ffmpeg_content->video_frame_rate ()));
 491                 } else {
 492                         _log->log ("Dropping frame without PTS");
 493                 }
 494         }
 495
 496         return true;
 497 }
 498
 499
 500 void
 501 FFmpegDecoder::setup_subtitle ()
 502 {
 503         boost::mutex::scoped_lock lm (_mutex);
 504
 505         if (!_ffmpeg_content->subtitle_stream()) {
 506                 return;
 507         }
 508
 509         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 510         if (_subtitle_codec_context == 0) {
 511                 throw DecodeError (N_("could not find subtitle stream"));
 512         }
 513
 514         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 515
 516         if (_subtitle_codec == 0) {
 517                 throw DecodeError (N_("could not find subtitle decoder"));
 518         }
 519
 520         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 521                 throw DecodeError (N_("could not open subtitle decoder"));
 522         }
 523 }
 524
 525 void
 526 FFmpegDecoder::decode_subtitle_packet ()
 527 {
 528         int got_subtitle;
 529         AVSubtitle sub;
 530         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 531                 return;
 532         }
 533
 534         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 535            indicate that the previous subtitle should stop.
 536         */
 537         if (sub.num_rects <= 0) {
 538                 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
 539                 return;
 540         } else if (sub.num_rects > 1) {
 541                 throw DecodeError (_("multi-part subtitles not yet supported"));
 542         }
 543
 544         /* Subtitle PTS (within the source, not taking into account any of the
 545            source that we may have chopped off for the DCP)
 546         */
 547         ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
 548
 549         /* hence start time for this sub */
 550         ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
 551         ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
 552
 553         AVSubtitleRect const * rect = sub.rects[0];
 554
 555         if (rect->type != SUBTITLE_BITMAP) {
 556                 /* XXX */
 557                 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
 558                 return;
 559         }
 560
 561         /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
 562            G, third B, fourth A.
 563         */
 564         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
 565
 566         /* Start of the first line in the subtitle */
 567         uint8_t* sub_p = rect->pict.data[0];
 568         /* sub_p looks up into a BGRA palette which is here
 569            (i.e. first byte B, second G, third R, fourth A)
 570         */
 571         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 572         /* Start of the output data */
 573         uint32_t* out_p = (uint32_t *) image->data()[0];
 574
 575         for (int y = 0; y < rect->h; ++y) {
 576                 uint8_t* sub_line_p = sub_p;
 577                 uint32_t* out_line_p = out_p;
 578                 for (int x = 0; x < rect->w; ++x) {
 579                         uint32_t const p = palette[*sub_line_p++];
 580                         *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
 581                 }
 582                 sub_p += rect->pict.linesize[0];
 583                 out_p += image->stride()[0] / sizeof (uint32_t);
 584         }
 585
 586         dcp::Size const vs = _ffmpeg_content->video_size ();
 587
 588         image_subtitle (
 589                 from,
 590                 to,
 591                 image,
 592                 dcpomatic::Rect<double> (
 593                         static_cast<double> (rect->x) / vs.width,
 594                         static_cast<double> (rect->y) / vs.height,
 595                         static_cast<double> (rect->w) / vs.width,
 596                         static_cast<double> (rect->h) / vs.height
 597                         )
 598                 );
 599
 600         avsubtitle_free (&sub);
 601 }