src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "exceptions.h"
  45 #include "image.h"
  46 #include "util.h"
  47 #include "log.h"
  48 #include "ffmpeg_decoder.h"
  49 #include "filter_graph.h"
  50 #include "subtitle.h"
  51
  52 #include "i18n.h"
  53
  54 using std::cout;
  55 using std::string;
  56 using std::vector;
  57 using std::stringstream;
  58 using std::list;
  59 using boost::shared_ptr;
  60 using boost::optional;
  61 using boost::dynamic_pointer_cast;
  62 using libdcp::Size;
  63
  64 boost::mutex FFmpegDecoder::_mutex;
  65
  66 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio, bool subtitles, bool video_sync)
  67         : Decoder (f)
  68         , VideoDecoder (f)
  69         , AudioDecoder (f)
  70         , _ffmpeg_content (c)
  71         , _format_context (0)
  72         , _video_stream (-1)
  73         , _frame (0)
  74         , _video_codec_context (0)
  75         , _video_codec (0)
  76         , _audio_codec_context (0)
  77         , _audio_codec (0)
  78         , _subtitle_codec_context (0)
  79         , _subtitle_codec (0)
  80         , _decode_video (video)
  81         , _decode_audio (audio)
  82         , _decode_subtitles (subtitles)
  83         , _video_sync (video_sync)
  84 {
  85         setup_general ();
  86         setup_video ();
  87         setup_audio ();
  88         setup_subtitle ();
  89
  90         if (!video_sync) {
  91                 _first_video = 0;
  92         }
  93 }
  94
  95 FFmpegDecoder::~FFmpegDecoder ()
  96 {
  97         boost::mutex::scoped_lock lm (_mutex);
  98
  99         if (_audio_codec_context) {
 100                 avcodec_close (_audio_codec_context);
 101         }
 102
 103         if (_video_codec_context) {
 104                 avcodec_close (_video_codec_context);
 105         }
 106
 107         if (_subtitle_codec_context) {
 108                 avcodec_close (_subtitle_codec_context);
 109         }
 110
 111         av_free (_frame);
 112
 113         avformat_close_input (&_format_context);
 114 }
 115
 116 void
 117 FFmpegDecoder::setup_general ()
 118 {
 119         av_register_all ();
 120
 121         if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
 122                 throw OpenFileError (_ffmpeg_content->file().string ());
 123         }
 124
 125         if (avformat_find_stream_info (_format_context, 0) < 0) {
 126                 throw DecodeError (_("could not find stream information"));
 127         }
 128
 129         /* Find video, audio and subtitle streams */
 130
 131         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 132                 AVStream* s = _format_context->streams[i];
 133                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 134                         _video_stream = i;
 135                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 136
 137                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 138                            so bodge it here.  No idea why we should have to do this.
 139                         */
 140
 141                         if (s->codec->channel_layout == 0) {
 142                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 143                         }
 144
 145                         _audio_streams.push_back (
 146                                 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channels)
 147                                 );
 148
 149                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 150                         _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
 151                 }
 152         }
 153
 154         if (_video_stream < 0) {
 155                 throw DecodeError (N_("could not find video stream"));
 156         }
 157
 158         _frame = avcodec_alloc_frame ();
 159         if (_frame == 0) {
 160                 throw DecodeError (N_("could not allocate frame"));
 161         }
 162 }
 163
 164 void
 165 FFmpegDecoder::setup_video ()
 166 {
 167         boost::mutex::scoped_lock lm (_mutex);
 168
 169         _video_codec_context = _format_context->streams[_video_stream]->codec;
 170         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 171
 172         if (_video_codec == 0) {
 173                 throw DecodeError (_("could not find video decoder"));
 174         }
 175
 176         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 177                 throw DecodeError (N_("could not open video decoder"));
 178         }
 179 }
 180
 181 void
 182 FFmpegDecoder::setup_audio ()
 183 {
 184         boost::mutex::scoped_lock lm (_mutex);
 185
 186         if (!_ffmpeg_content->audio_stream ()) {
 187                 return;
 188         }
 189
 190         _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
 191         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 192
 193         if (_audio_codec == 0) {
 194                 throw DecodeError (_("could not find audio decoder"));
 195         }
 196
 197         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 198                 throw DecodeError (N_("could not open audio decoder"));
 199         }
 200 }
 201
 202 void
 203 FFmpegDecoder::setup_subtitle ()
 204 {
 205         boost::mutex::scoped_lock lm (_mutex);
 206
 207         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
 208                 return;
 209         }
 210
 211         _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
 212         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 213
 214         if (_subtitle_codec == 0) {
 215                 throw DecodeError (_("could not find subtitle decoder"));
 216         }
 217
 218         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 219                 throw DecodeError (N_("could not open subtitle decoder"));
 220         }
 221 }
 222
 223
 224 bool
 225 FFmpegDecoder::pass ()
 226 {
 227         int r = av_read_frame (_format_context, &_packet);
 228
 229         if (r < 0) {
 230                 if (r != AVERROR_EOF) {
 231                         /* Maybe we should fail here, but for now we'll just finish off instead */
 232                         char buf[256];
 233                         av_strerror (r, buf, sizeof(buf));
 234                         _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 235                 }
 236
 237                 /* Get any remaining frames */
 238
 239                 _packet.data = 0;
 240                 _packet.size = 0;
 241
 242                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 243
 244                 int frame_finished;
 245
 246                 if (_decode_video) {
 247                         while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 248                                 filter_and_emit_video (_frame);
 249                         }
 250                 }
 251
 252                 if (_ffmpeg_content->audio_stream() && _decode_audio) {
 253                         decode_audio_packet ();
 254                 }
 255
 256                 return true;
 257         }
 258
 259         avcodec_get_frame_defaults (_frame);
 260
 261         if (_packet.stream_index == _video_stream && _decode_video) {
 262
 263                 int frame_finished;
 264                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 265                 if (r >= 0 && frame_finished) {
 266
 267                         if (r != _packet.size) {
 268                                 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
 269                         }
 270
 271                         if (_video_sync) {
 272                                 out_with_sync ();
 273                         } else {
 274                                 filter_and_emit_video (_frame);
 275                         }
 276                 }
 277
 278         } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _decode_audio) {
 279                 decode_audio_packet ();
 280         } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _decode_subtitles && _first_video) {
 281
 282                 int got_subtitle;
 283                 AVSubtitle sub;
 284                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 285                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 286                            indicate that the previous subtitle should stop.
 287                         */
 288                         if (sub.num_rects > 0) {
 289                                 shared_ptr<TimedSubtitle> ts;
 290                                 try {
 291                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 292                                 } catch (...) {
 293                                         /* some problem with the subtitle; we probably didn't understand it */
 294                                 }
 295                         } else {
 296                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 297                         }
 298                         avsubtitle_free (&sub);
 299                 }
 300         }
 301
 302         av_free_packet (&_packet);
 303         return false;
 304 }
 305
 306 /** @param data pointer to array of pointers to buffers.
 307  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 308  */
 309 shared_ptr<AudioBuffers>
 310 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 311 {
 312         assert (_ffmpeg_content->audio_channels());
 313         assert (bytes_per_audio_sample());
 314
 315         /* Deinterleave and convert to float */
 316
 317         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 318
 319         int const total_samples = size / bytes_per_audio_sample();
 320         int const frames = total_samples / _ffmpeg_content->audio_channels();
 321         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 322
 323         switch (audio_sample_format()) {
 324         case AV_SAMPLE_FMT_S16:
 325         {
 326                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 327                 int sample = 0;
 328                 int channel = 0;
 329                 for (int i = 0; i < total_samples; ++i) {
 330                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 331
 332                         ++channel;
 333                         if (channel == _ffmpeg_content->audio_channels()) {
 334                                 channel = 0;
 335                                 ++sample;
 336                         }
 337                 }
 338         }
 339         break;
 340
 341         case AV_SAMPLE_FMT_S16P:
 342         {
 343                 int16_t** p = reinterpret_cast<int16_t **> (data);
 344                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 345                         for (int j = 0; j < frames; ++j) {
 346                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 347                         }
 348                 }
 349         }
 350         break;
 351
 352         case AV_SAMPLE_FMT_S32:
 353         {
 354                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 355                 int sample = 0;
 356                 int channel = 0;
 357                 for (int i = 0; i < total_samples; ++i) {
 358                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 359
 360                         ++channel;
 361                         if (channel == _ffmpeg_content->audio_channels()) {
 362                                 channel = 0;
 363                                 ++sample;
 364                         }
 365                 }
 366         }
 367         break;
 368
 369         case AV_SAMPLE_FMT_FLT:
 370         {
 371                 float* p = reinterpret_cast<float*> (data[0]);
 372                 int sample = 0;
 373                 int channel = 0;
 374                 for (int i = 0; i < total_samples; ++i) {
 375                         audio->data(channel)[sample] = *p++;
 376
 377                         ++channel;
 378                         if (channel == _ffmpeg_content->audio_channels()) {
 379                                 channel = 0;
 380                                 ++sample;
 381                         }
 382                 }
 383         }
 384         break;
 385
 386         case AV_SAMPLE_FMT_FLTP:
 387         {
 388                 float** p = reinterpret_cast<float**> (data);
 389                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 390                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 391                 }
 392         }
 393         break;
 394
 395         default:
 396                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 397         }
 398
 399         return audio;
 400 }
 401
 402 float
 403 FFmpegDecoder::video_frame_rate () const
 404 {
 405         AVStream* s = _format_context->streams[_video_stream];
 406
 407         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 408                 return av_q2d (s->avg_frame_rate);
 409         }
 410
 411         return av_q2d (s->r_frame_rate);
 412 }
 413
 414 AVSampleFormat
 415 FFmpegDecoder::audio_sample_format () const
 416 {
 417         if (_audio_codec_context == 0) {
 418                 return (AVSampleFormat) 0;
 419         }
 420
 421         return _audio_codec_context->sample_fmt;
 422 }
 423
 424 libdcp::Size
 425 FFmpegDecoder::native_size () const
 426 {
 427         return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
 428 }
 429
 430 PixelFormat
 431 FFmpegDecoder::pixel_format () const
 432 {
 433         return _video_codec_context->pix_fmt;
 434 }
 435
 436 int
 437 FFmpegDecoder::time_base_numerator () const
 438 {
 439         return _video_codec_context->time_base.num;
 440 }
 441
 442 int
 443 FFmpegDecoder::time_base_denominator () const
 444 {
 445         return _video_codec_context->time_base.den;
 446 }
 447
 448 int
 449 FFmpegDecoder::sample_aspect_ratio_numerator () const
 450 {
 451         return _video_codec_context->sample_aspect_ratio.num;
 452 }
 453
 454 int
 455 FFmpegDecoder::sample_aspect_ratio_denominator () const
 456 {
 457         return _video_codec_context->sample_aspect_ratio.den;
 458 }
 459
 460 string
 461 FFmpegDecoder::stream_name (AVStream* s) const
 462 {
 463         stringstream n;
 464
 465         AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
 466         if (lang) {
 467                 n << lang->value;
 468         }
 469
 470         AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
 471         if (title) {
 472                 if (!n.str().empty()) {
 473                         n << N_(" ");
 474                 }
 475                 n << title->value;
 476         }
 477
 478         if (n.str().empty()) {
 479                 n << N_("unknown");
 480         }
 481
 482         return n.str ();
 483 }
 484
 485 int
 486 FFmpegDecoder::bytes_per_audio_sample () const
 487 {
 488         return av_get_bytes_per_sample (audio_sample_format ());
 489 }
 490
 491 void
 492 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 493 {
 494         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 495
 496         shared_ptr<FilterGraph> graph;
 497
 498         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 499         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 500                 ++i;
 501         }
 502
 503         if (i == _filter_graphs.end ()) {
 504                 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 505                 _filter_graphs.push_back (graph);
 506                 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
 507         } else {
 508                 graph = *i;
 509         }
 510
 511         list<shared_ptr<Image> > images = graph->process (frame);
 512
 513         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 514                 emit_video (*i, frame_time ());
 515         }
 516 }
 517
 518 bool
 519 FFmpegDecoder::seek (double p)
 520 {
 521         /* This use of AVSEEK_FLAG_BACKWARD is a bit of a hack; without it, if we ask for a seek to the same place as last time
 522            (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
 523            staying in the same place.
 524         */
 525         bool const backwards = (p == last_content_time());
 526
 527         int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
 528
 529         int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
 530
 531         avcodec_flush_buffers (_video_codec_context);
 532         if (_subtitle_codec_context) {
 533                 avcodec_flush_buffers (_subtitle_codec_context);
 534         }
 535
 536         return r < 0;
 537 }
 538
 539 void
 540 FFmpegDecoder::out_with_sync ()
 541 {
 542         /* Where we are in the output, in seconds */
 543         double const out_pts_seconds = video_frame() / video_frame_rate();
 544
 545         /* Where we are in the source, in seconds */
 546         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 547                 * av_frame_get_best_effort_timestamp(_frame);
 548
 549         _film->log()->log (
 550                 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
 551                 Log::VERBOSE
 552                 );
 553
 554         if (!_first_video) {
 555                 _first_video = source_pts_seconds;
 556         }
 557
 558         /* Difference between where we are and where we should be */
 559         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 560         double const one_frame = 1 / video_frame_rate();
 561
 562         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 563         if (delta > one_frame) {
 564                 int const extra = rint (delta / one_frame);
 565                 for (int i = 0; i < extra; ++i) {
 566                         repeat_last_video (frame_time ());
 567                         _film->log()->log (
 568                                 String::compose (
 569                                         N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
 570                                         out_pts_seconds, video_frame(), source_pts_seconds, video_frame_rate()
 571                                         )
 572                                 );
 573                 }
 574         }
 575
 576         if (delta > -one_frame) {
 577                 /* Process this frame */
 578                 filter_and_emit_video (_frame);
 579         } else {
 580                 /* Otherwise we are omitting a frame to keep things right */
 581                 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
 582         }
 583 }
 584
 585 void
 586 FFmpegDecoder::film_changed (Film::Property p)
 587 {
 588         switch (p) {
 589         case Film::CROP:
 590         case Film::FILTERS:
 591         {
 592                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 593                 _filter_graphs.clear ();
 594         }
 595         break;
 596
 597         default:
 598                 break;
 599         }
 600 }
 601
 602 /** @return Length (in video frames) according to our content's header */
 603 ContentVideoFrame
 604 FFmpegDecoder::video_length () const
 605 {
 606         return (double(_format_context->duration) / AV_TIME_BASE) * video_frame_rate();
 607 }
 608
 609 double
 610 FFmpegDecoder::frame_time () const
 611 {
 612         return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
 613 }
 614
 615 void
 616 FFmpegDecoder::decode_audio_packet ()
 617 {
 618         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 619            several times.
 620         */
 621
 622         AVPacket copy_packet = _packet;
 623
 624         while (copy_packet.size > 0) {
 625
 626                 int frame_finished;
 627                 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &copy_packet);
 628                 if (decode_result >= 0 && frame_finished) {
 629
 630                         /* Where we are in the source, in seconds */
 631                         double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 632                                 * av_frame_get_best_effort_timestamp(_frame);
 633
 634                         /* We only decode audio if we've had our first video packet through, and if it
 635                            was before this packet.  Until then audio is thrown away.
 636                         */
 637
 638                         if ((_first_video && _first_video.get() <= source_pts_seconds) || !_decode_video) {
 639
 640                                 if (!_first_audio && _decode_video) {
 641                                         _first_audio = source_pts_seconds;
 642
 643                                         /* This is our first audio frame, and if we've arrived here we must have had our
 644                                            first video frame.  Push some silence to make up any gap between our first
 645                                            video frame and our first audio.
 646                                         */
 647
 648                                         /* frames of silence that we must push */
 649                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
 650
 651                                         _film->log()->log (
 652                                                 String::compose (
 653                                                         N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
 654                                                         _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
 655                                                         )
 656                                                 );
 657
 658                                         if (s) {
 659                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
 660                                                 audio->make_silent ();
 661                                                 Audio (audio);
 662                                         }
 663                                 }
 664
 665                                 int const data_size = av_samples_get_buffer_size (
 666                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 667                                         );
 668
 669                                 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
 670                                 Audio (deinterleave_audio (_frame->data, data_size));
 671                         }
 672                 }
 673
 674                 if (decode_result >= 0) {
 675                         copy_packet.data += decode_result;
 676                         copy_packet.size -= decode_result;
 677                 }
 678         }
 679 }