src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80
  81         if (!o->video_sync) {
  82                 _first_video = 0;
  83         }
  84 }
  85
  86 FFmpegDecoder::~FFmpegDecoder ()
  87 {
  88         if (_audio_codec_context) {
  89                 avcodec_close (_audio_codec_context);
  90         }
  91
  92         if (_video_codec_context) {
  93                 avcodec_close (_video_codec_context);
  94         }
  95
  96         if (_subtitle_codec_context) {
  97                 avcodec_close (_subtitle_codec_context);
  98         }
  99
 100         av_free (_frame);
 101
 102         avformat_close_input (&_format_context);
 103 }
 104
 105 void
 106 FFmpegDecoder::setup_general ()
 107 {
 108         av_register_all ();
 109
 110         if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
 111                 throw OpenFileError (_film->content_path ());
 112         }
 113
 114         if (avformat_find_stream_info (_format_context, 0) < 0) {
 115                 throw DecodeError ("could not find stream information");
 116         }
 117
 118         /* Find video, audio and subtitle streams and choose the first of each */
 119
 120         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 121                 AVStream* s = _format_context->streams[i];
 122                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 123                         _video_stream = i;
 124                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 125
 126                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 127                            so bodge it here.  No idea why we should have to do this.
 128                         */
 129
 130                         if (s->codec->channel_layout == 0) {
 131                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 132                         }
 133
 134                         _audio_streams.push_back (
 135                                 shared_ptr<AudioStream> (
 136                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 137                                         )
 138                                 );
 139
 140                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 141                         _subtitle_streams.push_back (
 142                                 shared_ptr<SubtitleStream> (
 143                                         new SubtitleStream (stream_name (s), i)
 144                                         )
 145                                 );
 146                 }
 147         }
 148
 149         if (_video_stream < 0) {
 150                 throw DecodeError ("could not find video stream");
 151         }
 152
 153         _frame = avcodec_alloc_frame ();
 154         if (_frame == 0) {
 155                 throw DecodeError ("could not allocate frame");
 156         }
 157 }
 158
 159 void
 160 FFmpegDecoder::setup_video ()
 161 {
 162         _video_codec_context = _format_context->streams[_video_stream]->codec;
 163         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 164
 165         if (_video_codec == 0) {
 166                 throw DecodeError ("could not find video decoder");
 167         }
 168
 169         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 170                 throw DecodeError ("could not open video decoder");
 171         }
 172 }
 173
 174 void
 175 FFmpegDecoder::setup_audio ()
 176 {
 177         if (!_audio_stream) {
 178                 return;
 179         }
 180
 181         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 182         assert (ffa);
 183
 184         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 185         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 186
 187         if (_audio_codec == 0) {
 188                 throw DecodeError ("could not find audio decoder");
 189         }
 190
 191         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 192                 throw DecodeError ("could not open audio decoder");
 193         }
 194 }
 195
 196 void
 197 FFmpegDecoder::setup_subtitle ()
 198 {
 199         if (!_subtitle_stream) {
 200                 return;
 201         }
 202
 203         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 204         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 205
 206         if (_subtitle_codec == 0) {
 207                 throw DecodeError ("could not find subtitle decoder");
 208         }
 209
 210         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 211                 throw DecodeError ("could not open subtitle decoder");
 212         }
 213 }
 214
 215
 216 bool
 217 FFmpegDecoder::pass ()
 218 {
 219         int r = av_read_frame (_format_context, &_packet);
 220
 221         if (r < 0) {
 222                 if (r != AVERROR_EOF) {
 223                         /* Maybe we should fail here, but for now we'll just finish off instead */
 224                         char buf[256];
 225                         av_strerror (r, buf, sizeof(buf));
 226                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 227                 }
 228
 229                 /* Get any remaining frames */
 230
 231                 _packet.data = 0;
 232                 _packet.size = 0;
 233
 234                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 235
 236                 int frame_finished;
 237
 238                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 239                         filter_and_emit_video (_frame);
 240                 }
 241
 242                 if (_audio_stream && _opt->decode_audio) {
 243                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 244                                 int const data_size = av_samples_get_buffer_size (
 245                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 246                                         );
 247
 248                                 assert (_audio_codec_context->channels == _film->audio_channels());
 249                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 250                         }
 251                 }
 252
 253                 return true;
 254         }
 255
 256         avcodec_get_frame_defaults (_frame);
 257
 258         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 259
 260         if (_packet.stream_index == _video_stream) {
 261
 262                 int frame_finished;
 263                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 264                 if (r >= 0 && frame_finished) {
 265
 266                         if (r != _packet.size) {
 267                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 268                         }
 269
 270                         if (_opt->video_sync) {
 271                                 out_with_sync ();
 272                         } else {
 273                                 filter_and_emit_video (_frame);
 274                         }
 275                 }
 276
 277         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 278
 279                 int frame_finished;
 280                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 281
 282                         /* Where we are in the source, in seconds */
 283                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 284                                 * av_frame_get_best_effort_timestamp(_frame);
 285
 286                         /* We only decode audio if we've had our first video packet through, and if it
 287                            was before this packet.  Until then audio is thrown away.
 288                         */
 289
 290                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 291
 292                                 if (!_first_audio) {
 293                                         _first_audio = source_pts_seconds;
 294
 295                                         /* This is our first audio frame, and if we've arrived here we must have had our
 296                                            first video frame.  Push some silence to make up any gap between our first
 297                                            video frame and our first audio.
 298                                         */
 299
 300                                         /* frames of silence that we must push */
 301                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 302
 303                                         _film->log()->log (
 304                                                 String::compose (
 305                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 306                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 307                                                         )
 308                                                 );
 309
 310                                         if (s) {
 311                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 312                                                 audio->make_silent ();
 313                                                 Audio (audio);
 314                                         }
 315                                 }
 316
 317                                 int const data_size = av_samples_get_buffer_size (
 318                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 319                                         );
 320
 321                                 assert (_audio_codec_context->channels == _film->audio_channels());
 322                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 323                         }
 324                 }
 325
 326         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 327
 328                 int got_subtitle;
 329                 AVSubtitle sub;
 330                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 331                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 332                            indicate that the previous subtitle should stop.
 333                         */
 334                         if (sub.num_rects > 0) {
 335                                 shared_ptr<TimedSubtitle> ts;
 336                                 try {
 337                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 338                                 } catch (...) {
 339                                         /* some problem with the subtitle; we probably didn't understand it */
 340                                 }
 341                         } else {
 342                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 343                         }
 344                         avsubtitle_free (&sub);
 345                 }
 346         }
 347
 348         av_free_packet (&_packet);
 349         return false;
 350 }
 351
 352 shared_ptr<AudioBuffers>
 353 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 354 {
 355         assert (_film->audio_channels());
 356         assert (bytes_per_audio_sample());
 357
 358         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 359         assert (ffa);
 360
 361         /* Deinterleave and convert to float */
 362
 363         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 364
 365         int const total_samples = size / bytes_per_audio_sample();
 366         int const frames = total_samples / _film->audio_channels();
 367         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 368
 369         switch (audio_sample_format()) {
 370         case AV_SAMPLE_FMT_S16:
 371         {
 372                 int16_t* p = (int16_t *) data;
 373                 int sample = 0;
 374                 int channel = 0;
 375                 for (int i = 0; i < total_samples; ++i) {
 376                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 377
 378                         ++channel;
 379                         if (channel == _film->audio_channels()) {
 380                                 channel = 0;
 381                                 ++sample;
 382                         }
 383                 }
 384         }
 385         break;
 386
 387         case AV_SAMPLE_FMT_S32:
 388         {
 389                 int32_t* p = (int32_t *) data;
 390                 int sample = 0;
 391                 int channel = 0;
 392                 for (int i = 0; i < total_samples; ++i) {
 393                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 394
 395                         ++channel;
 396                         if (channel == _film->audio_channels()) {
 397                                 channel = 0;
 398                                 ++sample;
 399                         }
 400                 }
 401         }
 402
 403         case AV_SAMPLE_FMT_FLTP:
 404         {
 405                 float* p = reinterpret_cast<float*> (data);
 406                 for (int i = 0; i < _film->audio_channels(); ++i) {
 407                         memcpy (audio->data(i), p, frames * sizeof(float));
 408                         p += frames;
 409                 }
 410         }
 411         break;
 412
 413         default:
 414                 assert (false);
 415         }
 416
 417         return audio;
 418 }
 419
 420 float
 421 FFmpegDecoder::frames_per_second () const
 422 {
 423         AVStream* s = _format_context->streams[_video_stream];
 424
 425         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 426                 return av_q2d (s->avg_frame_rate);
 427         }
 428
 429         return av_q2d (s->r_frame_rate);
 430 }
 431
 432 AVSampleFormat
 433 FFmpegDecoder::audio_sample_format () const
 434 {
 435         if (_audio_codec_context == 0) {
 436                 return (AVSampleFormat) 0;
 437         }
 438
 439         return _audio_codec_context->sample_fmt;
 440 }
 441
 442 Size
 443 FFmpegDecoder::native_size () const
 444 {
 445         return Size (_video_codec_context->width, _video_codec_context->height);
 446 }
 447
 448 PixelFormat
 449 FFmpegDecoder::pixel_format () const
 450 {
 451         return _video_codec_context->pix_fmt;
 452 }
 453
 454 int
 455 FFmpegDecoder::time_base_numerator () const
 456 {
 457         return _video_codec_context->time_base.num;
 458 }
 459
 460 int
 461 FFmpegDecoder::time_base_denominator () const
 462 {
 463         return _video_codec_context->time_base.den;
 464 }
 465
 466 int
 467 FFmpegDecoder::sample_aspect_ratio_numerator () const
 468 {
 469         return _video_codec_context->sample_aspect_ratio.num;
 470 }
 471
 472 int
 473 FFmpegDecoder::sample_aspect_ratio_denominator () const
 474 {
 475         return _video_codec_context->sample_aspect_ratio.den;
 476 }
 477
 478 string
 479 FFmpegDecoder::stream_name (AVStream* s) const
 480 {
 481         stringstream n;
 482
 483         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 484         if (lang) {
 485                 n << lang->value;
 486         }
 487
 488         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 489         if (title) {
 490                 if (!n.str().empty()) {
 491                         n << " ";
 492                 }
 493                 n << title->value;
 494         }
 495
 496         if (n.str().empty()) {
 497                 n << "unknown";
 498         }
 499
 500         return n.str ();
 501 }
 502
 503 int
 504 FFmpegDecoder::bytes_per_audio_sample () const
 505 {
 506         return av_get_bytes_per_sample (audio_sample_format ());
 507 }
 508
 509 void
 510 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 511 {
 512         AudioDecoder::set_audio_stream (s);
 513         setup_audio ();
 514 }
 515
 516 void
 517 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 518 {
 519         VideoDecoder::set_subtitle_stream (s);
 520         setup_subtitle ();
 521         OutputChanged ();
 522 }
 523
 524 void
 525 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 526 {
 527         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 528
 529         shared_ptr<FilterGraph> graph;
 530
 531         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 532         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 533                 ++i;
 534         }
 535
 536         if (i == _filter_graphs.end ()) {
 537                 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 538                 _filter_graphs.push_back (graph);
 539                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 540         } else {
 541                 graph = *i;
 542         }
 543
 544         list<shared_ptr<Image> > images = graph->process (frame);
 545
 546         SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
 547                 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
 548
 549         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 550                 emit_video (*i, sf);
 551         }
 552 }
 553
 554 bool
 555 FFmpegDecoder::seek (SourceFrame f)
 556 {
 557         int64_t const vt = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
 558
 559         /* This AVSEEK_FLAG_BACKWARD is a bit of a hack; without it, if we ask for a seek to the same place as last time
 560            (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
 561            staying in the same place.
 562         */
 563         int const r = av_seek_frame (_format_context, _video_stream, vt, (f == last_source_frame() ? AVSEEK_FLAG_BACKWARD : 0));
 564
 565         avcodec_flush_buffers (_video_codec_context);
 566         if (_subtitle_codec_context) {
 567                 avcodec_flush_buffers (_subtitle_codec_context);
 568         }
 569
 570         return r < 0;
 571 }
 572
 573 shared_ptr<FFmpegAudioStream>
 574 FFmpegAudioStream::create (string t, optional<int> v)
 575 {
 576         if (!v) {
 577                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 578                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 579         }
 580
 581         stringstream s (t);
 582         string type;
 583         s >> type;
 584         if (type != "ffmpeg") {
 585                 return shared_ptr<FFmpegAudioStream> ();
 586         }
 587
 588         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 589 }
 590
 591 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 592 {
 593         stringstream n (t);
 594
 595         int name_index = 4;
 596         if (!version) {
 597                 name_index = 2;
 598                 int channels;
 599                 n >> _id >> channels;
 600                 _channel_layout = av_get_default_channel_layout (channels);
 601                 _sample_rate = 0;
 602         } else {
 603                 string type;
 604                 /* Current (marked version 1) */
 605                 n >> type >> _id >> _sample_rate >> _channel_layout;
 606                 assert (type == "ffmpeg");
 607         }
 608
 609         for (int i = 0; i < name_index; ++i) {
 610                 size_t const s = t.find (' ');
 611                 if (s != string::npos) {
 612                         t = t.substr (s + 1);
 613                 }
 614         }
 615
 616         _name = t;
 617 }
 618
 619 string
 620 FFmpegAudioStream::to_string () const
 621 {
 622         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 623 }
 624
 625 void
 626 FFmpegDecoder::out_with_sync ()
 627 {
 628         /* Where we are in the output, in seconds */
 629         double const out_pts_seconds = video_frame() / frames_per_second();
 630
 631         /* Where we are in the source, in seconds */
 632         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 633                 * av_frame_get_best_effort_timestamp(_frame);
 634
 635         _film->log()->log (
 636                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 637                 Log::VERBOSE
 638                 );
 639
 640         if (!_first_video) {
 641                 _first_video = source_pts_seconds;
 642         }
 643
 644         /* Difference between where we are and where we should be */
 645         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 646         double const one_frame = 1 / frames_per_second();
 647
 648         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 649         if (delta > one_frame) {
 650                 int const extra = rint (delta / one_frame);
 651                 for (int i = 0; i < extra; ++i) {
 652                         repeat_last_video ();
 653                         _film->log()->log (
 654                                 String::compose (
 655                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 656                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 657                                                         )
 658                                 );
 659                 }
 660         }
 661
 662         if (delta > -one_frame) {
 663                 /* Process this frame */
 664                 filter_and_emit_video (_frame);
 665         } else {
 666                 /* Otherwise we are omitting a frame to keep things right */
 667                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 668         }
 669 }
 670
 671 void
 672 FFmpegDecoder::film_changed (Film::Property p)
 673 {
 674         switch (p) {
 675         case Film::CROP:
 676         case Film::FILTERS:
 677         {
 678                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 679                 _filter_graphs.clear ();
 680         }
 681         OutputChanged ();
 682         break;
 683
 684         default:
 685                 break;
 686         }
 687 }
 688
 689 /** @return Length (in video frames) according to our content's header */
 690 SourceFrame
 691 FFmpegDecoder::length () const
 692 {
 693         return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
 694 }
 695