src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80
  81         _film_connection = f->Changed.connect (bind (&FFmpegDecoder::film_changed, this, _1));
  82 }
  83
  84 FFmpegDecoder::~FFmpegDecoder ()
  85 {
  86         if (_audio_codec_context) {
  87                 avcodec_close (_audio_codec_context);
  88         }
  89
  90         if (_video_codec_context) {
  91                 avcodec_close (_video_codec_context);
  92         }
  93
  94         if (_subtitle_codec_context) {
  95                 avcodec_close (_subtitle_codec_context);
  96         }
  97
  98         av_free (_frame);
  99
 100         avformat_close_input (&_format_context);
 101 }
 102
 103 void
 104 FFmpegDecoder::setup_general ()
 105 {
 106         av_register_all ();
 107
 108         if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
 109                 throw OpenFileError (_film->content_path ());
 110         }
 111
 112         if (avformat_find_stream_info (_format_context, 0) < 0) {
 113                 throw DecodeError ("could not find stream information");
 114         }
 115
 116         /* Find video, audio and subtitle streams and choose the first of each */
 117
 118         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 119                 AVStream* s = _format_context->streams[i];
 120                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 121                         _video_stream = i;
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 123
 124                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 125                            so bodge it here.  No idea why we should have to do this.
 126                         */
 127
 128                         if (s->codec->channel_layout == 0) {
 129                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 130                         }
 131
 132                         _audio_streams.push_back (
 133                                 shared_ptr<AudioStream> (
 134                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 135                                         )
 136                                 );
 137
 138                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 139                         _subtitle_streams.push_back (
 140                                 shared_ptr<SubtitleStream> (
 141                                         new SubtitleStream (stream_name (s), i)
 142                                         )
 143                                 );
 144                 }
 145         }
 146
 147         if (_video_stream < 0) {
 148                 throw DecodeError ("could not find video stream");
 149         }
 150
 151         _frame = avcodec_alloc_frame ();
 152         if (_frame == 0) {
 153                 throw DecodeError ("could not allocate frame");
 154         }
 155 }
 156
 157 void
 158 FFmpegDecoder::setup_video ()
 159 {
 160         _video_codec_context = _format_context->streams[_video_stream]->codec;
 161         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 162
 163         if (_video_codec == 0) {
 164                 throw DecodeError ("could not find video decoder");
 165         }
 166
 167         /* I think this prevents problems with green hash on decodes and
 168            "changing frame properties on the fly is not supported by all filters"
 169            messages with some content.  Although I'm not sure; needs checking.
 170         */
 171         AVDictionary* opts = 0;
 172         av_dict_set (&opts, "threads", "1", 0);
 173
 174         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 175                 throw DecodeError ("could not open video decoder");
 176         }
 177 }
 178
 179 void
 180 FFmpegDecoder::setup_audio ()
 181 {
 182         if (!_audio_stream) {
 183                 return;
 184         }
 185
 186         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 187         assert (ffa);
 188
 189         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 190         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 191
 192         if (_audio_codec == 0) {
 193                 throw DecodeError ("could not find audio decoder");
 194         }
 195
 196         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 197                 throw DecodeError ("could not open audio decoder");
 198         }
 199 }
 200
 201 void
 202 FFmpegDecoder::setup_subtitle ()
 203 {
 204         if (!_subtitle_stream) {
 205                 return;
 206         }
 207
 208         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 209         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 210
 211         if (_subtitle_codec == 0) {
 212                 throw DecodeError ("could not find subtitle decoder");
 213         }
 214
 215         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 216                 throw DecodeError ("could not open subtitle decoder");
 217         }
 218 }
 219
 220
 221 bool
 222 FFmpegDecoder::pass ()
 223 {
 224         int r = av_read_frame (_format_context, &_packet);
 225
 226         if (r < 0) {
 227                 if (r != AVERROR_EOF) {
 228                         /* Maybe we should fail here, but for now we'll just finish off instead */
 229                         char buf[256];
 230                         av_strerror (r, buf, sizeof(buf));
 231                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 232                 }
 233
 234                 /* Get any remaining frames */
 235
 236                 _packet.data = 0;
 237                 _packet.size = 0;
 238
 239                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 240
 241                 int frame_finished;
 242
 243                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 244                         filter_and_emit_video (_frame);
 245                 }
 246
 247                 if (_audio_stream && _opt->decode_audio) {
 248                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 249                                 int const data_size = av_samples_get_buffer_size (
 250                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 251                                         );
 252
 253                                 assert (_audio_codec_context->channels == _film->audio_channels());
 254                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 255                         }
 256                 }
 257
 258                 return true;
 259         }
 260
 261         avcodec_get_frame_defaults (_frame);
 262
 263         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 264
 265         if (_packet.stream_index == _video_stream) {
 266
 267                 int frame_finished;
 268                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 269                 if (r >= 0 && frame_finished) {
 270
 271                         if (r != _packet.size) {
 272                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 273                         }
 274
 275                         if (_opt->video_sync) {
 276                                 out_with_sync ();
 277                         } else {
 278                                 filter_and_emit_video (_frame);
 279                         }
 280                 }
 281
 282         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 283
 284                 int frame_finished;
 285                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 286
 287                         /* Where we are in the source, in seconds */
 288                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 289                                 * av_frame_get_best_effort_timestamp(_frame);
 290
 291                         /* We only decode audio if we've had our first video packet through, and if it
 292                            was before this packet.  Until then audio is thrown away.
 293                         */
 294
 295                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 296
 297                                 if (!_first_audio) {
 298                                         _first_audio = source_pts_seconds;
 299
 300                                         /* This is our first audio frame, and if we've arrived here we must have had our
 301                                            first video frame.  Push some silence to make up any gap between our first
 302                                            video frame and our first audio.
 303                                         */
 304
 305                                         /* frames of silence that we must push */
 306                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 307
 308                                         _film->log()->log (
 309                                                 String::compose (
 310                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 311                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 312                                                         )
 313                                                 );
 314
 315                                         if (s) {
 316                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 317                                                 audio->make_silent ();
 318                                                 Audio (audio);
 319                                         }
 320                                 }
 321
 322                                 int const data_size = av_samples_get_buffer_size (
 323                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 324                                         );
 325
 326                                 assert (_audio_codec_context->channels == _film->audio_channels());
 327                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 328                         }
 329                 }
 330
 331         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 332
 333                 int got_subtitle;
 334                 AVSubtitle sub;
 335                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 336                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 337                            indicate that the previous subtitle should stop.
 338                         */
 339                         if (sub.num_rects > 0) {
 340                                 shared_ptr<TimedSubtitle> ts;
 341                                 try {
 342                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 343                                 } catch (...) {
 344                                         /* some problem with the subtitle; we probably didn't understand it */
 345                                 }
 346                         } else {
 347                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 348                         }
 349                         avsubtitle_free (&sub);
 350                 }
 351         }
 352
 353         av_free_packet (&_packet);
 354         return false;
 355 }
 356
 357 shared_ptr<AudioBuffers>
 358 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 359 {
 360         assert (_film->audio_channels());
 361         assert (bytes_per_audio_sample());
 362
 363         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 364         assert (ffa);
 365
 366         /* Deinterleave and convert to float */
 367
 368         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 369
 370         int const total_samples = size / bytes_per_audio_sample();
 371         int const frames = total_samples / _film->audio_channels();
 372         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 373
 374         switch (audio_sample_format()) {
 375         case AV_SAMPLE_FMT_S16:
 376         {
 377                 int16_t* p = (int16_t *) data;
 378                 int sample = 0;
 379                 int channel = 0;
 380                 for (int i = 0; i < total_samples; ++i) {
 381                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 382
 383                         ++channel;
 384                         if (channel == _film->audio_channels()) {
 385                                 channel = 0;
 386                                 ++sample;
 387                         }
 388                 }
 389         }
 390         break;
 391
 392         case AV_SAMPLE_FMT_S32:
 393         {
 394                 int32_t* p = (int32_t *) data;
 395                 int sample = 0;
 396                 int channel = 0;
 397                 for (int i = 0; i < total_samples; ++i) {
 398                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 399
 400                         ++channel;
 401                         if (channel == _film->audio_channels()) {
 402                                 channel = 0;
 403                                 ++sample;
 404                         }
 405                 }
 406         }
 407
 408         case AV_SAMPLE_FMT_FLTP:
 409         {
 410                 float* p = reinterpret_cast<float*> (data);
 411                 for (int i = 0; i < _film->audio_channels(); ++i) {
 412                         memcpy (audio->data(i), p, frames * sizeof(float));
 413                         p += frames;
 414                 }
 415         }
 416         break;
 417
 418         default:
 419                 assert (false);
 420         }
 421
 422         return audio;
 423 }
 424
 425 float
 426 FFmpegDecoder::frames_per_second () const
 427 {
 428         AVStream* s = _format_context->streams[_video_stream];
 429
 430         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 431                 return av_q2d (s->avg_frame_rate);
 432         }
 433
 434         return av_q2d (s->r_frame_rate);
 435 }
 436
 437 AVSampleFormat
 438 FFmpegDecoder::audio_sample_format () const
 439 {
 440         if (_audio_codec_context == 0) {
 441                 return (AVSampleFormat) 0;
 442         }
 443
 444         return _audio_codec_context->sample_fmt;
 445 }
 446
 447 Size
 448 FFmpegDecoder::native_size () const
 449 {
 450         return Size (_video_codec_context->width, _video_codec_context->height);
 451 }
 452
 453 PixelFormat
 454 FFmpegDecoder::pixel_format () const
 455 {
 456         return _video_codec_context->pix_fmt;
 457 }
 458
 459 int
 460 FFmpegDecoder::time_base_numerator () const
 461 {
 462         return _video_codec_context->time_base.num;
 463 }
 464
 465 int
 466 FFmpegDecoder::time_base_denominator () const
 467 {
 468         return _video_codec_context->time_base.den;
 469 }
 470
 471 int
 472 FFmpegDecoder::sample_aspect_ratio_numerator () const
 473 {
 474         return _video_codec_context->sample_aspect_ratio.num;
 475 }
 476
 477 int
 478 FFmpegDecoder::sample_aspect_ratio_denominator () const
 479 {
 480         return _video_codec_context->sample_aspect_ratio.den;
 481 }
 482
 483 string
 484 FFmpegDecoder::stream_name (AVStream* s) const
 485 {
 486         stringstream n;
 487
 488         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 489         if (lang) {
 490                 n << lang->value;
 491         }
 492
 493         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 494         if (title) {
 495                 if (!n.str().empty()) {
 496                         n << " ";
 497                 }
 498                 n << title->value;
 499         }
 500
 501         if (n.str().empty()) {
 502                 n << "unknown";
 503         }
 504
 505         return n.str ();
 506 }
 507
 508 int
 509 FFmpegDecoder::bytes_per_audio_sample () const
 510 {
 511         return av_get_bytes_per_sample (audio_sample_format ());
 512 }
 513
 514 void
 515 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 516 {
 517         AudioDecoder::set_audio_stream (s);
 518         setup_audio ();
 519 }
 520
 521 void
 522 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 523 {
 524         VideoDecoder::set_subtitle_stream (s);
 525         setup_subtitle ();
 526 }
 527
 528 void
 529 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 530 {
 531         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 532
 533         shared_ptr<FilterGraph> graph;
 534
 535         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 536         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 537                 ++i;
 538         }
 539
 540         if (i == _filter_graphs.end ()) {
 541                 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 542                 _filter_graphs.push_back (graph);
 543                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 544         } else {
 545                 graph = *i;
 546         }
 547
 548         list<shared_ptr<Image> > images = graph->process (frame);
 549
 550         SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
 551                 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
 552
 553         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 554                 emit_video (*i, sf);
 555         }
 556 }
 557
 558 bool
 559 FFmpegDecoder::seek (SourceFrame f)
 560 {
 561         int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
 562         int const r = av_seek_frame (_format_context, _video_stream, t, 0);
 563         avcodec_flush_buffers (_video_codec_context);
 564         return r < 0;
 565 }
 566
 567 shared_ptr<FFmpegAudioStream>
 568 FFmpegAudioStream::create (string t, optional<int> v)
 569 {
 570         if (!v) {
 571                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 572                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 573         }
 574
 575         stringstream s (t);
 576         string type;
 577         s >> type;
 578         if (type != "ffmpeg") {
 579                 return shared_ptr<FFmpegAudioStream> ();
 580         }
 581
 582         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 583 }
 584
 585 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 586 {
 587         stringstream n (t);
 588
 589         int name_index = 4;
 590         if (!version) {
 591                 name_index = 2;
 592                 int channels;
 593                 n >> _id >> channels;
 594                 _channel_layout = av_get_default_channel_layout (channels);
 595                 _sample_rate = 0;
 596         } else {
 597                 string type;
 598                 /* Current (marked version 1) */
 599                 n >> type >> _id >> _sample_rate >> _channel_layout;
 600                 assert (type == "ffmpeg");
 601         }
 602
 603         for (int i = 0; i < name_index; ++i) {
 604                 size_t const s = t.find (' ');
 605                 if (s != string::npos) {
 606                         t = t.substr (s + 1);
 607                 }
 608         }
 609
 610         _name = t;
 611 }
 612
 613 string
 614 FFmpegAudioStream::to_string () const
 615 {
 616         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 617 }
 618
 619
 620 void
 621 FFmpegDecoder::out_with_sync ()
 622 {
 623         /* Where we are in the output, in seconds */
 624         double const out_pts_seconds = video_frame() / frames_per_second();
 625
 626         /* Where we are in the source, in seconds */
 627         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 628                 * av_frame_get_best_effort_timestamp(_frame);
 629
 630         _film->log()->log (
 631                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 632                 Log::VERBOSE
 633                 );
 634
 635         if (!_first_video) {
 636                 _first_video = source_pts_seconds;
 637         }
 638
 639         /* Difference between where we are and where we should be */
 640         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 641         double const one_frame = 1 / frames_per_second();
 642
 643         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 644         if (delta > one_frame) {
 645                 int const extra = rint (delta / one_frame);
 646                 for (int i = 0; i < extra; ++i) {
 647                         repeat_last_video ();
 648                         _film->log()->log (
 649                                 String::compose (
 650                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 651                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 652                                                         )
 653                                 );
 654                 }
 655         }
 656
 657         if (delta > -one_frame) {
 658                 /* Process this frame */
 659                 filter_and_emit_video (_frame);
 660         } else {
 661                 /* Otherwise we are omitting a frame to keep things right */
 662                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 663         }
 664 }
 665
 666 void
 667 FFmpegDecoder::film_changed (Film::Property p)
 668 {
 669         switch (p) {
 670         case Film::CROP:
 671         {
 672                 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 673                 _filter_graphs.clear ();
 674         }
 675         OutputChanged ();
 676         break;
 677
 678         default:
 679                 break;
 680         }
 681 }
 682