src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80 }
  81
  82 FFmpegDecoder::~FFmpegDecoder ()
  83 {
  84         if (_audio_codec_context) {
  85                 avcodec_close (_audio_codec_context);
  86         }
  87
  88         if (_video_codec_context) {
  89                 avcodec_close (_video_codec_context);
  90         }
  91
  92         if (_subtitle_codec_context) {
  93                 avcodec_close (_subtitle_codec_context);
  94         }
  95
  96         av_free (_frame);
  97
  98         avformat_close_input (&_format_context);
  99 }
 100
 101 void
 102 FFmpegDecoder::setup_general ()
 103 {
 104         av_register_all ();
 105
 106         if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
 107                 throw OpenFileError (_film->content_path ());
 108         }
 109
 110         if (avformat_find_stream_info (_format_context, 0) < 0) {
 111                 throw DecodeError ("could not find stream information");
 112         }
 113
 114         /* Find video, audio and subtitle streams and choose the first of each */
 115
 116         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 117                 AVStream* s = _format_context->streams[i];
 118                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 119                         _video_stream = i;
 120                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 121
 122                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 123                            so bodge it here.  No idea why we should have to do this.
 124                         */
 125
 126                         if (s->codec->channel_layout == 0) {
 127                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 128                         }
 129
 130                         _audio_streams.push_back (
 131                                 shared_ptr<AudioStream> (
 132                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 133                                         )
 134                                 );
 135
 136                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 137                         _subtitle_streams.push_back (
 138                                 shared_ptr<SubtitleStream> (
 139                                         new SubtitleStream (stream_name (s), i)
 140                                         )
 141                                 );
 142                 }
 143         }
 144
 145         if (_video_stream < 0) {
 146                 throw DecodeError ("could not find video stream");
 147         }
 148
 149         _frame = avcodec_alloc_frame ();
 150         if (_frame == 0) {
 151                 throw DecodeError ("could not allocate frame");
 152         }
 153 }
 154
 155 void
 156 FFmpegDecoder::setup_video ()
 157 {
 158         _video_codec_context = _format_context->streams[_video_stream]->codec;
 159         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 160
 161         if (_video_codec == 0) {
 162                 throw DecodeError ("could not find video decoder");
 163         }
 164
 165         /* I think this prevents problems with green hash on decodes and
 166            "changing frame properties on the fly is not supported by all filters"
 167            messages with some content.  Although I'm not sure; needs checking.
 168         */
 169         AVDictionary* opts = 0;
 170         av_dict_set (&opts, "threads", "1", 0);
 171
 172         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 173                 throw DecodeError ("could not open video decoder");
 174         }
 175 }
 176
 177 void
 178 FFmpegDecoder::setup_audio ()
 179 {
 180         if (!_audio_stream) {
 181                 return;
 182         }
 183
 184         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 185         assert (ffa);
 186
 187         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 188         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 189
 190         if (_audio_codec == 0) {
 191                 throw DecodeError ("could not find audio decoder");
 192         }
 193
 194         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 195                 throw DecodeError ("could not open audio decoder");
 196         }
 197 }
 198
 199 void
 200 FFmpegDecoder::setup_subtitle ()
 201 {
 202         if (!_subtitle_stream) {
 203                 return;
 204         }
 205
 206         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 207         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 208
 209         if (_subtitle_codec == 0) {
 210                 throw DecodeError ("could not find subtitle decoder");
 211         }
 212
 213         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 214                 throw DecodeError ("could not open subtitle decoder");
 215         }
 216 }
 217
 218
 219 bool
 220 FFmpegDecoder::pass ()
 221 {
 222         int r = av_read_frame (_format_context, &_packet);
 223
 224         if (r < 0) {
 225                 if (r != AVERROR_EOF) {
 226                         /* Maybe we should fail here, but for now we'll just finish off instead */
 227                         char buf[256];
 228                         av_strerror (r, buf, sizeof(buf));
 229                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 230                 }
 231
 232                 /* Get any remaining frames */
 233
 234                 _packet.data = 0;
 235                 _packet.size = 0;
 236
 237                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 238
 239                 int frame_finished;
 240
 241                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 242                         filter_and_emit_video (_frame);
 243                 }
 244
 245                 if (_audio_stream && _opt->decode_audio) {
 246                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 247                                 int const data_size = av_samples_get_buffer_size (
 248                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 249                                         );
 250
 251                                 assert (_audio_codec_context->channels == _film->audio_channels());
 252                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 253                         }
 254                 }
 255
 256                 return true;
 257         }
 258
 259         avcodec_get_frame_defaults (_frame);
 260
 261         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 262
 263         if (_packet.stream_index == _video_stream) {
 264
 265                 int frame_finished;
 266                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 267                 if (r >= 0 && frame_finished) {
 268
 269                         if (r != _packet.size) {
 270                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 271                         }
 272
 273                         /* Where we are in the output, in seconds */
 274                         double const out_pts_seconds = video_frame() / frames_per_second();
 275
 276                         /* Where we are in the source, in seconds */
 277                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 278                                 * av_frame_get_best_effort_timestamp(_frame);
 279
 280                         _film->log()->log (
 281                                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 282                                 Log::VERBOSE
 283                                 );
 284
 285                         if (!_first_video) {
 286                                 _first_video = source_pts_seconds;
 287                         }
 288
 289                         /* Difference between where we are and where we should be */
 290                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 291                         double const one_frame = 1 / frames_per_second();
 292
 293                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 294                         if (delta > one_frame) {
 295                                 int const extra = rint (delta / one_frame);
 296                                 for (int i = 0; i < extra; ++i) {
 297                                         repeat_last_video ();
 298                                         _film->log()->log (
 299                                                 String::compose (
 300                                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 301                                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 302                                                         )
 303                                                 );
 304                                 }
 305                         }
 306
 307                         if (delta > -one_frame) {
 308                                 /* Process this frame */
 309                                 filter_and_emit_video (_frame);
 310                         } else {
 311                                 /* Otherwise we are omitting a frame to keep things right */
 312                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 313                         }
 314                 }
 315
 316         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 317
 318                 int frame_finished;
 319                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 320
 321                         /* Where we are in the source, in seconds */
 322                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 323                                 * av_frame_get_best_effort_timestamp(_frame);
 324
 325                         /* We only decode audio if we've had our first video packet through, and if it
 326                            was before this packet.  Until then audio is thrown away.
 327                         */
 328
 329                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 330
 331                                 if (!_first_audio) {
 332                                         _first_audio = source_pts_seconds;
 333
 334                                         /* This is our first audio frame, and if we've arrived here we must have had our
 335                                            first video frame.  Push some silence to make up any gap between our first
 336                                            video frame and our first audio.
 337                                         */
 338
 339                                         /* frames of silence that we must push */
 340                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 341
 342                                         _film->log()->log (
 343                                                 String::compose (
 344                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 345                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 346                                                         )
 347                                                 );
 348
 349                                         if (s) {
 350                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 351                                                 audio->make_silent ();
 352                                                 Audio (audio);
 353                                         }
 354                                 }
 355
 356                                 int const data_size = av_samples_get_buffer_size (
 357                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 358                                         );
 359
 360                                 assert (_audio_codec_context->channels == _film->audio_channels());
 361                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 362                         }
 363                 }
 364
 365         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 366
 367                 int got_subtitle;
 368                 AVSubtitle sub;
 369                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 370                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 371                            indicate that the previous subtitle should stop.
 372                         */
 373                         if (sub.num_rects > 0) {
 374                                 shared_ptr<TimedSubtitle> ts;
 375                                 try {
 376                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 377                                 } catch (...) {
 378                                         /* some problem with the subtitle; we probably didn't understand it */
 379                                 }
 380                         } else {
 381                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 382                         }
 383                         avsubtitle_free (&sub);
 384                 }
 385         }
 386
 387         av_free_packet (&_packet);
 388         return false;
 389 }
 390
 391 shared_ptr<AudioBuffers>
 392 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 393 {
 394         assert (_film->audio_channels());
 395         assert (bytes_per_audio_sample());
 396
 397         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 398         assert (ffa);
 399
 400         /* Deinterleave and convert to float */
 401
 402         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 403
 404         int const total_samples = size / bytes_per_audio_sample();
 405         int const frames = total_samples / _film->audio_channels();
 406         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 407
 408         switch (audio_sample_format()) {
 409         case AV_SAMPLE_FMT_S16:
 410         {
 411                 int16_t* p = (int16_t *) data;
 412                 int sample = 0;
 413                 int channel = 0;
 414                 for (int i = 0; i < total_samples; ++i) {
 415                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 416
 417                         ++channel;
 418                         if (channel == _film->audio_channels()) {
 419                                 channel = 0;
 420                                 ++sample;
 421                         }
 422                 }
 423         }
 424         break;
 425
 426         case AV_SAMPLE_FMT_S32:
 427         {
 428                 int32_t* p = (int32_t *) data;
 429                 int sample = 0;
 430                 int channel = 0;
 431                 for (int i = 0; i < total_samples; ++i) {
 432                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 433
 434                         ++channel;
 435                         if (channel == _film->audio_channels()) {
 436                                 channel = 0;
 437                                 ++sample;
 438                         }
 439                 }
 440         }
 441         break;
 442
 443         case AV_SAMPLE_FMT_FLT:
 444         {
 445                 float* p = reinterpret_cast<float*> (data);
 446                 int sample = 0;
 447                 int channel = 0;
 448                 for (int i = 0; i < total_samples; ++i) {
 449                         audio->data(channel)[sample] = *p++;
 450
 451                         ++channel;
 452                         if (channel == _film->audio_channels()) {
 453                                 channel = 0;
 454                                 ++sample;
 455                         }
 456                 }
 457         }
 458         break;
 459
 460         case AV_SAMPLE_FMT_FLTP:
 461         {
 462                 float* p = reinterpret_cast<float*> (data);
 463                 for (int i = 0; i < _film->audio_channels(); ++i) {
 464                         memcpy (audio->data(i), p, frames * sizeof(float));
 465                         p += frames;
 466                 }
 467         }
 468         break;
 469
 470         default:
 471                 throw DecodeError (String::compose ("Unrecognised audio sample format (%1)", static_cast<int> (audio_sample_format())));
 472         }
 473
 474         return audio;
 475 }
 476
 477 float
 478 FFmpegDecoder::frames_per_second () const
 479 {
 480         AVStream* s = _format_context->streams[_video_stream];
 481
 482         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 483                 return av_q2d (s->avg_frame_rate);
 484         }
 485
 486         return av_q2d (s->r_frame_rate);
 487 }
 488
 489 AVSampleFormat
 490 FFmpegDecoder::audio_sample_format () const
 491 {
 492         if (_audio_codec_context == 0) {
 493                 return (AVSampleFormat) 0;
 494         }
 495
 496         return _audio_codec_context->sample_fmt;
 497 }
 498
 499 Size
 500 FFmpegDecoder::native_size () const
 501 {
 502         return Size (_video_codec_context->width, _video_codec_context->height);
 503 }
 504
 505 PixelFormat
 506 FFmpegDecoder::pixel_format () const
 507 {
 508         return _video_codec_context->pix_fmt;
 509 }
 510
 511 int
 512 FFmpegDecoder::time_base_numerator () const
 513 {
 514         return _video_codec_context->time_base.num;
 515 }
 516
 517 int
 518 FFmpegDecoder::time_base_denominator () const
 519 {
 520         return _video_codec_context->time_base.den;
 521 }
 522
 523 int
 524 FFmpegDecoder::sample_aspect_ratio_numerator () const
 525 {
 526         return _video_codec_context->sample_aspect_ratio.num;
 527 }
 528
 529 int
 530 FFmpegDecoder::sample_aspect_ratio_denominator () const
 531 {
 532         return _video_codec_context->sample_aspect_ratio.den;
 533 }
 534
 535 string
 536 FFmpegDecoder::stream_name (AVStream* s) const
 537 {
 538         stringstream n;
 539
 540         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 541         if (lang) {
 542                 n << lang->value;
 543         }
 544
 545         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 546         if (title) {
 547                 if (!n.str().empty()) {
 548                         n << " ";
 549                 }
 550                 n << title->value;
 551         }
 552
 553         if (n.str().empty()) {
 554                 n << "unknown";
 555         }
 556
 557         return n.str ();
 558 }
 559
 560 int
 561 FFmpegDecoder::bytes_per_audio_sample () const
 562 {
 563         return av_get_bytes_per_sample (audio_sample_format ());
 564 }
 565
 566 void
 567 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 568 {
 569         AudioDecoder::set_audio_stream (s);
 570         setup_audio ();
 571 }
 572
 573 void
 574 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 575 {
 576         VideoDecoder::set_subtitle_stream (s);
 577         setup_subtitle ();
 578 }
 579
 580 void
 581 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 582 {
 583         shared_ptr<FilterGraph> graph;
 584
 585         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 586         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 587                 ++i;
 588         }
 589
 590         if (i == _filter_graphs.end ()) {
 591                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 592                 _filter_graphs.push_back (graph);
 593                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 594         } else {
 595                 graph = *i;
 596         }
 597
 598         list<shared_ptr<Image> > images = graph->process (frame);
 599
 600         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 601                 emit_video (*i);
 602         }
 603 }
 604
 605 shared_ptr<FFmpegAudioStream>
 606 FFmpegAudioStream::create (string t, optional<int> v)
 607 {
 608         if (!v) {
 609                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 610                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 611         }
 612
 613         stringstream s (t);
 614         string type;
 615         s >> type;
 616         if (type != "ffmpeg") {
 617                 return shared_ptr<FFmpegAudioStream> ();
 618         }
 619
 620         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 621 }
 622
 623 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 624 {
 625         stringstream n (t);
 626
 627         int name_index = 4;
 628         if (!version) {
 629                 name_index = 2;
 630                 int channels;
 631                 n >> _id >> channels;
 632                 _channel_layout = av_get_default_channel_layout (channels);
 633                 _sample_rate = 0;
 634         } else {
 635                 string type;
 636                 /* Current (marked version 1) */
 637                 n >> type >> _id >> _sample_rate >> _channel_layout;
 638                 assert (type == "ffmpeg");
 639         }
 640
 641         for (int i = 0; i < name_index; ++i) {
 642                 size_t const s = t.find (' ');
 643                 if (s != string::npos) {
 644                         t = t.substr (s + 1);
 645                 }
 646         }
 647
 648         _name = t;
 649 }
 650
 651 string
 652 FFmpegAudioStream::to_string () const
 653 {
 654         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 655 }
 656