src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80 }
  81
  82 FFmpegDecoder::~FFmpegDecoder ()
  83 {
  84         if (_audio_codec_context) {
  85                 avcodec_close (_audio_codec_context);
  86         }
  87
  88         if (_video_codec_context) {
  89                 avcodec_close (_video_codec_context);
  90         }
  91
  92         if (_subtitle_codec_context) {
  93                 avcodec_close (_subtitle_codec_context);
  94         }
  95
  96         av_free (_frame);
  97
  98         avformat_close_input (&_format_context);
  99 }
 100
 101 void
 102 FFmpegDecoder::setup_general ()
 103 {
 104         int r;
 105
 106         av_register_all ();
 107
 108         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 109                 throw OpenFileError (_film->content_path ());
 110         }
 111
 112         if (avformat_find_stream_info (_format_context, 0) < 0) {
 113                 throw DecodeError ("could not find stream information");
 114         }
 115
 116         /* Find video, audio and subtitle streams and choose the first of each */
 117
 118         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 119                 AVStream* s = _format_context->streams[i];
 120                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 121                         _video_stream = i;
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 123                         _audio_streams.push_back (
 124                                 shared_ptr<AudioStream> (
 125                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 126                                         )
 127                                 );
 128                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 129                         _subtitle_streams.push_back (
 130                                 shared_ptr<SubtitleStream> (
 131                                         new SubtitleStream (stream_name (s), i)
 132                                         )
 133                                 );
 134                 }
 135         }
 136
 137         if (_video_stream < 0) {
 138                 throw DecodeError ("could not find video stream");
 139         }
 140
 141         _frame = avcodec_alloc_frame ();
 142         if (_frame == 0) {
 143                 throw DecodeError ("could not allocate frame");
 144         }
 145 }
 146
 147 void
 148 FFmpegDecoder::setup_video ()
 149 {
 150         _video_codec_context = _format_context->streams[_video_stream]->codec;
 151         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 152
 153         if (_video_codec == 0) {
 154                 throw DecodeError ("could not find video decoder");
 155         }
 156
 157         /* I think this prevents problems with green hash on decodes and
 158            "changing frame properties on the fly is not supported by all filters"
 159            messages with some content.  Although I'm not sure; needs checking.
 160         */
 161         AVDictionary* opts = 0;
 162         av_dict_set (&opts, "threads", "1", 0);
 163
 164         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 165                 throw DecodeError ("could not open video decoder");
 166         }
 167 }
 168
 169 void
 170 FFmpegDecoder::setup_audio ()
 171 {
 172         if (!_audio_stream) {
 173                 return;
 174         }
 175
 176         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 177         assert (ffa);
 178
 179         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 180         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 181
 182         if (_audio_codec == 0) {
 183                 throw DecodeError ("could not find audio decoder");
 184         }
 185
 186         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 187                 throw DecodeError ("could not open audio decoder");
 188         }
 189
 190         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 191            so bodge it here.  No idea why we should have to do this.
 192         */
 193
 194         if (_audio_codec_context->channel_layout == 0) {
 195                 _audio_codec_context->channel_layout = av_get_default_channel_layout (ffa->channels());
 196         }
 197 }
 198
 199 void
 200 FFmpegDecoder::setup_subtitle ()
 201 {
 202         if (!_subtitle_stream) {
 203                 return;
 204         }
 205
 206         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 207         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 208
 209         if (_subtitle_codec == 0) {
 210                 throw DecodeError ("could not find subtitle decoder");
 211         }
 212
 213         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 214                 throw DecodeError ("could not open subtitle decoder");
 215         }
 216 }
 217
 218
 219 bool
 220 FFmpegDecoder::pass ()
 221 {
 222         int r = av_read_frame (_format_context, &_packet);
 223
 224         if (r < 0) {
 225                 if (r != AVERROR_EOF) {
 226                         /* Maybe we should fail here, but for now we'll just finish off instead */
 227                         char buf[256];
 228                         av_strerror (r, buf, sizeof(buf));
 229                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 230                 }
 231
 232                 /* Get any remaining frames */
 233
 234                 _packet.data = 0;
 235                 _packet.size = 0;
 236
 237                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 238
 239                 int frame_finished;
 240
 241                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 242                         filter_and_emit_video (_frame);
 243                 }
 244
 245                 if (_audio_stream && _opt->decode_audio) {
 246                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 247                                 int const data_size = av_samples_get_buffer_size (
 248                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 249                                         );
 250
 251                                 assert (_audio_codec_context->channels == _film->audio_channels());
 252                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 253                         }
 254                 }
 255
 256                 return true;
 257         }
 258
 259         avcodec_get_frame_defaults (_frame);
 260
 261         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 262
 263         if (_packet.stream_index == _video_stream) {
 264
 265                 int frame_finished;
 266                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 267                 if (r >= 0 && frame_finished) {
 268
 269                         if (r != _packet.size) {
 270                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 271                         }
 272
 273                         /* Where we are in the output, in seconds */
 274                         double const out_pts_seconds = video_frame() / frames_per_second();
 275
 276                         /* Where we are in the source, in seconds */
 277                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 278                                 * av_frame_get_best_effort_timestamp(_frame);
 279
 280                         if (!_first_video) {
 281                                 _first_video = source_pts_seconds;
 282                         }
 283
 284                         /* Difference between where we are and where we should be */
 285                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 286                         double const one_frame = 1 / frames_per_second();
 287
 288                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 289                         if (delta > one_frame) {
 290                                 int const extra = rint (delta / one_frame);
 291                                 for (int i = 0; i < extra; ++i) {
 292                                         repeat_last_video ();
 293                                         _film->log()->log (
 294                                                 String::compose (
 295                                                         "Extra frame inserted at %1s; source frame %2, source PTS %3",
 296                                                         out_pts_seconds, video_frame(), source_pts_seconds
 297                                                         )
 298                                                 );
 299                                 }
 300                         }
 301
 302                         if (delta > -one_frame) {
 303                                 /* Process this frame */
 304                                 filter_and_emit_video (_frame);
 305                         } else {
 306                                 /* Otherwise we are omitting a frame to keep things right */
 307                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 308                         }
 309                 }
 310
 311         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 312
 313                 int frame_finished;
 314                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 315
 316                         /* Where we are in the source, in seconds */
 317                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 318                                 * av_frame_get_best_effort_timestamp(_frame);
 319
 320                         /* We only decode audio if we've had our first video packet through, and if it
 321                            was before this packet.  Until then audio is thrown away.
 322                         */
 323
 324                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 325
 326                                 if (!_first_audio) {
 327                                         _first_audio = source_pts_seconds;
 328
 329                                         /* This is our first audio frame, and if we've arrived here we must have had our
 330                                            first video frame.  Push some silence to make up any gap between our first
 331                                            video frame and our first audio.
 332                                         */
 333
 334                                         /* frames of silence that we must push */
 335                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 336
 337                                         _film->log()->log (
 338                                                 String::compose (
 339                                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 340                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 341                                                         )
 342                                                 );
 343
 344                                         if (s) {
 345                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 346                                                 audio->make_silent ();
 347                                                 Audio (audio);
 348                                         }
 349                                 }
 350
 351                                 int const data_size = av_samples_get_buffer_size (
 352                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 353                                         );
 354
 355                                 assert (_audio_codec_context->channels == _film->audio_channels());
 356                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 357                         }
 358                 }
 359
 360         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 361
 362                 int got_subtitle;
 363                 AVSubtitle sub;
 364                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 365                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 366                            indicate that the previous subtitle should stop.
 367                         */
 368                         if (sub.num_rects > 0) {
 369                                 shared_ptr<TimedSubtitle> ts;
 370                                 try {
 371                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 372                                 } catch (...) {
 373                                         /* some problem with the subtitle; we probably didn't understand it */
 374                                 }
 375                         } else {
 376                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 377                         }
 378                         avsubtitle_free (&sub);
 379                 }
 380         }
 381
 382         av_free_packet (&_packet);
 383         return false;
 384 }
 385
 386 shared_ptr<AudioBuffers>
 387 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 388 {
 389         assert (_film->audio_channels());
 390         assert (bytes_per_audio_sample());
 391
 392         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 393         assert (ffa);
 394
 395         /* Deinterleave and convert to float */
 396
 397         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 398
 399         int const total_samples = size / bytes_per_audio_sample();
 400         int const frames = total_samples / _film->audio_channels();
 401         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 402
 403         switch (audio_sample_format()) {
 404         case AV_SAMPLE_FMT_S16:
 405         {
 406                 int16_t* p = (int16_t *) data;
 407                 int sample = 0;
 408                 int channel = 0;
 409                 for (int i = 0; i < total_samples; ++i) {
 410                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 411
 412                         ++channel;
 413                         if (channel == _film->audio_channels()) {
 414                                 channel = 0;
 415                                 ++sample;
 416                         }
 417                 }
 418         }
 419         break;
 420
 421         case AV_SAMPLE_FMT_S32:
 422         {
 423                 int32_t* p = (int32_t *) data;
 424                 int sample = 0;
 425                 int channel = 0;
 426                 for (int i = 0; i < total_samples; ++i) {
 427                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 428
 429                         ++channel;
 430                         if (channel == _film->audio_channels()) {
 431                                 channel = 0;
 432                                 ++sample;
 433                         }
 434                 }
 435         }
 436
 437         case AV_SAMPLE_FMT_FLTP:
 438         {
 439                 float* p = reinterpret_cast<float*> (data);
 440                 for (int i = 0; i < _film->audio_channels(); ++i) {
 441                         memcpy (audio->data(i), p, frames * sizeof(float));
 442                         p += frames;
 443                 }
 444         }
 445         break;
 446
 447         default:
 448                 assert (false);
 449         }
 450
 451         return audio;
 452 }
 453
 454 float
 455 FFmpegDecoder::frames_per_second () const
 456 {
 457         AVStream* s = _format_context->streams[_video_stream];
 458
 459         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 460                 return av_q2d (s->avg_frame_rate);
 461         }
 462
 463         return av_q2d (s->r_frame_rate);
 464 }
 465
 466 AVSampleFormat
 467 FFmpegDecoder::audio_sample_format () const
 468 {
 469         if (_audio_codec_context == 0) {
 470                 return (AVSampleFormat) 0;
 471         }
 472
 473         return _audio_codec_context->sample_fmt;
 474 }
 475
 476 Size
 477 FFmpegDecoder::native_size () const
 478 {
 479         return Size (_video_codec_context->width, _video_codec_context->height);
 480 }
 481
 482 PixelFormat
 483 FFmpegDecoder::pixel_format () const
 484 {
 485         return _video_codec_context->pix_fmt;
 486 }
 487
 488 int
 489 FFmpegDecoder::time_base_numerator () const
 490 {
 491         return _video_codec_context->time_base.num;
 492 }
 493
 494 int
 495 FFmpegDecoder::time_base_denominator () const
 496 {
 497         return _video_codec_context->time_base.den;
 498 }
 499
 500 int
 501 FFmpegDecoder::sample_aspect_ratio_numerator () const
 502 {
 503         return _video_codec_context->sample_aspect_ratio.num;
 504 }
 505
 506 int
 507 FFmpegDecoder::sample_aspect_ratio_denominator () const
 508 {
 509         return _video_codec_context->sample_aspect_ratio.den;
 510 }
 511
 512 string
 513 FFmpegDecoder::stream_name (AVStream* s) const
 514 {
 515         stringstream n;
 516
 517         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 518         if (lang) {
 519                 n << lang->value;
 520         }
 521
 522         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 523         if (title) {
 524                 if (!n.str().empty()) {
 525                         n << " ";
 526                 }
 527                 n << title->value;
 528         }
 529
 530         if (n.str().empty()) {
 531                 n << "unknown";
 532         }
 533
 534         return n.str ();
 535 }
 536
 537 int
 538 FFmpegDecoder::bytes_per_audio_sample () const
 539 {
 540         return av_get_bytes_per_sample (audio_sample_format ());
 541 }
 542
 543 void
 544 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 545 {
 546         AudioDecoder::set_audio_stream (s);
 547         setup_audio ();
 548 }
 549
 550 void
 551 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 552 {
 553         VideoDecoder::set_subtitle_stream (s);
 554         setup_subtitle ();
 555 }
 556
 557 void
 558 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 559 {
 560         shared_ptr<FilterGraph> graph;
 561
 562         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 563         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 564                 ++i;
 565         }
 566
 567         if (i == _filter_graphs.end ()) {
 568                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 569                 _filter_graphs.push_back (graph);
 570                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 571         } else {
 572                 graph = *i;
 573         }
 574
 575         list<shared_ptr<Image> > images = graph->process (frame);
 576
 577         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 578                 emit_video (*i);
 579         }
 580 }
 581
 582 shared_ptr<FFmpegAudioStream>
 583 FFmpegAudioStream::create (string t, optional<int> v)
 584 {
 585         if (!v) {
 586                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 587                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 588         }
 589
 590         stringstream s (t);
 591         string type;
 592         s >> type;
 593         if (type != "ffmpeg") {
 594                 return shared_ptr<FFmpegAudioStream> ();
 595         }
 596
 597         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 598 }
 599
 600 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 601 {
 602         stringstream n (t);
 603
 604         int name_index = 4;
 605         if (!version) {
 606                 name_index = 2;
 607                 int channels;
 608                 n >> _id >> channels;
 609                 _channel_layout = av_get_default_channel_layout (channels);
 610                 _sample_rate = 0;
 611         } else {
 612                 string type;
 613                 /* Current (marked version 1) */
 614                 n >> type >> _id >> _sample_rate >> _channel_layout;
 615                 assert (type == "ffmpeg");
 616         }
 617
 618         for (int i = 0; i < name_index; ++i) {
 619                 size_t const s = t.find (' ');
 620                 if (s != string::npos) {
 621                         t = t.substr (s + 1);
 622                 }
 623         }
 624
 625         _name = t;
 626 }
 627
 628 string
 629 FFmpegAudioStream::to_string () const
 630 {
 631         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 632 }
 633