src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80 }
  81
  82 FFmpegDecoder::~FFmpegDecoder ()
  83 {
  84         if (_audio_codec_context) {
  85                 avcodec_close (_audio_codec_context);
  86         }
  87
  88         if (_video_codec_context) {
  89                 avcodec_close (_video_codec_context);
  90         }
  91
  92         if (_subtitle_codec_context) {
  93                 avcodec_close (_subtitle_codec_context);
  94         }
  95
  96         av_free (_frame);
  97
  98         avformat_close_input (&_format_context);
  99 }
 100
 101 void
 102 FFmpegDecoder::setup_general ()
 103 {
 104         av_register_all ();
 105
 106         if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
 107                 throw OpenFileError (_film->content_path ());
 108         }
 109
 110         if (avformat_find_stream_info (_format_context, 0) < 0) {
 111                 throw DecodeError ("could not find stream information");
 112         }
 113
 114         /* Find video, audio and subtitle streams and choose the first of each */
 115
 116         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 117                 AVStream* s = _format_context->streams[i];
 118                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 119                         _video_stream = i;
 120                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 121
 122                         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 123                            so bodge it here.  No idea why we should have to do this.
 124                         */
 125
 126                         if (s->codec->channel_layout == 0) {
 127                                 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
 128                         }
 129
 130                         _audio_streams.push_back (
 131                                 shared_ptr<AudioStream> (
 132                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 133                                         )
 134                                 );
 135
 136                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 137                         _subtitle_streams.push_back (
 138                                 shared_ptr<SubtitleStream> (
 139                                         new SubtitleStream (stream_name (s), i)
 140                                         )
 141                                 );
 142                 }
 143         }
 144
 145         if (_video_stream < 0) {
 146                 throw DecodeError ("could not find video stream");
 147         }
 148
 149         _frame = avcodec_alloc_frame ();
 150         if (_frame == 0) {
 151                 throw DecodeError ("could not allocate frame");
 152         }
 153 }
 154
 155 void
 156 FFmpegDecoder::setup_video ()
 157 {
 158         _video_codec_context = _format_context->streams[_video_stream]->codec;
 159         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 160
 161         if (_video_codec == 0) {
 162                 throw DecodeError ("could not find video decoder");
 163         }
 164
 165         /* I think this prevents problems with green hash on decodes and
 166            "changing frame properties on the fly is not supported by all filters"
 167            messages with some content.  Although I'm not sure; needs checking.
 168         */
 169         AVDictionary* opts = 0;
 170         av_dict_set (&opts, "threads", "1", 0);
 171
 172         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 173                 throw DecodeError ("could not open video decoder");
 174         }
 175 }
 176
 177 void
 178 FFmpegDecoder::setup_audio ()
 179 {
 180         if (!_audio_stream) {
 181                 return;
 182         }
 183
 184         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 185         assert (ffa);
 186
 187         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 188         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 189
 190         if (_audio_codec == 0) {
 191                 throw DecodeError ("could not find audio decoder");
 192         }
 193
 194         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 195                 throw DecodeError ("could not open audio decoder");
 196         }
 197 }
 198
 199 void
 200 FFmpegDecoder::setup_subtitle ()
 201 {
 202         if (!_subtitle_stream) {
 203                 return;
 204         }
 205
 206         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 207         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 208
 209         if (_subtitle_codec == 0) {
 210                 throw DecodeError ("could not find subtitle decoder");
 211         }
 212
 213         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 214                 throw DecodeError ("could not open subtitle decoder");
 215         }
 216 }
 217
 218
 219 bool
 220 FFmpegDecoder::pass ()
 221 {
 222         int r = av_read_frame (_format_context, &_packet);
 223
 224         if (r < 0) {
 225                 if (r != AVERROR_EOF) {
 226                         /* Maybe we should fail here, but for now we'll just finish off instead */
 227                         char buf[256];
 228                         av_strerror (r, buf, sizeof(buf));
 229                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 230                 }
 231
 232                 /* Get any remaining frames */
 233
 234                 _packet.data = 0;
 235                 _packet.size = 0;
 236
 237                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 238
 239                 int frame_finished;
 240
 241                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 242                         filter_and_emit_video (_frame);
 243                 }
 244
 245                 if (_audio_stream && _opt->decode_audio) {
 246                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 247                                 int const data_size = av_samples_get_buffer_size (
 248                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 249                                         );
 250
 251                                 assert (_audio_codec_context->channels == _film->audio_channels());
 252                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 253                         }
 254                 }
 255
 256                 return true;
 257         }
 258
 259         avcodec_get_frame_defaults (_frame);
 260
 261         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 262
 263         if (_packet.stream_index == _video_stream) {
 264
 265                 int frame_finished;
 266                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 267                 if (r >= 0 && frame_finished) {
 268
 269                         if (r != _packet.size) {
 270                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 271                         }
 272
 273                         if (_opt->decoder_alignment) {
 274                                 out_careful ();
 275                         } else {
 276                                 filter_and_emit_video (_frame);
 277                         }
 278                 }
 279
 280         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 281
 282                 int frame_finished;
 283                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 284
 285                         /* Where we are in the source, in seconds */
 286                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 287                                 * av_frame_get_best_effort_timestamp(_frame);
 288
 289                         /* We only decode audio if we've had our first video packet through, and if it
 290                            was before this packet.  Until then audio is thrown away.
 291                         */
 292
 293                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 294
 295                                 if (!_first_audio) {
 296                                         _first_audio = source_pts_seconds;
 297
 298                                         /* This is our first audio frame, and if we've arrived here we must have had our
 299                                            first video frame.  Push some silence to make up any gap between our first
 300                                            video frame and our first audio.
 301                                         */
 302
 303                                         /* frames of silence that we must push */
 304                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 305
 306                                         _film->log()->log (
 307                                                 String::compose (
 308                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 309                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 310                                                         )
 311                                                 );
 312
 313                                         if (s) {
 314                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 315                                                 audio->make_silent ();
 316                                                 Audio (audio);
 317                                         }
 318                                 }
 319
 320                                 int const data_size = av_samples_get_buffer_size (
 321                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 322                                         );
 323
 324                                 assert (_audio_codec_context->channels == _film->audio_channels());
 325                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 326                         }
 327                 }
 328
 329         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 330
 331                 int got_subtitle;
 332                 AVSubtitle sub;
 333                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 334                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 335                            indicate that the previous subtitle should stop.
 336                         */
 337                         if (sub.num_rects > 0) {
 338                                 shared_ptr<TimedSubtitle> ts;
 339                                 try {
 340                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 341                                 } catch (...) {
 342                                         /* some problem with the subtitle; we probably didn't understand it */
 343                                 }
 344                         } else {
 345                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 346                         }
 347                         avsubtitle_free (&sub);
 348                 }
 349         }
 350
 351         av_free_packet (&_packet);
 352         return false;
 353 }
 354
 355 shared_ptr<AudioBuffers>
 356 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 357 {
 358         assert (_film->audio_channels());
 359         assert (bytes_per_audio_sample());
 360
 361         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 362         assert (ffa);
 363
 364         /* Deinterleave and convert to float */
 365
 366         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 367
 368         int const total_samples = size / bytes_per_audio_sample();
 369         int const frames = total_samples / _film->audio_channels();
 370         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 371
 372         switch (audio_sample_format()) {
 373         case AV_SAMPLE_FMT_S16:
 374         {
 375                 int16_t* p = (int16_t *) data;
 376                 int sample = 0;
 377                 int channel = 0;
 378                 for (int i = 0; i < total_samples; ++i) {
 379                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 380
 381                         ++channel;
 382                         if (channel == _film->audio_channels()) {
 383                                 channel = 0;
 384                                 ++sample;
 385                         }
 386                 }
 387         }
 388         break;
 389
 390         case AV_SAMPLE_FMT_S32:
 391         {
 392                 int32_t* p = (int32_t *) data;
 393                 int sample = 0;
 394                 int channel = 0;
 395                 for (int i = 0; i < total_samples; ++i) {
 396                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 397
 398                         ++channel;
 399                         if (channel == _film->audio_channels()) {
 400                                 channel = 0;
 401                                 ++sample;
 402                         }
 403                 }
 404         }
 405
 406         case AV_SAMPLE_FMT_FLTP:
 407         {
 408                 float* p = reinterpret_cast<float*> (data);
 409                 for (int i = 0; i < _film->audio_channels(); ++i) {
 410                         memcpy (audio->data(i), p, frames * sizeof(float));
 411                         p += frames;
 412                 }
 413         }
 414         break;
 415
 416         default:
 417                 assert (false);
 418         }
 419
 420         return audio;
 421 }
 422
 423 float
 424 FFmpegDecoder::frames_per_second () const
 425 {
 426         AVStream* s = _format_context->streams[_video_stream];
 427
 428         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 429                 return av_q2d (s->avg_frame_rate);
 430         }
 431
 432         return av_q2d (s->r_frame_rate);
 433 }
 434
 435 AVSampleFormat
 436 FFmpegDecoder::audio_sample_format () const
 437 {
 438         if (_audio_codec_context == 0) {
 439                 return (AVSampleFormat) 0;
 440         }
 441
 442         return _audio_codec_context->sample_fmt;
 443 }
 444
 445 Size
 446 FFmpegDecoder::native_size () const
 447 {
 448         return Size (_video_codec_context->width, _video_codec_context->height);
 449 }
 450
 451 PixelFormat
 452 FFmpegDecoder::pixel_format () const
 453 {
 454         return _video_codec_context->pix_fmt;
 455 }
 456
 457 int
 458 FFmpegDecoder::time_base_numerator () const
 459 {
 460         return _video_codec_context->time_base.num;
 461 }
 462
 463 int
 464 FFmpegDecoder::time_base_denominator () const
 465 {
 466         return _video_codec_context->time_base.den;
 467 }
 468
 469 int
 470 FFmpegDecoder::sample_aspect_ratio_numerator () const
 471 {
 472         return _video_codec_context->sample_aspect_ratio.num;
 473 }
 474
 475 int
 476 FFmpegDecoder::sample_aspect_ratio_denominator () const
 477 {
 478         return _video_codec_context->sample_aspect_ratio.den;
 479 }
 480
 481 string
 482 FFmpegDecoder::stream_name (AVStream* s) const
 483 {
 484         stringstream n;
 485
 486         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 487         if (lang) {
 488                 n << lang->value;
 489         }
 490
 491         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 492         if (title) {
 493                 if (!n.str().empty()) {
 494                         n << " ";
 495                 }
 496                 n << title->value;
 497         }
 498
 499         if (n.str().empty()) {
 500                 n << "unknown";
 501         }
 502
 503         return n.str ();
 504 }
 505
 506 int
 507 FFmpegDecoder::bytes_per_audio_sample () const
 508 {
 509         return av_get_bytes_per_sample (audio_sample_format ());
 510 }
 511
 512 void
 513 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 514 {
 515         AudioDecoder::set_audio_stream (s);
 516         setup_audio ();
 517 }
 518
 519 void
 520 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 521 {
 522         VideoDecoder::set_subtitle_stream (s);
 523         setup_subtitle ();
 524 }
 525
 526 void
 527 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 528 {
 529         shared_ptr<FilterGraph> graph;
 530
 531         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 532         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 533                 ++i;
 534         }
 535
 536         if (i == _filter_graphs.end ()) {
 537                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 538                 _filter_graphs.push_back (graph);
 539                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 540         } else {
 541                 graph = *i;
 542         }
 543
 544         list<shared_ptr<Image> > images = graph->process (frame);
 545
 546         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 547                 emit_video (*i);
 548         }
 549 }
 550
 551 bool
 552 FFmpegDecoder::seek (SourceFrame f)
 553 {
 554         int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
 555         int const r = av_seek_frame (_format_context, _video_stream, t, 0);
 556         avcodec_flush_buffers (_video_codec_context);
 557         return r < 0;
 558 }
 559
 560 shared_ptr<FFmpegAudioStream>
 561 FFmpegAudioStream::create (string t, optional<int> v)
 562 {
 563         if (!v) {
 564                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 565                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 566         }
 567
 568         stringstream s (t);
 569         string type;
 570         s >> type;
 571         if (type != "ffmpeg") {
 572                 return shared_ptr<FFmpegAudioStream> ();
 573         }
 574
 575         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 576 }
 577
 578 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 579 {
 580         stringstream n (t);
 581
 582         int name_index = 4;
 583         if (!version) {
 584                 name_index = 2;
 585                 int channels;
 586                 n >> _id >> channels;
 587                 _channel_layout = av_get_default_channel_layout (channels);
 588                 _sample_rate = 0;
 589         } else {
 590                 string type;
 591                 /* Current (marked version 1) */
 592                 n >> type >> _id >> _sample_rate >> _channel_layout;
 593                 assert (type == "ffmpeg");
 594         }
 595
 596         for (int i = 0; i < name_index; ++i) {
 597                 size_t const s = t.find (' ');
 598                 if (s != string::npos) {
 599                         t = t.substr (s + 1);
 600                 }
 601         }
 602
 603         _name = t;
 604 }
 605
 606 string
 607 FFmpegAudioStream::to_string () const
 608 {
 609         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 610 }
 611
 612
 613 void
 614 FFmpegDecoder::out_careful ()
 615 {
 616         /* Where we are in the output, in seconds */
 617         double const out_pts_seconds = video_frame() / frames_per_second();
 618
 619         /* Where we are in the source, in seconds */
 620         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 621                 * av_frame_get_best_effort_timestamp(_frame);
 622
 623         _film->log()->log (
 624                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 625                 Log::VERBOSE
 626                 );
 627
 628         if (!_first_video) {
 629                 _first_video = source_pts_seconds;
 630         }
 631
 632         /* Difference between where we are and where we should be */
 633         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 634         double const one_frame = 1 / frames_per_second();
 635
 636         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 637         if (delta > one_frame) {
 638                 int const extra = rint (delta / one_frame);
 639                 for (int i = 0; i < extra; ++i) {
 640                         repeat_last_video ();
 641                         _film->log()->log (
 642                                 String::compose (
 643                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 644                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 645                                                         )
 646                                 );
 647                 }
 648         }
 649
 650         if (delta > -one_frame) {
 651                 /* Process this frame */
 652                 filter_and_emit_video (_frame);
 653         } else {
 654                 /* Otherwise we are omitting a frame to keep things right */
 655                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 656         }
 657 }