src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60 using boost::dynamic_pointer_cast;
  61
  62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  63         : Decoder (f, o, j)
  64         , VideoDecoder (f, o, j)
  65         , AudioDecoder (f, o, j)
  66         , _format_context (0)
  67         , _video_stream (-1)
  68         , _frame (0)
  69         , _video_codec_context (0)
  70         , _video_codec (0)
  71         , _audio_codec_context (0)
  72         , _audio_codec (0)
  73         , _subtitle_codec_context (0)
  74         , _subtitle_codec (0)
  75 {
  76         setup_general ();
  77         setup_video ();
  78         setup_audio ();
  79         setup_subtitle ();
  80 }
  81
  82 FFmpegDecoder::~FFmpegDecoder ()
  83 {
  84         if (_audio_codec_context) {
  85                 avcodec_close (_audio_codec_context);
  86         }
  87
  88         if (_video_codec_context) {
  89                 avcodec_close (_video_codec_context);
  90         }
  91
  92         if (_subtitle_codec_context) {
  93                 avcodec_close (_subtitle_codec_context);
  94         }
  95
  96         av_free (_frame);
  97
  98         avformat_close_input (&_format_context);
  99 }
 100
 101 void
 102 FFmpegDecoder::setup_general ()
 103 {
 104         int r;
 105
 106         av_register_all ();
 107
 108         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 109                 throw OpenFileError (_film->content_path ());
 110         }
 111
 112         if (avformat_find_stream_info (_format_context, 0) < 0) {
 113                 throw DecodeError ("could not find stream information");
 114         }
 115
 116         /* Find video, audio and subtitle streams and choose the first of each */
 117
 118         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 119                 AVStream* s = _format_context->streams[i];
 120                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 121                         _video_stream = i;
 122                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 123                         _audio_streams.push_back (
 124                                 shared_ptr<AudioStream> (
 125                                         new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
 126                                         )
 127                                 );
 128                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 129                         _subtitle_streams.push_back (
 130                                 shared_ptr<SubtitleStream> (
 131                                         new SubtitleStream (stream_name (s), i)
 132                                         )
 133                                 );
 134                 }
 135         }
 136
 137         if (_video_stream < 0) {
 138                 throw DecodeError ("could not find video stream");
 139         }
 140
 141         _frame = avcodec_alloc_frame ();
 142         if (_frame == 0) {
 143                 throw DecodeError ("could not allocate frame");
 144         }
 145 }
 146
 147 void
 148 FFmpegDecoder::setup_video ()
 149 {
 150         _video_codec_context = _format_context->streams[_video_stream]->codec;
 151         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 152
 153         if (_video_codec == 0) {
 154                 throw DecodeError ("could not find video decoder");
 155         }
 156
 157         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 158                 throw DecodeError ("could not open video decoder");
 159         }
 160 }
 161
 162 void
 163 FFmpegDecoder::setup_audio ()
 164 {
 165         if (!_audio_stream) {
 166                 return;
 167         }
 168
 169         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 170         assert (ffa);
 171
 172         _audio_codec_context = _format_context->streams[ffa->id()]->codec;
 173         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 174
 175         if (_audio_codec == 0) {
 176                 throw DecodeError ("could not find audio decoder");
 177         }
 178
 179         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 180                 throw DecodeError ("could not open audio decoder");
 181         }
 182
 183         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 184            so bodge it here.  No idea why we should have to do this.
 185         */
 186
 187         if (_audio_codec_context->channel_layout == 0) {
 188                 _audio_codec_context->channel_layout = av_get_default_channel_layout (ffa->channels());
 189         }
 190 }
 191
 192 void
 193 FFmpegDecoder::setup_subtitle ()
 194 {
 195         if (!_subtitle_stream) {
 196                 return;
 197         }
 198
 199         _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
 200         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 201
 202         if (_subtitle_codec == 0) {
 203                 throw DecodeError ("could not find subtitle decoder");
 204         }
 205
 206         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 207                 throw DecodeError ("could not open subtitle decoder");
 208         }
 209 }
 210
 211
 212 bool
 213 FFmpegDecoder::pass ()
 214 {
 215         int r = av_read_frame (_format_context, &_packet);
 216
 217         if (r < 0) {
 218                 if (r != AVERROR_EOF) {
 219                         /* Maybe we should fail here, but for now we'll just finish off instead */
 220                         char buf[256];
 221                         av_strerror (r, buf, sizeof(buf));
 222                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 223                 }
 224
 225                 /* Get any remaining frames */
 226
 227                 _packet.data = 0;
 228                 _packet.size = 0;
 229
 230                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 231
 232                 int frame_finished;
 233
 234                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 235                         filter_and_emit_video (_frame);
 236                 }
 237
 238                 if (_audio_stream && _opt->decode_audio) {
 239                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 240                                 int const data_size = av_samples_get_buffer_size (
 241                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 242                                         );
 243
 244                                 assert (_audio_codec_context->channels == _film->audio_channels());
 245                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 246                         }
 247                 }
 248
 249                 return true;
 250         }
 251
 252         avcodec_get_frame_defaults (_frame);
 253
 254         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 255
 256         if (_packet.stream_index == _video_stream) {
 257
 258                 int frame_finished;
 259                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 260                 if (r >= 0 && frame_finished) {
 261
 262                         if (r != _packet.size) {
 263                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 264                         }
 265
 266                         /* Where we are in the output, in seconds */
 267                         double const out_pts_seconds = video_frame() / frames_per_second();
 268
 269                         /* Where we are in the source, in seconds */
 270                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 271                                 * av_frame_get_best_effort_timestamp(_frame);
 272
 273                         _film->log()->log (
 274                                 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
 275                                 Log::VERBOSE
 276                                 );
 277
 278                         if (!_first_video) {
 279                                 _first_video = source_pts_seconds;
 280                         }
 281
 282                         /* Difference between where we are and where we should be */
 283                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 284                         double const one_frame = 1 / frames_per_second();
 285
 286                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 287                         if (delta > one_frame) {
 288                                 int const extra = rint (delta / one_frame);
 289                                 for (int i = 0; i < extra; ++i) {
 290                                         repeat_last_video ();
 291                                         _film->log()->log (
 292                                                 String::compose (
 293                                                         "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
 294                                                         out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
 295                                                         )
 296                                                 );
 297                                 }
 298                         }
 299
 300                         if (delta > -one_frame) {
 301                                 /* Process this frame */
 302                                 filter_and_emit_video (_frame);
 303                         } else {
 304                                 /* Otherwise we are omitting a frame to keep things right */
 305                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 306                         }
 307                 }
 308
 309         } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
 310
 311                 int frame_finished;
 312                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 313
 314                         /* Where we are in the source, in seconds */
 315                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 316                                 * av_frame_get_best_effort_timestamp(_frame);
 317
 318                         /* We only decode audio if we've had our first video packet through, and if it
 319                            was before this packet.  Until then audio is thrown away.
 320                         */
 321
 322                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 323
 324                                 if (!_first_audio) {
 325                                         _first_audio = source_pts_seconds;
 326
 327                                         /* This is our first audio frame, and if we've arrived here we must have had our
 328                                            first video frame.  Push some silence to make up any gap between our first
 329                                            video frame and our first audio.
 330                                         */
 331
 332                                         /* frames of silence that we must push */
 333                                         int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
 334
 335                                         _film->log()->log (
 336                                                 String::compose (
 337                                                         "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
 338                                                         _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
 339                                                         )
 340                                                 );
 341
 342                                         if (s) {
 343                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
 344                                                 audio->make_silent ();
 345                                                 Audio (audio);
 346                                         }
 347                                 }
 348
 349                                 int const data_size = av_samples_get_buffer_size (
 350                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 351                                         );
 352
 353                                 assert (_audio_codec_context->channels == _film->audio_channels());
 354                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 355                         }
 356                 }
 357
 358         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
 359
 360                 int got_subtitle;
 361                 AVSubtitle sub;
 362                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 363                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 364                            indicate that the previous subtitle should stop.
 365                         */
 366                         if (sub.num_rects > 0) {
 367                                 shared_ptr<TimedSubtitle> ts;
 368                                 try {
 369                                         emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 370                                 } catch (...) {
 371                                         /* some problem with the subtitle; we probably didn't understand it */
 372                                 }
 373                         } else {
 374                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 375                         }
 376                         avsubtitle_free (&sub);
 377                 }
 378         }
 379
 380         av_free_packet (&_packet);
 381         return false;
 382 }
 383
 384 shared_ptr<AudioBuffers>
 385 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 386 {
 387         assert (_film->audio_channels());
 388         assert (bytes_per_audio_sample());
 389
 390         shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
 391         assert (ffa);
 392
 393         /* Deinterleave and convert to float */
 394
 395         assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
 396
 397         int const total_samples = size / bytes_per_audio_sample();
 398         int const frames = total_samples / _film->audio_channels();
 399         shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
 400
 401         switch (audio_sample_format()) {
 402         case AV_SAMPLE_FMT_S16:
 403         {
 404                 int16_t* p = (int16_t *) data;
 405                 int sample = 0;
 406                 int channel = 0;
 407                 for (int i = 0; i < total_samples; ++i) {
 408                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 409
 410                         ++channel;
 411                         if (channel == _film->audio_channels()) {
 412                                 channel = 0;
 413                                 ++sample;
 414                         }
 415                 }
 416         }
 417         break;
 418
 419         case AV_SAMPLE_FMT_S32:
 420         {
 421                 int32_t* p = (int32_t *) data;
 422                 int sample = 0;
 423                 int channel = 0;
 424                 for (int i = 0; i < total_samples; ++i) {
 425                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 426
 427                         ++channel;
 428                         if (channel == _film->audio_channels()) {
 429                                 channel = 0;
 430                                 ++sample;
 431                         }
 432                 }
 433         }
 434
 435         case AV_SAMPLE_FMT_FLTP:
 436         {
 437                 float* p = reinterpret_cast<float*> (data);
 438                 for (int i = 0; i < _film->audio_channels(); ++i) {
 439                         memcpy (audio->data(i), p, frames * sizeof(float));
 440                         p += frames;
 441                 }
 442         }
 443         break;
 444
 445         default:
 446                 assert (false);
 447         }
 448
 449         return audio;
 450 }
 451
 452 float
 453 FFmpegDecoder::frames_per_second () const
 454 {
 455         AVStream* s = _format_context->streams[_video_stream];
 456
 457         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 458                 return av_q2d (s->avg_frame_rate);
 459         }
 460
 461         return av_q2d (s->r_frame_rate);
 462 }
 463
 464 AVSampleFormat
 465 FFmpegDecoder::audio_sample_format () const
 466 {
 467         if (_audio_codec_context == 0) {
 468                 return (AVSampleFormat) 0;
 469         }
 470
 471         return _audio_codec_context->sample_fmt;
 472 }
 473
 474 Size
 475 FFmpegDecoder::native_size () const
 476 {
 477         return Size (_video_codec_context->width, _video_codec_context->height);
 478 }
 479
 480 PixelFormat
 481 FFmpegDecoder::pixel_format () const
 482 {
 483         return _video_codec_context->pix_fmt;
 484 }
 485
 486 int
 487 FFmpegDecoder::time_base_numerator () const
 488 {
 489         return _video_codec_context->time_base.num;
 490 }
 491
 492 int
 493 FFmpegDecoder::time_base_denominator () const
 494 {
 495         return _video_codec_context->time_base.den;
 496 }
 497
 498 int
 499 FFmpegDecoder::sample_aspect_ratio_numerator () const
 500 {
 501         return _video_codec_context->sample_aspect_ratio.num;
 502 }
 503
 504 int
 505 FFmpegDecoder::sample_aspect_ratio_denominator () const
 506 {
 507         return _video_codec_context->sample_aspect_ratio.den;
 508 }
 509
 510 string
 511 FFmpegDecoder::stream_name (AVStream* s) const
 512 {
 513         stringstream n;
 514
 515         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 516         if (lang) {
 517                 n << lang->value;
 518         }
 519
 520         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 521         if (title) {
 522                 if (!n.str().empty()) {
 523                         n << " ";
 524                 }
 525                 n << title->value;
 526         }
 527
 528         if (n.str().empty()) {
 529                 n << "unknown";
 530         }
 531
 532         return n.str ();
 533 }
 534
 535 int
 536 FFmpegDecoder::bytes_per_audio_sample () const
 537 {
 538         return av_get_bytes_per_sample (audio_sample_format ());
 539 }
 540
 541 void
 542 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
 543 {
 544         AudioDecoder::set_audio_stream (s);
 545         setup_audio ();
 546 }
 547
 548 void
 549 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
 550 {
 551         VideoDecoder::set_subtitle_stream (s);
 552         setup_subtitle ();
 553 }
 554
 555 void
 556 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 557 {
 558         shared_ptr<FilterGraph> graph;
 559
 560         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 561         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 562                 ++i;
 563         }
 564
 565         if (i == _filter_graphs.end ()) {
 566                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 567                 _filter_graphs.push_back (graph);
 568                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 569         } else {
 570                 graph = *i;
 571         }
 572
 573         list<shared_ptr<Image> > images = graph->process (frame);
 574
 575         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 576                 emit_video (*i);
 577         }
 578 }
 579
 580 shared_ptr<FFmpegAudioStream>
 581 FFmpegAudioStream::create (string t, optional<int> v)
 582 {
 583         if (!v) {
 584                 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
 585                 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 586         }
 587
 588         stringstream s (t);
 589         string type;
 590         s >> type;
 591         if (type != "ffmpeg") {
 592                 return shared_ptr<FFmpegAudioStream> ();
 593         }
 594
 595         return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
 596 }
 597
 598 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
 599 {
 600         stringstream n (t);
 601
 602         int name_index = 4;
 603         if (!version) {
 604                 name_index = 2;
 605                 int channels;
 606                 n >> _id >> channels;
 607                 _channel_layout = av_get_default_channel_layout (channels);
 608                 _sample_rate = 0;
 609         } else {
 610                 string type;
 611                 /* Current (marked version 1) */
 612                 n >> type >> _id >> _sample_rate >> _channel_layout;
 613                 assert (type == "ffmpeg");
 614         }
 615
 616         for (int i = 0; i < name_index; ++i) {
 617                 size_t const s = t.find (' ');
 618                 if (s != string::npos) {
 619                         t = t.substr (s + 1);
 620                 }
 621         }
 622
 623         _name = t;
 624 }
 625
 626 string
 627 FFmpegAudioStream::to_string () const
 628 {
 629         return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
 630 }
 631
 632 /** @return Length (in video frames) according to our content's header */
 633 SourceFrame
 634 FFmpegDecoder::length () const
 635 {
 636         return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
 637 }