src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57
  58 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  59         : Decoder (f, o, j)
  60         , _format_context (0)
  61         , _video_stream (-1)
  62         , _audio_stream (-1)
  63         , _subtitle_stream (-1)
  64         , _frame (0)
  65         , _video_codec_context (0)
  66         , _video_codec (0)
  67         , _audio_codec_context (0)
  68         , _audio_codec (0)
  69         , _subtitle_codec_context (0)
  70         , _subtitle_codec (0)
  71 {
  72         setup_general ();
  73         setup_video ();
  74         setup_audio ();
  75         setup_subtitle ();
  76 }
  77
  78 FFmpegDecoder::~FFmpegDecoder ()
  79 {
  80         if (_audio_codec_context) {
  81                 avcodec_close (_audio_codec_context);
  82         }
  83
  84         if (_video_codec_context) {
  85                 avcodec_close (_video_codec_context);
  86         }
  87
  88         if (_subtitle_codec_context) {
  89                 avcodec_close (_subtitle_codec_context);
  90         }
  91
  92         av_free (_frame);
  93
  94         avformat_close_input (&_format_context);
  95 }
  96
  97 void
  98 FFmpegDecoder::setup_general ()
  99 {
 100         int r;
 101
 102         av_register_all ();
 103
 104         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 105                 throw OpenFileError (_film->content_path ());
 106         }
 107
 108         if (avformat_find_stream_info (_format_context, 0) < 0) {
 109                 throw DecodeError ("could not find stream information");
 110         }
 111
 112         /* Find video, audio and subtitle streams and choose the first of each */
 113
 114         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 115                 AVStream* s = _format_context->streams[i];
 116                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 117                         _video_stream = i;
 118                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 119                         if (_audio_stream == -1) {
 120                                 _audio_stream = i;
 121                         }
 122                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 123                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 124                         if (_subtitle_stream == -1) {
 125                                 _subtitle_stream = i;
 126                         }
 127                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 128                 }
 129         }
 130
 131         /* Now override audio and subtitle streams with those from the Film, if it has any */
 132
 133         if (_film->audio_stream_index() != -1) {
 134                 _audio_stream = _film->audio_stream().id();
 135         }
 136
 137         if (_film->subtitle_stream_index() != -1) {
 138                 _subtitle_stream = _film->subtitle_stream().id ();
 139         }
 140
 141         if (_video_stream < 0) {
 142                 throw DecodeError ("could not find video stream");
 143         }
 144
 145         _frame = avcodec_alloc_frame ();
 146         if (_frame == 0) {
 147                 throw DecodeError ("could not allocate frame");
 148         }
 149 }
 150
 151 void
 152 FFmpegDecoder::setup_video ()
 153 {
 154         _video_codec_context = _format_context->streams[_video_stream]->codec;
 155         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 156
 157         if (_video_codec == 0) {
 158                 throw DecodeError ("could not find video decoder");
 159         }
 160
 161         /* I think this prevents problems with green hash on decodes and
 162            "changing frame properties on the fly is not supported by all filters"
 163            messages with some content.  Although I'm not sure; needs checking.
 164         */
 165         AVDictionary* opts = 0;
 166         av_dict_set (&opts, "threads", "1", 0);
 167
 168         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 169                 throw DecodeError ("could not open video decoder");
 170         }
 171 }
 172
 173 void
 174 FFmpegDecoder::setup_audio ()
 175 {
 176         if (_audio_stream < 0) {
 177                 return;
 178         }
 179
 180         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 181         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 182
 183         if (_audio_codec == 0) {
 184                 throw DecodeError ("could not find audio decoder");
 185         }
 186
 187         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 188                 throw DecodeError ("could not open audio decoder");
 189         }
 190
 191         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 192            so bodge it here.  No idea why we should have to do this.
 193         */
 194
 195         if (_audio_codec_context->channel_layout == 0) {
 196                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 197         }
 198 }
 199
 200 void
 201 FFmpegDecoder::setup_subtitle ()
 202 {
 203         if (_subtitle_stream < 0) {
 204                 return;
 205         }
 206
 207         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 208         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 209
 210         if (_subtitle_codec == 0) {
 211                 throw DecodeError ("could not find subtitle decoder");
 212         }
 213
 214         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 215                 throw DecodeError ("could not open subtitle decoder");
 216         }
 217 }
 218
 219
 220 bool
 221 FFmpegDecoder::pass ()
 222 {
 223         int r = av_read_frame (_format_context, &_packet);
 224
 225         if (r < 0) {
 226                 if (r != AVERROR_EOF) {
 227                         /* Maybe we should fail here, but for now we'll just finish off instead */
 228                         char buf[256];
 229                         av_strerror (r, buf, sizeof(buf));
 230                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 231                 }
 232
 233                 /* Get any remaining frames */
 234
 235                 _packet.data = 0;
 236                 _packet.size = 0;
 237
 238                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 239
 240                 int frame_finished;
 241
 242                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 243                         process_video (_frame);
 244                 }
 245
 246                 if (_audio_stream >= 0 && _opt->decode_audio && _film->use_source_audio()) {
 247                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 248                                 int const data_size = av_samples_get_buffer_size (
 249                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 250                                         );
 251
 252                                 assert (_audio_codec_context->channels == _film->audio_channels());
 253                                 process_audio (deinterleave_audio (_frame->data[0], data_size));
 254                         }
 255                 }
 256
 257                 return true;
 258         }
 259
 260         avcodec_get_frame_defaults (_frame);
 261
 262         if (_packet.stream_index == _video_stream) {
 263
 264                 int frame_finished;
 265                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 266                 if (r >= 0 && frame_finished) {
 267
 268                         if (r != _packet.size) {
 269                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 270                         }
 271
 272                         /* Where we are in the output, in seconds */
 273                         double const out_pts_seconds = video_frame() / frames_per_second();
 274
 275                         /* Where we are in the source, in seconds */
 276                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 277                                 * av_frame_get_best_effort_timestamp(_frame);
 278
 279                         if (!_first_video) {
 280                                 _first_video = source_pts_seconds;
 281                         }
 282
 283                         /* Difference between where we are and where we should be */
 284                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 285                         double const one_frame = 1 / frames_per_second();
 286
 287                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 288                         if (delta > one_frame) {
 289                                 int const extra = rint (delta / one_frame);
 290                                 for (int i = 0; i < extra; ++i) {
 291                                         repeat_last_video ();
 292                                         _film->log()->log (
 293                                                 String::compose (
 294                                                         "Extra frame inserted at %1s; source frame %2, source PTS %3",
 295                                                         out_pts_seconds, video_frame(), source_pts_seconds
 296                                                         )
 297                                                 );
 298                                 }
 299                         }
 300
 301                         if (delta > -one_frame) {
 302                                 /* Process this frame */
 303                                 process_video (_frame);
 304                         } else {
 305                                 /* Otherwise we are omitting a frame to keep things right */
 306                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 307                         }
 308                 }
 309
 310         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _film->use_source_audio()) {
 311
 312                 int frame_finished;
 313                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 314
 315                         /* Where we are in the source, in seconds */
 316                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 317                                 * av_frame_get_best_effort_timestamp(_frame);
 318
 319                         /* We only decode audio if we've had our first video packet through, and if it
 320                            was before this packet.  Until then audio is thrown away.
 321                         */
 322
 323                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 324
 325                                 if (!_first_audio) {
 326                                         _first_audio = source_pts_seconds;
 327
 328                                         /* This is our first audio frame, and if we've arrived here we must have had our
 329                                            first video frame.  Push some silence to make up any gap between our first
 330                                            video frame and our first audio.
 331                                         */
 332
 333                                         /* frames of silence that we must push */
 334                                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 335
 336                                         _film->log()->log (
 337                                                 String::compose (
 338                                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 339                                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 340                                                         )
 341                                                 );
 342
 343                                         if (s) {
 344                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (audio_channels(), s));
 345                                                 audio->make_silent ();
 346                                                 process_audio (audio);
 347                                         }
 348                                 }
 349
 350                                 int const data_size = av_samples_get_buffer_size (
 351                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 352                                         );
 353
 354                                 assert (_audio_codec_context->channels == _film->audio_channels());
 355                                 process_audio (deinterleave_audio (_frame->data[0], data_size));
 356                         }
 357                 }
 358
 359         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 360
 361                 int got_subtitle;
 362                 AVSubtitle sub;
 363                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 364                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 365                            indicate that the previous subtitle should stop.
 366                         */
 367                         if (sub.num_rects > 0) {
 368                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 369                         } else {
 370                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 371                         }
 372                         avsubtitle_free (&sub);
 373                 }
 374         }
 375
 376         av_free_packet (&_packet);
 377         return false;
 378 }
 379
 380 shared_ptr<AudioBuffers>
 381 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 382 {
 383         assert (_film->audio_channels());
 384         assert (bytes_per_audio_sample());
 385
 386         /* Deinterleave and convert to float */
 387
 388         assert ((size % (bytes_per_audio_sample() * audio_channels())) == 0);
 389
 390         int const total_samples = size / bytes_per_audio_sample();
 391         int const frames = total_samples / _film->audio_channels();
 392         shared_ptr<AudioBuffers> audio (new AudioBuffers (audio_channels(), frames));
 393
 394         switch (audio_sample_format()) {
 395         case AV_SAMPLE_FMT_S16:
 396         {
 397                 int16_t* p = (int16_t *) data;
 398                 int sample = 0;
 399                 int channel = 0;
 400                 for (int i = 0; i < total_samples; ++i) {
 401                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 402
 403                         ++channel;
 404                         if (channel == _film->audio_channels()) {
 405                                 channel = 0;
 406                                 ++sample;
 407                         }
 408                 }
 409         }
 410         break;
 411
 412         case AV_SAMPLE_FMT_S32:
 413         {
 414                 int32_t* p = (int32_t *) data;
 415                 int sample = 0;
 416                 int channel = 0;
 417                 for (int i = 0; i < total_samples; ++i) {
 418                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 419
 420                         ++channel;
 421                         if (channel == _film->audio_channels()) {
 422                                 channel = 0;
 423                                 ++sample;
 424                         }
 425                 }
 426         }
 427
 428         case AV_SAMPLE_FMT_FLTP:
 429         {
 430                 float* p = reinterpret_cast<float*> (data);
 431                 for (int i = 0; i < _film->audio_channels(); ++i) {
 432                         memcpy (audio->data(i), p, frames * sizeof(float));
 433                         p += frames;
 434                 }
 435         }
 436         break;
 437
 438         default:
 439                 assert (false);
 440         }
 441
 442         return audio;
 443 }
 444
 445 float
 446 FFmpegDecoder::frames_per_second () const
 447 {
 448         AVStream* s = _format_context->streams[_video_stream];
 449
 450         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 451                 return av_q2d (s->avg_frame_rate);
 452         }
 453
 454         return av_q2d (s->r_frame_rate);
 455 }
 456
 457 int
 458 FFmpegDecoder::audio_channels () const
 459 {
 460         if (_audio_codec_context == 0) {
 461                 return 0;
 462         }
 463
 464         return _audio_codec_context->channels;
 465 }
 466
 467 int
 468 FFmpegDecoder::audio_sample_rate () const
 469 {
 470         if (_audio_codec_context == 0) {
 471                 return 0;
 472         }
 473
 474         return _audio_codec_context->sample_rate;
 475 }
 476
 477 AVSampleFormat
 478 FFmpegDecoder::audio_sample_format () const
 479 {
 480         if (_audio_codec_context == 0) {
 481                 return (AVSampleFormat) 0;
 482         }
 483
 484         return _audio_codec_context->sample_fmt;
 485 }
 486
 487 int64_t
 488 FFmpegDecoder::audio_channel_layout () const
 489 {
 490         if (_audio_codec_context == 0) {
 491                 return 0;
 492         }
 493
 494         return _audio_codec_context->channel_layout;
 495 }
 496
 497 Size
 498 FFmpegDecoder::native_size () const
 499 {
 500         return Size (_video_codec_context->width, _video_codec_context->height);
 501 }
 502
 503 PixelFormat
 504 FFmpegDecoder::pixel_format () const
 505 {
 506         return _video_codec_context->pix_fmt;
 507 }
 508
 509 int
 510 FFmpegDecoder::time_base_numerator () const
 511 {
 512         return _video_codec_context->time_base.num;
 513 }
 514
 515 int
 516 FFmpegDecoder::time_base_denominator () const
 517 {
 518         return _video_codec_context->time_base.den;
 519 }
 520
 521 int
 522 FFmpegDecoder::sample_aspect_ratio_numerator () const
 523 {
 524         return _video_codec_context->sample_aspect_ratio.num;
 525 }
 526
 527 int
 528 FFmpegDecoder::sample_aspect_ratio_denominator () const
 529 {
 530         return _video_codec_context->sample_aspect_ratio.den;
 531 }
 532
 533 bool
 534 FFmpegDecoder::has_subtitles () const
 535 {
 536         return (_subtitle_stream != -1);
 537 }
 538
 539 vector<AudioStream>
 540 FFmpegDecoder::audio_streams () const
 541 {
 542         return _audio_streams;
 543 }
 544
 545 vector<SubtitleStream>
 546 FFmpegDecoder::subtitle_streams () const
 547 {
 548         return _subtitle_streams;
 549 }
 550
 551 string
 552 FFmpegDecoder::stream_name (AVStream* s) const
 553 {
 554         stringstream n;
 555
 556         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 557         if (lang) {
 558                 n << lang->value;
 559         }
 560
 561         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 562         if (title) {
 563                 if (!n.str().empty()) {
 564                         n << " ";
 565                 }
 566                 n << title->value;
 567         }
 568
 569         if (n.str().empty()) {
 570                 n << "unknown";
 571         }
 572
 573         return n.str ();
 574 }
 575