src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::cout;
  53 using std::string;
  54 using std::vector;
  55 using std::stringstream;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58
  59 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  60         : Decoder (f, o, j)
  61         , _format_context (0)
  62         , _video_stream (-1)
  63         , _frame (0)
  64         , _video_codec_context (0)
  65         , _video_codec (0)
  66         , _audio_codec_context (0)
  67         , _audio_codec (0)
  68         , _subtitle_codec_context (0)
  69         , _subtitle_codec (0)
  70 {
  71         setup_general ();
  72         setup_video ();
  73         setup_audio ();
  74         setup_subtitle ();
  75 }
  76
  77 FFmpegDecoder::~FFmpegDecoder ()
  78 {
  79         if (_audio_codec_context) {
  80                 avcodec_close (_audio_codec_context);
  81         }
  82
  83         if (_video_codec_context) {
  84                 avcodec_close (_video_codec_context);
  85         }
  86
  87         if (_subtitle_codec_context) {
  88                 avcodec_close (_subtitle_codec_context);
  89         }
  90
  91         av_free (_frame);
  92
  93         avformat_close_input (&_format_context);
  94 }
  95
  96 void
  97 FFmpegDecoder::setup_general ()
  98 {
  99         int r;
 100
 101         av_register_all ();
 102
 103         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 104                 throw OpenFileError (_film->content_path ());
 105         }
 106
 107         if (avformat_find_stream_info (_format_context, 0) < 0) {
 108                 throw DecodeError ("could not find stream information");
 109         }
 110
 111         /* Find video, audio and subtitle streams and choose the first of each */
 112
 113         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 114                 AVStream* s = _format_context->streams[i];
 115                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 116                         _video_stream = i;
 117                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 118                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout));
 119                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 120                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 121                 }
 122         }
 123
 124         if (_video_stream < 0) {
 125                 throw DecodeError ("could not find video stream");
 126         }
 127
 128         _frame = avcodec_alloc_frame ();
 129         if (_frame == 0) {
 130                 throw DecodeError ("could not allocate frame");
 131         }
 132 }
 133
 134 void
 135 FFmpegDecoder::setup_video ()
 136 {
 137         _video_codec_context = _format_context->streams[_video_stream]->codec;
 138         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 139
 140         if (_video_codec == 0) {
 141                 throw DecodeError ("could not find video decoder");
 142         }
 143
 144         /* I think this prevents problems with green hash on decodes and
 145            "changing frame properties on the fly is not supported by all filters"
 146            messages with some content.  Although I'm not sure; needs checking.
 147         */
 148         AVDictionary* opts = 0;
 149         av_dict_set (&opts, "threads", "1", 0);
 150
 151         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 152                 throw DecodeError ("could not open video decoder");
 153         }
 154 }
 155
 156 void
 157 FFmpegDecoder::setup_audio ()
 158 {
 159         if (!_audio_stream) {
 160                 return;
 161         }
 162
 163         _audio_codec_context = _format_context->streams[_audio_stream.get().id()]->codec;
 164         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 165
 166         if (_audio_codec == 0) {
 167                 throw DecodeError ("could not find audio decoder");
 168         }
 169
 170         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 171                 throw DecodeError ("could not open audio decoder");
 172         }
 173
 174         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 175            so bodge it here.  No idea why we should have to do this.
 176         */
 177
 178         if (_audio_codec_context->channel_layout == 0) {
 179                 _audio_codec_context->channel_layout = av_get_default_channel_layout (_audio_stream.get().channels());
 180         }
 181 }
 182
 183 void
 184 FFmpegDecoder::setup_subtitle ()
 185 {
 186         if (!_subtitle_stream) {
 187                 return;
 188         }
 189
 190         _subtitle_codec_context = _format_context->streams[_subtitle_stream.get().id()]->codec;
 191         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 192
 193         if (_subtitle_codec == 0) {
 194                 throw DecodeError ("could not find subtitle decoder");
 195         }
 196
 197         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 198                 throw DecodeError ("could not open subtitle decoder");
 199         }
 200 }
 201
 202
 203 bool
 204 FFmpegDecoder::pass ()
 205 {
 206         int r = av_read_frame (_format_context, &_packet);
 207
 208         if (r < 0) {
 209                 if (r != AVERROR_EOF) {
 210                         /* Maybe we should fail here, but for now we'll just finish off instead */
 211                         char buf[256];
 212                         av_strerror (r, buf, sizeof(buf));
 213                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 214                 }
 215
 216                 /* Get any remaining frames */
 217
 218                 _packet.data = 0;
 219                 _packet.size = 0;
 220
 221                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 222
 223                 int frame_finished;
 224
 225                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 226                         process_video (_frame);
 227                 }
 228
 229                 if (_audio_stream && _opt->decode_audio && _film->use_content_audio()) {
 230                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 231                                 int const data_size = av_samples_get_buffer_size (
 232                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 233                                         );
 234
 235                                 assert (_audio_codec_context->channels == _film->audio_channels());
 236                                 process_audio (deinterleave_audio (_frame->data[0], data_size));
 237                         }
 238                 }
 239
 240                 return true;
 241         }
 242
 243         avcodec_get_frame_defaults (_frame);
 244
 245         if (_packet.stream_index == _video_stream) {
 246
 247                 int frame_finished;
 248                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 249                 if (r >= 0 && frame_finished) {
 250
 251                         if (r != _packet.size) {
 252                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 253                         }
 254
 255                         /* Where we are in the output, in seconds */
 256                         double const out_pts_seconds = video_frame() / frames_per_second();
 257
 258                         /* Where we are in the source, in seconds */
 259                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 260                                 * av_frame_get_best_effort_timestamp(_frame);
 261
 262                         if (!_first_video) {
 263                                 _first_video = source_pts_seconds;
 264                         }
 265
 266                         /* Difference between where we are and where we should be */
 267                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 268                         double const one_frame = 1 / frames_per_second();
 269
 270                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 271                         if (delta > one_frame) {
 272                                 int const extra = rint (delta / one_frame);
 273                                 for (int i = 0; i < extra; ++i) {
 274                                         repeat_last_video ();
 275                                         _film->log()->log (
 276                                                 String::compose (
 277                                                         "Extra frame inserted at %1s; source frame %2, source PTS %3",
 278                                                         out_pts_seconds, video_frame(), source_pts_seconds
 279                                                         )
 280                                                 );
 281                                 }
 282                         }
 283
 284                         if (delta > -one_frame) {
 285                                 /* Process this frame */
 286                                 process_video (_frame);
 287                         } else {
 288                                 /* Otherwise we are omitting a frame to keep things right */
 289                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 290                         }
 291                 }
 292
 293         } else if (_audio_stream && _packet.stream_index == _audio_stream.get().id() && _opt->decode_audio && _film->use_content_audio()) {
 294
 295                 int frame_finished;
 296                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 297
 298                         /* Where we are in the source, in seconds */
 299                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 300                                 * av_frame_get_best_effort_timestamp(_frame);
 301
 302                         /* We only decode audio if we've had our first video packet through, and if it
 303                            was before this packet.  Until then audio is thrown away.
 304                         */
 305
 306                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 307
 308                                 if (!_first_audio) {
 309                                         _first_audio = source_pts_seconds;
 310
 311                                         /* This is our first audio frame, and if we've arrived here we must have had our
 312                                            first video frame.  Push some silence to make up any gap between our first
 313                                            video frame and our first audio.
 314                                         */
 315
 316                                         /* frames of silence that we must push */
 317                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _audio_stream.get().sample_rate ());
 318
 319                                         _film->log()->log (
 320                                                 String::compose (
 321                                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 322                                                         _first_video.get(), _first_audio.get(), s, _audio_stream.get().channels(), bytes_per_audio_sample()
 323                                                         )
 324                                                 );
 325
 326                                         if (s) {
 327                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), s));
 328                                                 audio->make_silent ();
 329                                                 process_audio (audio);
 330                                         }
 331                                 }
 332
 333                                 int const data_size = av_samples_get_buffer_size (
 334                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 335                                         );
 336
 337                                 assert (_audio_codec_context->channels == _film->audio_channels());
 338                                 process_audio (deinterleave_audio (_frame->data[0], data_size));
 339                         }
 340                 }
 341
 342         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream.get().id() && _opt->decode_subtitles && _first_video) {
 343
 344                 int got_subtitle;
 345                 AVSubtitle sub;
 346                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 347                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 348                            indicate that the previous subtitle should stop.
 349                         */
 350                         if (sub.num_rects > 0) {
 351                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 352                         } else {
 353                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 354                         }
 355                         avsubtitle_free (&sub);
 356                 }
 357         }
 358
 359         av_free_packet (&_packet);
 360         return false;
 361 }
 362
 363 shared_ptr<AudioBuffers>
 364 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 365 {
 366         assert (_film->audio_channels());
 367         assert (bytes_per_audio_sample());
 368
 369         /* Deinterleave and convert to float */
 370
 371         assert ((size % (bytes_per_audio_sample() * _audio_stream.get().channels())) == 0);
 372
 373         int const total_samples = size / bytes_per_audio_sample();
 374         int const frames = total_samples / _film->audio_channels();
 375         shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), frames));
 376
 377         switch (audio_sample_format()) {
 378         case AV_SAMPLE_FMT_S16:
 379         {
 380                 int16_t* p = (int16_t *) data;
 381                 int sample = 0;
 382                 int channel = 0;
 383                 for (int i = 0; i < total_samples; ++i) {
 384                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 385
 386                         ++channel;
 387                         if (channel == _film->audio_channels()) {
 388                                 channel = 0;
 389                                 ++sample;
 390                         }
 391                 }
 392         }
 393         break;
 394
 395         case AV_SAMPLE_FMT_S32:
 396         {
 397                 int32_t* p = (int32_t *) data;
 398                 int sample = 0;
 399                 int channel = 0;
 400                 for (int i = 0; i < total_samples; ++i) {
 401                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 402
 403                         ++channel;
 404                         if (channel == _film->audio_channels()) {
 405                                 channel = 0;
 406                                 ++sample;
 407                         }
 408                 }
 409         }
 410
 411         case AV_SAMPLE_FMT_FLTP:
 412         {
 413                 float* p = reinterpret_cast<float*> (data);
 414                 for (int i = 0; i < _film->audio_channels(); ++i) {
 415                         memcpy (audio->data(i), p, frames * sizeof(float));
 416                         p += frames;
 417                 }
 418         }
 419         break;
 420
 421         default:
 422                 assert (false);
 423         }
 424
 425         return audio;
 426 }
 427
 428 float
 429 FFmpegDecoder::frames_per_second () const
 430 {
 431         AVStream* s = _format_context->streams[_video_stream];
 432
 433         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 434                 return av_q2d (s->avg_frame_rate);
 435         }
 436
 437         return av_q2d (s->r_frame_rate);
 438 }
 439
 440 AVSampleFormat
 441 FFmpegDecoder::audio_sample_format () const
 442 {
 443         if (_audio_codec_context == 0) {
 444                 return (AVSampleFormat) 0;
 445         }
 446
 447         return _audio_codec_context->sample_fmt;
 448 }
 449
 450 Size
 451 FFmpegDecoder::native_size () const
 452 {
 453         return Size (_video_codec_context->width, _video_codec_context->height);
 454 }
 455
 456 PixelFormat
 457 FFmpegDecoder::pixel_format () const
 458 {
 459         return _video_codec_context->pix_fmt;
 460 }
 461
 462 int
 463 FFmpegDecoder::time_base_numerator () const
 464 {
 465         return _video_codec_context->time_base.num;
 466 }
 467
 468 int
 469 FFmpegDecoder::time_base_denominator () const
 470 {
 471         return _video_codec_context->time_base.den;
 472 }
 473
 474 int
 475 FFmpegDecoder::sample_aspect_ratio_numerator () const
 476 {
 477         return _video_codec_context->sample_aspect_ratio.num;
 478 }
 479
 480 int
 481 FFmpegDecoder::sample_aspect_ratio_denominator () const
 482 {
 483         return _video_codec_context->sample_aspect_ratio.den;
 484 }
 485
 486 string
 487 FFmpegDecoder::stream_name (AVStream* s) const
 488 {
 489         stringstream n;
 490
 491         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 492         if (lang) {
 493                 n << lang->value;
 494         }
 495
 496         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 497         if (title) {
 498                 if (!n.str().empty()) {
 499                         n << " ";
 500                 }
 501                 n << title->value;
 502         }
 503
 504         if (n.str().empty()) {
 505                 n << "unknown";
 506         }
 507
 508         return n.str ();
 509 }
 510
 511 int
 512 FFmpegDecoder::bytes_per_audio_sample () const
 513 {
 514         return av_get_bytes_per_sample (audio_sample_format ());
 515 }
 516
 517 void
 518 FFmpegDecoder::set_audio_stream (optional<AudioStream> s)
 519 {
 520         Decoder::set_audio_stream (s);
 521         setup_audio ();
 522 }
 523
 524 void
 525 FFmpegDecoder::set_subtitle_stream (optional<SubtitleStream> s)
 526 {
 527         Decoder::set_subtitle_stream (s);
 528         setup_subtitle ();
 529 }