src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "filter_graph.h"
  51 #include "subtitle.h"
  52
  53 using std::cout;
  54 using std::string;
  55 using std::vector;
  56 using std::stringstream;
  57 using std::list;
  58 using boost::shared_ptr;
  59 using boost::optional;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
  62         : Decoder (f, o, j)
  63         , _format_context (0)
  64         , _video_stream (-1)
  65         , _frame (0)
  66         , _video_codec_context (0)
  67         , _video_codec (0)
  68         , _audio_codec_context (0)
  69         , _audio_codec (0)
  70         , _subtitle_codec_context (0)
  71         , _subtitle_codec (0)
  72 {
  73         setup_general ();
  74         setup_video ();
  75         setup_audio ();
  76         setup_subtitle ();
  77 }
  78
  79 FFmpegDecoder::~FFmpegDecoder ()
  80 {
  81         if (_audio_codec_context) {
  82                 avcodec_close (_audio_codec_context);
  83         }
  84
  85         if (_video_codec_context) {
  86                 avcodec_close (_video_codec_context);
  87         }
  88
  89         if (_subtitle_codec_context) {
  90                 avcodec_close (_subtitle_codec_context);
  91         }
  92
  93         av_free (_frame);
  94
  95         avformat_close_input (&_format_context);
  96 }
  97
  98 void
  99 FFmpegDecoder::setup_general ()
 100 {
 101         int r;
 102
 103         av_register_all ();
 104
 105         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 106                 throw OpenFileError (_film->content_path ());
 107         }
 108
 109         if (avformat_find_stream_info (_format_context, 0) < 0) {
 110                 throw DecodeError ("could not find stream information");
 111         }
 112
 113         /* Find video, audio and subtitle streams and choose the first of each */
 114
 115         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 116                 AVStream* s = _format_context->streams[i];
 117                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 118                         _video_stream = i;
 119                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 120                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout));
 121                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 122                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 123                 }
 124         }
 125
 126         if (_video_stream < 0) {
 127                 throw DecodeError ("could not find video stream");
 128         }
 129
 130         _frame = avcodec_alloc_frame ();
 131         if (_frame == 0) {
 132                 throw DecodeError ("could not allocate frame");
 133         }
 134 }
 135
 136 void
 137 FFmpegDecoder::setup_video ()
 138 {
 139         _video_codec_context = _format_context->streams[_video_stream]->codec;
 140         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 141
 142         if (_video_codec == 0) {
 143                 throw DecodeError ("could not find video decoder");
 144         }
 145
 146         /* I think this prevents problems with green hash on decodes and
 147            "changing frame properties on the fly is not supported by all filters"
 148            messages with some content.  Although I'm not sure; needs checking.
 149         */
 150         AVDictionary* opts = 0;
 151         av_dict_set (&opts, "threads", "1", 0);
 152
 153         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 154                 throw DecodeError ("could not open video decoder");
 155         }
 156 }
 157
 158 void
 159 FFmpegDecoder::setup_audio ()
 160 {
 161         if (!_audio_stream) {
 162                 return;
 163         }
 164
 165         _audio_codec_context = _format_context->streams[_audio_stream.get().id()]->codec;
 166         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 167
 168         if (_audio_codec == 0) {
 169                 throw DecodeError ("could not find audio decoder");
 170         }
 171
 172         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 173                 throw DecodeError ("could not open audio decoder");
 174         }
 175
 176         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 177            so bodge it here.  No idea why we should have to do this.
 178         */
 179
 180         if (_audio_codec_context->channel_layout == 0) {
 181                 _audio_codec_context->channel_layout = av_get_default_channel_layout (_audio_stream.get().channels());
 182         }
 183 }
 184
 185 void
 186 FFmpegDecoder::setup_subtitle ()
 187 {
 188         if (!_subtitle_stream) {
 189                 return;
 190         }
 191
 192         _subtitle_codec_context = _format_context->streams[_subtitle_stream.get().id()]->codec;
 193         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 194
 195         if (_subtitle_codec == 0) {
 196                 throw DecodeError ("could not find subtitle decoder");
 197         }
 198
 199         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 200                 throw DecodeError ("could not open subtitle decoder");
 201         }
 202 }
 203
 204
 205 bool
 206 FFmpegDecoder::pass ()
 207 {
 208         int r = av_read_frame (_format_context, &_packet);
 209
 210         if (r < 0) {
 211                 if (r != AVERROR_EOF) {
 212                         /* Maybe we should fail here, but for now we'll just finish off instead */
 213                         char buf[256];
 214                         av_strerror (r, buf, sizeof(buf));
 215                         _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
 216                 }
 217
 218                 /* Get any remaining frames */
 219
 220                 _packet.data = 0;
 221                 _packet.size = 0;
 222
 223                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 224
 225                 int frame_finished;
 226
 227                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 228                         filter_and_emit_video (_frame);
 229                 }
 230
 231                 if (_audio_stream && _opt->decode_audio && _film->use_content_audio()) {
 232                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 233                                 int const data_size = av_samples_get_buffer_size (
 234                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 235                                         );
 236
 237                                 assert (_audio_codec_context->channels == _film->audio_channels());
 238                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 239                         }
 240                 }
 241
 242                 return true;
 243         }
 244
 245         avcodec_get_frame_defaults (_frame);
 246
 247         if (_packet.stream_index == _video_stream) {
 248
 249                 int frame_finished;
 250                 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
 251                 if (r >= 0 && frame_finished) {
 252
 253                         if (r != _packet.size) {
 254                                 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
 255                         }
 256
 257                         /* Where we are in the output, in seconds */
 258                         double const out_pts_seconds = video_frame() / frames_per_second();
 259
 260                         /* Where we are in the source, in seconds */
 261                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 262                                 * av_frame_get_best_effort_timestamp(_frame);
 263
 264                         if (!_first_video) {
 265                                 _first_video = source_pts_seconds;
 266                         }
 267
 268                         /* Difference between where we are and where we should be */
 269                         double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
 270                         double const one_frame = 1 / frames_per_second();
 271
 272                         /* Insert frames if required to get out_pts_seconds up to pts_seconds */
 273                         if (delta > one_frame) {
 274                                 int const extra = rint (delta / one_frame);
 275                                 for (int i = 0; i < extra; ++i) {
 276                                         repeat_last_video ();
 277                                         _film->log()->log (
 278                                                 String::compose (
 279                                                         "Extra frame inserted at %1s; source frame %2, source PTS %3",
 280                                                         out_pts_seconds, video_frame(), source_pts_seconds
 281                                                         )
 282                                                 );
 283                                 }
 284                         }
 285
 286                         if (delta > -one_frame) {
 287                                 /* Process this frame */
 288                                 filter_and_emit_video (_frame);
 289                         } else {
 290                                 /* Otherwise we are omitting a frame to keep things right */
 291                                 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
 292                         }
 293                 }
 294
 295         } else if (_audio_stream && _packet.stream_index == _audio_stream.get().id() && _opt->decode_audio && _film->use_content_audio()) {
 296
 297                 int frame_finished;
 298                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 299
 300                         /* Where we are in the source, in seconds */
 301                         double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
 302                                 * av_frame_get_best_effort_timestamp(_frame);
 303
 304                         /* We only decode audio if we've had our first video packet through, and if it
 305                            was before this packet.  Until then audio is thrown away.
 306                         */
 307
 308                         if (_first_video && _first_video.get() <= source_pts_seconds) {
 309
 310                                 if (!_first_audio) {
 311                                         _first_audio = source_pts_seconds;
 312
 313                                         /* This is our first audio frame, and if we've arrived here we must have had our
 314                                            first video frame.  Push some silence to make up any gap between our first
 315                                            video frame and our first audio.
 316                                         */
 317
 318                                         /* frames of silence that we must push */
 319                                         int const s = rint ((_first_audio.get() - _first_video.get()) * _audio_stream.get().sample_rate ());
 320
 321                                         _film->log()->log (
 322                                                 String::compose (
 323                                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 324                                                         _first_video.get(), _first_audio.get(), s, _audio_stream.get().channels(), bytes_per_audio_sample()
 325                                                         )
 326                                                 );
 327
 328                                         if (s) {
 329                                                 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), s));
 330                                                 audio->make_silent ();
 331                                                 Audio (audio);
 332                                         }
 333                                 }
 334
 335                                 int const data_size = av_samples_get_buffer_size (
 336                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 337                                         );
 338
 339                                 assert (_audio_codec_context->channels == _film->audio_channels());
 340                                 Audio (deinterleave_audio (_frame->data[0], data_size));
 341                         }
 342                 }
 343
 344         } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream.get().id() && _opt->decode_subtitles && _first_video) {
 345
 346                 int got_subtitle;
 347                 AVSubtitle sub;
 348                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 349                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 350                            indicate that the previous subtitle should stop.
 351                         */
 352                         if (sub.num_rects > 0) {
 353                                 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 354                         } else {
 355                                 emit_subtitle (shared_ptr<TimedSubtitle> ());
 356                         }
 357                         avsubtitle_free (&sub);
 358                 }
 359         }
 360
 361         av_free_packet (&_packet);
 362         return false;
 363 }
 364
 365 shared_ptr<AudioBuffers>
 366 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
 367 {
 368         assert (_film->audio_channels());
 369         assert (bytes_per_audio_sample());
 370
 371         /* Deinterleave and convert to float */
 372
 373         assert ((size % (bytes_per_audio_sample() * _audio_stream.get().channels())) == 0);
 374
 375         int const total_samples = size / bytes_per_audio_sample();
 376         int const frames = total_samples / _film->audio_channels();
 377         shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), frames));
 378
 379         switch (audio_sample_format()) {
 380         case AV_SAMPLE_FMT_S16:
 381         {
 382                 int16_t* p = (int16_t *) data;
 383                 int sample = 0;
 384                 int channel = 0;
 385                 for (int i = 0; i < total_samples; ++i) {
 386                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 387
 388                         ++channel;
 389                         if (channel == _film->audio_channels()) {
 390                                 channel = 0;
 391                                 ++sample;
 392                         }
 393                 }
 394         }
 395         break;
 396
 397         case AV_SAMPLE_FMT_S32:
 398         {
 399                 int32_t* p = (int32_t *) data;
 400                 int sample = 0;
 401                 int channel = 0;
 402                 for (int i = 0; i < total_samples; ++i) {
 403                         audio->data(channel)[sample] = float(*p++) / (1 << 31);
 404
 405                         ++channel;
 406                         if (channel == _film->audio_channels()) {
 407                                 channel = 0;
 408                                 ++sample;
 409                         }
 410                 }
 411         }
 412
 413         case AV_SAMPLE_FMT_FLTP:
 414         {
 415                 float* p = reinterpret_cast<float*> (data);
 416                 for (int i = 0; i < _film->audio_channels(); ++i) {
 417                         memcpy (audio->data(i), p, frames * sizeof(float));
 418                         p += frames;
 419                 }
 420         }
 421         break;
 422
 423         default:
 424                 assert (false);
 425         }
 426
 427         return audio;
 428 }
 429
 430 float
 431 FFmpegDecoder::frames_per_second () const
 432 {
 433         AVStream* s = _format_context->streams[_video_stream];
 434
 435         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 436                 return av_q2d (s->avg_frame_rate);
 437         }
 438
 439         return av_q2d (s->r_frame_rate);
 440 }
 441
 442 AVSampleFormat
 443 FFmpegDecoder::audio_sample_format () const
 444 {
 445         if (_audio_codec_context == 0) {
 446                 return (AVSampleFormat) 0;
 447         }
 448
 449         return _audio_codec_context->sample_fmt;
 450 }
 451
 452 Size
 453 FFmpegDecoder::native_size () const
 454 {
 455         return Size (_video_codec_context->width, _video_codec_context->height);
 456 }
 457
 458 PixelFormat
 459 FFmpegDecoder::pixel_format () const
 460 {
 461         return _video_codec_context->pix_fmt;
 462 }
 463
 464 int
 465 FFmpegDecoder::time_base_numerator () const
 466 {
 467         return _video_codec_context->time_base.num;
 468 }
 469
 470 int
 471 FFmpegDecoder::time_base_denominator () const
 472 {
 473         return _video_codec_context->time_base.den;
 474 }
 475
 476 int
 477 FFmpegDecoder::sample_aspect_ratio_numerator () const
 478 {
 479         return _video_codec_context->sample_aspect_ratio.num;
 480 }
 481
 482 int
 483 FFmpegDecoder::sample_aspect_ratio_denominator () const
 484 {
 485         return _video_codec_context->sample_aspect_ratio.den;
 486 }
 487
 488 string
 489 FFmpegDecoder::stream_name (AVStream* s) const
 490 {
 491         stringstream n;
 492
 493         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 494         if (lang) {
 495                 n << lang->value;
 496         }
 497
 498         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 499         if (title) {
 500                 if (!n.str().empty()) {
 501                         n << " ";
 502                 }
 503                 n << title->value;
 504         }
 505
 506         if (n.str().empty()) {
 507                 n << "unknown";
 508         }
 509
 510         return n.str ();
 511 }
 512
 513 int
 514 FFmpegDecoder::bytes_per_audio_sample () const
 515 {
 516         return av_get_bytes_per_sample (audio_sample_format ());
 517 }
 518
 519 void
 520 FFmpegDecoder::set_audio_stream (optional<AudioStream> s)
 521 {
 522         Decoder::set_audio_stream (s);
 523         setup_audio ();
 524 }
 525
 526 void
 527 FFmpegDecoder::set_subtitle_stream (optional<SubtitleStream> s)
 528 {
 529         Decoder::set_subtitle_stream (s);
 530         setup_subtitle ();
 531 }
 532
 533 void
 534 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
 535 {
 536         shared_ptr<FilterGraph> graph;
 537
 538         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 539         while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
 540                 ++i;
 541         }
 542
 543         if (i == _filter_graphs.end ()) {
 544                 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
 545                 _filter_graphs.push_back (graph);
 546                 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
 547         } else {
 548                 graph = *i;
 549         }
 550
 551         list<shared_ptr<Image> > images = graph->process (frame);
 552
 553         for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
 554                 emit_video (*i);
 555         }
 556 }