src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "film_state.h"
  45 #include "options.h"
  46 #include "exceptions.h"
  47 #include "image.h"
  48 #include "util.h"
  49 #include "log.h"
  50 #include "ffmpeg_decoder.h"
  51 #include "subtitle.h"
  52
  53 using namespace std;
  54 using namespace boost;
  55
  56 FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Job* j, Log* l, bool minimal, bool ignore_length)
  57         : Decoder (s, o, j, l, minimal, ignore_length)
  58         , _format_context (0)
  59         , _video_stream (-1)
  60         , _audio_stream (-1)
  61         , _subtitle_stream (-1)
  62         , _frame (0)
  63         , _video_codec_context (0)
  64         , _video_codec (0)
  65         , _audio_codec_context (0)
  66         , _audio_codec (0)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69 {
  70         setup_general ();
  71         setup_video ();
  72         setup_audio ();
  73         setup_subtitle ();
  74 }
  75
  76 FFmpegDecoder::~FFmpegDecoder ()
  77 {
  78         if (_audio_codec_context) {
  79                 avcodec_close (_audio_codec_context);
  80         }
  81
  82         if (_video_codec_context) {
  83                 avcodec_close (_video_codec_context);
  84         }
  85
  86         if (_subtitle_codec_context) {
  87                 avcodec_close (_subtitle_codec_context);
  88         }
  89
  90         av_free (_frame);
  91         avformat_close_input (&_format_context);
  92 }
  93
  94 void
  95 FFmpegDecoder::setup_general ()
  96 {
  97         int r;
  98
  99         av_register_all ();
 100
 101         if ((r = avformat_open_input (&_format_context, _fs->content_path().c_str(), 0, 0)) != 0) {
 102                 throw OpenFileError (_fs->content_path ());
 103         }
 104
 105         if (avformat_find_stream_info (_format_context, 0) < 0) {
 106                 throw DecodeError ("could not find stream information");
 107         }
 108
 109         /* Find video, audio and subtitle streams and choose the first of each */
 110
 111         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 112                 AVStream* s = _format_context->streams[i];
 113                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 114                         _video_stream = i;
 115                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 116                         if (_audio_stream == -1) {
 117                                 _audio_stream = i;
 118                         }
 119                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 120                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 121                         if (_subtitle_stream == -1) {
 122                                 _subtitle_stream = i;
 123                         }
 124                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 125                 }
 126         }
 127
 128         /* Now override audio and subtitle streams with those from the Film, if it has any */
 129
 130         if (_fs->audio_stream_index() != -1) {
 131                 _audio_stream = _fs->audio_stream().id();
 132         }
 133
 134         if (_fs->subtitle_stream_index() != -1) {
 135                 _subtitle_stream = _fs->subtitle_stream().id ();
 136         }
 137
 138         if (_video_stream < 0) {
 139                 throw DecodeError ("could not find video stream");
 140         }
 141
 142         _frame = avcodec_alloc_frame ();
 143         if (_frame == 0) {
 144                 throw DecodeError ("could not allocate frame");
 145         }
 146 }
 147
 148 void
 149 FFmpegDecoder::setup_video ()
 150 {
 151         _video_codec_context = _format_context->streams[_video_stream]->codec;
 152         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 153
 154         if (_video_codec == 0) {
 155                 throw DecodeError ("could not find video decoder");
 156         }
 157
 158         if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
 159                 throw DecodeError ("could not open video decoder");
 160         }
 161 }
 162
 163 void
 164 FFmpegDecoder::setup_audio ()
 165 {
 166         if (_audio_stream < 0) {
 167                 return;
 168         }
 169
 170         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 171         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 172
 173         if (_audio_codec == 0) {
 174                 throw DecodeError ("could not find audio decoder");
 175         }
 176
 177         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 178                 throw DecodeError ("could not open audio decoder");
 179         }
 180
 181         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 182            so bodge it here.  No idea why we should have to do this.
 183         */
 184
 185         if (_audio_codec_context->channel_layout == 0) {
 186                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 187         }
 188 }
 189
 190 void
 191 FFmpegDecoder::setup_subtitle ()
 192 {
 193         if (_subtitle_stream < 0) {
 194                 return;
 195         }
 196
 197         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 198         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 199
 200         if (_subtitle_codec == 0) {
 201                 throw DecodeError ("could not find subtitle decoder");
 202         }
 203
 204         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 205                 throw DecodeError ("could not open subtitle decoder");
 206         }
 207 }
 208
 209
 210 bool
 211 FFmpegDecoder::do_pass ()
 212 {
 213         int r = av_read_frame (_format_context, &_packet);
 214
 215         if (r < 0) {
 216                 if (r != AVERROR_EOF) {
 217                         throw DecodeError ("error on av_read_frame");
 218                 }
 219
 220                 /* Get any remaining frames */
 221
 222                 _packet.data = 0;
 223                 _packet.size = 0;
 224
 225                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 226
 227                 int frame_finished;
 228
 229                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 230                         process_video (_frame);
 231                 }
 232
 233                 if (_audio_stream >= 0 && _opt->decode_audio) {
 234                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 235                                 int const data_size = av_samples_get_buffer_size (
 236                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 237                                         );
 238
 239                                 assert (_audio_codec_context->channels == _fs->audio_channels());
 240                                 process_audio (_frame->data[0], data_size);
 241                         }
 242                 }
 243
 244                 return true;
 245         }
 246
 247         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 248
 249         if (_packet.stream_index == _video_stream) {
 250
 251                 if (!_first_video) {
 252                         _first_video = pts_seconds;
 253                 }
 254
 255                 int frame_finished;
 256                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 257                         process_video (_frame);
 258                 }
 259
 260         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 261
 262                 /* Note: We only decode audio if we've had our first video packet through, and if it
 263                    was before this packet.  Until then audio is thrown away.
 264                 */
 265
 266                 if (!_first_audio) {
 267                         _first_audio = pts_seconds;
 268
 269                         /* This is our first audio packet, and if we've arrived here we must have had our
 270                            first video packet.  Push some silence to make up the gap between our first
 271                            video packet and our first audio.
 272                         */
 273
 274                         /* frames of silence that we must push */
 275                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 276
 277                         _log->log (
 278                                 String::compose (
 279                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 280                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 281                                         )
 282                                 );
 283
 284                         /* hence bytes */
 285                         int const b = s * audio_channels() * bytes_per_audio_sample();
 286
 287                         /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 288                            that all sound representations are silent with memset()ed zero data.
 289                         */
 290                         uint8_t silence[b];
 291                         memset (silence, 0, b);
 292                         process_audio (silence, b);
 293                 }
 294
 295                 avcodec_get_frame_defaults (_frame);
 296
 297                 int frame_finished;
 298                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 299                         int const data_size = av_samples_get_buffer_size (
 300                                 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 301                                 );
 302
 303                         assert (_audio_codec_context->channels == _fs->audio_channels());
 304                         process_audio (_frame->data[0], data_size);
 305                 }
 306
 307         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles) {
 308
 309                 int got_subtitle;
 310                 AVSubtitle sub;
 311                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 312                         /* I'm not entirely sure why, but sometimes we get an AVSubtitle with
 313                            no AVSubtitleRects.
 314                         */
 315                         if (sub.num_rects > 0) {
 316                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
 317                         }
 318                         avsubtitle_free (&sub);
 319                 }
 320         }
 321
 322         av_free_packet (&_packet);
 323         return false;
 324 }
 325
 326 float
 327 FFmpegDecoder::frames_per_second () const
 328 {
 329         AVStream* s = _format_context->streams[_video_stream];
 330
 331         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 332                 return av_q2d (s->avg_frame_rate);
 333         }
 334
 335         return av_q2d (s->r_frame_rate);
 336 }
 337
 338 int
 339 FFmpegDecoder::audio_channels () const
 340 {
 341         if (_audio_codec_context == 0) {
 342                 return 0;
 343         }
 344
 345         return _audio_codec_context->channels;
 346 }
 347
 348 int
 349 FFmpegDecoder::audio_sample_rate () const
 350 {
 351         if (_audio_codec_context == 0) {
 352                 return 0;
 353         }
 354
 355         return _audio_codec_context->sample_rate;
 356 }
 357
 358 AVSampleFormat
 359 FFmpegDecoder::audio_sample_format () const
 360 {
 361         if (_audio_codec_context == 0) {
 362                 return (AVSampleFormat) 0;
 363         }
 364
 365         return _audio_codec_context->sample_fmt;
 366 }
 367
 368 int64_t
 369 FFmpegDecoder::audio_channel_layout () const
 370 {
 371         if (_audio_codec_context == 0) {
 372                 return 0;
 373         }
 374
 375         return _audio_codec_context->channel_layout;
 376 }
 377
 378 Size
 379 FFmpegDecoder::native_size () const
 380 {
 381         return Size (_video_codec_context->width, _video_codec_context->height);
 382 }
 383
 384 PixelFormat
 385 FFmpegDecoder::pixel_format () const
 386 {
 387         return _video_codec_context->pix_fmt;
 388 }
 389
 390 int
 391 FFmpegDecoder::time_base_numerator () const
 392 {
 393         return _video_codec_context->time_base.num;
 394 }
 395
 396 int
 397 FFmpegDecoder::time_base_denominator () const
 398 {
 399         return _video_codec_context->time_base.den;
 400 }
 401
 402 int
 403 FFmpegDecoder::sample_aspect_ratio_numerator () const
 404 {
 405         return _video_codec_context->sample_aspect_ratio.num;
 406 }
 407
 408 int
 409 FFmpegDecoder::sample_aspect_ratio_denominator () const
 410 {
 411         return _video_codec_context->sample_aspect_ratio.den;
 412 }
 413
 414 bool
 415 FFmpegDecoder::has_subtitles () const
 416 {
 417         return (_subtitle_stream != -1);
 418 }
 419
 420 vector<AudioStream>
 421 FFmpegDecoder::audio_streams () const
 422 {
 423         return _audio_streams;
 424 }
 425
 426 vector<SubtitleStream>
 427 FFmpegDecoder::subtitle_streams () const
 428 {
 429         return _subtitle_streams;
 430 }
 431
 432 string
 433 FFmpegDecoder::stream_name (AVStream* s) const
 434 {
 435         stringstream n;
 436
 437         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 438         if (lang) {
 439                 n << lang->value;
 440         }
 441
 442         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 443         if (title) {
 444                 if (!n.str().empty()) {
 445                         n << " ";
 446                 }
 447                 n << title->value;
 448         }
 449
 450         if (n.str().empty()) {
 451                 n << "unknown";
 452         }
 453
 454         return n.str ();
 455 }
 456