src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 extern "C" {
  32 #include <tiffio.h>
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 #include <libswscale/swscale.h>
  36 #include <libpostproc/postprocess.h>
  37 }
  38 #include <sndfile.h>
  39 #include "film.h"
  40 #include "format.h"
  41 #include "transcoder.h"
  42 #include "job.h"
  43 #include "filter.h"
  44 #include "options.h"
  45 #include "exceptions.h"
  46 #include "image.h"
  47 #include "util.h"
  48 #include "log.h"
  49 #include "ffmpeg_decoder.h"
  50 #include "subtitle.h"
  51
  52 using std::string;
  53 using std::vector;
  54 using std::stringstream;
  55 using boost::shared_ptr;
  56
  57 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j, bool minimal, bool ignore_length)
  58         : Decoder (f, o, j, minimal, ignore_length)
  59         , _format_context (0)
  60         , _video_stream (-1)
  61         , _audio_stream (-1)
  62         , _subtitle_stream (-1)
  63         , _frame (0)
  64         , _video_codec_context (0)
  65         , _video_codec (0)
  66         , _audio_codec_context (0)
  67         , _audio_codec (0)
  68         , _subtitle_codec_context (0)
  69         , _subtitle_codec (0)
  70 {
  71         setup_general ();
  72         setup_video ();
  73         setup_audio ();
  74         setup_subtitle ();
  75 }
  76
  77 FFmpegDecoder::~FFmpegDecoder ()
  78 {
  79         if (_audio_codec_context) {
  80                 avcodec_close (_audio_codec_context);
  81         }
  82
  83         if (_video_codec_context) {
  84                 avcodec_close (_video_codec_context);
  85         }
  86
  87         if (_subtitle_codec_context) {
  88                 avcodec_close (_subtitle_codec_context);
  89         }
  90
  91         av_free (_frame);
  92         avformat_close_input (&_format_context);
  93 }
  94
  95 void
  96 FFmpegDecoder::setup_general ()
  97 {
  98         int r;
  99
 100         av_register_all ();
 101
 102         if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
 103                 throw OpenFileError (_film->content_path ());
 104         }
 105
 106         if (avformat_find_stream_info (_format_context, 0) < 0) {
 107                 throw DecodeError ("could not find stream information");
 108         }
 109
 110         /* Find video, audio and subtitle streams and choose the first of each */
 111
 112         for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
 113                 AVStream* s = _format_context->streams[i];
 114                 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
 115                         _video_stream = i;
 116                 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 117                         if (_audio_stream == -1) {
 118                                 _audio_stream = i;
 119                         }
 120                         _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->channels));
 121                 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
 122                         if (_subtitle_stream == -1) {
 123                                 _subtitle_stream = i;
 124                         }
 125                         _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
 126                 }
 127         }
 128
 129         /* Now override audio and subtitle streams with those from the Film, if it has any */
 130
 131         if (_film->audio_stream_index() != -1) {
 132                 _audio_stream = _film->audio_stream().id();
 133         }
 134
 135         if (_film->subtitle_stream_index() != -1) {
 136                 _subtitle_stream = _film->subtitle_stream().id ();
 137         }
 138
 139         if (_video_stream < 0) {
 140                 throw DecodeError ("could not find video stream");
 141         }
 142
 143         _frame = avcodec_alloc_frame ();
 144         if (_frame == 0) {
 145                 throw DecodeError ("could not allocate frame");
 146         }
 147 }
 148
 149 void
 150 FFmpegDecoder::setup_video ()
 151 {
 152         _video_codec_context = _format_context->streams[_video_stream]->codec;
 153         _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
 154
 155         if (_video_codec == 0) {
 156                 throw DecodeError ("could not find video decoder");
 157         }
 158
 159         /* I think this prevents problems with green hash on decodes and
 160            "changing frame properties on the fly is not supported by all filters"
 161            messages with some content.  Although I'm not sure; needs checking.
 162         */
 163         AVDictionary* opts = 0;
 164         av_dict_set (&opts, "threads", "1", 0);
 165
 166         if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
 167                 throw DecodeError ("could not open video decoder");
 168         }
 169 }
 170
 171 void
 172 FFmpegDecoder::setup_audio ()
 173 {
 174         if (_audio_stream < 0) {
 175                 return;
 176         }
 177
 178         _audio_codec_context = _format_context->streams[_audio_stream]->codec;
 179         _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
 180
 181         if (_audio_codec == 0) {
 182                 throw DecodeError ("could not find audio decoder");
 183         }
 184
 185         if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
 186                 throw DecodeError ("could not open audio decoder");
 187         }
 188
 189         /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
 190            so bodge it here.  No idea why we should have to do this.
 191         */
 192
 193         if (_audio_codec_context->channel_layout == 0) {
 194                 _audio_codec_context->channel_layout = av_get_default_channel_layout (audio_channels ());
 195         }
 196 }
 197
 198 void
 199 FFmpegDecoder::setup_subtitle ()
 200 {
 201         if (_subtitle_stream < 0) {
 202                 return;
 203         }
 204
 205         _subtitle_codec_context = _format_context->streams[_subtitle_stream]->codec;
 206         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 207
 208         if (_subtitle_codec == 0) {
 209                 throw DecodeError ("could not find subtitle decoder");
 210         }
 211
 212         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 213                 throw DecodeError ("could not open subtitle decoder");
 214         }
 215 }
 216
 217
 218 bool
 219 FFmpegDecoder::do_pass ()
 220 {
 221         int r = av_read_frame (_format_context, &_packet);
 222
 223         if (r < 0) {
 224                 if (r != AVERROR_EOF) {
 225                         throw DecodeError ("error on av_read_frame");
 226                 }
 227
 228                 /* Get any remaining frames */
 229
 230                 _packet.data = 0;
 231                 _packet.size = 0;
 232
 233                 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 234
 235                 int frame_finished;
 236
 237                 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 238                         process_video (_frame);
 239                 }
 240
 241                 if (_audio_stream >= 0 && _opt->decode_audio) {
 242                         while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 243                                 int const data_size = av_samples_get_buffer_size (
 244                                         0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 245                                         );
 246
 247                                 assert (_audio_codec_context->channels == _film->audio_channels());
 248                                 process_audio (_frame->data[0], data_size);
 249                         }
 250                 }
 251
 252                 return true;
 253         }
 254
 255         double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
 256
 257         if (_packet.stream_index == _video_stream) {
 258
 259                 if (!_first_video) {
 260                         _first_video = pts_seconds;
 261                 }
 262
 263                 int frame_finished;
 264                 if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 265                         process_video (_frame);
 266                 }
 267
 268         } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 269
 270                 /* Note: We only decode audio if we've had our first video packet through, and if it
 271                    was before this packet.  Until then audio is thrown away.
 272                 */
 273
 274                 if (!_first_audio) {
 275                         _first_audio = pts_seconds;
 276
 277                         /* This is our first audio packet, and if we've arrived here we must have had our
 278                            first video packet.  Push some silence to make up the gap between our first
 279                            video packet and our first audio.
 280                         */
 281
 282                         /* frames of silence that we must push */
 283                         int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 284
 285                         _film->log()->log (
 286                                 String::compose (
 287                                         "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
 288                                         _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 289                                         )
 290                                 );
 291
 292                         /* hence bytes */
 293                         int const b = s * audio_channels() * bytes_per_audio_sample();
 294
 295                         /* XXX: this assumes that it won't be too much, and there are shaky assumptions
 296                            that all sound representations are silent with memset()ed zero data.
 297                         */
 298                         uint8_t silence[b];
 299                         memset (silence, 0, b);
 300                         process_audio (silence, b);
 301                 }
 302
 303                 avcodec_get_frame_defaults (_frame);
 304
 305                 int frame_finished;
 306                 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
 307                         int const data_size = av_samples_get_buffer_size (
 308                                 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 309                                 );
 310
 311                         assert (_audio_codec_context->channels == _film->audio_channels());
 312                         process_audio (_frame->data[0], data_size);
 313                 }
 314
 315         } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {
 316
 317                 int got_subtitle;
 318                 AVSubtitle sub;
 319                 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
 320                         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 321                            indicate that the previous subtitle should stop.
 322                         */
 323                         if (sub.num_rects > 0) {
 324                                 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
 325                         } else {
 326                                 process_subtitle (shared_ptr<TimedSubtitle> ());
 327                         }
 328                         avsubtitle_free (&sub);
 329                 }
 330         }
 331
 332         av_free_packet (&_packet);
 333         return false;
 334 }
 335
 336 float
 337 FFmpegDecoder::frames_per_second () const
 338 {
 339         AVStream* s = _format_context->streams[_video_stream];
 340
 341         if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
 342                 return av_q2d (s->avg_frame_rate);
 343         }
 344
 345         return av_q2d (s->r_frame_rate);
 346 }
 347
 348 int
 349 FFmpegDecoder::audio_channels () const
 350 {
 351         if (_audio_codec_context == 0) {
 352                 return 0;
 353         }
 354
 355         return _audio_codec_context->channels;
 356 }
 357
 358 int
 359 FFmpegDecoder::audio_sample_rate () const
 360 {
 361         if (_audio_codec_context == 0) {
 362                 return 0;
 363         }
 364
 365         return _audio_codec_context->sample_rate;
 366 }
 367
 368 AVSampleFormat
 369 FFmpegDecoder::audio_sample_format () const
 370 {
 371         if (_audio_codec_context == 0) {
 372                 return (AVSampleFormat) 0;
 373         }
 374
 375         return _audio_codec_context->sample_fmt;
 376 }
 377
 378 int64_t
 379 FFmpegDecoder::audio_channel_layout () const
 380 {
 381         if (_audio_codec_context == 0) {
 382                 return 0;
 383         }
 384
 385         return _audio_codec_context->channel_layout;
 386 }
 387
 388 Size
 389 FFmpegDecoder::native_size () const
 390 {
 391         return Size (_video_codec_context->width, _video_codec_context->height);
 392 }
 393
 394 PixelFormat
 395 FFmpegDecoder::pixel_format () const
 396 {
 397         return _video_codec_context->pix_fmt;
 398 }
 399
 400 int
 401 FFmpegDecoder::time_base_numerator () const
 402 {
 403         return _video_codec_context->time_base.num;
 404 }
 405
 406 int
 407 FFmpegDecoder::time_base_denominator () const
 408 {
 409         return _video_codec_context->time_base.den;
 410 }
 411
 412 int
 413 FFmpegDecoder::sample_aspect_ratio_numerator () const
 414 {
 415         return _video_codec_context->sample_aspect_ratio.num;
 416 }
 417
 418 int
 419 FFmpegDecoder::sample_aspect_ratio_denominator () const
 420 {
 421         return _video_codec_context->sample_aspect_ratio.den;
 422 }
 423
 424 bool
 425 FFmpegDecoder::has_subtitles () const
 426 {
 427         return (_subtitle_stream != -1);
 428 }
 429
 430 vector<AudioStream>
 431 FFmpegDecoder::audio_streams () const
 432 {
 433         return _audio_streams;
 434 }
 435
 436 vector<SubtitleStream>
 437 FFmpegDecoder::subtitle_streams () const
 438 {
 439         return _subtitle_streams;
 440 }
 441
 442 string
 443 FFmpegDecoder::stream_name (AVStream* s) const
 444 {
 445         stringstream n;
 446
 447         AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
 448         if (lang) {
 449                 n << lang->value;
 450         }
 451
 452         AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
 453         if (title) {
 454                 if (!n.str().empty()) {
 455                         n << " ";
 456                 }
 457                 n << title->value;
 458         }
 459
 460         if (n.str().empty()) {
 461                 n << "unknown";
 462         }
 463
 464         return n.str ();
 465 }
 466