src/lib/ffmpeg_decoder.cc

   1 /*
   2     Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 /** @file  src/ffmpeg_decoder.cc
  21  *  @brief A decoder using FFmpeg to decode content.
  22  */
  23
  24 #include <stdexcept>
  25 #include <vector>
  26 #include <sstream>
  27 #include <iomanip>
  28 #include <iostream>
  29 #include <stdint.h>
  30 #include <boost/lexical_cast.hpp>
  31 #include <sndfile.h>
  32 extern "C" {
  33 #include <libavcodec/avcodec.h>
  34 #include <libavformat/avformat.h>
  35 }
  36 #include "film.h"
  37 #include "filter.h"
  38 #include "exceptions.h"
  39 #include "image.h"
  40 #include "util.h"
  41 #include "log.h"
  42 #include "ffmpeg_decoder.h"
  43 #include "filter_graph.h"
  44 #include "audio_buffers.h"
  45 #include "ffmpeg_content.h"
  46
  47 #include "i18n.h"
  48
  49 using std::cout;
  50 using std::string;
  51 using std::vector;
  52 using std::stringstream;
  53 using std::list;
  54 using std::min;
  55 using std::pair;
  56 using boost::shared_ptr;
  57 using boost::optional;
  58 using boost::dynamic_pointer_cast;
  59 using libdcp::Size;
  60
  61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
  62         : Decoder (f)
  63         , VideoDecoder (f, c)
  64         , AudioDecoder (f, c)
  65         , SubtitleDecoder (f)
  66         , FFmpeg (c)
  67         , _subtitle_codec_context (0)
  68         , _subtitle_codec (0)
  69         , _decode_video (video)
  70         , _decode_audio (audio)
  71         , _video_pts_offset (0)
  72         , _audio_pts_offset (0)
  73 {
  74         setup_subtitle ();
  75
  76         /* Audio and video frame PTS values may not start with 0.  We want
  77            to fiddle them so that:
  78
  79            1.  One of them starts at time 0.
  80            2.  The first video PTS value ends up on a frame boundary.
  81
  82            Then we remove big initial gaps in PTS and we allow our
  83            insertion of black frames to work.
  84
  85            We will do:
  86              audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
  87              video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
  88         */
  89
  90         bool const have_video = video && c->first_video();
  91         bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
  92
  93         /* First, make one of them start at 0 */
  94
  95         if (have_audio && have_video) {
  96                 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
  97         } else if (have_video) {
  98                 _video_pts_offset = - c->first_video().get();
  99         } else if (have_audio) {
 100                 _audio_pts_offset = - c->audio_stream()->first_audio.get();
 101         }
 102
 103         /* Now adjust both so that the video pts starts on a frame */
 104         if (have_video && have_audio) {
 105                 double first_video = c->first_video().get() + _video_pts_offset;
 106                 double const old_first_video = first_video;
 107
 108                 /* Round the first video up to a frame boundary */
 109                 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
 110                         first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
 111                 }
 112
 113                 _video_pts_offset += first_video - old_first_video;
 114                 _audio_pts_offset += first_video - old_first_video;
 115         }
 116 }
 117
 118 FFmpegDecoder::~FFmpegDecoder ()
 119 {
 120         boost::mutex::scoped_lock lm (_mutex);
 121
 122         if (_subtitle_codec_context) {
 123                 avcodec_close (_subtitle_codec_context);
 124         }
 125 }
 126
 127 void
 128 FFmpegDecoder::flush ()
 129 {
 130         /* Get any remaining frames */
 131
 132         _packet.data = 0;
 133         _packet.size = 0;
 134
 135         /* XXX: should we reset _packet.data and size after each *_decode_* call? */
 136
 137         if (_decode_video) {
 138                 while (decode_video_packet ()) {}
 139         }
 140
 141         if (_ffmpeg_content->audio_stream() && _decode_audio) {
 142                 decode_audio_packet ();
 143         }
 144
 145         AudioDecoder::flush ();
 146 }
 147
 148 bool
 149 FFmpegDecoder::pass ()
 150 {
 151         int r = av_read_frame (_format_context, &_packet);
 152
 153         if (r < 0) {
 154                 if (r != AVERROR_EOF) {
 155                         /* Maybe we should fail here, but for now we'll just finish off instead */
 156                         char buf[256];
 157                         av_strerror (r, buf, sizeof(buf));
 158                         shared_ptr<const Film> film = _film.lock ();
 159                         assert (film);
 160                         film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
 161                 }
 162
 163                 flush ();
 164                 return true;
 165         }
 166
 167         avcodec_get_frame_defaults (_frame);
 168
 169         shared_ptr<const Film> film = _film.lock ();
 170         assert (film);
 171
 172         int const si = _packet.stream_index;
 173
 174         if (si == _video_stream && _decode_video) {
 175                 decode_video_packet ();
 176         } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
 177                 decode_audio_packet ();
 178         } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
 179                 decode_subtitle_packet ();
 180         }
 181
 182         av_free_packet (&_packet);
 183         return false;
 184 }
 185
 186 /** @param data pointer to array of pointers to buffers.
 187  *  Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
 188  */
 189 shared_ptr<AudioBuffers>
 190 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
 191 {
 192         assert (_ffmpeg_content->audio_channels());
 193         assert (bytes_per_audio_sample());
 194
 195         /* Deinterleave and convert to float */
 196
 197         assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
 198
 199         int const total_samples = size / bytes_per_audio_sample();
 200         int const frames = total_samples / _ffmpeg_content->audio_channels();
 201         shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
 202
 203         switch (audio_sample_format()) {
 204         case AV_SAMPLE_FMT_S16:
 205         {
 206                 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
 207                 int sample = 0;
 208                 int channel = 0;
 209                 for (int i = 0; i < total_samples; ++i) {
 210                         audio->data(channel)[sample] = float(*p++) / (1 << 15);
 211
 212                         ++channel;
 213                         if (channel == _ffmpeg_content->audio_channels()) {
 214                                 channel = 0;
 215                                 ++sample;
 216                         }
 217                 }
 218         }
 219         break;
 220
 221         case AV_SAMPLE_FMT_S16P:
 222         {
 223                 int16_t** p = reinterpret_cast<int16_t **> (data);
 224                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 225                         for (int j = 0; j < frames; ++j) {
 226                                 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
 227                         }
 228                 }
 229         }
 230         break;
 231
 232         case AV_SAMPLE_FMT_S32:
 233         {
 234                 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
 235                 int sample = 0;
 236                 int channel = 0;
 237                 for (int i = 0; i < total_samples; ++i) {
 238                         audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
 239
 240                         ++channel;
 241                         if (channel == _ffmpeg_content->audio_channels()) {
 242                                 channel = 0;
 243                                 ++sample;
 244                         }
 245                 }
 246         }
 247         break;
 248
 249         case AV_SAMPLE_FMT_FLT:
 250         {
 251                 float* p = reinterpret_cast<float*> (data[0]);
 252                 int sample = 0;
 253                 int channel = 0;
 254                 for (int i = 0; i < total_samples; ++i) {
 255                         audio->data(channel)[sample] = *p++;
 256
 257                         ++channel;
 258                         if (channel == _ffmpeg_content->audio_channels()) {
 259                                 channel = 0;
 260                                 ++sample;
 261                         }
 262                 }
 263         }
 264         break;
 265
 266         case AV_SAMPLE_FMT_FLTP:
 267         {
 268                 float** p = reinterpret_cast<float**> (data);
 269                 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
 270                         memcpy (audio->data(i), p[i], frames * sizeof(float));
 271                 }
 272         }
 273         break;
 274
 275         default:
 276                 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
 277         }
 278
 279         return audio;
 280 }
 281
 282 AVSampleFormat
 283 FFmpegDecoder::audio_sample_format () const
 284 {
 285         if (!_ffmpeg_content->audio_stream()) {
 286                 return (AVSampleFormat) 0;
 287         }
 288
 289         return audio_codec_context()->sample_fmt;
 290 }
 291
 292 int
 293 FFmpegDecoder::bytes_per_audio_sample () const
 294 {
 295         return av_get_bytes_per_sample (audio_sample_format ());
 296 }
 297
 298 int
 299 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
 300 {
 301         int frames_read = 0;
 302         ContentTime last_video = 0;
 303         ContentTime last_audio = 0;
 304
 305         while (!finished (last_video, last_audio, frames_read)) {
 306                 int r = av_read_frame (_format_context, &_packet);
 307                 if (r < 0) {
 308                         /* We should flush our decoders here, possibly yielding a few more frames,
 309                            but the consequence of having to do that is too hideous to contemplate.
 310                            Instead we give up and say that you can't seek too close to the end
 311                            of a file.
 312                         */
 313                         return frames_read;
 314                 }
 315
 316                 ++frames_read;
 317
 318                 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
 319
 320                 if (_packet.stream_index == _video_stream) {
 321
 322                         avcodec_get_frame_defaults (_frame);
 323
 324                         int finished = 0;
 325                         r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
 326                         if (r >= 0 && finished) {
 327                                 last_video = rint (
 328                                         (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
 329                                         );
 330                         }
 331
 332                 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
 333                         AVPacket copy_packet = _packet;
 334                         while (copy_packet.size > 0) {
 335
 336                                 int finished;
 337                                 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
 338                                 if (r >= 0 && finished) {
 339                                         last_audio = rint (
 340                                                 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
 341                                                 );
 342                                 }
 343
 344                                 copy_packet.data += r;
 345                                 copy_packet.size -= r;
 346                         }
 347                 }
 348
 349                 av_free_packet (&_packet);
 350         }
 351
 352         return frames_read;
 353 }
 354
 355 bool
 356 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
 357 {
 358         return last_video >= seek || last_audio >= seek;
 359 }
 360
 361 bool
 362 FFmpegDecoder::seek_final_finished (int n, int done) const
 363 {
 364         return n == done;
 365 }
 366
 367 void
 368 FFmpegDecoder::seek_and_flush (ContentTime t)
 369 {
 370         int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
 371                 av_q2d (_format_context->streams[_video_stream]->time_base);
 372
 373         av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
 374
 375         shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
 376         if (as) {
 377                 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
 378                         av_q2d (as->stream(_format_context)->time_base);
 379
 380                 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
 381         }
 382
 383         avcodec_flush_buffers (video_codec_context());
 384         if (audio_codec_context ()) {
 385                 avcodec_flush_buffers (audio_codec_context ());
 386         }
 387         if (_subtitle_codec_context) {
 388                 avcodec_flush_buffers (_subtitle_codec_context);
 389         }
 390 }
 391
 392 void
 393 FFmpegDecoder::seek (ContentTime time, bool accurate)
 394 {
 395         Decoder::seek (time, accurate);
 396
 397         /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
 398            a number plucked from the air) earlier than we want to end up.  The loop below
 399            will hopefully then step through to where we want to be.
 400         */
 401
 402         ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
 403         ContentTime initial_seek = time - pre_roll;
 404         if (initial_seek < 0) {
 405                 initial_seek = 0;
 406         }
 407
 408         /* Initial seek time in the video stream's timebase */
 409
 410         seek_and_flush (initial_seek);
 411
 412         if (time == 0 || !accurate) {
 413                 /* We're already there, or we're as close as we need to be */
 414                 return;
 415         }
 416
 417         int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
 418
 419         seek_and_flush (initial_seek);
 420         if (N > 0) {
 421                 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
 422         }
 423 }
 424
 425 void
 426 FFmpegDecoder::decode_audio_packet ()
 427 {
 428         /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
 429            several times.
 430         */
 431
 432         AVPacket copy_packet = _packet;
 433
 434         while (copy_packet.size > 0) {
 435
 436                 int frame_finished;
 437                 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 438
 439                 if (decode_result < 0) {
 440                         shared_ptr<const Film> film = _film.lock ();
 441                         assert (film);
 442                         film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
 443                         return;
 444                 }
 445
 446                 if (frame_finished) {
 447                         ContentTime const t = rint (
 448                                 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
 449                                  * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
 450                                 );
 451
 452                         int const data_size = av_samples_get_buffer_size (
 453                                 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 454                                 );
 455
 456                         audio (deinterleave_audio (_frame->data, data_size), t);
 457                 }
 458
 459                 copy_packet.data += decode_result;
 460                 copy_packet.size -= decode_result;
 461         }
 462 }
 463
 464 bool
 465 FFmpegDecoder::decode_video_packet ()
 466 {
 467         int frame_finished;
 468         if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
 469                 return false;
 470         }
 471
 472         boost::mutex::scoped_lock lm (_filter_graphs_mutex);
 473
 474         shared_ptr<FilterGraph> graph;
 475
 476         list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
 477         while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
 478                 ++i;
 479         }
 480
 481         if (i == _filter_graphs.end ()) {
 482                 shared_ptr<const Film> film = _film.lock ();
 483                 assert (film);
 484
 485                 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
 486                 _filter_graphs.push_back (graph);
 487
 488                 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
 489         } else {
 490                 graph = *i;
 491         }
 492
 493         list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
 494
 495         string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
 496
 497         for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
 498
 499                 shared_ptr<Image> image = i->first;
 500                 if (!post_process.empty ()) {
 501                         image = image->post_process (post_process, true);
 502                 }
 503
 504                 if (i->second != AV_NOPTS_VALUE) {
 505                         ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
 506                         video (image, false, t);
 507                 } else {
 508                         shared_ptr<const Film> film = _film.lock ();
 509                         assert (film);
 510                         film->log()->log ("Dropping frame without PTS");
 511                 }
 512         }
 513
 514         return true;
 515 }
 516
 517
 518 void
 519 FFmpegDecoder::setup_subtitle ()
 520 {
 521         boost::mutex::scoped_lock lm (_mutex);
 522
 523         if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
 524                 return;
 525         }
 526
 527         _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
 528         _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
 529
 530         if (_subtitle_codec == 0) {
 531                 throw DecodeError (_("could not find subtitle decoder"));
 532         }
 533
 534         if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
 535                 throw DecodeError (N_("could not open subtitle decoder"));
 536         }
 537 }
 538
 539 void
 540 FFmpegDecoder::decode_subtitle_packet ()
 541 {
 542         int got_subtitle;
 543         AVSubtitle sub;
 544         if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
 545                 return;
 546         }
 547
 548         /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
 549            indicate that the previous subtitle should stop.
 550         */
 551         if (sub.num_rects <= 0) {
 552                 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
 553                 return;
 554         } else if (sub.num_rects > 1) {
 555                 throw DecodeError (_("multi-part subtitles not yet supported"));
 556         }
 557
 558         /* Subtitle PTS in seconds (within the source, not taking into account any of the
 559            source that we may have chopped off for the DCP)
 560         */
 561         double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
 562
 563         /* hence start time for this sub */
 564         DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
 565         DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
 566
 567         AVSubtitleRect const * rect = sub.rects[0];
 568
 569         if (rect->type != SUBTITLE_BITMAP) {
 570                 throw DecodeError (_("non-bitmap subtitles not yet supported"));
 571         }
 572
 573         shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
 574
 575         /* Start of the first line in the subtitle */
 576         uint8_t* sub_p = rect->pict.data[0];
 577         /* sub_p looks up into a RGB palette which is here */
 578         uint32_t const * palette = (uint32_t *) rect->pict.data[1];
 579         /* Start of the output data */
 580         uint32_t* out_p = (uint32_t *) image->data()[0];
 581
 582         for (int y = 0; y < rect->h; ++y) {
 583                 uint8_t* sub_line_p = sub_p;
 584                 uint32_t* out_line_p = out_p;
 585                 for (int x = 0; x < rect->w; ++x) {
 586                         *out_line_p++ = palette[*sub_line_p++];
 587                 }
 588                 sub_p += rect->pict.linesize[0];
 589                 out_p += image->stride()[0] / sizeof (uint32_t);
 590         }
 591
 592         libdcp::Size const vs = _ffmpeg_content->video_size ();
 593
 594         subtitle (
 595                 image,
 596                 dcpomatic::Rect<double> (
 597                         static_cast<double> (rect->x) / vs.width,
 598                         static_cast<double> (rect->y) / vs.height,
 599                         static_cast<double> (rect->w) / vs.width,
 600                         static_cast<double> (rect->h) / vs.height
 601                         ),
 602                 from,
 603                 to
 604                 );
 605
 606
 607         avsubtitle_free (&sub);
 608 }