From d628ed776e3ca94b154980e962b1c4a0a8f7540c Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Thu, 11 May 2017 00:46:18 +0100 Subject: [PATCH] Basic audio support. --- src/lib/ffmpeg_encoder.cc | 217 ++++++++++++++++++++++++++++++------ src/lib/ffmpeg_encoder.h | 22 +++- test/ffmpeg_encoder_test.cc | 2 +- 3 files changed, 201 insertions(+), 40 deletions(-) diff --git a/src/lib/ffmpeg_encoder.cc b/src/lib/ffmpeg_encoder.cc index 7d1597e36..36b2d2fe4 100644 --- a/src/lib/ffmpeg_encoder.cc +++ b/src/lib/ffmpeg_encoder.cc @@ -37,6 +37,9 @@ using boost::shared_ptr; using boost::bind; using boost::weak_ptr; +int FFmpegEncoder::_video_stream_index = 0; +int FFmpegEncoder::_audio_stream_index = 1; + static AVPixelFormat force_pixel_format (AVPixelFormat, AVPixelFormat out) { @@ -45,70 +48,119 @@ force_pixel_format (AVPixelFormat, AVPixelFormat out) FFmpegEncoder::FFmpegEncoder (shared_ptr film, weak_ptr job, boost::filesystem::path output, Format format) : Encoder (film, job) - , _options (0) + , _video_options (0) , _history (1000) , _output (output) + , _pending_audio (new AudioBuffers (film->audio_channels(), 0)) { switch (format) { case FORMAT_PRORES: _pixel_format = AV_PIX_FMT_YUV422P10; - _codec_name = "prores_ks"; - av_dict_set (&_options, "profile", "3", 0); - av_dict_set (&_options, "threads", "auto", 0); + _sample_format = AV_SAMPLE_FMT_S16; + _video_codec_name = "prores_ks"; + _audio_codec_name = "pcm_s16le"; + av_dict_set (&_video_options, "profile", "3", 0); + av_dict_set (&_video_options, "threads", "auto", 0); break; case FORMAT_H264: _pixel_format = AV_PIX_FMT_YUV420P; - _codec_name = "libx264"; + _sample_format = AV_SAMPLE_FMT_FLTP; + _video_codec_name = "libx264"; + _audio_codec_name = "aac"; break; } } void -FFmpegEncoder::go () +FFmpegEncoder::setup_video () { - AVCodec* codec = avcodec_find_encoder_by_name (_codec_name.c_str()); - if (!codec) { - throw runtime_error (String::compose ("could not find FFmpeg encoder %1", _codec_name)); + _video_codec = avcodec_find_encoder_by_name (_video_codec_name.c_str()); + if (!_video_codec) { + throw runtime_error (String::compose ("could not find FFmpeg encoder %1", _video_codec_name)); } - _codec_context = avcodec_alloc_context3 (codec); - if (!_codec_context) { - throw runtime_error ("could not allocate FFmpeg context"); + _video_codec_context = avcodec_alloc_context3 (_video_codec); + if (!_video_codec_context) { + throw runtime_error ("could not allocate FFmpeg video context"); } - avcodec_get_context_defaults3 (_codec_context, codec); + avcodec_get_context_defaults3 (_video_codec_context, _video_codec); /* Variable quantisation */ - _codec_context->global_quality = 0; - _codec_context->width = _film->frame_size().width; - _codec_context->height = _film->frame_size().height; - _codec_context->time_base = (AVRational) { 1, _film->video_frame_rate() }; - _codec_context->pix_fmt = _pixel_format; - _codec_context->flags |= CODEC_FLAG_QSCALE | CODEC_FLAG_GLOBAL_HEADER; + _video_codec_context->global_quality = 0; + _video_codec_context->width = _film->frame_size().width; + _video_codec_context->height = _film->frame_size().height; + _video_codec_context->time_base = (AVRational) { 1, _film->video_frame_rate() }; + _video_codec_context->pix_fmt = _pixel_format; + _video_codec_context->flags |= CODEC_FLAG_QSCALE | CODEC_FLAG_GLOBAL_HEADER; +} + +void +FFmpegEncoder::setup_audio () +{ + _audio_codec = avcodec_find_encoder_by_name (_audio_codec_name.c_str()); + if (!_audio_codec) { + throw runtime_error (String::compose ("could not find FFmpeg encoder %1", _audio_codec_name)); + } + + _audio_codec_context = avcodec_alloc_context3 (_audio_codec); + if (!_audio_codec_context) { + throw runtime_error ("could not allocate FFmpeg audio context"); + } + + avcodec_get_context_defaults3 (_audio_codec_context, _audio_codec); + + /* XXX: configurable */ + _audio_codec_context->bit_rate = 256 * 1024; + _audio_codec_context->sample_fmt = _sample_format; + _audio_codec_context->sample_rate = _film->audio_frame_rate (); + _audio_codec_context->channel_layout = av_get_default_channel_layout (_film->audio_channels ()); + _audio_codec_context->channels = _film->audio_channels (); +} + +void +FFmpegEncoder::go () +{ + setup_video (); + setup_audio (); avformat_alloc_output_context2 (&_format_context, 0, 0, _output.string().c_str()); if (!_format_context) { throw runtime_error ("could not allocate FFmpeg format context"); } - _video_stream = avformat_new_stream (_format_context, codec); + _video_stream = avformat_new_stream (_format_context, _video_codec); if (!_video_stream) { throw runtime_error ("could not create FFmpeg output video stream"); } - /* Note: needs to increment with each stream */ - _video_stream->id = 0; - _video_stream->codec = _codec_context; + _audio_stream = avformat_new_stream (_format_context, _audio_codec); + if (!_audio_stream) { + throw runtime_error ("could not create FFmpeg output audio stream"); + } + + _video_stream->id = _video_stream_index; + _video_stream->codec = _video_codec_context; + + _audio_stream->id = _audio_stream_index; + _audio_stream->codec = _audio_codec_context; - if (avcodec_open2 (_codec_context, codec, &_options) < 0) { - throw runtime_error ("could not open FFmpeg codec"); + if (avcodec_open2 (_video_codec_context, _video_codec, &_video_options) < 0) { + throw runtime_error ("could not open FFmpeg video codec"); + } + + int r = avcodec_open2 (_audio_codec_context, _audio_codec, 0); + if (r < 0) { + char buffer[256]; + av_strerror (r, buffer, sizeof(buffer)); + throw runtime_error (String::compose ("could not open FFmpeg audio codec (%1)", buffer)); } if (avio_open (&_format_context->pb, _output.c_str(), AVIO_FLAG_WRITE) < 0) { throw runtime_error ("could not open FFmpeg output file"); } - if (avformat_write_header (_format_context, &_options) < 0) { + if (avformat_write_header (_format_context, 0) < 0) { throw runtime_error ("could not write header to FFmpeg output file"); } @@ -120,26 +172,49 @@ FFmpegEncoder::go () while (!_player->pass ()) {} - while (true) { + if (_pending_audio->frames() > 0) { + audio_frame (_pending_audio->frames ()); + } + + /* Flush */ + + bool flushed_video = false; + bool flushed_audio = false; + + while (!flushed_video || !flushed_audio) { AVPacket packet; av_init_packet (&packet); packet.data = 0; packet.size = 0; int got_packet; - avcodec_encode_video2 (_codec_context, &packet, 0, &got_packet); - if (!got_packet) { - break; + avcodec_encode_video2 (_video_codec_context, &packet, 0, &got_packet); + if (got_packet) { + packet.stream_index = 0; + av_interleaved_write_frame (_format_context, &packet); + } else { + flushed_video = true; } + av_packet_unref (&packet); - packet.stream_index = 0; - av_interleaved_write_frame (_format_context, &packet); + av_init_packet (&packet); + packet.data = 0; + packet.size = 0; + + avcodec_encode_audio2 (_video_codec_context, &packet, 0, &got_packet); + if (got_packet) { + packet.stream_index = 0; + av_interleaved_write_frame (_format_context, &packet); + } else { + flushed_audio = true; + } av_packet_unref (&packet); } av_write_trailer (_format_context); - avcodec_close (_codec_context); + avcodec_close (_video_codec_context); + avcodec_close (_audio_codec_context); avio_close (_format_context->pb); avformat_free_context (_format_context); } @@ -155,10 +230,12 @@ FFmpegEncoder::video (shared_ptr video, DCPTime time) ); AVFrame* frame = av_frame_alloc (); + DCPOMATIC_ASSERT (frame); for (int i = 0; i < 3; ++i) { size_t const size = image->stride()[i] * image->size().height; AVBufferRef* buffer = av_buffer_alloc (size); + DCPOMATIC_ASSERT (buffer); /* XXX: inefficient */ memcpy (buffer->data, image->data()[i], size); frame->buf[i] = av_buffer_ref (buffer); @@ -178,13 +255,12 @@ FFmpegEncoder::video (shared_ptr video, DCPTime time) packet.size = 0; int got_packet; - if (avcodec_encode_video2 (_codec_context, &packet, frame, &got_packet) < 0) { + if (avcodec_encode_video2 (_video_codec_context, &packet, frame, &got_packet) < 0) { throw EncodeError ("FFmpeg video encode failed"); } if (got_packet && packet.size) { - /* XXX: this should not be hard-wired */ - packet.stream_index = 0; + packet.stream_index = _video_stream_index; av_interleaved_write_frame (_format_context, &packet); av_packet_unref (&packet); } @@ -207,7 +283,76 @@ FFmpegEncoder::video (shared_ptr video, DCPTime time) void FFmpegEncoder::audio (shared_ptr audio, DCPTime time) { + _pending_audio->append (audio); + + while (_pending_audio->frames() >= _audio_codec_context->frame_size) { + audio_frame (_audio_codec_context->frame_size); + } +} + +void +FFmpegEncoder::audio_frame (int size) +{ + AVFrame* frame = av_frame_alloc (); + DCPOMATIC_ASSERT (frame); + + int const channels = _audio_codec_context->channels; + + int const buffer_size = av_samples_get_buffer_size (0, channels, size, _audio_codec_context->sample_fmt, 0); + DCPOMATIC_ASSERT (buffer_size >= 0); + + void* samples = av_malloc (buffer_size); + DCPOMATIC_ASSERT (samples); + + frame->nb_samples = size; + int r = avcodec_fill_audio_frame (frame, channels, _audio_codec_context->sample_fmt, (const uint8_t *) samples, buffer_size, 0); + DCPOMATIC_ASSERT (r >= 0); + + float** p = _pending_audio->data (); + switch (_audio_codec_context->sample_fmt) { + case AV_SAMPLE_FMT_S16: + { + int16_t* q = reinterpret_cast (samples); + for (int i = 0; i < size; ++i) { + for (int j = 0; j < channels; ++i) { + *q++ = p[j][i] * 32767; + } + } + break; + } + case AV_SAMPLE_FMT_FLTP: + { + float* q = reinterpret_cast (samples); + for (int i = 0; i < channels; ++i) { + memcpy (q, p[i], sizeof(float) * size); + q += size; + } + break; + } + default: + DCPOMATIC_ASSERT (false); + } + + AVPacket packet; + av_init_packet (&packet); + packet.data = 0; + packet.size = 0; + + int got_packet; + if (avcodec_encode_audio2 (_audio_codec_context, &packet, frame, &got_packet) < 0) { + throw EncodeError ("FFmpeg audio encode failed"); + } + + if (got_packet && packet.size) { + packet.stream_index = _audio_stream_index; + av_interleaved_write_frame (_format_context, &packet); + av_packet_unref (&packet); + } + + av_free (samples); + av_frame_free (&frame); + _pending_audio->trim_start (size); } void diff --git a/src/lib/ffmpeg_encoder.h b/src/lib/ffmpeg_encoder.h index b5715e514..c7929c54f 100644 --- a/src/lib/ffmpeg_encoder.h +++ b/src/lib/ffmpeg_encoder.h @@ -52,12 +52,23 @@ private: void audio (boost::shared_ptr, DCPTime); void subtitle (PlayerSubtitles, DCPTimePeriod); - AVCodecContext* _codec_context; + void setup_video (); + void setup_audio (); + + void audio_frame (int size); + + AVCodec* _video_codec; + AVCodecContext* _video_codec_context; + AVCodec* _audio_codec; + AVCodecContext* _audio_codec_context; AVFormatContext* _format_context; AVStream* _video_stream; + AVStream* _audio_stream; AVPixelFormat _pixel_format; - AVDictionary* _options; - std::string _codec_name; + AVSampleFormat _sample_format; + AVDictionary* _video_options; + std::string _video_codec_name; + std::string _audio_codec_name; mutable boost::mutex _mutex; DCPTime _last_time; @@ -65,6 +76,11 @@ private: EventHistory _history; boost::filesystem::path _output; + + boost::shared_ptr _pending_audio; + + static int _video_stream_index; + static int _audio_stream_index; }; #endif diff --git a/test/ffmpeg_encoder_test.cc b/test/ffmpeg_encoder_test.cc index c96e19f92..92cd9ff8e 100644 --- a/test/ffmpeg_encoder_test.cc +++ b/test/ffmpeg_encoder_test.cc @@ -40,6 +40,6 @@ BOOST_AUTO_TEST_CASE (ffmpeg_encoder_basic_test) wait_for_jobs (); shared_ptr job (new TranscodeJob (film)); - FFmpegEncoder encoder (film, job, "build/test/ffmpeg_encoder_basic_test/test.mov", FFmpegEncoder::FORMAT_PRORES); + FFmpegEncoder encoder (film, job, "build/test/ffmpeg_encoder_basic_test.mov", FFmpegEncoder::FORMAT_PRORES); encoder.go (); } -- 2.30.2