WIP: VP9 encoder. 1602-sign-language
authorCarl Hetherington <cth@carlh.net>
Tue, 8 Nov 2022 23:38:33 +0000 (00:38 +0100)
committerCarl Hetherington <cth@carlh.net>
Thu, 22 Dec 2022 23:12:00 +0000 (00:12 +0100)
src/lib/constants.h
src/lib/dcp_encoder.cc
src/lib/dcp_encoder.h
src/lib/vp9_encoder.cc [new file with mode: 0644]
src/lib/vp9_encoder.h [new file with mode: 0644]
src/lib/vp9_pcm_block.cc [new file with mode: 0644]
src/lib/vp9_pcm_block.h [new file with mode: 0644]
src/lib/wscript
test/data
test/sign_language_test.cc [new file with mode: 0644]
test/wscript

index b6476cc99b2c18a28eeca7bf047c946a23f0f4f8..cef8f4cff529f5064905524892b627a331ab1f85 100644 (file)
 #define CERTIFICATE_VALIDITY_PERIOD (10 * 365)
 #define SIGN_LANGUAGE_WIDTH 480
 #define SIGN_LANGUAGE_HEIGHT 640
+#define SIGN_LANGUAGE_VIDEO_FRAME_RATE 24
+#define SIGN_LANGUAGE_AUDIO_FRAME_RATE 48000
+#define SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS 2
+#define AUDIO_BYTES_PER_SAMPLE 3
 
 
 #endif
index 186ec59c2a9d5e49bd5c2d60cf60e29ab0292776..fc59452356d455c37c36cfa414cc4de3ec48b589 100644 (file)
@@ -83,6 +83,8 @@ DCPEncoder::DCPEncoder (shared_ptr<const Film> film, weak_ptr<Job> job)
                        }
                }
        }
+
+       _sign_language = contains_sign_language(film->content());
 }
 
 DCPEncoder::~DCPEncoder ()
@@ -126,13 +128,39 @@ DCPEncoder::video (shared_ptr<PlayerVideo> data, DCPTime time)
 {
        if (data->type() == VideoType::MAIN) {
                _j2k_encoder.encode(data, time);
+       } else {
+               _vp9_encoder.encode(data->image(boost::bind(&PlayerVideo::force, AV_PIX_FMT_YUV420P), VideoRange::VIDEO, false), time);
+       }
+}
+
+
+void
+DCPEncoder::write_pending_audio()
+{
+       if (_pending_audio.empty()) {
+               return;
+       }
+
+       auto const last_pending_audio = _pending_audio.back();
+       auto const pending_audio_end = last_pending_audio.second + DCPTime::from_frames(last_pending_audio.first->frames(), _film->video_frame_rate());
+       auto const write_until = std::min(pending_audio_end, _vp9_encoder.end());
+       if (write_until == _last_audio_write_to) {
+               return;
        }
+
+       /* XXX: need to mix _pending_audio with the output from vp9_encoder */
 }
 
+
 void
 DCPEncoder::audio (shared_ptr<AudioBuffers> data, DCPTime time)
 {
-       _writer.write(data, time);
+       if (!_sign_language) {
+               _writer.write(data, time);
+       } else {
+               _pending_audio.push_back({data, time});
+               write_pending_audio();
+       }
 
        auto job = _job.lock ();
        DCPOMATIC_ASSERT (job);
index ad77f6951312c03ace8fe91e7fe4d91608f2f743..69ae5b7b29e82b9a03f278dd2eaf0e4a62fb0166 100644 (file)
@@ -25,6 +25,7 @@
 #include "encoder.h"
 #include "player_text.h"
 #include "j2k_encoder.h"
+#include "vp9_encoder.h"
 #include "writer.h"
 #include <dcp/atmos_frame.h>
 
@@ -36,7 +37,6 @@ class Player;
 class PlayerVideo;
 
 
-/** @class DCPEncoder */
 class DCPEncoder : public Encoder
 {
 public:
@@ -60,10 +60,16 @@ private:
        void text (PlayerText, TextType, boost::optional<DCPTextTrack>, dcpomatic::DCPTimePeriod);
        void atmos (std::shared_ptr<const dcp::AtmosFrame>, dcpomatic::DCPTime, AtmosMetadata metadata);
 
+       void write_pending_audio();
+
        Writer _writer;
        J2KEncoder _j2k_encoder;
+       VP9Encoder _vp9_encoder;
        bool _finishing;
        bool _non_burnt_subtitles;
+       bool _sign_language;
+       dcpomatic::DCPTime _last_audio_write_to;
+       std::list<std::pair<std::shared_ptr<AudioBuffers>, dcpomatic::DCPTime>> _pending_audio;
 
        boost::signals2::scoped_connection _player_video_connection;
        boost::signals2::scoped_connection _player_audio_connection;
diff --git a/src/lib/vp9_encoder.cc b/src/lib/vp9_encoder.cc
new file mode 100644 (file)
index 0000000..bb47568
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "constants.h"
+#include "exceptions.h"
+#include "image.h"
+#include "ffmpeg_wrapper.h"
+#include "vp9_encoder.h"
+#include "util.h"
+#include <dcp/raw_convert.h>
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+}
+#include <boost/thread.hpp>
+
+
+#include "i18n.h"
+
+
+using std::shared_ptr;
+using std::string;
+using std::vector;
+using dcp::raw_convert;
+using namespace dcpomatic;
+
+
+int
+write_packet(void* opaque, uint8_t* buffer, int size)
+{
+       /* Here comes the webm header from the format context */
+       auto& header = reinterpret_cast<VP9Encoder*>(opaque)->_header;
+       header.resize(size);
+       memcpy(header.data(), buffer, size);
+       return 0;
+}
+
+
+VP9Encoder::VP9Encoder()
+{
+       av_log_set_callback(ffmpeg_log_callback);
+
+       auto codec = avcodec_find_encoder_by_name("libvpx-vp9");
+       if (!codec) {
+               throw EncodeError("avcodec_find_encoder_by_name failed for libvpx-vp9");
+       }
+
+       _codec_context = avcodec_alloc_context3(codec);
+       if (!_codec_context) {
+               throw std::bad_alloc();
+       }
+
+       _codec_context->width = SIGN_LANGUAGE_WIDTH;
+       _codec_context->height = SIGN_LANGUAGE_HEIGHT;
+       _codec_context->time_base = AVRational{1, SIGN_LANGUAGE_VIDEO_FRAME_RATE};
+       _codec_context->pix_fmt = AV_PIX_FMT_YUV420P;
+       // We need to take up the same bits as the equivalent length of audio
+       // For some reason the ISDCF example script divides that bitrate by 2;
+       // maybe to give some overhead?
+       _codec_context->bit_rate = SIGN_LANGUAGE_AUDIO_FRAME_RATE * AUDIO_BYTES_PER_SAMPLE * 8 / 2;
+
+       /* All these options are taken from
+        * https://github.com/ISDCF/Sign-Language-Video-Encoding/blob/master/encode-vp9-wav
+        */
+
+       AVDictionary* options = nullptr;
+
+       auto constexpr chunk_length_in_video_frames = SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS * SIGN_LANGUAGE_VIDEO_FRAME_RATE;
+
+       av_dict_set_int(&options, "keyint_min", chunk_length_in_video_frames, 0);
+       // GOP size
+       av_dict_set_int(&options, "g", chunk_length_in_video_frames, 0);
+       av_dict_set_int(&options, "speed", 6, 0);
+       av_dict_set_int(&options, "tile-columns", 4, 0);
+       av_dict_set_int(&options, "frame-parallel", 1, 0);
+       av_dict_set_int(&options, "static-thresh", 0, 0);
+       av_dict_set_int(&options, "max-intra-rate", 300, 0);
+       av_dict_set(&options, "deadline", "realtime", 0);
+       av_dict_set_int(&options, "lag-in-frames", 0, 0);
+       av_dict_set_int(&options, "error-resilient", 1, 0);
+       av_dict_set_int(&options, "minrate", _codec_context->bit_rate, 0);
+       av_dict_set_int(&options, "maxrate", _codec_context->bit_rate, 0);
+
+       int r = avcodec_open2(_codec_context, codec, &options);
+       if (r < 0) {
+               throw EncodeError(N_("avcodec_open2"), N_("VP9Encoder::VP9Encoder"));
+       }
+
+       vector<uint8_t> header_buffer(32768);
+       auto avio_context = avio_alloc_context(header_buffer.data(), header_buffer.size(), 1, this, nullptr, write_packet, nullptr);
+       if (!avio_context) {
+               throw std::bad_alloc();
+       }
+       AVFormatContext* format_context;
+       r = avformat_alloc_output_context2(&format_context, nullptr, "webm", nullptr);
+       if (r < 0) {
+               throw EncodeError(N_("avformat_alloc_output_context2"), N_("VP9Encoder::VP9Encoder"));
+       }
+       format_context->pb = avio_context;
+
+       av_dict_set(&format_context->metadata, "MAJOR_BRAND", "isom", 0);
+       av_dict_set(&format_context->metadata, "MINOR_VERSION", "512", 0);
+       av_dict_set(&format_context->metadata, "COMPATIBLE_BRANDS", "isomiso2avc1mp41", 0);
+       av_dict_set(&format_context->metadata, "ENCODER", "Lavf59.27.100", 0);
+
+       auto stream = avformat_new_stream(format_context, codec);
+       if (!stream) {
+               throw EncodeError(N_("avformat_new_stream"), N_("VP9Encoder::VP9Encoder"));
+       }
+
+       stream->disposition |= AV_DISPOSITION_DEFAULT;
+       r = avcodec_parameters_from_context(stream->codecpar, _codec_context);
+       if (r < 0) {
+               throw EncodeError(N_("avcodec_parameters_from_context"), N_("VP9Encoder::VP9Encoder"), r);
+       }
+       stream->avg_frame_rate = { 24, 1 };
+       stream->codecpar->color_range = AVCOL_RANGE_MPEG;
+       stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
+       stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
+
+       av_dict_set(&stream->metadata, "ENCODER", "Lavc59.37.100 libvpx-vp9", 0);
+       av_dict_set(&stream->metadata, "HANDLER_NAME", "VideoHandler", 0);
+       av_dict_set(&stream->metadata, "VENDOR_ID", "[0][0][0][0]", 0);
+
+       AVDictionary* format_options = nullptr;
+       r = avformat_write_header(format_context, &format_options);
+       if (r < 0) {
+               throw EncodeError(N_("avformat_write_header"), N_("VP9Encoder::VP9Encoder"));
+       }
+
+       avformat_free_context(format_context);
+}
+
+
+VP9Encoder::~VP9Encoder()
+{
+       avcodec_close(_codec_context);
+}
+
+
+void
+VP9Encoder::encode(shared_ptr<const Image> image, DCPTime time)
+{
+       DCPOMATIC_ASSERT(image->pixel_format() == AV_PIX_FMT_YUV420P);
+       DCPOMATIC_ASSERT(image->size() == dcp::Size(SIGN_LANGUAGE_WIDTH, SIGN_LANGUAGE_HEIGHT));
+
+       auto frame = av_frame_alloc();
+       if (!frame) {
+               throw std::bad_alloc();
+       }
+
+       for (int i = 0; i < 3; ++i) {
+               auto buffer = _pending_images.create_buffer(image, i);
+               frame->buf[i] = av_buffer_ref(buffer);
+               frame->data[i] = buffer->data;
+               frame->linesize[i] = image->stride()[i];
+               av_buffer_unref(&buffer);
+       }
+
+       frame->width = image->size().width;
+       frame->height = image->size().height;
+       frame->format = image->pixel_format();
+       frame->pts = time.get() * SIGN_LANGUAGE_VIDEO_FRAME_RATE / DCPTime::HZ;
+
+       int r = avcodec_send_frame(_codec_context, frame);
+       av_frame_free(&frame);
+       if (r < 0) {
+               throw EncodeError(N_("avcodec_send_frame"), N_("VP9Encoder::video"), r);
+       }
+
+       ffmpeg::Packet packet;
+       r = avcodec_receive_packet(_codec_context, packet.get());
+       if (r < 0 && r != AVERROR(EAGAIN)) {
+               throw EncodeError(N_("avcodec_receive_packet"), N_("VP9Encoder::video"), r);
+       } else if (r >= 0) {
+               _pcm_blocks.emplace_back(std::make_pair(time, VP9PCMBlock(packet.get()->data, packet.get()->size, _header)));
+               _end = time + DCPTime::from_seconds(SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS);
+       }
+}
+
diff --git a/src/lib/vp9_encoder.h b/src/lib/vp9_encoder.h
new file mode 100644 (file)
index 0000000..ca5baba
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "dcpomatic_time.h"
+#include "image_store.h"
+#include "vp9_pcm_block.h"
+
+
+struct AVCodecContext;
+
+
+class VP9Encoder
+{
+public:
+       VP9Encoder();
+
+       VP9Encoder(VP9Encoder const&) = delete;
+       VP9Encoder& operator=(VP9Encoder const&) = delete;
+
+       ~VP9Encoder();
+
+       void encode(std::shared_ptr<const Image> image, dcpomatic::DCPTime time);
+
+       dcpomatic::DCPTime end() const {
+               return _end;
+       }
+
+private:
+       friend int write_packet(void*, uint8_t*, int);
+
+       AVCodecContext* _codec_context = nullptr;
+
+       ImageStore _pending_images;
+       std::vector<uint8_t> _header;
+       std::vector<std::pair<dcpomatic::DCPTime, VP9PCMBlock>> _pcm_blocks;
+       dcpomatic::DCPTime _end;
+};
+
diff --git a/src/lib/vp9_pcm_block.cc b/src/lib/vp9_pcm_block.cc
new file mode 100644 (file)
index 0000000..a606b4f
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "constants.h"
+#include "dcpomatic_assert.h"
+#include "util.h"
+#include "vp9_pcm_block.h"
+
+
+using std::vector;
+
+
+auto constexpr pcm_block_size = SIGN_LANGUAGE_AUDIO_FRAME_RATE * AUDIO_BYTES_PER_SAMPLE * SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS;
+
+
+VP9PCMBlock::VP9PCMBlock(uint8_t const* vp9_segment, int vp9_segment_size, vector<uint8_t> const& ebml_header)
+       : _data(pcm_block_size)
+{
+       uint32_t const ebml_header_size = ebml_header.size();
+
+       DCPOMATIC_ASSERT(vp9_segment_size < static_cast<int>((pcm_block_size - 20 - ebml_header_size)));
+
+       // H_1
+       _data[0] = 0xff;
+       _data[1] = 0xff;
+       _data[2] = 0xff;
+       _data[3] = 0xff;
+       // Length of VP9 segment, big-endian, unsigned
+       _data[4] = (vp9_segment_size >> 24) & 0xff;
+       _data[5] = (vp9_segment_size >> 16) & 0xff;
+       _data[6] = (vp9_segment_size >> 8) & 0xff;
+       _data[7] = (vp9_segment_size >> 0) & 0xff;
+       // Length of PCM block, big-endian, unsigned
+       _data[8] = (pcm_block_size >> 24) & 0xff;
+       _data[9] = (pcm_block_size >> 16) & 0xff;
+       _data[10] = (pcm_block_size >> 8) & 0xff;
+       _data[11] = (pcm_block_size >> 0) & 0xff;
+       // Length of VP9 EBML header, big-endian, unsigned
+       _data[12] = (ebml_header_size >> 24) & 0xff;
+       _data[13] = (ebml_header_size >> 16) & 0xff;
+       _data[14] = (ebml_header_size >> 8) & 0xff;
+       _data[15] = (ebml_header_size >> 0) & 0xff;
+       // H_2
+       _data[16] = 0xff;
+       _data[17] = 0xff;
+       _data[18] = 0xff;
+       _data[19] = 0xff;
+       // VP9 EBML header
+       std::copy(ebml_header.begin(), ebml_header.end(), _data.begin() + 20);
+       // VP9 payload
+       memcpy(_data.data() + 20 + ebml_header.size(), vp9_segment, vp9_segment_size);
+}
+
diff --git a/src/lib/vp9_pcm_block.h b/src/lib/vp9_pcm_block.h
new file mode 100644 (file)
index 0000000..10b51e5
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#ifndef DCPOMATIC_VP9_PCM_BLOCK_H
+#define DCPOMATIC_VP9_PCM_BLOCK_H
+
+
+#include <vector>
+#include <stdint.h>
+
+
+class VP9PCMBlock
+{
+public:
+       VP9PCMBlock(uint8_t const* vp9_segment, int vp9_segment_size, std::vector<uint8_t> const& ebml_header);
+
+private:
+       std::vector<uint8_t> _data;
+};
+
+
+#endif
+
index 9d7e5abaef529fd74c274308b8d5ed2991acda41..42aeeb29aef6ff76916cedc6cbaabb12ceea0dd7 100644 (file)
@@ -208,6 +208,8 @@ sources = """
           video_range.cc
           video_ring_buffers.cc
           video_type.cc
+          vp9_encoder.cc
+          vp9_pcm_block.cc
           writer.cc
           zipper.cc
           """
index bedc98152b3ea06a2e73ea14bda4178af652ed64..17ee84d24db14fe45023b02ae42d595562f6e4c3 160000 (submodule)
--- a/test/data
+++ b/test/data
@@ -1 +1 @@
-Subproject commit bedc98152b3ea06a2e73ea14bda4178af652ed64
+Subproject commit 17ee84d24db14fe45023b02ae42d595562f6e4c3
diff --git a/test/sign_language_test.cc b/test/sign_language_test.cc
new file mode 100644 (file)
index 0000000..e15356d
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "lib/content.h"
+#include "lib/content_factory.h"
+#include "lib/video_content.h"
+#include "lib/video_type.h"
+#include "test.h"
+#include <boost/test/unit_test.hpp>
+
+
+BOOST_AUTO_TEST_CASE(simple_sign_language_encode)
+{
+       auto content = content_factory("test/data/10s_sign_language.mp4");
+       BOOST_REQUIRE(!content.empty());
+       auto film = new_test_film2("simple_sign_language_encode", content);
+       content[0]->video->set_type(VideoType::SIGN_LANGUAGE);
+
+       make_and_verify_dcp(film);
+}
+
+
index c31a37a764348e13420a020118fbbe11ee06bcd4..b95fdbcfb46e9f8238d99166d04fd37d8af16b68 100644 (file)
@@ -130,6 +130,7 @@ def build(bld):
                  scaling_test.cc
                  scope_guard_test.cc
                  scoped_temporary_test.cc
+                 sign_language_test.cc
                  silence_padding_test.cc
                  shuffler_test.cc
                  skip_frame_test.cc