WIP: VP9 encoder.
[dcpomatic.git] / src / lib / vp9_encoder.cc
1 /*
2     Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21
22 #include "constants.h"
23 #include "exceptions.h"
24 #include "image.h"
25 #include "ffmpeg_wrapper.h"
26 #include "vp9_encoder.h"
27 #include "util.h"
28 #include <dcp/raw_convert.h>
29 extern "C" {
30 #include <libavcodec/avcodec.h>
31 #include <libavformat/avformat.h>
32 }
33 #include <boost/thread.hpp>
34
35
36 #include "i18n.h"
37
38
39 using std::shared_ptr;
40 using std::string;
41 using std::vector;
42 using dcp::raw_convert;
43 using namespace dcpomatic;
44
45
46 int
47 write_packet(void* opaque, uint8_t* buffer, int size)
48 {
49         /* Here comes the webm header from the format context */
50         auto& header = reinterpret_cast<VP9Encoder*>(opaque)->_header;
51         header.resize(size);
52         memcpy(header.data(), buffer, size);
53         return 0;
54 }
55
56
57 VP9Encoder::VP9Encoder()
58 {
59         av_log_set_callback(ffmpeg_log_callback);
60
61         auto codec = avcodec_find_encoder_by_name("libvpx-vp9");
62         if (!codec) {
63                 throw EncodeError("avcodec_find_encoder_by_name failed for libvpx-vp9");
64         }
65
66         _codec_context = avcodec_alloc_context3(codec);
67         if (!_codec_context) {
68                 throw std::bad_alloc();
69         }
70
71         _codec_context->width = SIGN_LANGUAGE_WIDTH;
72         _codec_context->height = SIGN_LANGUAGE_HEIGHT;
73         _codec_context->time_base = AVRational{1, SIGN_LANGUAGE_VIDEO_FRAME_RATE};
74         _codec_context->pix_fmt = AV_PIX_FMT_YUV420P;
75         // We need to take up the same bits as the equivalent length of audio
76         // For some reason the ISDCF example script divides that bitrate by 2;
77         // maybe to give some overhead?
78         _codec_context->bit_rate = SIGN_LANGUAGE_AUDIO_FRAME_RATE * AUDIO_BYTES_PER_SAMPLE * 8 / 2;
79
80         /* All these options are taken from
81          * https://github.com/ISDCF/Sign-Language-Video-Encoding/blob/master/encode-vp9-wav
82          */
83
84         AVDictionary* options = nullptr;
85
86         auto constexpr chunk_length_in_video_frames = SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS * SIGN_LANGUAGE_VIDEO_FRAME_RATE;
87
88         av_dict_set_int(&options, "keyint_min", chunk_length_in_video_frames, 0);
89         // GOP size
90         av_dict_set_int(&options, "g", chunk_length_in_video_frames, 0);
91         av_dict_set_int(&options, "speed", 6, 0);
92         av_dict_set_int(&options, "tile-columns", 4, 0);
93         av_dict_set_int(&options, "frame-parallel", 1, 0);
94         av_dict_set_int(&options, "static-thresh", 0, 0);
95         av_dict_set_int(&options, "max-intra-rate", 300, 0);
96         av_dict_set(&options, "deadline", "realtime", 0);
97         av_dict_set_int(&options, "lag-in-frames", 0, 0);
98         av_dict_set_int(&options, "error-resilient", 1, 0);
99         av_dict_set_int(&options, "minrate", _codec_context->bit_rate, 0);
100         av_dict_set_int(&options, "maxrate", _codec_context->bit_rate, 0);
101
102         int r = avcodec_open2(_codec_context, codec, &options);
103         if (r < 0) {
104                 throw EncodeError(N_("avcodec_open2"), N_("VP9Encoder::VP9Encoder"));
105         }
106
107         vector<uint8_t> header_buffer(32768);
108         auto avio_context = avio_alloc_context(header_buffer.data(), header_buffer.size(), 1, this, nullptr, write_packet, nullptr);
109         if (!avio_context) {
110                 throw std::bad_alloc();
111         }
112         AVFormatContext* format_context;
113         r = avformat_alloc_output_context2(&format_context, nullptr, "webm", nullptr);
114         if (r < 0) {
115                 throw EncodeError(N_("avformat_alloc_output_context2"), N_("VP9Encoder::VP9Encoder"));
116         }
117         format_context->pb = avio_context;
118
119         av_dict_set(&format_context->metadata, "MAJOR_BRAND", "isom", 0);
120         av_dict_set(&format_context->metadata, "MINOR_VERSION", "512", 0);
121         av_dict_set(&format_context->metadata, "COMPATIBLE_BRANDS", "isomiso2avc1mp41", 0);
122         av_dict_set(&format_context->metadata, "ENCODER", "Lavf59.27.100", 0);
123
124         auto stream = avformat_new_stream(format_context, codec);
125         if (!stream) {
126                 throw EncodeError(N_("avformat_new_stream"), N_("VP9Encoder::VP9Encoder"));
127         }
128
129         stream->disposition |= AV_DISPOSITION_DEFAULT;
130         r = avcodec_parameters_from_context(stream->codecpar, _codec_context);
131         if (r < 0) {
132                 throw EncodeError(N_("avcodec_parameters_from_context"), N_("VP9Encoder::VP9Encoder"), r);
133         }
134         stream->avg_frame_rate = { 24, 1 };
135         stream->codecpar->color_range = AVCOL_RANGE_MPEG;
136         stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
137         stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
138
139         av_dict_set(&stream->metadata, "ENCODER", "Lavc59.37.100 libvpx-vp9", 0);
140         av_dict_set(&stream->metadata, "HANDLER_NAME", "VideoHandler", 0);
141         av_dict_set(&stream->metadata, "VENDOR_ID", "[0][0][0][0]", 0);
142
143         AVDictionary* format_options = nullptr;
144         r = avformat_write_header(format_context, &format_options);
145         if (r < 0) {
146                 throw EncodeError(N_("avformat_write_header"), N_("VP9Encoder::VP9Encoder"));
147         }
148
149         avformat_free_context(format_context);
150 }
151
152
153 VP9Encoder::~VP9Encoder()
154 {
155         avcodec_close(_codec_context);
156 }
157
158
159 void
160 VP9Encoder::encode(shared_ptr<const Image> image, DCPTime time)
161 {
162         DCPOMATIC_ASSERT(image->pixel_format() == AV_PIX_FMT_YUV420P);
163         DCPOMATIC_ASSERT(image->size() == dcp::Size(SIGN_LANGUAGE_WIDTH, SIGN_LANGUAGE_HEIGHT));
164
165         auto frame = av_frame_alloc();
166         if (!frame) {
167                 throw std::bad_alloc();
168         }
169
170         for (int i = 0; i < 3; ++i) {
171                 auto buffer = _pending_images.create_buffer(image, i);
172                 frame->buf[i] = av_buffer_ref(buffer);
173                 frame->data[i] = buffer->data;
174                 frame->linesize[i] = image->stride()[i];
175                 av_buffer_unref(&buffer);
176         }
177
178         frame->width = image->size().width;
179         frame->height = image->size().height;
180         frame->format = image->pixel_format();
181         frame->pts = time.get() * SIGN_LANGUAGE_VIDEO_FRAME_RATE / DCPTime::HZ;
182
183         int r = avcodec_send_frame(_codec_context, frame);
184         av_frame_free(&frame);
185         if (r < 0) {
186                 throw EncodeError(N_("avcodec_send_frame"), N_("VP9Encoder::video"), r);
187         }
188
189         ffmpeg::Packet packet;
190         r = avcodec_receive_packet(_codec_context, packet.get());
191         if (r < 0 && r != AVERROR(EAGAIN)) {
192                 throw EncodeError(N_("avcodec_receive_packet"), N_("VP9Encoder::video"), r);
193         } else if (r >= 0) {
194                 _pcm_blocks.emplace_back(std::make_pair(time, VP9PCMBlock(packet.get()->data, packet.get()->size, _header)));
195                 _end = time + DCPTime::from_seconds(SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS);
196         }
197 }
198