summaryrefslogtreecommitdiff
path: root/src/lib/gpu_j2k_encode_worker.cc
blob: ba0b2036c4457c2883b7f0c9c8c8288b6b87afc5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
    Copyright (C) 2019 Carl Hetherington <cth@carlh.net>

    This file is part of DCP-o-matic.

    DCP-o-matic is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DCP-o-matic is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.

*/

#include "gpu_j2k_encode_worker.h"
#include "dcp_video.h"
#include "cross.h"
#include "timer.h"
#include "dcpomatic_log.h"
#include <dcp/openjpeg_image.h>
extern "C" {
#include <poznanj2k/config/init_device.h>
#include <poznanj2k/types/image_types.h>
#include <poznanj2k/types/image.h>
#include <poznanj2k/preprocessing/mct.h>
#include <poznanj2k/dwt/dwt.h>
#include <poznanj2k/tier1/quantizer.h>
#include <poznanj2k/tier1/coeff_coder/gpu_coder.h>
#include <poznanj2k/tier2/codestream.h>
#include <poznanj2k/misc/memory_management.cuh>
#include <cuda_runtime_api.h>
}

#include "i18n.h"

using dcp::Data;
using boost::optional;
using boost::shared_ptr;

GPUJ2KEncodeWorker::GPUJ2KEncodeWorker ()
{
	init_device (0);
}

optional<Data>
GPUJ2KEncodeWorker::encode (shared_ptr<DCPVideo> vf)
{
	shared_ptr<dcp::OpenJPEGImage> image = DCPVideo::convert_to_xyz(vf->frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
	int const width = image->size().width;
	int const height = image->size().height;

	type_image img;
	img.mct_compression_method = 0;
	img.width = width;
	img.height = height;
	img.num_components = 3;
	img.depth = 36;
	img.sign = UNSIGNED;
	/* XXX: 6 for 4K? */
	img.num_dlvls = 5;
	img.wavelet_type = 1;
	img.num_tiles = 1;
	img.tile_w = width;
	img.tile_h = height;
	img.coding_style = CODING_STYLE_PRECINCTS_DEFINED;
	img.prog_order = COMP_POS_RES_LY_PROG;
	img.num_layers = 1;
	img.num_range_bits = 12;
	img.use_mct = 1;
	img.use_part2_mct = 0;

	set_coding_parameters (&img, (vf->j2k_bandwidth() / 8) / vf->frames_per_second());

	init_tiles (&img, width, height, 5, 5);
	type_tile* tile = &(img.tile[0]);

	PeriodTimer encode("poz-encode");

	// XXX: it's a big shame about this int -> float conversion
	for (int i = 0; i < 3; ++i) {
		type_tile_comp* c = &tile->tile_comp[i];
		c->tile_comp_no = i;
		int const pixels = c->width * c->height;
		for (int j = 0; j < pixels; ++j) {
			c->img_data[j] = float (image->data(i)[j]);
		}
		cuda_memcpy_htd (c->img_data, c->img_data_d, pixels * sizeof(type_data));
	}

	mct (&img, 10000, 0.000001, 1.0e-7);
	fwt (tile);
	quantize_tile (tile);
	encode_tile (tile);

	type_buffer buffer;
	init_buffer (&buffer);
	encode_codestream (&buffer, &img);
	cudaThreadSynchronize ();

	image_destroy(&img);

	// XXX: remove this memcpy
	dcp::Data encoded (buffer.bytes_count);
	memcpy (encoded.data().get(), buffer.data, buffer.bytes_count);
	free (buffer.data);
	return encoded;
}

void
GPUJ2KEncodeWorker::log_thread_start ()
{
	LOG_TIMING ("start-encoder-thread thread=%1 GPU", thread_id());
}