From bdbe925a467f9b7149322ad8d1c090d4c1e6d5c3 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Tue, 22 Sep 2015 16:15:08 +0100 Subject: Use uchardet to guess encoding of subtitle files and reject non-UTF-8. --- wscript | 2 ++ 1 file changed, 2 insertions(+) (limited to 'wscript') diff --git a/wscript b/wscript index 3e84dcac5..150e9cb16 100644 --- a/wscript +++ b/wscript @@ -159,6 +159,8 @@ def configure(conf): else: conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) + # uchardet + conf.check_cfg(package='uchardet', args='--cflags --libs', uselib_store='UCHARDET', mandatory=True) # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From effc88be7dcf3e0848ed9dab8010e8c20cf4bb38 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 12:45:48 +0100 Subject: Use libicu instead of uchardet and convert subrip files to UTF-8. --- ChangeLog | 5 +++++ cscript | 12 ++++++------ src/lib/subrip.cc | 46 ++++++++++++++++++++++++---------------------- wscript | 3 --- 4 files changed, 35 insertions(+), 31 deletions(-) (limited to 'wscript') diff --git a/ChangeLog b/ChangeLog index aeb563d52..b8e85cc87 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-09-25 Carl Hetherington + + * Detect and convert from non-UTF-8 + subtitle encodings. + 2015-09-21 Carl Hetherington * Version 2.3.5 released. diff --git a/cscript b/cscript index 6e2a62dcc..ff1a1582e 100644 --- a/cscript +++ b/cscript @@ -46,7 +46,7 @@ deb_depends['12.04'] = {'libc6': '2.15', 'libcurl3': '7.22.0-3ubuntu4', 'libzip2': '0.10-1ubuntu1', 'libsamplerate0': '0.1.8-4', - 'libuchardet0': '0.0.1-1'} + 'libicu48': '4.8.1.1-3'} deb_depends['14.04'] = {'libc6': '2.19-0ubuntu6', 'libssh-4': '0.6.1-0ubuntu3', @@ -63,7 +63,7 @@ deb_depends['14.04'] = {'libc6': '2.19-0ubuntu6', 'libcurl3': '7.35.0-1ubuntu2', 'libzip2': '0.10.1-1.2', 'libsamplerate0': '0.1.8-7', - 'libuchardet0': '0.0.1-1ubuntu1'} + 'libicu52': '52.1-3'} deb_depends['15.04'] = {'libc6': '2.21-0ubuntu4', 'libssh-4': '0.6.3-3ubuntu3', @@ -81,7 +81,7 @@ deb_depends['15.04'] = {'libc6': '2.21-0ubuntu4', 'libzip2': '0.11.2-1.2', 'libwxgtk3.0-0': '3.0.2-1', 'libsamplerate0': '0.1.8-8', - 'libuchardet0': '0.0.1-1ubuntu1'} + 'libicu52': '52.1-8'} deb_depends['7'] = {'libc6': '2.13', 'libssh-4': '0.5.4', @@ -100,7 +100,7 @@ deb_depends['7'] = {'libc6': '2.13', 'libcairomm-1.0-1': '1.10.0-1', 'libpangomm-1.4-1': '2.28.4-1', 'libsamplerate0': '0.1.8-5', - 'libuchardet': '0.0.1-1'} + 'libicu48': '4.8.1.1-12+deb7u3'} deb_depends['8'] = {'libc6': '2.19-18', 'libssh-4': '0.6.3-4', @@ -120,7 +120,7 @@ deb_depends['8'] = {'libc6': '2.19-18', 'libxcb-xfixes0': '1.10', 'libxcb-shape0': '1.10', 'libsamplerate0': '0.1.8-8', - 'libuchardet': '0.0.1-1'} + 'libicu52': '52.1-8+deb8u2'} deb_depends['unstable'] = {'libc6': '2.13', 'libssh-4': '0.5.4', @@ -137,7 +137,7 @@ deb_depends['unstable'] = {'libc6': '2.13', 'libcurl3': '7.26.0', 'libzip2': '0.10.1', 'libsamplerate0': '0.1.8-8', - 'libuchardet': '0.0.1-1'} + 'libicu52': '52.1-9'} def packages(name, packages, f): s = '%s: ' % name diff --git a/src/lib/subrip.cc b/src/lib/subrip.cc index d4adee428..6df8b236b 100644 --- a/src/lib/subrip.cc +++ b/src/lib/subrip.cc @@ -21,9 +21,11 @@ #include "cross.h" #include "exceptions.h" #include "subrip_content.h" +#include "data.h" #include #include -#include +#include +#include #include #include "i18n.h" @@ -32,34 +34,34 @@ using std::vector; using std::cout; using std::string; using boost::shared_ptr; +using boost::scoped_array; SubRip::SubRip (shared_ptr content) { - FILE* f = fopen_boost (content->path (0), "r"); - if (!f) { - throw OpenFileError (content->path (0)); - } + Data in (content->path (0)); - /* Guess the encoding */ - uchardet_t det = uchardet_new (); - char buffer[1024]; - while (!feof (f)) { - int const n = fread (buffer, 1, sizeof (buffer), f); - if (uchardet_handle_data (det, buffer, n)) { - break; - } - } + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); - uchardet_data_end (det); - string charset = uchardet_get_charset (det); - uchardet_delete (det); + UCharsetMatch const * match = ucsdet_detect (detector, &status); + char const * in_charset = ucsdet_getName (match, &status); - if (charset != "UTF-8") { - throw TextEncodingError (_("unrecognised character set; please use files encoded in UTF-8")); - } + UConverter* to_utf16 = ucnv_open (in_charset, &status); + /* This is a guess; I think we should be able to encode any input in 4 times its input size */ + scoped_array utf16 (new uint16_t[in.size() * 2]); + int const utf16_len = ucnv_toUChars (to_utf16, utf16.get(), in.size() * 2, reinterpret_cast (in.data().get()), in.size(), &status); + + UConverter* to_utf8 = ucnv_open ("UTF-8", &status); + /* Another guess */ + scoped_array utf8 (new char[utf16_len * 2]); + ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, utf16.get(), utf16_len, &status); + + ucsdet_close (detector); + ucnv_close (to_utf16); + ucnv_close (to_utf8); - rewind (f); - sub::SubripReader reader (f); + sub::SubripReader reader (utf8.get()); _subtitles = sub::collect > (reader.subtitles ()); } diff --git a/wscript b/wscript index 8f6c487ee..245f3353c 100644 --- a/wscript +++ b/wscript @@ -159,9 +159,6 @@ def configure(conf): else: conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) - # uchardet - conf.check_cfg(package='uchardet', args='--cflags --libs', uselib_store='UCHARDET', mandatory=True) - # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From c982ff2fe7a956a79c4e935d78ed6b2c37752d75 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 14:08:59 +0100 Subject: More build fixes. --- src/lib/wscript | 2 +- src/tools/wscript | 2 +- test/wscript | 2 +- wscript | 3 +++ 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'wscript') diff --git a/src/lib/wscript b/src/lib/wscript index b615b575d..3daf3ecf4 100644 --- a/src/lib/wscript +++ b/src/lib/wscript @@ -143,7 +143,7 @@ def build(bld): AVCODEC AVUTIL AVFORMAT AVFILTER SWSCALE SWRESAMPLE BOOST_FILESYSTEM BOOST_THREAD BOOST_DATETIME BOOST_SIGNALS2 BOOST_REGEX SNDFILE SAMPLERATE OPENJPEG POSTPROC TIFF MAGICK SSH DCP CXML GLIB LZMA XML++ - CURL ZIP FONTCONFIG PANGOMM CAIROMM XMLSEC SUB UCHARDET + CURL ZIP FONTCONFIG PANGOMM CAIROMM XMLSEC SUB ICU """ if bld.env.TARGET_OSX: diff --git a/src/tools/wscript b/src/tools/wscript index b01eee7ca..72d8ac9a8 100644 --- a/src/tools/wscript +++ b/src/tools/wscript @@ -29,7 +29,7 @@ def configure(conf): def build(bld): uselib = 'BOOST_THREAD BOOST_DATETIME OPENJPEG DCP XMLSEC CXML XMLPP AVFORMAT AVFILTER AVCODEC ' uselib += 'AVUTIL SWSCALE POSTPROC CURL BOOST_FILESYSTEM SSH ZIP CAIROMM FONTCONFIG PANGOMM SUB MAGICK SNDFILE SAMPLERATE BOOST_REGEX ' - uselib += 'UCHARDET ' + uselib += 'ICU ' if bld.env.TARGET_WINDOWS: uselib += 'WINSOCK2' diff --git a/test/wscript b/test/wscript index a92e344eb..6ee995e36 100644 --- a/test/wscript +++ b/test/wscript @@ -31,7 +31,7 @@ def build(bld): obj = bld(features='cxx cxxprogram') obj.name = 'unit-tests' obj.uselib = 'BOOST_TEST BOOST_THREAD BOOST_FILESYSTEM BOOST_DATETIME SNDFILE SAMPLERATE DCP OPENJPEG FONTCONFIG CAIROMM PANGOMM XMLPP ' - obj.uselib += 'AVFORMAT AVFILTER AVCODEC AVUTIL SWSCALE POSTPROC CXML MAGICK SUB GLIB CURL SSH XMLSEC BOOST_REGEX UCHARDET ' + obj.uselib += 'AVFORMAT AVFILTER AVCODEC AVUTIL SWSCALE POSTPROC CXML MAGICK SUB GLIB CURL SSH XMLSEC BOOST_REGEX ICU ' if bld.env.TARGET_WINDOWS: obj.uselib += 'WINSOCK2' obj.use = 'libdcpomatic2' diff --git a/wscript b/wscript index 245f3353c..22ec79c26 100644 --- a/wscript +++ b/wscript @@ -159,6 +159,9 @@ def configure(conf): else: conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) + # libicu + conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=True) + # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From fd2f5c71f7789230e3a1645997be72c552414288 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 14:20:43 +0100 Subject: More build fixes. --- wscript | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index 22ec79c26..35097734a 100644 --- a/wscript +++ b/wscript @@ -160,7 +160,8 @@ def configure(conf): conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) # libicu - conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=True) + if conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) andconf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + Logs.pprint('RED', 'libicu not found') # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From 865c4d64b7a770efe12964f95a1bbd8cd3b0539f Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 14:36:26 +0100 Subject: More build fixes. --- wscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index 35097734a..049e51972 100644 --- a/wscript +++ b/wscript @@ -160,7 +160,7 @@ def configure(conf): conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) # libicu - if conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) andconf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + if conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) and conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: Logs.pprint('RED', 'libicu not found') # libsndfile -- cgit v1.2.3 From 80eeaa74d20c43ba80da3db00196b9b020e55806 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 14:47:59 +0100 Subject: More build fixes. --- wscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index 049e51972..b21510b33 100644 --- a/wscript +++ b/wscript @@ -160,7 +160,7 @@ def configure(conf): conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) # libicu - if conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) and conf.check_cfg(package='icu-io', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) and conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: Logs.pprint('RED', 'libicu not found') # libsndfile -- cgit v1.2.3 From b4d4e8123c00e37f3a1b39e81fed06e0adc6fc81 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 14:58:23 +0100 Subject: More build fixes. --- wscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index b21510b33..38b6fba74 100644 --- a/wscript +++ b/wscript @@ -160,7 +160,7 @@ def configure(conf): conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) # libicu - if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) and conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None and conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: Logs.pprint('RED', 'libicu not found') # libsndfile -- cgit v1.2.3 From 7b9dcb418ca39dde37ea2a3fc4b55079e2b174bc Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 15:15:58 +0100 Subject: More build fixes. --- wscript | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'wscript') diff --git a/wscript b/wscript index 38b6fba74..c4e6fdf42 100644 --- a/wscript +++ b/wscript @@ -160,8 +160,9 @@ def configure(conf): conf.check_cfg(package='libcurl', args='--cflags --libs', uselib_store='CURL', mandatory=True) # libicu - if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None and conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: - Logs.pprint('RED', 'libicu not found') + if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + if conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: + Logs.pprint('RED', 'libicu not found') # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From 74efa45d8cad7e4cf85c85520d093f5d072d7f04 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 20:31:05 +0100 Subject: Improve finding of libicu without pkgconfig. --- wscript | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index c4e6fdf42..2970e8fc5 100644 --- a/wscript +++ b/wscript @@ -162,7 +162,19 @@ def configure(conf): # libicu if conf.check_cfg(package='icu-i18n', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: if conf.check_cfg(package='icu', args='--cflags --libs', uselib_store='ICU', mandatory=False) is None: - Logs.pprint('RED', 'libicu not found') + conf.check_cxx(fragment=""" + #include + int main(void) { + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + return 0; }\n + """, + mandatory=True, + msg='Checking for libicu', + okmsg='yes', + libpath=['/usr/local/lib', '/usr/lib', '/usr/lib/x86_64-linux-gnu'], + lib=['icuio', 'icui18n', 'icudata'], + uselib_store='ICU') # libsndfile conf.check_cfg(package='sndfile', args='--cflags --libs', uselib_store='SNDFILE', mandatory=True) -- cgit v1.2.3 From ac2cfae5356809bea66ff6a614f96193f6d86da2 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Fri, 25 Sep 2015 20:39:42 +0100 Subject: Yet more libicu fiddling. --- wscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index 2970e8fc5..fd2830a98 100644 --- a/wscript +++ b/wscript @@ -173,7 +173,7 @@ def configure(conf): msg='Checking for libicu', okmsg='yes', libpath=['/usr/local/lib', '/usr/lib', '/usr/lib/x86_64-linux-gnu'], - lib=['icuio', 'icui18n', 'icudata'], + lib=['icuio', 'icui18n', 'icudata', 'icuuc'], uselib_store='ICU') # libsndfile -- cgit v1.2.3 From e3609d01179f6bfd6d890c3300db1142071c73f9 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Sat, 26 Sep 2015 14:43:58 +0100 Subject: Bump version --- ChangeLog | 4 ++++ debian/changelog | 5 +++-- wscript | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'wscript') diff --git a/ChangeLog b/ChangeLog index b81fc19a5..a9af721eb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2015-09-26 Carl Hetherington + + * Version 2.3.6 released. + 2015-09-25 Carl Hetherington * Fix assertion failure when loading .MTS files (#702). diff --git a/debian/changelog b/debian/changelog index 4cb7d9714..5d06eb766 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -dcpomatic (2.3.5-1) UNRELEASED; urgency=low +dcpomatic (2.3.6-1) UNRELEASED; urgency=low [ Carl Hetherington ] * New upstream release. @@ -312,8 +312,9 @@ dcpomatic (2.3.5-1) UNRELEASED; urgency=low * New upstream release. * New upstream release. * New upstream release. + * New upstream release. - -- Carl Hetherington Mon, 21 Sep 2015 12:26:01 +0100 + -- Carl Hetherington Sat, 26 Sep 2015 14:43:58 +0100 dcpomatic (0.87-1) UNRELEASED; urgency=low diff --git a/wscript b/wscript index fd2830a98..fb300aa51 100644 --- a/wscript +++ b/wscript @@ -26,7 +26,7 @@ import distutils.spawn from waflib import Logs APPNAME = 'dcpomatic' -VERSION = '2.3.5devel' +VERSION = '2.3.6' def options(opt): opt.load('compiler_cxx') -- cgit v1.2.3 From 2590ffa2e829cb3b930481dfa299269698e5902a Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Sat, 26 Sep 2015 14:43:58 +0100 Subject: Bump version --- wscript | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wscript') diff --git a/wscript b/wscript index fb300aa51..0c08e499e 100644 --- a/wscript +++ b/wscript @@ -26,7 +26,7 @@ import distutils.spawn from waflib import Logs APPNAME = 'dcpomatic' -VERSION = '2.3.6' +VERSION = '2.3.6devel' def options(opt): opt.load('compiler_cxx') -- cgit v1.2.3