diff options
| author | Carl Hetherington <cth@carlh.net> | 2015-09-22 16:15:08 +0100 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2015-09-22 16:15:08 +0100 |
| commit | bdbe925a467f9b7149322ad8d1c090d4c1e6d5c3 (patch) | |
| tree | 007f81fe339a08e11afea6b567b79d75a11ad41f /src | |
| parent | a9370ddf1d55ca01307c086950d2294611d9e6a6 (diff) | |
Use uchardet to guess encoding of subtitle files and reject non-UTF-8.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/exceptions.h | 8 | ||||
| -rw-r--r-- | src/lib/subrip.cc | 23 | ||||
| -rw-r--r-- | src/tools/wscript | 1 |
3 files changed, 32 insertions, 0 deletions
diff --git a/src/lib/exceptions.h b/src/lib/exceptions.h index 7240611ee..6939f81a3 100644 --- a/src/lib/exceptions.h +++ b/src/lib/exceptions.h @@ -263,4 +263,12 @@ public: ProgrammingError (std::string file, int line); }; +class TextEncodingError : public StringError +{ +public: + TextEncodingError (std::string s) + : StringError (s) + {} +}; + #endif diff --git a/src/lib/subrip.cc b/src/lib/subrip.cc index f19867952..d4adee428 100644 --- a/src/lib/subrip.cc +++ b/src/lib/subrip.cc @@ -23,10 +23,14 @@ #include "subrip_content.h" #include <sub/subrip_reader.h> #include <sub/collect.h> +#include <uchardet/uchardet.h> +#include <iostream> #include "i18n.h" using std::vector; +using std::cout; +using std::string; using boost::shared_ptr; SubRip::SubRip (shared_ptr<const SubRipContent> content) @@ -36,6 +40,25 @@ SubRip::SubRip (shared_ptr<const SubRipContent> content) throw OpenFileError (content->path (0)); } + /* Guess the encoding */ + uchardet_t det = uchardet_new (); + char buffer[1024]; + while (!feof (f)) { + int const n = fread (buffer, 1, sizeof (buffer), f); + if (uchardet_handle_data (det, buffer, n)) { + break; + } + } + + uchardet_data_end (det); + string charset = uchardet_get_charset (det); + uchardet_delete (det); + + if (charset != "UTF-8") { + throw TextEncodingError (_("unrecognised character set; please use files encoded in UTF-8")); + } + + rewind (f); sub::SubripReader reader (f); _subtitles = sub::collect<vector<sub::Subtitle> > (reader.subtitles ()); } diff --git a/src/tools/wscript b/src/tools/wscript index 33a631e6e..b01eee7ca 100644 --- a/src/tools/wscript +++ b/src/tools/wscript @@ -29,6 +29,7 @@ def configure(conf): def build(bld): uselib = 'BOOST_THREAD BOOST_DATETIME OPENJPEG DCP XMLSEC CXML XMLPP AVFORMAT AVFILTER AVCODEC ' uselib += 'AVUTIL SWSCALE POSTPROC CURL BOOST_FILESYSTEM SSH ZIP CAIROMM FONTCONFIG PANGOMM SUB MAGICK SNDFILE SAMPLERATE BOOST_REGEX ' + uselib += 'UCHARDET ' if bld.env.TARGET_WINDOWS: uselib += 'WINSOCK2' |
