X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Flib%2Fstring_text_file.cc;h=76abe547f35045413e1a7b945f72a939d59ea72a;hb=dba7e1137282b52a1bd6ad1d56fe6371a8c97e30;hp=bf19e482c9ea2e10e13773f39985525fc114ed44;hpb=c4403784febdbdd42e9c32e67fadb147f11fe566;p=dcpomatic.git diff --git a/src/lib/string_text_file.cc b/src/lib/string_text_file.cc index bf19e482c..76abe547f 100644 --- a/src/lib/string_text_file.cc +++ b/src/lib/string_text_file.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2018 Carl Hetherington + Copyright (C) 2014-2020 Carl Hetherington This file is part of DCP-o-matic. @@ -18,81 +18,99 @@ */ -#include "string_text_file.h" + #include "cross.h" #include "exceptions.h" +#include "string_text_file.h" #include "string_text_file_content.h" -#include -#include +#include #include +#include +#include +#include #include #include #include #include "i18n.h" -using std::vector; + using std::cout; +using std::shared_ptr; using std::string; -using boost::shared_ptr; +using std::vector; using boost::scoped_array; using boost::optional; -using dcp::Data; +using dcp::ArrayData; using namespace dcpomatic; + StringTextFile::StringTextFile (shared_ptr content) { - Data in (content->path (0)); - - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open (&status); - ucsdet_setText (detector, reinterpret_cast (in.data().get()), in.size(), &status); - - UCharsetMatch const * match = ucsdet_detect (detector, &status); - char const * in_charset = ucsdet_getName (match, &status); - - UConverter* to_utf16 = ucnv_open (in_charset, &status); - /* This is a guess; I think we should be able to encode any input in 4 times its input size */ - scoped_array utf16 (new uint16_t[in.size() * 2]); - int const utf16_len = ucnv_toUChars ( - to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, - reinterpret_cast (in.data().get()), in.size(), - &status - ); - - UConverter* to_utf8 = ucnv_open ("UTF-8", &status); - /* Another guess */ - scoped_array utf8 (new char[utf16_len * 2]); - ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); - - /* Fix OS X line endings */ - size_t utf8_len = strlen (utf8.get ()); - for (size_t i = 0; i < utf8_len; ++i) { - if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { - utf8[i] = '\n'; - } - } + string ext = content->path(0).extension().string(); + transform (ext.begin(), ext.end(), ext.begin(), ::tolower); - ucsdet_close (detector); - ucnv_close (to_utf16); - ucnv_close (to_utf8); + std::unique_ptr reader; - sub::Reader* reader = 0; + if (ext == ".stl") { + dcp::File f(content->path(0), "rb"); + if (!f) { + throw OpenFileError (f.path(), errno, OpenFileError::READ); + } + try { + reader.reset(new sub::STLBinaryReader(f.get())); + } catch (...) { + throw; + } - string ext = content->path(0).extension().string(); - transform (ext.begin(), ext.end(), ext.begin(), ::tolower); + } else { + /* Text-based file; sort out its character encoding before we try to parse it */ + + ArrayData in (content->path (0)); + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open (&status); + ucsdet_setText (detector, reinterpret_cast(in.data()), in.size(), &status); + + UCharsetMatch const * match = ucsdet_detect (detector, &status); + char const * in_charset = ucsdet_getName (match, &status); + + UConverter* to_utf16 = ucnv_open (in_charset, &status); + /* This is a guess; I think we should be able to encode any input in 4 times its input size */ + scoped_array utf16 (new uint16_t[in.size() * 2]); + int const utf16_len = ucnv_toUChars ( + to_utf16, reinterpret_cast(utf16.get()), in.size() * 2, + reinterpret_cast(in.data()), in.size(), + &status + ); + + UConverter* to_utf8 = ucnv_open ("UTF-8", &status); + /* Another guess */ + scoped_array utf8 (new char[utf16_len * 2]); + ucnv_fromUChars (to_utf8, utf8.get(), utf16_len * 2, reinterpret_cast(utf16.get()), utf16_len, &status); + + /* Fix OS X line endings */ + size_t utf8_len = strlen (utf8.get ()); + for (size_t i = 0; i < utf8_len; ++i) { + if (utf8[i] == '\r' && ((i == utf8_len - 1) || utf8[i + 1] != '\n')) { + utf8[i] = '\n'; + } + } + + ucsdet_close (detector); + ucnv_close (to_utf16); + ucnv_close (to_utf8); - if (ext == ".srt") { - reader = new sub::SubripReader (utf8.get()); - } else if (ext == ".ssa" || ext == ".ass") { - reader = new sub::SSAReader (utf8.get()); + if (ext == ".srt") { + reader.reset(new sub::SubripReader(utf8.get())); + } else if (ext == ".ssa" || ext == ".ass") { + reader.reset(new sub::SSAReader(utf8.get())); + } } if (reader) { - _subtitles = sub::collect > (reader->subtitles ()); + _subtitles = sub::collect>(reader->subtitles()); } - - delete reader; } /** @return time of first subtitle, if there is one */ @@ -100,7 +118,7 @@ optional StringTextFile::first () const { if (_subtitles.empty()) { - return optional(); + return {}; } return ContentTime::from_seconds(_subtitles[0].from.all_as_seconds()); @@ -110,7 +128,7 @@ ContentTime StringTextFile::length () const { if (_subtitles.empty ()) { - return ContentTime (); + return {}; } return ContentTime::from_seconds (_subtitles.back().to.all_as_seconds ());