diff options
| -rw-r--r-- | src/scc_reader.cc | 212 | ||||
| -rw-r--r-- | src/sub_time.cc | 12 | ||||
| -rw-r--r-- | src/sub_time.h | 4 | ||||
| -rw-r--r-- | test/scc_reader_test.cc | 24 |
4 files changed, 174 insertions, 78 deletions
diff --git a/src/scc_reader.cc b/src/scc_reader.cc index 10fde09..deb853b 100644 --- a/src/scc_reader.cc +++ b/src/scc_reader.cc @@ -27,16 +27,23 @@ #include <boost/bind/bind.hpp> #include <boost/algorithm/string.hpp> #include <iostream> +#include <set> +#include <unordered_map> using std::pair; +using std::set; using std::string; +using std::unordered_map; using std::vector; using boost::function; using boost::optional; using namespace sub; +static Rational const frame_rate{30000, 1001}; + + /** @param s Subtitle string encoded in UTF-8 */ SCCReader::SCCReader(string s) { @@ -51,7 +58,7 @@ SCCReader::SCCReader(FILE* f) } -boost::optional<Time> +int convert_time(std::string time) { vector<string> parts; @@ -60,12 +67,8 @@ convert_time(std::string time) throw SCCReader(String::compose("Unrecognised time %1", time)); } - return Time::from_hmsf( - raw_convert<int>(parts[0]), - raw_convert<int>(parts[1]), - raw_convert<int>(parts[2]), - raw_convert<int>(parts[3]) - ); + int seconds = raw_convert<int>(parts[0]) * 3600 + raw_convert<int>(parts[1]) * 60 + raw_convert<int>(parts[2]); + return std::round(seconds * frame_rate.fraction() + raw_convert<int>(parts[3])); } @@ -142,22 +145,42 @@ preamble_address_code(string part) PAC pac; switch (bytes[1]) { - case 0x43: - case 0x45: - case 0x46: - case 0x49: - case 0x4a: - case 0x4c: - case 0x51: + /* Column 0 - from http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_FORMAT.HTML */ + case 0xd0: case 0x70: + case 0x51: + case 0xf1: case 0xc2: + case 0x62: + case 0x43: + case 0xe3: case 0xc4: + case 0x64: + case 0x45: + case 0xe5: + case 0x46: + case 0xe6: case 0xc7: + case 0x67: case 0xc8: + case 0x68: + case 0x49: + case 0xe9: + case 0x4a: + case 0xea: case 0xcb: + case 0x6b: + case 0x4c: + case 0xec: case 0xcd: - case 0xd0: - case 0xf1: + case 0x6d: + + /* RESUMEDIRECTCAPTIONING in ccextractor? */ + // case 0x29: + /* No documentation found for these... */ + // case 0x2f: + // case 0x40: + // case 0xe0: pac.row = ((bytes[1] & 0x20) >> 5) + 1; pac.underline = bytes[1] & 0x1; switch (bytes[1] & 0xf) { @@ -197,34 +220,36 @@ preamble_address_code(string part) } switch (bytes[1]) { + /* Columns 4 - 28 */ case 0x52: - case 0x54: - case 0x57: - case 0x58: - case 0x5b: - case 0x5d: - case 0x5e: - case 0x73: - case 0x75: - case 0x76: - case 0x79: - case 0x7a: - case 0x7c: - case 0x7f: + case 0xf2: case 0xd3: + case 0x73: + case 0x54: + case 0xf4: case 0xd5: + case 0x75: case 0xd6: - case 0xd9: - case 0xda: - case 0xdc: - case 0xdf: - case 0xf2: - case 0xf4: + case 0x76: + case 0x57: case 0xf7: + case 0x58: case 0xf8: + case 0xd9: + case 0x79: + case 0xda: + case 0x7a: + case 0x5b: case 0xfb: + case 0xdc: + case 0x7c: + case 0x5d: case 0xfd: + case 0x5e: case 0xfe: + case 0xdf: + case 0x7f: + pac.row = ((bytes[1] & 0x20) >> 5) + 1; pac.underline = bytes[1] & 0x1; pac.column = (bytes[1] & 0xe) * 2; if (adjust(pac, bytes[0])) { @@ -239,18 +264,19 @@ preamble_address_code(string part) void SCCReader::read(function<optional<string> ()> get_line) { - bool got_header = false; - - RawSubtitle displayed; - RawSubtitle non_displayed; + set<string> ignore; + ignore.insert("102f"); + ignore.insert("9420"); // RCL: resume caption loading; the next caption is "pop-on" + ignore.insert("9440"); // should be a line break? - displayed.vertical_position.line = 0; - displayed.vertical_position.reference = TOP_OF_SUBTITLE; + set<string> line_break; + line_break.insert("94e0"); - non_displayed.vertical_position.line = 0; - non_displayed.vertical_position.reference = TOP_OF_SUBTITLE; + unordered_map<string, char> replace; + replace["9229"] = '\''; - optional<sub::Time> time; + bool got_header = false; + int to_fix = 0; while (true) { auto line = get_line (); @@ -258,8 +284,6 @@ SCCReader::read(function<optional<string> ()> get_line) break; } - std::cout << *line << "\n"; - trim_right_if(*line, boost::is_any_of ("\n\r")); if (*line == "Scenarist_SCC V1.0" && !got_header) { @@ -280,53 +304,79 @@ SCCReader::read(function<optional<string> ()> get_line) throw SCCError(String::compose("No tab character found in line %1", *line)); } - time = convert_time(line->substr(0, tab)); + /* XXX: don't fuck about with this stuff, we just add a frame per two-bytes to the initial time, + * and probably do this NDF adjustment. + */ + int time_in_frames = convert_time(line->substr(0, tab)); vector<string> parts; boost::algorithm::split(parts, line->substr(tab + 1), boost::is_any_of(" ")); + vector<RawSubtitle> current_line; + optional<Time> from; + RawSubtitle current; + current.vertical_position.line = 0; + current.vertical_position.reference = TOP_OF_SUBTITLE; + + auto maybe_add = [&]() { + if (!current.text.empty()) { + std::cerr << "Add " << current.from << " " << current.to << " " << current.text << "\n"; + current_line.push_back(current); + } + current.text = ""; + }; + for (auto part: parts) { - if (part == "9420") { - /* RCL: resume caption loading - * The next caption is "pop-on" - */ - std::cout << "-->resume caption loading<--\n"; + if (ignore.find(part) != ignore.end()) { + ++time_in_frames; + continue; + } + std::cerr << "___" << part << "___\n"; + auto replace_iter = replace.find(part); + if (replace_iter != replace.end()) { + current.text = current.text.substr(0, current.text.length() - 1) + replace_iter->second; + ++time_in_frames; continue; + } + + if (line_break.find(part) != line_break.end()) { + maybe_add(); + current.vertical_position.line = current.vertical_position.line.get_value_or(0) + 1; } else if (part == "97a1") { /* TO: tab over * Move one column over */ - non_displayed.text += " "; + current.text += " "; } else if (part == "97a2") { /* TO: tab over * Move two columns over */ - non_displayed.text += " "; + current.text += " "; } else if (part == "9723") { /* TO: tab over * Move three columns over */ - non_displayed.text += " "; + current.text += " "; } else if (part == "94ae") { /* ENM: erase non-displayed memory */ - non_displayed.text = ""; - std::cout << "-->erase non-displayed<--\n"; + current.text = ""; } else if (part == "942f") { /* EOC: end of caption */ - displayed = non_displayed; - SUB_ASSERT(time); - displayed.from = *time; - non_displayed.text = ""; - std::cout << "-->end of caption<--\n"; + from = Time::from_frames(time_in_frames, frame_rate); } else if (part == "942c") { /* EDM: erase displayed memory */ - SUB_ASSERT(time); - displayed.to = *time; - /* XXX: last subtitle finished */ - std::cout << "-->erase displayed<--\n"; + from = Time::from_frames(time_in_frames, frame_rate); + } else if (part == "91ae" || part == "946e" || part == "94ce") { + maybe_add(); + current.italic = true; + } else if (part == "9120") { + maybe_add(); + current.italic = false; } else if (auto pac = preamble_address_code(part)) { - std::cout << "-->PAC<-- " << part << "\n"; - /* XXX */ + // std::cerr << "-" << part << "-PAC row=" << pac->row << "\n"; + maybe_add(); + current.vertical_position.line = pac->row; + current.vertical_position.lines = 16; } else { int text[2]; if (sscanf(part.c_str(), "%2x%2x", text, text + 1) < 2) { @@ -334,13 +384,31 @@ SCCReader::read(function<optional<string> ()> get_line) } for (int i = 0; i < 2; ++i) { if (text[i] != 0x80) { - non_displayed.text += static_cast<unsigned char>(text[i] & 0x7f); + current.text += static_cast<unsigned char>(text[i] & 0x7f); } } - std::cout << "part-as-chars " << part << " " << non_displayed.text << "\n"; + std::cerr << ">" << current.text << "\n"; } - // XXX - // time->add(sub::Time::from_hmsf(0, 0, 0, 1)); + time_in_frames++; } + + maybe_add(); + + for (auto i = _subs.rbegin(); i != _subs.rend(); ++i) { + if (to_fix == 0) { + break; + } + SUB_ASSERT(from); + i->to = *from; + --to_fix; + } + + for (auto sub: current_line) { + SUB_ASSERT(from); + sub.from = *from; + _subs.push_back(sub); + } + + to_fix = current_line.size(); } } diff --git a/src/sub_time.cc b/src/sub_time.cc index 763b4e4..6773e67 100644 --- a/src/sub_time.cc +++ b/src/sub_time.cc @@ -155,15 +155,17 @@ Time::from_hms (int h, int m, int s, int ms) return Time (h * 3600 + m * 60 + s, ms, Rational (1000, 1)); } -/** Create a Time from a number of frames. - * rate must be integer. - */ +/** Create a Time from a number of frames */ Time Time::from_frames (int f, Rational rate) { SUB_ASSERT (rate.denominator != 0); - SUB_ASSERT (rate.integer ()); - return Time (f / rate.integer_fraction(), f % rate.integer_fraction(), rate); + if (rate.integer()) { + return Time(f / rate.integer_fraction(), f % rate.integer_fraction(), rate); + } else { + auto const seconds = std::floor(f / rate.fraction()); + return Time(seconds, std::round(f - seconds * rate.fraction()), rate); + } } double diff --git a/src/sub_time.h b/src/sub_time.h index 93088cb..0ee8559 100644 --- a/src/sub_time.h +++ b/src/sub_time.h @@ -45,6 +45,10 @@ public: void add (Time t); void scale (float f); + boost::optional<Rational> frame_rate() const { + return _rate; + } + static Time from_hmsf (int h, int m, int s, int f, boost::optional<Rational> rate = boost::optional<Rational> ()); static Time from_hms (int h, int m, int s, int ms); static Time from_frames (int frames, Rational rate); diff --git a/test/scc_reader_test.cc b/test/scc_reader_test.cc index 1c55032..10444f7 100644 --- a/test/scc_reader_test.cc +++ b/test/scc_reader_test.cc @@ -26,7 +26,7 @@ #include <boost/filesystem.hpp> -BOOST_AUTO_TEST_CASE(scc_reader_test) +BOOST_AUTO_TEST_CASE(scc_reader_test1) { auto file = private_test / "mir.scc"; auto f = fopen(file.string().c_str(), "r"); @@ -37,6 +37,28 @@ BOOST_AUTO_TEST_CASE(scc_reader_test) for (auto sub: subs) { for (auto line: sub.lines) { + std::cout << ">Line " << *line.vertical_position.line << "\n"; + for (auto block: line.blocks) { + std::cout << block.text << "\n"; + } + } + } +} + + +BOOST_AUTO_TEST_CASE(scc_reader_test2) +{ + auto file = private_test / "NIKI•TOMI•BETO Surround captions_CEA-608_English.scc"; + auto f = fopen(file.string().c_str(), "r"); + BOOST_REQUIRE(f); + sub::SCCReader reader(f); + fclose(f); + auto subs = sub::collect<std::vector<sub::Subtitle>> (reader.subtitles()); + + for (auto sub: subs) { + std::cout << ">Time " << sub.from << " -> " << sub.to << "\n"; + for (auto line: sub.lines) { + std::cout << ">Line " << *line.vertical_position.line << "\n"; for (auto block: line.blocks) { std::cout << block.text << "\n"; } |
