summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/scc_reader.cc212
-rw-r--r--src/sub_time.cc12
-rw-r--r--src/sub_time.h4
-rw-r--r--test/scc_reader_test.cc24
4 files changed, 174 insertions, 78 deletions
diff --git a/src/scc_reader.cc b/src/scc_reader.cc
index 10fde09..deb853b 100644
--- a/src/scc_reader.cc
+++ b/src/scc_reader.cc
@@ -27,16 +27,23 @@
#include <boost/bind/bind.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
+#include <set>
+#include <unordered_map>
using std::pair;
+using std::set;
using std::string;
+using std::unordered_map;
using std::vector;
using boost::function;
using boost::optional;
using namespace sub;
+static Rational const frame_rate{30000, 1001};
+
+
/** @param s Subtitle string encoded in UTF-8 */
SCCReader::SCCReader(string s)
{
@@ -51,7 +58,7 @@ SCCReader::SCCReader(FILE* f)
}
-boost::optional<Time>
+int
convert_time(std::string time)
{
vector<string> parts;
@@ -60,12 +67,8 @@ convert_time(std::string time)
throw SCCReader(String::compose("Unrecognised time %1", time));
}
- return Time::from_hmsf(
- raw_convert<int>(parts[0]),
- raw_convert<int>(parts[1]),
- raw_convert<int>(parts[2]),
- raw_convert<int>(parts[3])
- );
+ int seconds = raw_convert<int>(parts[0]) * 3600 + raw_convert<int>(parts[1]) * 60 + raw_convert<int>(parts[2]);
+ return std::round(seconds * frame_rate.fraction() + raw_convert<int>(parts[3]));
}
@@ -142,22 +145,42 @@ preamble_address_code(string part)
PAC pac;
switch (bytes[1]) {
- case 0x43:
- case 0x45:
- case 0x46:
- case 0x49:
- case 0x4a:
- case 0x4c:
- case 0x51:
+ /* Column 0 - from http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_FORMAT.HTML */
+ case 0xd0:
case 0x70:
+ case 0x51:
+ case 0xf1:
case 0xc2:
+ case 0x62:
+ case 0x43:
+ case 0xe3:
case 0xc4:
+ case 0x64:
+ case 0x45:
+ case 0xe5:
+ case 0x46:
+ case 0xe6:
case 0xc7:
+ case 0x67:
case 0xc8:
+ case 0x68:
+ case 0x49:
+ case 0xe9:
+ case 0x4a:
+ case 0xea:
case 0xcb:
+ case 0x6b:
+ case 0x4c:
+ case 0xec:
case 0xcd:
- case 0xd0:
- case 0xf1:
+ case 0x6d:
+
+ /* RESUMEDIRECTCAPTIONING in ccextractor? */
+ // case 0x29:
+ /* No documentation found for these... */
+ // case 0x2f:
+ // case 0x40:
+ // case 0xe0:
pac.row = ((bytes[1] & 0x20) >> 5) + 1;
pac.underline = bytes[1] & 0x1;
switch (bytes[1] & 0xf) {
@@ -197,34 +220,36 @@ preamble_address_code(string part)
}
switch (bytes[1]) {
+ /* Columns 4 - 28 */
case 0x52:
- case 0x54:
- case 0x57:
- case 0x58:
- case 0x5b:
- case 0x5d:
- case 0x5e:
- case 0x73:
- case 0x75:
- case 0x76:
- case 0x79:
- case 0x7a:
- case 0x7c:
- case 0x7f:
+ case 0xf2:
case 0xd3:
+ case 0x73:
+ case 0x54:
+ case 0xf4:
case 0xd5:
+ case 0x75:
case 0xd6:
- case 0xd9:
- case 0xda:
- case 0xdc:
- case 0xdf:
- case 0xf2:
- case 0xf4:
+ case 0x76:
+ case 0x57:
case 0xf7:
+ case 0x58:
case 0xf8:
+ case 0xd9:
+ case 0x79:
+ case 0xda:
+ case 0x7a:
+ case 0x5b:
case 0xfb:
+ case 0xdc:
+ case 0x7c:
+ case 0x5d:
case 0xfd:
+ case 0x5e:
case 0xfe:
+ case 0xdf:
+ case 0x7f:
+ pac.row = ((bytes[1] & 0x20) >> 5) + 1;
pac.underline = bytes[1] & 0x1;
pac.column = (bytes[1] & 0xe) * 2;
if (adjust(pac, bytes[0])) {
@@ -239,18 +264,19 @@ preamble_address_code(string part)
void
SCCReader::read(function<optional<string> ()> get_line)
{
- bool got_header = false;
-
- RawSubtitle displayed;
- RawSubtitle non_displayed;
+ set<string> ignore;
+ ignore.insert("102f");
+ ignore.insert("9420"); // RCL: resume caption loading; the next caption is "pop-on"
+ ignore.insert("9440"); // should be a line break?
- displayed.vertical_position.line = 0;
- displayed.vertical_position.reference = TOP_OF_SUBTITLE;
+ set<string> line_break;
+ line_break.insert("94e0");
- non_displayed.vertical_position.line = 0;
- non_displayed.vertical_position.reference = TOP_OF_SUBTITLE;
+ unordered_map<string, char> replace;
+ replace["9229"] = '\'';
- optional<sub::Time> time;
+ bool got_header = false;
+ int to_fix = 0;
while (true) {
auto line = get_line ();
@@ -258,8 +284,6 @@ SCCReader::read(function<optional<string> ()> get_line)
break;
}
- std::cout << *line << "\n";
-
trim_right_if(*line, boost::is_any_of ("\n\r"));
if (*line == "Scenarist_SCC V1.0" && !got_header) {
@@ -280,53 +304,79 @@ SCCReader::read(function<optional<string> ()> get_line)
throw SCCError(String::compose("No tab character found in line %1", *line));
}
- time = convert_time(line->substr(0, tab));
+ /* XXX: don't fuck about with this stuff, we just add a frame per two-bytes to the initial time,
+ * and probably do this NDF adjustment.
+ */
+ int time_in_frames = convert_time(line->substr(0, tab));
vector<string> parts;
boost::algorithm::split(parts, line->substr(tab + 1), boost::is_any_of(" "));
+ vector<RawSubtitle> current_line;
+ optional<Time> from;
+ RawSubtitle current;
+ current.vertical_position.line = 0;
+ current.vertical_position.reference = TOP_OF_SUBTITLE;
+
+ auto maybe_add = [&]() {
+ if (!current.text.empty()) {
+ std::cerr << "Add " << current.from << " " << current.to << " " << current.text << "\n";
+ current_line.push_back(current);
+ }
+ current.text = "";
+ };
+
for (auto part: parts) {
- if (part == "9420") {
- /* RCL: resume caption loading
- * The next caption is "pop-on"
- */
- std::cout << "-->resume caption loading<--\n";
+ if (ignore.find(part) != ignore.end()) {
+ ++time_in_frames;
+ continue;
+ }
+ std::cerr << "___" << part << "___\n";
+ auto replace_iter = replace.find(part);
+ if (replace_iter != replace.end()) {
+ current.text = current.text.substr(0, current.text.length() - 1) + replace_iter->second;
+ ++time_in_frames;
continue;
+ }
+
+ if (line_break.find(part) != line_break.end()) {
+ maybe_add();
+ current.vertical_position.line = current.vertical_position.line.get_value_or(0) + 1;
} else if (part == "97a1") {
/* TO: tab over
* Move one column over
*/
- non_displayed.text += " ";
+ current.text += " ";
} else if (part == "97a2") {
/* TO: tab over
* Move two columns over
*/
- non_displayed.text += " ";
+ current.text += " ";
} else if (part == "9723") {
/* TO: tab over
* Move three columns over
*/
- non_displayed.text += " ";
+ current.text += " ";
} else if (part == "94ae") {
/* ENM: erase non-displayed memory */
- non_displayed.text = "";
- std::cout << "-->erase non-displayed<--\n";
+ current.text = "";
} else if (part == "942f") {
/* EOC: end of caption */
- displayed = non_displayed;
- SUB_ASSERT(time);
- displayed.from = *time;
- non_displayed.text = "";
- std::cout << "-->end of caption<--\n";
+ from = Time::from_frames(time_in_frames, frame_rate);
} else if (part == "942c") {
/* EDM: erase displayed memory */
- SUB_ASSERT(time);
- displayed.to = *time;
- /* XXX: last subtitle finished */
- std::cout << "-->erase displayed<--\n";
+ from = Time::from_frames(time_in_frames, frame_rate);
+ } else if (part == "91ae" || part == "946e" || part == "94ce") {
+ maybe_add();
+ current.italic = true;
+ } else if (part == "9120") {
+ maybe_add();
+ current.italic = false;
} else if (auto pac = preamble_address_code(part)) {
- std::cout << "-->PAC<-- " << part << "\n";
- /* XXX */
+ // std::cerr << "-" << part << "-PAC row=" << pac->row << "\n";
+ maybe_add();
+ current.vertical_position.line = pac->row;
+ current.vertical_position.lines = 16;
} else {
int text[2];
if (sscanf(part.c_str(), "%2x%2x", text, text + 1) < 2) {
@@ -334,13 +384,31 @@ SCCReader::read(function<optional<string> ()> get_line)
}
for (int i = 0; i < 2; ++i) {
if (text[i] != 0x80) {
- non_displayed.text += static_cast<unsigned char>(text[i] & 0x7f);
+ current.text += static_cast<unsigned char>(text[i] & 0x7f);
}
}
- std::cout << "part-as-chars " << part << " " << non_displayed.text << "\n";
+ std::cerr << ">" << current.text << "\n";
}
- // XXX
- // time->add(sub::Time::from_hmsf(0, 0, 0, 1));
+ time_in_frames++;
}
+
+ maybe_add();
+
+ for (auto i = _subs.rbegin(); i != _subs.rend(); ++i) {
+ if (to_fix == 0) {
+ break;
+ }
+ SUB_ASSERT(from);
+ i->to = *from;
+ --to_fix;
+ }
+
+ for (auto sub: current_line) {
+ SUB_ASSERT(from);
+ sub.from = *from;
+ _subs.push_back(sub);
+ }
+
+ to_fix = current_line.size();
}
}
diff --git a/src/sub_time.cc b/src/sub_time.cc
index 763b4e4..6773e67 100644
--- a/src/sub_time.cc
+++ b/src/sub_time.cc
@@ -155,15 +155,17 @@ Time::from_hms (int h, int m, int s, int ms)
return Time (h * 3600 + m * 60 + s, ms, Rational (1000, 1));
}
-/** Create a Time from a number of frames.
- * rate must be integer.
- */
+/** Create a Time from a number of frames */
Time
Time::from_frames (int f, Rational rate)
{
SUB_ASSERT (rate.denominator != 0);
- SUB_ASSERT (rate.integer ());
- return Time (f / rate.integer_fraction(), f % rate.integer_fraction(), rate);
+ if (rate.integer()) {
+ return Time(f / rate.integer_fraction(), f % rate.integer_fraction(), rate);
+ } else {
+ auto const seconds = std::floor(f / rate.fraction());
+ return Time(seconds, std::round(f - seconds * rate.fraction()), rate);
+ }
}
double
diff --git a/src/sub_time.h b/src/sub_time.h
index 93088cb..0ee8559 100644
--- a/src/sub_time.h
+++ b/src/sub_time.h
@@ -45,6 +45,10 @@ public:
void add (Time t);
void scale (float f);
+ boost::optional<Rational> frame_rate() const {
+ return _rate;
+ }
+
static Time from_hmsf (int h, int m, int s, int f, boost::optional<Rational> rate = boost::optional<Rational> ());
static Time from_hms (int h, int m, int s, int ms);
static Time from_frames (int frames, Rational rate);
diff --git a/test/scc_reader_test.cc b/test/scc_reader_test.cc
index 1c55032..10444f7 100644
--- a/test/scc_reader_test.cc
+++ b/test/scc_reader_test.cc
@@ -26,7 +26,7 @@
#include <boost/filesystem.hpp>
-BOOST_AUTO_TEST_CASE(scc_reader_test)
+BOOST_AUTO_TEST_CASE(scc_reader_test1)
{
auto file = private_test / "mir.scc";
auto f = fopen(file.string().c_str(), "r");
@@ -37,6 +37,28 @@ BOOST_AUTO_TEST_CASE(scc_reader_test)
for (auto sub: subs) {
for (auto line: sub.lines) {
+ std::cout << ">Line " << *line.vertical_position.line << "\n";
+ for (auto block: line.blocks) {
+ std::cout << block.text << "\n";
+ }
+ }
+ }
+}
+
+
+BOOST_AUTO_TEST_CASE(scc_reader_test2)
+{
+ auto file = private_test / "NIKI•TOMI•BETO Surround captions_CEA-608_English.scc";
+ auto f = fopen(file.string().c_str(), "r");
+ BOOST_REQUIRE(f);
+ sub::SCCReader reader(f);
+ fclose(f);
+ auto subs = sub::collect<std::vector<sub::Subtitle>> (reader.subtitles());
+
+ for (auto sub: subs) {
+ std::cout << ">Time " << sub.from << " -> " << sub.to << "\n";
+ for (auto line: sub.lines) {
+ std::cout << ">Line " << *line.vertical_position.line << "\n";
for (auto block: line.blocks) {
std::cout << block.text << "\n";
}