WIP: SCC support. scc
authorCarl Hetherington <cth@carlh.net>
Sat, 30 Sep 2023 10:51:56 +0000 (12:51 +0200)
committerCarl Hetherington <cth@carlh.net>
Sat, 30 Sep 2023 10:51:56 +0000 (12:51 +0200)
src/exceptions.h
src/scc_reader.cc [new file with mode: 0644]
src/scc_reader.h [new file with mode: 0644]
src/wscript
test/scc_reader_test.cc [new file with mode: 0644]
test/wscript

index 1895f110433f88f389193f4a780e8f6148276354..3115f4c76695454460e861dea0b2a86789f6c976 100644 (file)
@@ -106,6 +106,16 @@ public:
        {}
 };
 
+
+class SCCError : public std::runtime_error
+{
+public:
+       SCCError(std::string const& message)
+               : std::runtime_error(message)
+       {}
+};
+
+
 class UnknownFrameRateError : public std::runtime_error
 {
 public:
diff --git a/src/scc_reader.cc b/src/scc_reader.cc
new file mode 100644 (file)
index 0000000..10fde09
--- /dev/null
@@ -0,0 +1,346 @@
+/*
+    Copyright (C) 2023 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+
+#include "compose.hpp"
+#include "exceptions.h"
+#include "raw_convert.h"
+#include "scc_reader.h"
+#include "sub_assert.h"
+#include "util.h"
+#include <boost/bind/bind.hpp>
+#include <boost/algorithm/string.hpp>
+#include <iostream>
+
+
+using std::pair;
+using std::string;
+using std::vector;
+using boost::function;
+using boost::optional;
+using namespace sub;
+
+
+/** @param s Subtitle string encoded in UTF-8 */
+SCCReader::SCCReader(string s)
+{
+       this->read(boost::bind(&get_line_string, &s));
+}
+
+
+/** @param f Subtitle file encoded in UTF-8 */
+SCCReader::SCCReader(FILE* f)
+{
+       this->read(boost::bind (&get_line_file, f));
+}
+
+
+boost::optional<Time>
+convert_time(std::string time)
+{
+       vector<string> parts;
+       boost::algorithm::split(parts, time, boost::is_any_of(":"));
+       if (parts.size() != 4) {
+               throw SCCReader(String::compose("Unrecognised time %1", time));
+       }
+
+       return Time::from_hmsf(
+               raw_convert<int>(parts[0]),
+               raw_convert<int>(parts[1]),
+               raw_convert<int>(parts[2]),
+               raw_convert<int>(parts[3])
+               );
+}
+
+
+struct PAC
+{
+       int row = 0;
+       int column = 0;
+
+       enum class Colour {
+               WHITE,
+               GREEN,
+               BLUE,
+               CYAN,
+               RED,
+               YELLOW,
+               MAGENTA
+       };
+
+       Colour colour = Colour::WHITE;
+
+       bool underline = false;
+
+       PAC() = default;
+
+       PAC(int row_, int column_, Colour colour_ = Colour::WHITE, bool underline_ = false)
+               : row(row_)
+               , column(column_)
+               , colour(colour_)
+               , underline(underline_)
+       {}
+};
+
+
+optional<PAC>
+preamble_address_code(string part)
+{
+       int bytes[2];
+       if (sscanf(part.c_str(), "%2x%2x", bytes, bytes + 1) < 2) {
+               throw SCCReader(String::compose("Failed to parse line part %1", part));
+       }
+
+       auto adjust = [](PAC& pac, int byte) {
+               switch (byte) {
+               case 0x91:
+                       break;
+               case 0x92:
+                       pac.row += 2;
+                       break;
+               case 0x15:
+                       pac.row += 4;
+                       break;
+               case 0x16:
+                       pac.row += 6;
+                       break;
+               case 0x97:
+                       pac.row += 8;
+                       break;
+               case 0x10:
+                       pac.row += 10;
+                       break;
+               case 0x13:
+                       pac.row += 11;
+                       break;
+               case 0x94:
+                       pac.row += 13;
+                       break;
+               default:
+                       return false;
+               }
+
+               return true;
+       };
+
+       PAC pac;
+
+       switch (bytes[1]) {
+       case 0x43:
+       case 0x45:
+       case 0x46:
+       case 0x49:
+       case 0x4a:
+       case 0x4c:
+       case 0x51:
+       case 0x70:
+       case 0xc2:
+       case 0xc4:
+       case 0xc7:
+       case 0xc8:
+       case 0xcb:
+       case 0xcd:
+       case 0xd0:
+       case 0xf1:
+               pac.row = ((bytes[1] & 0x20) >> 5) + 1;
+               pac.underline = bytes[1] & 0x1;
+               switch (bytes[1] & 0xf) {
+               case 0x0:
+               case 0x1:
+                       pac.colour = PAC::Colour::WHITE;
+                       break;
+               case 0x2:
+               case 0x3:
+                       pac.colour = PAC::Colour::GREEN;
+                       break;
+               case 0x4:
+               case 0x5:
+                       pac.colour = PAC::Colour::BLUE;
+                       break;
+               case 0x6:
+               case 0x7:
+                       pac.colour = PAC::Colour::CYAN;
+                       break;
+               case 0x8:
+               case 0x9:
+                       pac.colour = PAC::Colour::RED;
+                       break;
+               case 0xa:
+               case 0xb:
+                       pac.colour = PAC::Colour::YELLOW;
+                       break;
+               case 0xc:
+               case 0xd:
+                       pac.colour = PAC::Colour::MAGENTA;
+                       break;
+               }
+
+               if (adjust(pac, bytes[0])) {
+                       return pac;
+               }
+       }
+
+       switch (bytes[1]) {
+       case 0x52:
+       case 0x54:
+       case 0x57:
+       case 0x58:
+       case 0x5b:
+       case 0x5d:
+       case 0x5e:
+       case 0x73:
+       case 0x75:
+       case 0x76:
+       case 0x79:
+       case 0x7a:
+       case 0x7c:
+       case 0x7f:
+       case 0xd3:
+       case 0xd5:
+       case 0xd6:
+       case 0xd9:
+       case 0xda:
+       case 0xdc:
+       case 0xdf:
+       case 0xf2:
+       case 0xf4:
+       case 0xf7:
+       case 0xf8:
+       case 0xfb:
+       case 0xfd:
+       case 0xfe:
+               pac.underline = bytes[1] & 0x1;
+               pac.column = (bytes[1] & 0xe) * 2;
+               if (adjust(pac, bytes[0])) {
+                       return pac;
+               }
+       }
+
+       return {};
+}
+
+
+void
+SCCReader::read(function<optional<string> ()> get_line)
+{
+       bool got_header = false;
+
+       RawSubtitle displayed;
+       RawSubtitle non_displayed;
+
+       displayed.vertical_position.line = 0;
+       displayed.vertical_position.reference = TOP_OF_SUBTITLE;
+
+       non_displayed.vertical_position.line = 0;
+       non_displayed.vertical_position.reference = TOP_OF_SUBTITLE;
+
+       optional<sub::Time> time;
+
+       while (true) {
+               auto line = get_line ();
+               if (!line) {
+                       break;
+               }
+
+               std::cout << *line << "\n";
+
+               trim_right_if(*line, boost::is_any_of ("\n\r"));
+
+               if (*line == "Scenarist_SCC V1.0" && !got_header) {
+                       got_header = true;
+                       continue;
+               }
+
+               if (!got_header) {
+                       throw SCCError("No header string found");
+               }
+
+               if (line->empty()) {
+                       continue;
+               }
+
+               auto const tab = line->find('\t');
+               if (tab == string::npos) {
+                       throw SCCError(String::compose("No tab character found in line %1", *line));
+               }
+
+               time = convert_time(line->substr(0, tab));
+
+               vector<string> parts;
+               boost::algorithm::split(parts, line->substr(tab + 1), boost::is_any_of(" "));
+
+               for (auto part: parts) {
+                       if (part == "9420") {
+                               /* RCL: resume caption loading
+                                * The next caption is "pop-on"
+                                */
+                               std::cout << "-->resume caption loading<--\n";
+                               continue;
+                       } else if (part == "97a1") {
+                               /* TO: tab over
+                                * Move one column over
+                                */
+                               non_displayed.text += " ";
+                       } else if (part == "97a2") {
+                               /* TO: tab over
+                                * Move two columns over
+                                */
+                               non_displayed.text += "  ";
+                       } else if (part == "9723") {
+                               /* TO: tab over
+                                * Move three columns over
+                                */
+                               non_displayed.text += "   ";
+                       } else if (part == "94ae") {
+                               /* ENM: erase non-displayed memory */
+                               non_displayed.text = "";
+                               std::cout << "-->erase non-displayed<--\n";
+                       } else if (part == "942f") {
+                               /* EOC: end of caption */
+                               displayed = non_displayed;
+                               SUB_ASSERT(time);
+                               displayed.from = *time;
+                               non_displayed.text = "";
+                               std::cout << "-->end of caption<--\n";
+                       } else if (part == "942c") {
+                               /* EDM: erase displayed memory */
+                               SUB_ASSERT(time);
+                               displayed.to = *time;
+                               /* XXX: last subtitle finished */
+                               std::cout << "-->erase displayed<--\n";
+                       } else if (auto pac = preamble_address_code(part)) {
+                               std::cout << "-->PAC<-- " << part << "\n";
+                               /* XXX */
+                       } else {
+                               int text[2];
+                               if (sscanf(part.c_str(), "%2x%2x", text, text + 1) < 2) {
+                                       throw SCCReader(String::compose("Failed to parse line part %1", part));
+                               }
+                               for (int i = 0; i < 2; ++i) {
+                                       if (text[i] != 0x80) {
+                                               non_displayed.text += static_cast<unsigned char>(text[i] & 0x7f);
+                                       }
+                               }
+                               std::cout << "part-as-chars " << part << " " << non_displayed.text << "\n";
+                       }
+                       // XXX
+                       // time->add(sub::Time::from_hmsf(0, 0, 0, 1));
+               }
+       }
+}
diff --git a/src/scc_reader.h b/src/scc_reader.h
new file mode 100644 (file)
index 0000000..d080124
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+    Copyright (C) 2023 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+
+#ifndef LIBSUB_SCC_READER_H
+#define LIBSUB_SCC_READER_H
+
+
+#include "reader.h"
+#include <boost/function.hpp>
+#include <list>
+
+
+namespace sub {
+
+class SCCReader : public Reader
+{
+public:
+       SCCReader(FILE* f);
+       SCCReader(std::string subs);
+
+private:
+       void read(boost::function<boost::optional<std::string> ()> get_line);
+};
+
+}
+
+#endif
index 563b3319f8e27ea7fe121d8dd8966245a9a36776..746895fd387fcdcd7900d5e2d73b14638b78d940 100644 (file)
@@ -24,6 +24,7 @@ def build(bld):
                  raw_subtitle.cc
                  reader.cc
                  reader_factory.cc
+                 scc_reader.cc
                  ssa_reader.cc
                  stl_binary_reader.cc
                  stl_binary_tables.cc
@@ -51,6 +52,7 @@ def build(bld):
               raw_subtitle.h
               reader.h
               reader_factory.h
+              scc_reader.h
               ssa_reader.h
               stl_binary_tables.h
               stl_binary_reader.h
diff --git a/test/scc_reader_test.cc b/test/scc_reader_test.cc
new file mode 100644 (file)
index 0000000..1c55032
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+    Copyright (C) 2014-2020 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include "scc_reader.h"
+#include "subtitle.h"
+#include "test.h"
+#include "exceptions.h"
+#include "collect.h"
+#include <boost/test/unit_test.hpp>
+#include <boost/filesystem.hpp>
+
+
+BOOST_AUTO_TEST_CASE(scc_reader_test)
+{
+       auto file = private_test / "mir.scc";
+       auto f = fopen(file.string().c_str(), "r");
+       BOOST_REQUIRE(f);
+       sub::SCCReader reader(f);
+       fclose(f);
+       auto subs = sub::collect<std::vector<sub::Subtitle>> (reader.subtitles());
+
+       for (auto sub: subs) {
+               for (auto line: sub.lines) {
+                       for (auto block: line.blocks) {
+                               std::cout << block.text << "\n";
+                       }
+               }
+       }
+}
index 348b13853ebd692fd51d46a97a8a7e34c9298a39..579ad85da388fbdef541806cee31cb5556b11128 100644 (file)
@@ -22,6 +22,7 @@ def build(bld):
     obj.source = """
                  iso6937_test.cc
                  ssa_reader_test.cc
+                 scc_reader_test.cc
                  stl_binary_reader_test.cc
                  stl_binary_writer_test.cc
                  stl_text_reader_test.cc