diff options
| -rw-r--r-- | src/ssa_reader.cc | 5 | ||||
| -rw-r--r-- | src/subrip_reader.cc | 151 | ||||
| -rw-r--r-- | test/subrip_reader_test.cc | 25 |
3 files changed, 104 insertions, 77 deletions
diff --git a/src/ssa_reader.cc b/src/ssa_reader.cc index 4a9be0e..39b99d2 100644 --- a/src/ssa_reader.cc +++ b/src/ssa_reader.cc @@ -24,7 +24,7 @@ #include "subtitle.h" #include "compose.hpp" #include <boost/algorithm/string.hpp> -#include <boost/bind.hpp> +#include <boost/bind/bind.hpp> #include <iostream> #include <vector> @@ -35,6 +35,9 @@ using std::cout; using boost::optional; using boost::function; using namespace boost::algorithm; +#if BOOST_VERSION >= 106100 +using namespace boost::placeholders; +#endif using namespace sub; /** @param s Subtitle string encoded in UTF-8 */ diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index 8ba7c7d..94e1383 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -190,89 +190,88 @@ SubripReader::convert_time (string t) void SubripReader::convert_line (string t, RawSubtitle& p) { - enum { - TEXT, - TAG - } state = TEXT; - - string tag; - vector<Colour> colours; colours.push_back (Colour (1, 1, 1)); - /* XXX: missing <font> support */ - /* XXX: nesting of tags e.g. <b>foo<i>bar<b>baz</b>fred</i>jim</b> might - not work, I think. - */ + auto has_next = [](string line, size_t& index, string s) { + boost::to_lower(s); + auto next = line.substr(index, s.size()); + boost::to_lower(next); + if (next != s) { + return false; + } - for (size_t i = 0; i < t.size(); ++i) { - switch (state) { - case TEXT: - if (t[i] == '<' || t[i] == '{') { - state = TAG; - } else { - p.text += t[i]; + index += s.size(); + return true; + }; + + size_t i = 0; + while (i < t.size()) { + if (has_next(t, i, "<b>") || has_next(t, i, "{b}")) { + maybe_content (p); + p.bold = true; + } else if (has_next(t, i, "</b>") || has_next(t, i, "{/b}")) { + maybe_content (p); + p.bold = false; + } else if (has_next(t, i, "<i>") || has_next(t, i, "{i}")) { + maybe_content (p); + p.italic = true; + } else if (has_next(t, i, "</i>") || has_next(t, i, "{/i}")) { + maybe_content (p); + p.italic = false; + } else if (has_next(t, i, "<u>") || has_next(t, i, "{u}")) { + maybe_content (p); + p.underline = true; + } else if (has_next(t, i, "</u>") || has_next(t, i, "{/u}")) { + maybe_content (p); + p.underline = false; + } else if (has_next(t, i, "<font") || has_next(t, i, "<Font")) { + maybe_content (p); + boost::regex re (".*color=\"?#([[:xdigit:]]+)\"?"); + boost::smatch match; + string tag; + while (i < t.size() && t[i] != '>') { + tag += t[i]; + ++i; } - break; - case TAG: - if (t[i] == '>' || t[i] == '}') { - if (tag == "b") { - maybe_content (p); - p.bold = true; - } else if (tag == "/b") { - maybe_content (p); - p.bold = false; - } else if (tag == "i") { - maybe_content (p); - p.italic = true; - } else if (tag == "/i") { - maybe_content (p); - p.italic = false; - } else if (tag == "u") { - maybe_content (p); - p.underline = true; - } else if (tag == "/u") { - maybe_content (p); - p.underline = false; - } else if (boost::starts_with (tag, "font")) { - maybe_content (p); - boost::regex re (".*color=\"?#([[:xdigit:]]+)\"?"); - boost::smatch match; - if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) { - p.colour = Colour::from_rgb_hex (match[1]); - colours.push_back (p.colour); - } else { - re = boost::regex ( - ".*color=\"rgba\\(" - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*" - "\\)\"" - ); - if (boost::regex_search (tag, match, re) && match.size() == 5) { - p.colour.r = raw_convert<int>(string(match[1])) / 255.0; - p.colour.g = raw_convert<int>(string(match[2])) / 255.0; - p.colour.b = raw_convert<int>(string(match[3])) / 255.0; - colours.push_back (p.colour); - } else { - throw SubripError (tag, "a colour in the format #rrggbb or rgba(rr,gg,bb,aa)", _context); - } - } - } else if (tag == "/font") { - maybe_content (p); - SUB_ASSERT (!colours.empty()); - colours.pop_back (); - p.colour = colours.back (); - } else if (tag.size() > 0 && tag[0] == '\\') { - SSAReader::parse_style (p, tag, 288, 288); - } - tag.clear (); - state = TEXT; + ++i; + if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) { + p.colour = Colour::from_rgb_hex (match[1]); + colours.push_back (p.colour); } else { - tag += tolower (t[i]); + re = boost::regex ( + ".*color=\"rgba\\(" + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*" + "\\)\"" + ); + if (boost::regex_search (tag, match, re) && match.size() == 5) { + p.colour.r = raw_convert<int>(string(match[1])) / 255.0; + p.colour.g = raw_convert<int>(string(match[2])) / 255.0; + p.colour.b = raw_convert<int>(string(match[3])) / 255.0; + colours.push_back (p.colour); + } else { + throw SubripError (tag, "a colour in the format #rrggbb or rgba(rr,gg,bb,aa)", _context); + } } - break; + } else if (has_next(t, i, "</font>")) { + maybe_content (p); + SUB_ASSERT (!colours.empty()); + colours.pop_back (); + p.colour = colours.back (); + } else if (has_next(t, i, "{\\")) { + string ssa = "\\"; + while (i < t.size() && t[i] != '}') { + ssa += t[i]; + ++i; + } + ++i; + SSAReader::parse_style (p, ssa, 288, 288); + } else { + p.text += t[i]; + ++i; } } diff --git a/test/subrip_reader_test.cc b/test/subrip_reader_test.cc index 818b6ad..2daf84a 100644 --- a/test/subrip_reader_test.cc +++ b/test/subrip_reader_test.cc @@ -308,6 +308,24 @@ BOOST_AUTO_TEST_CASE (subrip_reader_convert_line_test) r._subs.clear (); rs = sub::RawSubtitle(); + r.convert_line ("<B>This is <I>nesting</I> of subtitles</B>", rs); + BOOST_CHECK_EQUAL (r._subs.size(), 3); + i = r._subs.begin(); + BOOST_CHECK_EQUAL (i->text, "This is "); + BOOST_CHECK_EQUAL (i->bold, true); + BOOST_CHECK_EQUAL (i->italic, false); + ++i; + BOOST_CHECK_EQUAL (i->text, "nesting"); + BOOST_CHECK_EQUAL (i->bold, true); + BOOST_CHECK_EQUAL (i->italic, true); + ++i; + BOOST_CHECK_EQUAL (i->text, " of subtitles"); + BOOST_CHECK_EQUAL (i->bold, true); + BOOST_CHECK_EQUAL (i->italic, false); + ++i; + r._subs.clear (); + + rs = sub::RawSubtitle(); r.convert_line ("<font color=\"#ff00ff\">simple color</font>", rs); BOOST_CHECK_EQUAL (r._subs.size(), 1); BOOST_CHECK_EQUAL (r._subs.front().text, "simple color"); @@ -377,6 +395,13 @@ BOOST_AUTO_TEST_CASE (subrip_reader_convert_line_test) BOOST_CHECK (fabs (i->colour.g) < 0.01); BOOST_CHECK_CLOSE (i->colour.b, 1, 0.1); r._subs.clear (); + + rs = sub::RawSubtitle(); + r.convert_line ("<< angle brackets but no HTML >>", rs); + BOOST_CHECK_EQUAL (r._subs.size(), 1); + i = r._subs.begin (); + BOOST_CHECK_EQUAL (i->text, "<< angle brackets but no HTML >>"); + r._subs.clear(); } /** Test SubripReader::convert_time */ |
