diff options
| author | Carl Hetherington <cth@carlh.net> | 2021-08-01 02:01:06 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2021-08-01 02:01:06 +0200 |
| commit | 0515e416e1e31f5063fe7b68cb794cc0872ff3a2 (patch) | |
| tree | ce62a6754c4ea4ea31c9d99af9c4f231d968607b /src | |
| parent | c7b4dae773bca9099d2ecdd025e3539516c81966 (diff) | |
Handle angle brackets / tags better in SubRip files.
Before we would assume that < starts a tag, and so parse things
like << some text >> incorrectly. Now we search for the tags
we are interested in and pass anything else through.
Diffstat (limited to 'src')
| -rw-r--r-- | src/subrip_reader.cc | 148 |
1 files changed, 76 insertions, 72 deletions
diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index d8972c1..94e1383 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -190,84 +190,88 @@ SubripReader::convert_time (string t) void SubripReader::convert_line (string t, RawSubtitle& p) { - enum { - TEXT, - TAG - } state = TEXT; - - string tag; - vector<Colour> colours; colours.push_back (Colour (1, 1, 1)); - for (size_t i = 0; i < t.size(); ++i) { - switch (state) { - case TEXT: - if (t[i] == '<' || t[i] == '{') { - state = TAG; - } else { - p.text += t[i]; + auto has_next = [](string line, size_t& index, string s) { + boost::to_lower(s); + auto next = line.substr(index, s.size()); + boost::to_lower(next); + if (next != s) { + return false; + } + + index += s.size(); + return true; + }; + + size_t i = 0; + while (i < t.size()) { + if (has_next(t, i, "<b>") || has_next(t, i, "{b}")) { + maybe_content (p); + p.bold = true; + } else if (has_next(t, i, "</b>") || has_next(t, i, "{/b}")) { + maybe_content (p); + p.bold = false; + } else if (has_next(t, i, "<i>") || has_next(t, i, "{i}")) { + maybe_content (p); + p.italic = true; + } else if (has_next(t, i, "</i>") || has_next(t, i, "{/i}")) { + maybe_content (p); + p.italic = false; + } else if (has_next(t, i, "<u>") || has_next(t, i, "{u}")) { + maybe_content (p); + p.underline = true; + } else if (has_next(t, i, "</u>") || has_next(t, i, "{/u}")) { + maybe_content (p); + p.underline = false; + } else if (has_next(t, i, "<font") || has_next(t, i, "<Font")) { + maybe_content (p); + boost::regex re (".*color=\"?#([[:xdigit:]]+)\"?"); + boost::smatch match; + string tag; + while (i < t.size() && t[i] != '>') { + tag += t[i]; + ++i; } - break; - case TAG: - if (t[i] == '>' || t[i] == '}') { - if (tag == "b") { - maybe_content (p); - p.bold = true; - } else if (tag == "/b") { - maybe_content (p); - p.bold = false; - } else if (tag == "i") { - maybe_content (p); - p.italic = true; - } else if (tag == "/i") { - maybe_content (p); - p.italic = false; - } else if (tag == "u") { - maybe_content (p); - p.underline = true; - } else if (tag == "/u") { - maybe_content (p); - p.underline = false; - } else if (boost::starts_with (tag, "font")) { - maybe_content (p); - boost::regex re (".*color=\"?#([[:xdigit:]]+)\"?"); - boost::smatch match; - if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) { - p.colour = Colour::from_rgb_hex (match[1]); - colours.push_back (p.colour); - } else { - re = boost::regex ( - ".*color=\"rgba\\(" - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*," - "[[:space:]]*([[:digit:]]+)[[:space:]]*" - "\\)\"" - ); - if (boost::regex_search (tag, match, re) && match.size() == 5) { - p.colour.r = raw_convert<int>(string(match[1])) / 255.0; - p.colour.g = raw_convert<int>(string(match[2])) / 255.0; - p.colour.b = raw_convert<int>(string(match[3])) / 255.0; - colours.push_back (p.colour); - } else { - throw SubripError (tag, "a colour in the format #rrggbb or rgba(rr,gg,bb,aa)", _context); - } - } - } else if (tag == "/font") { - maybe_content (p); - SUB_ASSERT (!colours.empty()); - colours.pop_back (); - p.colour = colours.back (); - } else if (tag.size() > 0 && tag[0] == '\\') { - SSAReader::parse_style (p, tag, 288, 288); - } - tag.clear (); - state = TEXT; + ++i; + if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) { + p.colour = Colour::from_rgb_hex (match[1]); + colours.push_back (p.colour); } else { - tag += tolower (t[i]); + re = boost::regex ( + ".*color=\"rgba\\(" + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*," + "[[:space:]]*([[:digit:]]+)[[:space:]]*" + "\\)\"" + ); + if (boost::regex_search (tag, match, re) && match.size() == 5) { + p.colour.r = raw_convert<int>(string(match[1])) / 255.0; + p.colour.g = raw_convert<int>(string(match[2])) / 255.0; + p.colour.b = raw_convert<int>(string(match[3])) / 255.0; + colours.push_back (p.colour); + } else { + throw SubripError (tag, "a colour in the format #rrggbb or rgba(rr,gg,bb,aa)", _context); + } } - break; + } else if (has_next(t, i, "</font>")) { + maybe_content (p); + SUB_ASSERT (!colours.empty()); + colours.pop_back (); + p.colour = colours.back (); + } else if (has_next(t, i, "{\\")) { + string ssa = "\\"; + while (i < t.size() && t[i] != '}') { + ssa += t[i]; + ++i; + } + ++i; + SSAReader::parse_style (p, ssa, 288, 288); + } else { + p.text += t[i]; + ++i; } } |
