Merge branch '1.0' of git.carlh.net:git/libsub into 1.0
[libsub.git] / src / ssa_reader.cc
index d4908a2eacd0fc989f811d40ff6d1e7bbab1876a..630eb29a47ff8442cd6a4a5ed13c4d93087990ad 100644 (file)
@@ -34,6 +34,7 @@ using std::stringstream;
 using std::vector;
 using std::map;
 using std::cout;
+using std::list;
 using boost::optional;
 using boost::function;
 using namespace boost::algorithm;
@@ -56,17 +57,23 @@ class Style
 {
 public:
        Style ()
-               : font_size (24)
+               : font_size (72)
                , primary_colour (255, 255, 255)
                , bold (false)
                , italic (false)
+               , underline (false)
+               , vertical_reference (BOTTOM_OF_SCREEN)
+               , vertical_margin (0)
        {}
 
        Style (string format_line, string style_line)
-               : font_size (24)
+               : font_size (72)
                , primary_colour (255, 255, 255)
                , bold (false)
                , italic (false)
+               , underline (false)
+               , vertical_reference (BOTTOM_OF_SCREEN)
+               , vertical_margin (0)
        {
                vector<string> keys;
                split (keys, format_line, is_any_of (","));
@@ -94,10 +101,27 @@ public:
                                bold = style[i] == "-1";
                        } else if (keys[i] == "Italic") {
                                italic = style[i] == "-1";
+                       } else if (keys[i] == "Underline") {
+                               underline = style[i] == "-1";
                        } else if (keys[i] == "BorderStyle") {
                                if (style[i] == "1") {
                                        effect = SHADOW;
                                }
+                       } else if (keys[i] == "Alignment") {
+                               /* These values from libass' source code */
+                               switch (raw_convert<int> (style[i]) & 12) {
+                               case 4:
+                                       vertical_reference = TOP_OF_SCREEN;
+                                       break;
+                               case 8:
+                                       vertical_reference = CENTRE_OF_SCREEN;
+                                       break;
+                               case 0:
+                                       vertical_reference = BOTTOM_OF_SCREEN;
+                                       break;
+                               }
+                       } else if (keys[i] == "MarginV") {
+                               vertical_margin = raw_convert<int> (style[i]);
                        }
                }
        }
@@ -110,7 +134,10 @@ public:
        optional<Colour> back_colour;
        bool bold;
        bool italic;
+       bool underline;
        optional<Effect> effect;
+       VerticalReference vertical_reference;
+       int vertical_margin;
 
 private:
        Colour colour (int c) const
@@ -137,6 +164,132 @@ SSAReader::parse_time (string t) const
                );
 }
 
+/** @param base RawSubtitle filled in with any required common values.
+ *  @param line SSA line string (i.e. just the subtitle, possibly with embedded stuff)
+ *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
+ */
+list<RawSubtitle>
+SSAReader::parse_line (RawSubtitle base, string line)
+{
+       enum {
+               TEXT,
+               STYLE,
+               BACKSLASH
+       } state = TEXT;
+
+       list<RawSubtitle> subs;
+       RawSubtitle current = base;
+       string style;
+
+       if (!current.vertical_position.reference) {
+               current.vertical_position.reference = BOTTOM_OF_SCREEN;
+       }
+
+       if (!current.vertical_position.proportional) {
+               current.vertical_position.proportional = 0;
+       }
+
+       /* We must have a font size, as there could be a margin specified
+          in pixels and in that case we must know how big the subtitle
+          lines are to work out the position on screen.
+       */
+       if (!current.font_size.points()) {
+               current.font_size.set_points (72);
+       }
+
+       /* Count the number of line breaks */
+       int line_breaks = 0;
+       for (size_t i = 0; i < line.length() - 1; ++i) {
+               if (line[i] == '\\' && (line[i+1] == 'n' || line[i+1] == 'N')) {
+                       ++line_breaks;
+               }
+       }
+
+       /* Imagine that the screen is 792 points (i.e. 11 inches) high (as with DCP) */
+       double const line_size = current.font_size.proportional(792) * 1.2;
+
+       /* Tweak vertical_position accordingly */
+       switch (current.vertical_position.reference.get()) {
+       case TOP_OF_SCREEN:
+       case TOP_OF_SUBTITLE:
+               /* Nothing to do */
+               break;
+       case CENTRE_OF_SCREEN:
+               current.vertical_position.proportional = current.vertical_position.proportional.get() - ((line_breaks + 1) * line_size) / 2;
+               break;
+       case BOTTOM_OF_SCREEN:
+               current.vertical_position.proportional = current.vertical_position.proportional.get() + line_breaks * line_size;
+               break;
+       }
+
+       for (size_t i = 0; i < line.length(); ++i) {
+               char const c = line[i];
+               switch (state) {
+               case TEXT:
+                       if (c == '{') {
+                               state = STYLE;
+                       } else if (c == '\\') {
+                               state = BACKSLASH;
+                       } else if (c != '\r' && c != '\n') {
+                               current.text += c;
+                       }
+                       break;
+               case STYLE:
+                       if (c == '}' || c == '\\') {
+                               if (!current.text.empty ()) {
+                                       subs.push_back (current);
+                                       current.text = "";
+                               }
+                               if (style == "\\i1") {
+                                       current.italic = true;
+                               } else if (style == "\\i0" || style == "\\i") {
+                                       current.italic = false;
+                               } else if (style == "\\b1") {
+                                       current.bold = true;
+                               } else if (style == "\\b0") {
+                                       current.bold = false;
+                               } else if (style == "\\an1" || style == "\\an2" || style == "\\an3") {
+                                       current.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
+                               } else if (style == "\\an4" || style == "\\an5" || style == "\\an6") {
+                                       current.vertical_position.reference = sub::CENTRE_OF_SCREEN;
+                               } else if (style == "\\an7" || style == "\\an8" || style == "\\an9") {
+                                       current.vertical_position.reference = sub::TOP_OF_SCREEN;
+                               }
+
+                               style = "";
+                       }
+
+                       if (c == '}') {
+                               state = TEXT;
+                       } else {
+                               style += c;
+                       }
+                       break;
+               case BACKSLASH:
+                       if (c == 'n' || c == 'N') {
+                               if (!current.text.empty ()) {
+                                       subs.push_back (current);
+                                       current.text = "";
+                               }
+                               /* Move down one line (1.2 times the font size) */
+                               if (current.vertical_position.reference.get() == BOTTOM_OF_SCREEN) {
+                                       current.vertical_position.proportional = current.vertical_position.proportional.get() - line_size;
+                               } else {
+                                       current.vertical_position.proportional = current.vertical_position.proportional.get() + line_size;
+                               }
+                       }
+                       state = TEXT;
+                       break;
+               }
+       }
+
+       if (!current.text.empty ()) {
+               subs.push_back (current);
+       }
+
+       return subs;
+}
+
 void
 SSAReader::read (function<optional<string> ()> get_line)
 {
@@ -146,6 +299,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                EVENTS
        } part = INFO;
 
+       int play_res_y = 288;
        map<string, Style> styles;
        string style_format_line;
        vector<string> event_format;
@@ -157,6 +311,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                }
 
                trim (*line);
+               remove_unicode_bom (line);
 
                if (starts_with (*line, ";") || line->empty ()) {
                        continue;
@@ -166,7 +321,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                        /* Section heading */
                        if (line.get() == "[Script Info]") {
                                part = INFO;
-                       } else if (line.get() == "[V4 Styles]") {
+                       } else if (line.get() == "[V4 Styles]" || line.get() == "[V4+ Styles]") {
                                part = STYLES;
                        } else if (line.get() == "[Events]") {
                                part = EVENTS;
@@ -176,12 +331,15 @@ SSAReader::read (function<optional<string> ()> get_line)
 
                size_t const colon = line->find (":");
                SUB_ASSERT (colon != string::npos);
-               SUB_ASSERT (line->length() > colon + 1);
                string const type = line->substr (0, colon);
-               string const body = line->substr (colon + 2);
+               string body = line->substr (colon + 1);
+               trim (body);
 
                switch (part) {
                case INFO:
+                       if (type == "PlayResY") {
+                               play_res_y = raw_convert<int> (body);
+                       }
                        break;
                case STYLES:
                        if (type == "Format") {
@@ -203,6 +361,15 @@ SSAReader::read (function<optional<string> ()> get_line)
                                vector<string> event;
                                split (event, body, is_any_of (","));
 
+                               /* There may be commas in the subtitle part; reassemble any extra parts
+                                  from when we just split it.
+                               */
+                               while (event.size() > event_format.size()) {
+                                       string const ex = event.back ();
+                                       event.pop_back ();
+                                       event.back() += "," + ex;
+                               }
+
                                SUB_ASSERT (!event.empty());
                                SUB_ASSERT (event_format.size() == event.size());
 
@@ -215,6 +382,10 @@ SSAReader::read (function<optional<string> ()> get_line)
                                        } else if (event_format[i] == "End") {
                                                sub.to = parse_time (event[i]);
                                        } else if (event_format[i] == "Style") {
+                                               /* libass trims leading '*'s from style names, commenting that
+                                                  "they seem to mean literally nothing".  Go figure...
+                                               */
+                                               trim_left_if (event[i], boost::is_any_of ("*"));
                                                SUB_ASSERT (styles.find(event[i]) != styles.end());
                                                Style style = styles[event[i]];
                                                sub.font = style.font_name;
@@ -223,19 +394,18 @@ SSAReader::read (function<optional<string> ()> get_line)
                                                sub.effect_colour = style.back_colour;
                                                sub.bold = style.bold;
                                                sub.italic = style.italic;
+                                               sub.underline = style.underline;
                                                sub.effect = style.effect;
-
-                                               /* XXX: arbitrary */
-                                               sub.vertical_position.lines = 32;
-                                               sub.vertical_position.reference = TOP_OF_SUBTITLE;
-                                               sub.vertical_position.line = 0;
-
+                                               sub.vertical_position.reference = style.vertical_reference;
+                                               sub.vertical_position.proportional = float(style.vertical_margin) / play_res_y;
+                                       } else if (event_format[i] == "MarginV") {
+                                               sub.vertical_position.proportional = raw_convert<float>(event[i]) / play_res_y;
                                        } else if (event_format[i] == "Text") {
-                                               sub.text = event[i];
+                                               BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i])) {
+                                                       _subs.push_back (j);
+                                               }
                                        }
                                }
-
-                               _subs.push_back (sub);
                        }
                }