Remove unicode BOM from SSA files.
authorCarl Hetherington <cth@carlh.net>
Thu, 2 Jun 2016 16:08:11 +0000 (17:08 +0100)
committerCarl Hetherington <cth@carlh.net>
Thu, 2 Jun 2016 16:08:11 +0000 (17:08 +0100)
src/ssa_reader.cc
src/subrip_reader.cc
src/util.cc
src/util.h

index bac893bbc4da44107b5fec7458b7e85dfa4e6885..859a00ecba18cbb36ab79ba2a51ba01151ae3658 100644 (file)
@@ -227,6 +227,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                }
 
                trim (*line);
+               remove_unicode_bom (line);
 
                if (starts_with (*line, ";") || line->empty ()) {
                        continue;
index a82862cb5bc060000773d3b1edaa172976086bd3..f71843ef144ef198d6e1eb9a33cd79f7a701b97a 100644 (file)
@@ -81,17 +81,7 @@ SubripReader::read (function<optional<string> ()> get_line)
                }
 
                trim_right_if (*line, boost::is_any_of ("\n\r"));
-
-               if (
-                       line->length() >= 3 &&
-                       static_cast<unsigned char> (line.get()[0]) == 0xef &&
-                       static_cast<unsigned char> (line.get()[1]) == 0xbb &&
-                       static_cast<unsigned char> (line.get()[2]) == 0xbf
-                       ) {
-
-                       /* Skip Unicode byte order mark */
-                       line = line->substr (3);
-               }
+               remove_unicode_bom (line);
 
                switch (state) {
                case COUNTER:
index 5510d8ecd59dfcc74074df49c5b0b9fa1578d51c..a736a91048f0a3ef145d35992dad8f3306831330 100644 (file)
@@ -65,3 +65,18 @@ sub::get_line_file (FILE* f)
 
        return string (buffer);
 }
+
+void
+sub::remove_unicode_bom (optional<string>& line)
+{
+       if (
+               line->length() >= 3 &&
+               static_cast<unsigned char> (line.get()[0]) == 0xef &&
+               static_cast<unsigned char> (line.get()[1]) == 0xbb &&
+               static_cast<unsigned char> (line.get()[2]) == 0xbf
+               ) {
+
+               /* Skip Unicode byte order mark */
+               line = line->substr (3);
+       }
+}
index 9d3edc247039fcfa7892d0778bbbf1ed365c2a0c..dea8023f787b6770cd553aca39bc129383931829 100644 (file)
@@ -23,9 +23,8 @@
 namespace sub {
 
 extern bool empty_or_white_space (std::string s);
+extern void remove_unicode_bom (boost::optional<std::string>& line);
 extern boost::optional<std::string> get_line_stringstream (std::stringstream* str);
 extern boost::optional<std::string> get_line_file (FILE* f);
 
 }
-
-