diff options
| author | Carl Hetherington <cth@carlh.net> | 2021-06-03 16:13:58 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2021-06-03 16:17:23 +0200 |
| commit | 046ce2e8d22ea384dc7e1e4545bede6fd859d9c3 (patch) | |
| tree | 042523a35297226885f0d10280b612fb6f1fef66 | |
| parent | ceb9ad4b9a6611da0e49e7c3f808dba5e794cec3 (diff) | |
Add "U8" character code table for binary STL.
A file was sent that has "U8" as a STL CCT. I presume this means
"UTF-8" but I can't see any documentation about it anywhere.
This change accepts U8 as a CCT. It doesn't look like this value
is actually used anywhere to change how text is parsed, but at
least it doesn't fail now.
| -rw-r--r-- | src/stl_binary_tables.cc | 1 | ||||
| -rw-r--r-- | src/stl_binary_tables.h | 3 | ||||
| -rw-r--r-- | test/stl_binary_reader_test.cc | 14 |
3 files changed, 17 insertions, 1 deletions
diff --git a/src/stl_binary_tables.cc b/src/stl_binary_tables.cc index db623bf..2d275df 100644 --- a/src/stl_binary_tables.cc +++ b/src/stl_binary_tables.cc @@ -211,6 +211,7 @@ STLBinaryTables::STLBinaryTables () code<LanguageGroup, string> (_language_group_map, "02", LANGUAGE_GROUP_LATIN_ARABIC, "Latin/Arabic"); code<LanguageGroup, string> (_language_group_map, "03", LANGUAGE_GROUP_LATIN_GREEK, "Latin/Greek"); code<LanguageGroup, string> (_language_group_map, "04", LANGUAGE_GROUP_LATIN_HEBREW, "Latin/Hebrew"); + code<LanguageGroup, string> (_language_group_map, "U8", LANGUAGE_GROUP_LATIN_HEBREW, "UTF-8"); code<Language, string> (_language_map, "00", LANGUAGE_UNKNOWN, "Unknown"); code<Language, string> (_language_map, "01", LANGUAGE_ALBANIAN, "Albanian"); diff --git a/src/stl_binary_tables.h b/src/stl_binary_tables.h index 4a6a7d8..4621413 100644 --- a/src/stl_binary_tables.h +++ b/src/stl_binary_tables.h @@ -38,7 +38,8 @@ enum LanguageGroup { LANGUAGE_GROUP_LATIN_CYRILLIC, LANGUAGE_GROUP_LATIN_ARABIC, LANGUAGE_GROUP_LATIN_GREEK, - LANGUAGE_GROUP_LATIN_HEBREW + LANGUAGE_GROUP_LATIN_HEBREW, + LANGUAGE_GROUP_UTF8, }; enum Language { diff --git a/test/stl_binary_reader_test.cc b/test/stl_binary_reader_test.cc index dfd7c0a..cf97b5a 100644 --- a/test/stl_binary_reader_test.cc +++ b/test/stl_binary_reader_test.cc @@ -68,3 +68,17 @@ BOOST_AUTO_TEST_CASE (stl_binary_reader_test2) sub::dump (r, dump_stream); dump_stream.close (); } + + +/** Test reading a file which raised "Unknown language group code U8" */ +BOOST_AUTO_TEST_CASE (stl_binary_reader_test3) +{ + if (private_test.empty()) { + return; + } + + auto path = private_test / "hsk.stl"; + ifstream in (path.string().c_str()); + auto reader = make_shared<sub::STLBinaryReader>(in); +} + |
