diff options
| author | Carl Hetherington <cth@carlh.net> | 2021-06-03 16:13:58 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2021-06-03 16:17:23 +0200 |
| commit | 046ce2e8d22ea384dc7e1e4545bede6fd859d9c3 (patch) | |
| tree | 042523a35297226885f0d10280b612fb6f1fef66 /src | |
| parent | ceb9ad4b9a6611da0e49e7c3f808dba5e794cec3 (diff) | |
Add "U8" character code table for binary STL.
A file was sent that has "U8" as a STL CCT. I presume this means
"UTF-8" but I can't see any documentation about it anywhere.
This change accepts U8 as a CCT. It doesn't look like this value
is actually used anywhere to change how text is parsed, but at
least it doesn't fail now.
Diffstat (limited to 'src')
| -rw-r--r-- | src/stl_binary_tables.cc | 1 | ||||
| -rw-r--r-- | src/stl_binary_tables.h | 3 |
2 files changed, 3 insertions, 1 deletions
diff --git a/src/stl_binary_tables.cc b/src/stl_binary_tables.cc index db623bf..2d275df 100644 --- a/src/stl_binary_tables.cc +++ b/src/stl_binary_tables.cc @@ -211,6 +211,7 @@ STLBinaryTables::STLBinaryTables () code<LanguageGroup, string> (_language_group_map, "02", LANGUAGE_GROUP_LATIN_ARABIC, "Latin/Arabic"); code<LanguageGroup, string> (_language_group_map, "03", LANGUAGE_GROUP_LATIN_GREEK, "Latin/Greek"); code<LanguageGroup, string> (_language_group_map, "04", LANGUAGE_GROUP_LATIN_HEBREW, "Latin/Hebrew"); + code<LanguageGroup, string> (_language_group_map, "U8", LANGUAGE_GROUP_LATIN_HEBREW, "UTF-8"); code<Language, string> (_language_map, "00", LANGUAGE_UNKNOWN, "Unknown"); code<Language, string> (_language_map, "01", LANGUAGE_ALBANIAN, "Albanian"); diff --git a/src/stl_binary_tables.h b/src/stl_binary_tables.h index 4a6a7d8..4621413 100644 --- a/src/stl_binary_tables.h +++ b/src/stl_binary_tables.h @@ -38,7 +38,8 @@ enum LanguageGroup { LANGUAGE_GROUP_LATIN_CYRILLIC, LANGUAGE_GROUP_LATIN_ARABIC, LANGUAGE_GROUP_LATIN_GREEK, - LANGUAGE_GROUP_LATIN_HEBREW + LANGUAGE_GROUP_LATIN_HEBREW, + LANGUAGE_GROUP_UTF8, }; enum Language { |
