[transliteration] Regenerating transliteration data files
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -8,47 +8,46 @@ typedef struct script_transliteration_rule {
|
|||||||
|
|
||||||
script_transliteration_rule_t script_transliteration_rules[] = {
|
script_transliteration_rule_t script_transliteration_rules[] = {
|
||||||
{{SCRIPT_THAANA, ""}, {0, 2}},
|
{{SCRIPT_THAANA, ""}, {0, 2}},
|
||||||
{{SCRIPT_GURMUKHI, ""}, {2, 1}},
|
{{SCRIPT_TELUGU, ""}, {2, 1}},
|
||||||
{{SCRIPT_TELUGU, ""}, {3, 1}},
|
{{SCRIPT_CYRILLIC, "be"}, {3, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "be"}, {4, 1}},
|
{{SCRIPT_CYRILLIC, "bg"}, {4, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "bg"}, {5, 1}},
|
{{SCRIPT_CYRILLIC, ""}, {5, 1}},
|
||||||
{{SCRIPT_CYRILLIC, ""}, {6, 1}},
|
{{SCRIPT_CYRILLIC, "ru"}, {6, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "ru"}, {7, 1}},
|
{{SCRIPT_CYRILLIC, "uz"}, {7, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "uz"}, {8, 1}},
|
{{SCRIPT_CYRILLIC, "kk"}, {8, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "kk"}, {9, 1}},
|
{{SCRIPT_CYRILLIC, "sr"}, {9, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "sr"}, {10, 1}},
|
{{SCRIPT_CYRILLIC, "mn"}, {10, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "mn"}, {11, 1}},
|
{{SCRIPT_CYRILLIC, "mk"}, {11, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "mk"}, {12, 1}},
|
{{SCRIPT_CYRILLIC, "uk"}, {12, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "uk"}, {13, 1}},
|
{{SCRIPT_CYRILLIC, "ky"}, {13, 1}},
|
||||||
{{SCRIPT_CYRILLIC, "ky"}, {14, 1}},
|
{{SCRIPT_HANGUL, ""}, {14, 2}},
|
||||||
{{SCRIPT_ORIYA, ""}, {15, 1}},
|
{{SCRIPT_HAN, ""}, {16, 1}},
|
||||||
{{SCRIPT_HANGUL, ""}, {16, 2}},
|
{{SCRIPT_ARMENIAN, ""}, {17, 1}},
|
||||||
{{SCRIPT_GUJARATI, ""}, {18, 1}},
|
{{SCRIPT_TAMIL, ""}, {18, 1}},
|
||||||
{{SCRIPT_HAN, ""}, {19, 1}},
|
{{SCRIPT_MALAYALAM, ""}, {19, 1}},
|
||||||
{{SCRIPT_ARMENIAN, ""}, {20, 1}},
|
{{SCRIPT_GUJARATI, ""}, {20, 1}},
|
||||||
{{SCRIPT_TAMIL, ""}, {21, 1}},
|
{{SCRIPT_LATIN, ""}, {21, 1}},
|
||||||
{{SCRIPT_BENGALI, ""}, {22, 1}},
|
{{SCRIPT_GEORGIAN, ""}, {22, 2}},
|
||||||
{{SCRIPT_MALAYALAM, ""}, {23, 1}},
|
{{SCRIPT_DEVANAGARI, ""}, {24, 1}},
|
||||||
{{SCRIPT_HIRAGANA, ""}, {24, 1}},
|
{{SCRIPT_THAI, ""}, {25, 1}},
|
||||||
{{SCRIPT_KANNADA, ""}, {25, 1}},
|
{{SCRIPT_GREEK, ""}, {26, 3}},
|
||||||
{{SCRIPT_LATIN, ""}, {26, 1}},
|
{{SCRIPT_KATAKANA, ""}, {29, 2}},
|
||||||
{{SCRIPT_GEORGIAN, ""}, {27, 2}},
|
{{SCRIPT_ORIYA, ""}, {31, 1}},
|
||||||
{{SCRIPT_DEVANAGARI, ""}, {29, 1}},
|
{{SCRIPT_HIRAGANA, ""}, {32, 1}},
|
||||||
{{SCRIPT_THAI, ""}, {30, 1}},
|
{{SCRIPT_ARABIC, "fa"}, {33, 1}},
|
||||||
{{SCRIPT_GREEK, ""}, {31, 3}},
|
{{SCRIPT_ARABIC, ""}, {34, 2}},
|
||||||
{{SCRIPT_CANADIAN_ABORIGINAL, ""}, {34, 1}},
|
{{SCRIPT_ARABIC, "ps"}, {36, 1}},
|
||||||
{{SCRIPT_ARABIC, "fa"}, {35, 1}},
|
{{SCRIPT_CANADIAN_ABORIGINAL, ""}, {37, 1}},
|
||||||
{{SCRIPT_ARABIC, ""}, {36, 2}},
|
{{SCRIPT_BENGALI, ""}, {38, 1}},
|
||||||
{{SCRIPT_ARABIC, "ps"}, {38, 1}},
|
{{SCRIPT_GURMUKHI, ""}, {39, 1}},
|
||||||
{{SCRIPT_HEBREW, ""}, {39, 2}},
|
{{SCRIPT_KANNADA, ""}, {40, 1}},
|
||||||
{{SCRIPT_KATAKANA, ""}, {41, 2}},
|
{{SCRIPT_COMMON, ""}, {41, 1}},
|
||||||
{{SCRIPT_COMMON, ""}, {43, 1}}
|
{{SCRIPT_HEBREW, ""}, {42, 2}}
|
||||||
};
|
};
|
||||||
|
|
||||||
char *script_transliterators[] = {
|
char *script_transliterators[] = {
|
||||||
"thaana-latin",
|
"thaana-latin",
|
||||||
"maldivian-latin-bgn",
|
"maldivian-latin-bgn",
|
||||||
"gurmukhi-latin",
|
|
||||||
"telugu-latin",
|
"telugu-latin",
|
||||||
"belarusian-latin-bgn",
|
"belarusian-latin-bgn",
|
||||||
"bulgarian-latin-bgn",
|
"bulgarian-latin-bgn",
|
||||||
@@ -61,17 +60,13 @@ char *script_transliterators[] = {
|
|||||||
"macedonian-latin-bgn",
|
"macedonian-latin-bgn",
|
||||||
"ukrainian-latin-bgn",
|
"ukrainian-latin-bgn",
|
||||||
"kirghiz-latin-bgn",
|
"kirghiz-latin-bgn",
|
||||||
"oriya-latin",
|
|
||||||
"hangul-latin",
|
"hangul-latin",
|
||||||
"korean-latin-bgn",
|
"korean-latin-bgn",
|
||||||
"gujarati-latin",
|
|
||||||
"han-latin",
|
"han-latin",
|
||||||
"armenian-latin-bgn",
|
"armenian-latin-bgn",
|
||||||
"tamil-latin",
|
"tamil-latin",
|
||||||
"bengali-latin",
|
|
||||||
"malayam-latin",
|
"malayam-latin",
|
||||||
"hiragana-latin",
|
"gujarati-latin",
|
||||||
"kannada-latin",
|
|
||||||
"latin-ascii",
|
"latin-ascii",
|
||||||
"georgian-latin",
|
"georgian-latin",
|
||||||
"georgian-latin-bgn",
|
"georgian-latin-bgn",
|
||||||
@@ -80,15 +75,20 @@ char *script_transliterators[] = {
|
|||||||
"greek-latin",
|
"greek-latin",
|
||||||
"greek-latin-bgn",
|
"greek-latin-bgn",
|
||||||
"greek-latin-ungegn",
|
"greek-latin-ungegn",
|
||||||
"canadianaboriginal-latin",
|
"katakana-latin",
|
||||||
|
"katakana-latin-bgn",
|
||||||
|
"oriya-latin",
|
||||||
|
"hiragana-latin",
|
||||||
"persian-latin-bgn",
|
"persian-latin-bgn",
|
||||||
"arabic-latin",
|
"arabic-latin",
|
||||||
"arabic-latin-bgn",
|
"arabic-latin-bgn",
|
||||||
"pashto-latin-bgn",
|
"pashto-latin-bgn",
|
||||||
|
"canadianaboriginal-latin",
|
||||||
|
"bengali-latin",
|
||||||
|
"gurmukhi-latin",
|
||||||
|
"kannada-latin",
|
||||||
|
"latin-ascii",
|
||||||
"hebrew-latin",
|
"hebrew-latin",
|
||||||
"hebrew-latin-bgn",
|
"hebrew-latin-bgn"
|
||||||
"katakana-latin",
|
|
||||||
"katakana-latin-bgn",
|
|
||||||
"latin-ascii"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "constants.h"
|
#include "constants.h"
|
||||||
|
#include "string_utils.h"
|
||||||
#include "utf8proc/utf8proc.h"
|
#include "utf8proc/utf8proc.h"
|
||||||
#include "unicode_script_types.h"
|
#include "unicode_script_types.h"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user