From b4593b6f8839eff7bb264d0ba77c95560e8c685f Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 23 Sep 2015 00:33:14 -0400 Subject: [PATCH] [unicode/tokenization] Using new character classes including wide chars in scanner --- scripts/geodata/i18n/transliteration_rules.py | 44 ++++++++++--------- src/scanner.re | 16 +++---- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/scripts/geodata/i18n/transliteration_rules.py b/scripts/geodata/i18n/transliteration_rules.py index 5518a7a3..60451f70 100644 --- a/scripts/geodata/i18n/transliteration_rules.py +++ b/scripts/geodata/i18n/transliteration_rules.py @@ -30,6 +30,7 @@ from scanner import Scanner from unicode_properties import * from unicode_paths import CLDR_DIR from geodata.encoding import safe_decode, safe_encode +from geodata.string_utils import NUM_CODEPOINTS, wide_unichr CLDR_TRANSFORMS_DIR = os.path.join(CLDR_DIR, 'common', 'transforms') @@ -150,9 +151,10 @@ def init_unicode_categories(): global unicode_blocks, unicode_combining_classes, unicode_properties, unicode_property_aliases global unicode_property_value_aliases, unicode_scripts, unicode_script_ids, unicode_word_breaks - for i in xrange(NUM_CHARS): - unicode_categories[unicodedata.category(unichr(i))].append(unichr(i)) - unicode_combining_classes[str(unicodedata.combining(unichr(i)))].append(unichr(i)) + for i in xrange(NUM_CODEPOINTS): + c = wide_unichr(i) + unicode_categories[unicodedata.category(c)].append(c) + unicode_combining_classes[str(unicodedata.combining(c))].append(c) unicode_categories = dict(unicode_categories) unicode_combining_classes = dict(unicode_combining_classes) @@ -165,7 +167,7 @@ def init_unicode_categories(): script_chars = get_chars_by_script() for i, script in enumerate(script_chars): if script: - unicode_scripts[script.lower()].append(unichr(i)) + unicode_scripts[script.lower()].append(wide_unichr(i)) unicode_scripts = dict(unicode_scripts) @@ -379,9 +381,7 @@ char_set_scanner = Scanner([ (r'[^\s]', CHARACTER), ]) -NUM_CHARS = 65536 - -all_chars = set([unichr(i) for i in xrange(NUM_CHARS)]) +all_chars = set([wide_unichr(i) for i in xrange(NUM_CODEPOINTS)]) control_chars = set([c for c in all_chars if unicodedata.category(c) in ('Cc', 'Cn', 'Cs')]) @@ -392,20 +392,22 @@ def get_transforms(): def replace_html_entity(ent): name = ent.strip('&;') - return unichr(htmlentitydefs.name2codepoint[name]) + return wide_unichr(htmlentitydefs.name2codepoint[name]) def parse_regex_char_range(regex): prev_char = None ranges = range_regex.findall(regex) regex = range_regex.sub('', regex) - chars = [ord(c) for c in regex] + chars = [wide_ord(c) for c in regex] for start, end in ranges: + start_ord = wide_ord(start) + end_ord = wide_ord(end) - if ord(end) > ord(start): + if end_ord > start_ord: # Ranges are inclusive - chars.extend([unichr(c) for c in range(ord(start), ord(end) + 1)]) + chars.extend([wide_unichr(c) for c in range(start_ord, end_ord + 1)]) return chars @@ -554,13 +556,15 @@ def parse_regex_char_set(s, current_filter=all_chars): elif token_class == CHARACTER and token not in control_chars: this_group.add(token) real_chars.add(token) - elif token_class == UNICODE_CHARACTER: + elif token_class in (UNICODE_CHARACTER, UNICODE_WIDE_CHARACTER): token = token.decode('unicode-escape') if token not in control_chars: this_group.add(token) real_chars.add(token) - elif token_class in (WIDE_CHARACTER, UNICODE_WIDE_CHARACTER): - continue + elif token_class == WIDE_CHARACTER: + if token not in control_chars: + this_group.add(token) + real_chars.add(token) elif token_class == BRACKETED_CHARACTER: if token.strip('{{}}') not in control_chars: this_group.add(token) @@ -794,11 +798,11 @@ def char_permutations(s, current_filter=all_chars, reverse=False): add_char_type(current_chars, [token]) elif token_type == SINGLE_QUOTE: add_char_type(current_chars, ["'"]) - elif token_type == UNICODE_CHARACTER: + elif token_type in (UNICODE_CHARACTER, UNICODE_WIDE_CHARACTER): token = token.decode('unicode-escape') add_char_type(current_chars, [token]) - elif token_type in (WIDE_CHARACTER, UNICODE_WIDE_CHARACTER): - continue + elif token_type == WIDE_CHARACTER: + add_char_type(current_chars, [token]) if in_group and last_token_group_start: start_group = len(current_chars) @@ -1185,12 +1189,12 @@ EXISTING_STEP = 'EXISTING_STEP' PREPEND_STEP = 'PREPEND_STEP' -html_escapes = {'&{};'.format(name): safe_encode(unichr(value)) +html_escapes = {'&{};'.format(name): safe_encode(wide_unichr(value)) for name, value in htmlentitydefs.name2codepoint.iteritems() } -html_escapes.update({'&#{};'.format(i): safe_encode(unichr(i)) - for i in xrange(NUM_CHARS) +html_escapes.update({'&#{};'.format(i): safe_encode(wide_unichr(i)) + for i in xrange(NUM_CODEPOINTS) }) # [[:Latin] & [:Ll:]] diff --git a/src/scanner.re b/src/scanner.re index 18c6e6ed..a23614c6 100644 --- a/src/scanner.re +++ b/src/scanner.re @@ -61,25 +61,25 @@ non_space = [^ \u00a0\u1680\u180e\u2000-\u200a\u202f\u3000\u0000-\u001f\u007f-\u // TR-29 word segmentation hebrew_letter_chars = [\u05d0-\u05ea\u05f0-\u05f2\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40-\ufb41\ufb43-\ufb44\ufb46-\ufb4f]; -format_chars = [\u00ad\u0600-\u0605\u061c\u06dd\u070f\u180e\u200e-\u200f\u202a-\u202e\u2060-\u2064\u2066-\u206f\ufeff\ufff9-\ufffb]; -extend_chars = [\u0300-\u036f\u0483-\u0487\u0488-\u0489\u0591-\u05bd\u05bf\u05c1-\u05c2\u05c4-\u05c5\u05c7\u0610-\u061a\u064b-\u065f\u0670\u06d6-\u06dc\u06df-\u06e4\u06e7-\u06e8\u06ea-\u06ed\u0711\u0730-\u074a\u07a6-\u07b0\u07eb-\u07f3\u0816-\u0819\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0859-\u085b\u08e4-\u0902\u0903\u093a\u093b\u093c\u093e-\u0940\u0941-\u0948\u0949-\u094c\u094d\u094e-\u094f\u0951-\u0957\u0962-\u0963\u0981\u0982-\u0983\u09bc\u09be-\u09c0\u09c1-\u09c4\u09c7-\u09c8\u09cb-\u09cc\u09cd\u09d7\u09e2-\u09e3\u0a01-\u0a02\u0a03\u0a3c\u0a3e-\u0a40\u0a41-\u0a42\u0a47-\u0a48\u0a4b-\u0a4d\u0a51\u0a70-\u0a71\u0a75\u0a81-\u0a82\u0a83\u0abc\u0abe-\u0ac0\u0ac1-\u0ac5\u0ac7-\u0ac8\u0ac9\u0acb-\u0acc\u0acd\u0ae2-\u0ae3\u0b01\u0b02-\u0b03\u0b3c\u0b3e\u0b3f\u0b40\u0b41-\u0b44\u0b47-\u0b48\u0b4b-\u0b4c\u0b4d\u0b56\u0b57\u0b62-\u0b63\u0b82\u0bbe-\u0bbf\u0bc0\u0bc1-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcc\u0bcd\u0bd7\u0c00\u0c01-\u0c03\u0c3e-\u0c40\u0c41-\u0c44\u0c46-\u0c48\u0c4a-\u0c4d\u0c55-\u0c56\u0c62-\u0c63\u0c81\u0c82-\u0c83\u0cbc\u0cbe\u0cbf\u0cc0-\u0cc4\u0cc6\u0cc7-\u0cc8\u0cca-\u0ccb\u0ccc-\u0ccd\u0cd5-\u0cd6\u0ce2-\u0ce3\u0d01\u0d02-\u0d03\u0d3e-\u0d40\u0d41-\u0d44\u0d46-\u0d48\u0d4a-\u0d4c\u0d4d\u0d57\u0d62-\u0d63\u0d82-\u0d83\u0dca\u0dcf-\u0dd1\u0dd2-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2-\u0df3\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0eb1\u0eb4-\u0eb9\u0ebb-\u0ebc\u0ec8-\u0ecd\u0f18-\u0f19\u0f35\u0f37\u0f39\u0f3e-\u0f3f\u0f71-\u0f7e\u0f7f\u0f80-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u102b-\u102c\u102d-\u1030\u1031\u1032-\u1037\u1038\u1039-\u103a\u103b-\u103c\u103d-\u103e\u1056-\u1057\u1058-\u1059\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082\u1083-\u1084\u1085-\u1086\u1087-\u108c\u108d\u108f\u109a-\u109c\u109d\u135d-\u135f\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17b4-\u17b5\u17b6\u17b7-\u17bd\u17be-\u17c5\u17c6\u17c7-\u17c8\u17c9-\u17d3\u17dd\u180b-\u180d\u18a9\u1920-\u1922\u1923-\u1926\u1927-\u1928\u1929-\u192b\u1930-\u1931\u1932\u1933-\u1938\u1939-\u193b\u19b0-\u19c0\u19c8-\u19c9\u1a17-\u1a18\u1a19-\u1a1a\u1a1b\u1a55\u1a56\u1a57\u1a58-\u1a5e\u1a60\u1a61\u1a62\u1a63-\u1a64\u1a65-\u1a6c\u1a6d-\u1a72\u1a73-\u1a7c\u1a7f\u1ab0-\u1abd\u1abe\u1b00-\u1b03\u1b04\u1b34\u1b35\u1b36-\u1b3a\u1b3b\u1b3c\u1b3d-\u1b41\u1b42\u1b43-\u1b44\u1b6b-\u1b73\u1b80-\u1b81\u1b82\u1ba1\u1ba2-\u1ba5\u1ba6-\u1ba7\u1ba8-\u1ba9\u1baa\u1bab-\u1bad\u1be6\u1be7\u1be8-\u1be9\u1bea-\u1bec\u1bed\u1bee\u1bef-\u1bf1\u1bf2-\u1bf3\u1c24-\u1c2b\u1c2c-\u1c33\u1c34-\u1c35\u1c36-\u1c37\u1cd0-\u1cd2\u1cd4-\u1ce0\u1ce1\u1ce2-\u1ce8\u1ced\u1cf2-\u1cf3\u1cf4\u1cf8-\u1cf9\u1dc0-\u1df5\u1dfc-\u1dff\u200c-\u200d\u20d0-\u20dc\u20dd-\u20e0\u20e1\u20e2-\u20e4\u20e5-\u20f0\u2cef-\u2cf1\u2d7f\u2de0-\u2dff\u302a-\u302d\u302e-\u302f\u3099-\u309a\ua66f\ua670-\ua672\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua802\ua806\ua80b\ua823-\ua824\ua825-\ua826\ua827\ua880-\ua881\ua8b4-\ua8c3\ua8c4\ua8e0-\ua8f1\ua926-\ua92d\ua947-\ua951\ua952-\ua953\ua980-\ua982\ua983\ua9b3\ua9b4-\ua9b5\ua9b6-\ua9b9\ua9ba-\ua9bb\ua9bc\ua9bd-\ua9c0\ua9e5\uaa29-\uaa2e\uaa2f-\uaa30\uaa31-\uaa32\uaa33-\uaa34\uaa35-\uaa36\uaa43\uaa4c\uaa4d\uaa7b\uaa7c\uaa7d\uaab0\uaab2-\uaab4\uaab7-\uaab8\uaabe-\uaabf\uaac1\uaaeb\uaaec-\uaaed\uaaee-\uaaef\uaaf5\uaaf6\uabe3-\uabe4\uabe5\uabe6-\uabe7\uabe8\uabe9-\uabea\uabec\uabed\ufb1e\ufe00-\ufe0f\ufe20-\ufe2d\uff9e-\uff9f]; -katakana_chars = [\u3031-\u3035\u309b-\u309c\u30a0\u30a1-\u30fa\u30fc-\u30fe\u30ff\u31f0-\u31ff\u32d0-\u32fe\u3300-\u3357\uff66-\uff6f\uff70\uff71-\uff9d]; -letter_other_alpha_chars = [\u00aa\u00ba\u01bb\u01c0-\u01c3\u0294\u0620-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u0800-\u0815\u0840-\u0858\u08a0-\u08b2\u0904-\u0939\u093d\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098c\u098f-\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc-\u09dd\u09df-\u09e1\u09f0-\u09f1\u0a05-\u0a0a\u0a0f-\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32-\u0a33\u0a35-\u0a36\u0a38-\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2-\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0-\u0ae1\u0b05-\u0b0c\u0b0f-\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32-\u0b33\u0b35-\u0b39\u0b3d\u0b5c-\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99-\u0b9a\u0b9c\u0b9e-\u0b9f\u0ba3-\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c39\u0c3d\u0c58-\u0c59\u0c60-\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0-\u0ce1\u0cf1-\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u10d0-\u10fa\u10fd-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16f1-\u16f8\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1820-\u1842\u1844-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191e\u1a00-\u1a16\u1b83-\u1ba0\u1bae-\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5-\u1cf6\u2135-\u2138\u2d30-\u2d67\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u303c\u3105-\u312d\u31a0-\u31ba\ua4d0-\ua4f7\ua500-\ua60b\ua610-\ua61f\ua62a-\ua62b\ua66e\ua6a0-\ua6e5\ua7f7\ua7fb-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaae0-\uaaea\uaaf2\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\ufb50-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc]; +format_chars = [\u00ad\u0600-\u0605\u061c\u06dd\u070f\u180e\u200e-\u200f\u202a-\u202e\u2060-\u2064\u2066-\u206f\ufeff\ufff9-\ufffb\U000110bd\U0001bca0-\U0001bca3\U0001d173-\U0001d17a\U000e0001\U000e0020-\U000e007f]; +extend_chars = [\u0300-\u036f\u0483-\u0487\u0488-\u0489\u0591-\u05bd\u05bf\u05c1-\u05c2\u05c4-\u05c5\u05c7\u0610-\u061a\u064b-\u065f\u0670\u06d6-\u06dc\u06df-\u06e4\u06e7-\u06e8\u06ea-\u06ed\u0711\u0730-\u074a\u07a6-\u07b0\u07eb-\u07f3\u0816-\u0819\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0859-\u085b\u08e3-\u0902\u0903\u093a\u093b\u093c\u093e-\u0940\u0941-\u0948\u0949-\u094c\u094d\u094e-\u094f\u0951-\u0957\u0962-\u0963\u0981\u0982-\u0983\u09bc\u09be-\u09c0\u09c1-\u09c4\u09c7-\u09c8\u09cb-\u09cc\u09cd\u09d7\u09e2-\u09e3\u0a01-\u0a02\u0a03\u0a3c\u0a3e-\u0a40\u0a41-\u0a42\u0a47-\u0a48\u0a4b-\u0a4d\u0a51\u0a70-\u0a71\u0a75\u0a81-\u0a82\u0a83\u0abc\u0abe-\u0ac0\u0ac1-\u0ac5\u0ac7-\u0ac8\u0ac9\u0acb-\u0acc\u0acd\u0ae2-\u0ae3\u0b01\u0b02-\u0b03\u0b3c\u0b3e\u0b3f\u0b40\u0b41-\u0b44\u0b47-\u0b48\u0b4b-\u0b4c\u0b4d\u0b56\u0b57\u0b62-\u0b63\u0b82\u0bbe-\u0bbf\u0bc0\u0bc1-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcc\u0bcd\u0bd7\u0c00\u0c01-\u0c03\u0c3e-\u0c40\u0c41-\u0c44\u0c46-\u0c48\u0c4a-\u0c4d\u0c55-\u0c56\u0c62-\u0c63\u0c81\u0c82-\u0c83\u0cbc\u0cbe\u0cbf\u0cc0-\u0cc4\u0cc6\u0cc7-\u0cc8\u0cca-\u0ccb\u0ccc-\u0ccd\u0cd5-\u0cd6\u0ce2-\u0ce3\u0d01\u0d02-\u0d03\u0d3e-\u0d40\u0d41-\u0d44\u0d46-\u0d48\u0d4a-\u0d4c\u0d4d\u0d57\u0d62-\u0d63\u0d82-\u0d83\u0dca\u0dcf-\u0dd1\u0dd2-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2-\u0df3\u0e31\u0e34-\u0e3a\u0e47-\u0e4e\u0eb1\u0eb4-\u0eb9\u0ebb-\u0ebc\u0ec8-\u0ecd\u0f18-\u0f19\u0f35\u0f37\u0f39\u0f3e-\u0f3f\u0f71-\u0f7e\u0f7f\u0f80-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u102b-\u102c\u102d-\u1030\u1031\u1032-\u1037\u1038\u1039-\u103a\u103b-\u103c\u103d-\u103e\u1056-\u1057\u1058-\u1059\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082\u1083-\u1084\u1085-\u1086\u1087-\u108c\u108d\u108f\u109a-\u109c\u109d\u135d-\u135f\u1712-\u1714\u1732-\u1734\u1752-\u1753\u1772-\u1773\u17b4-\u17b5\u17b6\u17b7-\u17bd\u17be-\u17c5\u17c6\u17c7-\u17c8\u17c9-\u17d3\u17dd\u180b-\u180d\u18a9\u1920-\u1922\u1923-\u1926\u1927-\u1928\u1929-\u192b\u1930-\u1931\u1932\u1933-\u1938\u1939-\u193b\u1a17-\u1a18\u1a19-\u1a1a\u1a1b\u1a55\u1a56\u1a57\u1a58-\u1a5e\u1a60\u1a61\u1a62\u1a63-\u1a64\u1a65-\u1a6c\u1a6d-\u1a72\u1a73-\u1a7c\u1a7f\u1ab0-\u1abd\u1abe\u1b00-\u1b03\u1b04\u1b34\u1b35\u1b36-\u1b3a\u1b3b\u1b3c\u1b3d-\u1b41\u1b42\u1b43-\u1b44\u1b6b-\u1b73\u1b80-\u1b81\u1b82\u1ba1\u1ba2-\u1ba5\u1ba6-\u1ba7\u1ba8-\u1ba9\u1baa\u1bab-\u1bad\u1be6\u1be7\u1be8-\u1be9\u1bea-\u1bec\u1bed\u1bee\u1bef-\u1bf1\u1bf2-\u1bf3\u1c24-\u1c2b\u1c2c-\u1c33\u1c34-\u1c35\u1c36-\u1c37\u1cd0-\u1cd2\u1cd4-\u1ce0\u1ce1\u1ce2-\u1ce8\u1ced\u1cf2-\u1cf3\u1cf4\u1cf8-\u1cf9\u1dc0-\u1df5\u1dfc-\u1dff\u200c-\u200d\u20d0-\u20dc\u20dd-\u20e0\u20e1\u20e2-\u20e4\u20e5-\u20f0\u2cef-\u2cf1\u2d7f\u2de0-\u2dff\u302a-\u302d\u302e-\u302f\u3099-\u309a\ua66f\ua670-\ua672\ua674-\ua67d\ua69e-\ua69f\ua6f0-\ua6f1\ua802\ua806\ua80b\ua823-\ua824\ua825-\ua826\ua827\ua880-\ua881\ua8b4-\ua8c3\ua8c4\ua8e0-\ua8f1\ua926-\ua92d\ua947-\ua951\ua952-\ua953\ua980-\ua982\ua983\ua9b3\ua9b4-\ua9b5\ua9b6-\ua9b9\ua9ba-\ua9bb\ua9bc\ua9bd-\ua9c0\ua9e5\uaa29-\uaa2e\uaa2f-\uaa30\uaa31-\uaa32\uaa33-\uaa34\uaa35-\uaa36\uaa43\uaa4c\uaa4d\uaa7b\uaa7c\uaa7d\uaab0\uaab2-\uaab4\uaab7-\uaab8\uaabe-\uaabf\uaac1\uaaeb\uaaec-\uaaed\uaaee-\uaaef\uaaf5\uaaf6\uabe3-\uabe4\uabe5\uabe6-\uabe7\uabe8\uabe9-\uabea\uabec\uabed\ufb1e\ufe00-\ufe0f\ufe20-\ufe2f\uff9e-\uff9f\U000101fd\U000102e0\U00010376-\U0001037a\U00010a01-\U00010a03\U00010a05-\U00010a06\U00010a0c-\U00010a0f\U00010a38-\U00010a3a\U00010a3f\U00010ae5-\U00010ae6\U00011000\U00011001\U00011002\U00011038-\U00011046\U0001107f-\U00011081\U00011082\U000110b0-\U000110b2\U000110b3-\U000110b6\U000110b7-\U000110b8\U000110b9-\U000110ba\U00011100-\U00011102\U00011127-\U0001112b\U0001112c\U0001112d-\U00011134\U00011173\U00011180-\U00011181\U00011182\U000111b3-\U000111b5\U000111b6-\U000111be\U000111bf-\U000111c0\U000111ca-\U000111cc\U0001122c-\U0001122e\U0001122f-\U00011231\U00011232-\U00011233\U00011234\U00011235\U00011236-\U00011237\U000112df\U000112e0-\U000112e2\U000112e3-\U000112ea\U00011300-\U00011301\U00011302-\U00011303\U0001133c\U0001133e-\U0001133f\U00011340\U00011341-\U00011344\U00011347-\U00011348\U0001134b-\U0001134d\U00011357\U00011362-\U00011363\U00011366-\U0001136c\U00011370-\U00011374\U000114b0-\U000114b2\U000114b3-\U000114b8\U000114b9\U000114ba\U000114bb-\U000114be\U000114bf-\U000114c0\U000114c1\U000114c2-\U000114c3\U000115af-\U000115b1\U000115b2-\U000115b5\U000115b8-\U000115bb\U000115bc-\U000115bd\U000115be\U000115bf-\U000115c0\U000115dc-\U000115dd\U00011630-\U00011632\U00011633-\U0001163a\U0001163b-\U0001163c\U0001163d\U0001163e\U0001163f-\U00011640\U000116ab\U000116ac\U000116ad\U000116ae-\U000116af\U000116b0-\U000116b5\U000116b6\U000116b7\U0001171d-\U0001171f\U00011720-\U00011721\U00011722-\U00011725\U00011726\U00011727-\U0001172b\U00016af0-\U00016af4\U00016b30-\U00016b36\U00016f51-\U00016f7e\U00016f8f-\U00016f92\U0001bc9d-\U0001bc9e\U0001d165-\U0001d166\U0001d167-\U0001d169\U0001d16d-\U0001d172\U0001d17b-\U0001d182\U0001d185-\U0001d18b\U0001d1aa-\U0001d1ad\U0001d242-\U0001d244\U0001da00-\U0001da36\U0001da3b-\U0001da6c\U0001da75\U0001da84\U0001da9b-\U0001da9f\U0001daa1-\U0001daaf\U0001e8d0-\U0001e8d6\U000e0100-\U000e01ef]; +katakana_chars = [\u3031-\u3035\u309b-\u309c\u30a0\u30a1-\u30fa\u30fc-\u30fe\u30ff\u31f0-\u31ff\u32d0-\u32fe\u3300-\u3357\uff66-\uff6f\uff70\uff71-\uff9d\U0001b000]; +letter_other_alpha_chars = [\u00aa\u00ba\u01bb\u01c0-\u01c3\u0294\u0620-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u0800-\u0815\u0840-\u0858\u08a0-\u08b4\u0904-\u0939\u093d\u0950\u0958-\u0961\u0972-\u0980\u0985-\u098c\u098f-\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc-\u09dd\u09df-\u09e1\u09f0-\u09f1\u0a05-\u0a0a\u0a0f-\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32-\u0a33\u0a35-\u0a36\u0a38-\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2-\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0-\u0ae1\u0af9\u0b05-\u0b0c\u0b0f-\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32-\u0b33\u0b35-\u0b39\u0b3d\u0b5c-\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99-\u0b9a\u0b9c\u0b9e-\u0b9f\u0ba3-\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c39\u0c3d\u0c58-\u0c5a\u0c60-\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0-\u0ce1\u0cf1-\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d5f-\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u10d0-\u10fa\u10fd-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16f1-\u16f8\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1820-\u1842\u1844-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191e\u1a00-\u1a16\u1b83-\u1ba0\u1bae-\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5-\u1cf6\u2135-\u2138\u2d30-\u2d67\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u303c\u3105-\u312d\u31a0-\u31ba\ua4d0-\ua4f7\ua500-\ua60b\ua610-\ua61f\ua62a-\ua62b\ua66e\ua6a0-\ua6e5\ua78f\ua7f7\ua7fb-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua8fd\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaae0-\uaaea\uaaf2\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\ufb50-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\U00010000-\U0001000b\U0001000d-\U00010026\U00010028-\U0001003a\U0001003c-\U0001003d\U0001003f-\U0001004d\U00010050-\U0001005d\U00010080-\U000100fa\U00010280-\U0001029c\U000102a0-\U000102d0\U00010300-\U0001031f\U00010330-\U00010340\U00010342-\U00010349\U00010350-\U00010375\U00010380-\U0001039d\U000103a0-\U000103c3\U000103c8-\U000103cf\U00010450-\U0001049d\U00010500-\U00010527\U00010530-\U00010563\U00010600-\U00010736\U00010740-\U00010755\U00010760-\U00010767\U00010800-\U00010805\U00010808\U0001080a-\U00010835\U00010837-\U00010838\U0001083c\U0001083f-\U00010855\U00010860-\U00010876\U00010880-\U0001089e\U000108e0-\U000108f2\U000108f4-\U000108f5\U00010900-\U00010915\U00010920-\U00010939\U00010980-\U000109b7\U000109be-\U000109bf\U00010a00\U00010a10-\U00010a13\U00010a15-\U00010a17\U00010a19-\U00010a33\U00010a60-\U00010a7c\U00010a80-\U00010a9c\U00010ac0-\U00010ac7\U00010ac9-\U00010ae4\U00010b00-\U00010b35\U00010b40-\U00010b55\U00010b60-\U00010b72\U00010b80-\U00010b91\U00010c00-\U00010c48\U00011003-\U00011037\U00011083-\U000110af\U000110d0-\U000110e8\U00011103-\U00011126\U00011150-\U00011172\U00011176\U00011183-\U000111b2\U000111c1-\U000111c4\U000111da\U000111dc\U00011200-\U00011211\U00011213-\U0001122b\U00011280-\U00011286\U00011288\U0001128a-\U0001128d\U0001128f-\U0001129d\U0001129f-\U000112a8\U000112b0-\U000112de\U00011305-\U0001130c\U0001130f-\U00011310\U00011313-\U00011328\U0001132a-\U00011330\U00011332-\U00011333\U00011335-\U00011339\U0001133d\U00011350\U0001135d-\U00011361\U00011480-\U000114af\U000114c4-\U000114c5\U000114c7\U00011580-\U000115ae\U000115d8-\U000115db\U00011600-\U0001162f\U00011644\U00011680-\U000116aa\U000118ff\U00011ac0-\U00011af8\U00012000-\U00012399\U00012480-\U00012543\U00013000-\U0001342e\U00014400-\U00014646\U00016800-\U00016a38\U00016a40-\U00016a5e\U00016ad0-\U00016aed\U00016b00-\U00016b2f\U00016b63-\U00016b77\U00016b7d-\U00016b8f\U00016f00-\U00016f44\U00016f50\U0001bc00-\U0001bc6a\U0001bc70-\U0001bc7c\U0001bc80-\U0001bc88\U0001bc90-\U0001bc99\U0001e800-\U0001e8c4\U0001ee00-\U0001ee03\U0001ee05-\U0001ee1f\U0001ee21-\U0001ee22\U0001ee24\U0001ee27\U0001ee29-\U0001ee32\U0001ee34-\U0001ee37\U0001ee39\U0001ee3b\U0001ee42\U0001ee47\U0001ee49\U0001ee4b\U0001ee4d-\U0001ee4f\U0001ee51-\U0001ee52\U0001ee54\U0001ee57\U0001ee59\U0001ee5b\U0001ee5d\U0001ee5f\U0001ee61-\U0001ee62\U0001ee64\U0001ee67-\U0001ee6a\U0001ee6c-\U0001ee72\U0001ee74-\U0001ee77\U0001ee79-\U0001ee7c\U0001ee7e\U0001ee80-\U0001ee89\U0001ee8b-\U0001ee9b\U0001eea1-\U0001eea3\U0001eea5-\U0001eea9\U0001eeab-\U0001eebb]; mid_letter_chars = [\u003a\u00b7\u02d7\u0387\u05f4\u2027\ufe13\ufe55\uff1a]; mid_number_chars = [\u002c\u003b\u037e\u0589\u060c-\u060d\u066c\u07f8\u2044\ufe10\ufe14\ufe50\ufe54\uff0c\uff1b]; mid_num_letter_chars = [\u002e\u2018\u2019\u2024\ufe52\uff07\uff0e]; -numeric_chars = [\u0030-\u0039\u0660-\u0669\u066b\u06f0-\u06f9\u07c0-\u07c9\u0966-\u096f\u09e6-\u09ef\u0a66-\u0a6f\u0ae6-\u0aef\u0b66-\u0b6f\u0be6-\u0bef\u0c66-\u0c6f\u0ce6-\u0cef\u0d66-\u0d6f\u0de6-\u0def\u0e50-\u0e59\u0ed0-\u0ed9\u0f20-\u0f29\u1040-\u1049\u1090-\u1099\u17e0-\u17e9\u1810-\u1819\u1946-\u194f\u19d0-\u19d9\u1a80-\u1a89\u1a90-\u1a99\u1b50-\u1b59\u1bb0-\u1bb9\u1c40-\u1c49\u1c50-\u1c59\ua620-\ua629\ua8d0-\ua8d9\ua900-\ua909\ua9d0-\ua9d9\ua9f0-\ua9f9\uaa50-\uaa59\uabf0-\uabf9]; +numeric_chars = [\u0030-\u0039\u0660-\u0669\u066b\u06f0-\u06f9\u07c0-\u07c9\u0966-\u096f\u09e6-\u09ef\u0a66-\u0a6f\u0ae6-\u0aef\u0b66-\u0b6f\u0be6-\u0bef\u0c66-\u0c6f\u0ce6-\u0cef\u0d66-\u0d6f\u0de6-\u0def\u0e50-\u0e59\u0ed0-\u0ed9\u0f20-\u0f29\u1040-\u1049\u1090-\u1099\u17e0-\u17e9\u1810-\u1819\u1946-\u194f\u19d0-\u19d9\u1a80-\u1a89\u1a90-\u1a99\u1b50-\u1b59\u1bb0-\u1bb9\u1c40-\u1c49\u1c50-\u1c59\ua620-\ua629\ua8d0-\ua8d9\ua900-\ua909\ua9d0-\ua9d9\ua9f0-\ua9f9\uaa50-\uaa59\uabf0-\uabf9\U000104a0-\U000104a9\U00011066-\U0001106f\U000110f0-\U000110f9\U00011136-\U0001113f\U000111d0-\U000111d9\U000112f0-\U000112f9\U000114d0-\U000114d9\U00011650-\U00011659\U000116c0-\U000116c9\U00011730-\U00011739\U000118e0-\U000118e9\U00016a60-\U00016a69\U00016b50-\U00016b59\U0001d7ce-\U0001d7ff]; extend_num_letter_chars = [\u005f\u203f-\u2040\u2054\ufe33-\ufe34\ufe4d-\ufe4f\uff3f]; -ideographic_numeric_chars = [\u00b2-\u00b3\u00b9\u00bc-\u00be\u2070\u2074-\u2079\u2080-\u2089\u2150-\u215f\u2189\u2460-\u249b\u24ea-\u24ff\u2776-\u2793\u3192-\u3195\u3220-\u3229\u3248-\u324f\u3251-\u325f\u3280-\u3289\u32b1-\u32bf\ua830-\ua835\u2160-\u2182\u2185-\u2188\u09f4-\u09f9\u0b72-\u0b77\u0bf0-\u0bf2\u0c78-\u0c7e\u0d70-\u0d75\u0f2a-\u0f33\u1369-\u137c\u16ee-\u16f0\u17f0-\u17f9\u3007\u3021-\u3029\u3038-\u303a\u2cfd\u19da\ua6e6-\ua6ef]; -ideographic_chars = [\u0e01-\u0e30\u0e31\u0e32-\u0e33\u0e34-\u0e3a\u0e40-\u0e45\u0e46\u0e47-\u0e4e\u0e4f\u0e50-\u0e59\u0e5a-\u0e5b\u0e81-\u0e82\u0e84\u0e87-\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa-\u0eab\u0ead-\u0eb0\u0eb1\u0eb2-\u0eb3\u0eb4-\u0eb9\u0ebb-\u0ebc\u0ebd\u0ec0-\u0ec4\u0ec6\u0ec8-\u0ecd\u0ed0-\u0ed9\u0edc-\u0edf\u0f00\u0f01-\u0f03\u0f04-\u0f12\u0f13\u0f14\u0f15-\u0f17\u0f18-\u0f19\u0f1a-\u0f1f\u0f20-\u0f29\u0f2a-\u0f33\u0f34\u0f35\u0f36\u0f37\u0f38\u0f39\u0f3a\u0f3b\u0f3c\u0f3d\u0f3e-\u0f3f\u0f40-\u0f47\u0f49-\u0f6c\u0f71-\u0f7e\u0f7f\u0f80-\u0f84\u0f85\u0f86-\u0f87\u0f88-\u0f8c\u0f8d-\u0f97\u0f99-\u0fbc\u0fbe-\u0fc5\u0fc6\u0fc7-\u0fcc\u0fce-\u0fcf\u0fd0-\u0fd4\u0fd9-\u0fda\u1100-\u11ff\u302e-\u302f\u3131-\u318e\u3200-\u321e\u3260-\u327e\ua960-\ua97c\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uffa0-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc\u3041-\u3096\u309d-\u309e\u309f\u2e80-\u2e99\u2e9b-\u2ef3\u2f00-\u2fd5\u3005\u3007\u3021-\u3029\u3038-\u303a\u303b\u3400-\u4db5\u4e00-\u9fcc\uf900-\ufa6d\ufa70-\ufad9\ua000-\ua014\ua015\ua016-\ua48c\ua490-\ua4c6\u1b00-\u1b03\u1b04\u1b05-\u1b33\u1b34\u1b35\u1b36-\u1b3a\u1b3b\u1b3c\u1b3d-\u1b41\u1b42\u1b43-\u1b44\u1b45-\u1b4b\u1b50-\u1b59\u1b5a-\u1b60\u1b61-\u1b6a\u1b6b-\u1b73\u1b74-\u1b7c\ua980-\ua982\ua983\ua984-\ua9b2\ua9b3\ua9b4-\ua9b5\ua9b6-\ua9b9\ua9ba-\ua9bb\ua9bc\ua9bd-\ua9c0\ua9c1-\ua9cd\ua9d0-\ua9d9\ua9de-\ua9df]; - hangul_syllable_class_LV = [\uac00\uac1c\uac38\uac54\uac70\uac8c\uaca8\uacc4\uace0\uacfc\uad18\uad34\uad50\uad6c\uad88\uada4\uadc0\uaddc\uadf8\uae14\uae30\uae4c\uae68\uae84\uaea0\uaebc\uaed8\uaef4\uaf10\uaf2c\uaf48\uaf64\uaf80\uaf9c\uafb8\uafd4\uaff0\ub00c\ub028\ub044\ub060\ub07c\ub098\ub0b4\ub0d0\ub0ec\ub108\ub124\ub140\ub15c\ub178\ub194\ub1b0\ub1cc\ub1e8\ub204\ub220\ub23c\ub258\ub274\ub290\ub2ac\ub2c8\ub2e4\ub300\ub31c\ub338\ub354\ub370\ub38c\ub3a8\ub3c4\ub3e0\ub3fc\ub418\ub434\ub450\ub46c\ub488\ub4a4\ub4c0\ub4dc\ub4f8\ub514\ub530\ub54c\ub568\ub584\ub5a0\ub5bc\ub5d8\ub5f4\ub610\ub62c\ub648\ub664\ub680\ub69c\ub6b8\ub6d4\ub6f0\ub70c\ub728\ub744\ub760\ub77c\ub798\ub7b4\ub7d0\ub7ec\ub808\ub824\ub840\ub85c\ub878\ub894\ub8b0\ub8cc\ub8e8\ub904\ub920\ub93c\ub958\ub974\ub990\ub9ac\ub9c8\ub9e4\uba00\uba1c\uba38\uba54\uba70\uba8c\ubaa8\ubac4\ubae0\ubafc\ubb18\ubb34\ubb50\ubb6c\ubb88\ubba4\ubbc0\ubbdc\ubbf8\ubc14\ubc30\ubc4c\ubc68\ubc84\ubca0\ubcbc\ubcd8\ubcf4\ubd10\ubd2c\ubd48\ubd64\ubd80\ubd9c\ubdb8\ubdd4\ubdf0\ube0c\ube28\ube44\ube60\ube7c\ube98\ubeb4\ubed0\ubeec\ubf08\ubf24\ubf40\ubf5c\ubf78\ubf94\ubfb0\ubfcc\ubfe8\uc004\uc020\uc03c\uc058\uc074\uc090\uc0ac\uc0c8\uc0e4\uc100\uc11c\uc138\uc154\uc170\uc18c\uc1a8\uc1c4\uc1e0\uc1fc\uc218\uc234\uc250\uc26c\uc288\uc2a4\uc2c0\uc2dc\uc2f8\uc314\uc330\uc34c\uc368\uc384\uc3a0\uc3bc\uc3d8\uc3f4\uc410\uc42c\uc448\uc464\uc480\uc49c\uc4b8\uc4d4\uc4f0\uc50c\uc528\uc544\uc560\uc57c\uc598\uc5b4\uc5d0\uc5ec\uc608\uc624\uc640\uc65c\uc678\uc694\uc6b0\uc6cc\uc6e8\uc704\uc720\uc73c\uc758\uc774\uc790\uc7ac\uc7c8\uc7e4\uc800\uc81c\uc838\uc854\uc870\uc88c\uc8a8\uc8c4\uc8e0\uc8fc\uc918\uc934\uc950\uc96c\uc988\uc9a4\uc9c0\uc9dc\uc9f8\uca14\uca30\uca4c\uca68\uca84\ucaa0\ucabc\ucad8\ucaf4\ucb10\ucb2c\ucb48\ucb64\ucb80\ucb9c\ucbb8\ucbd4\ucbf0\ucc0c\ucc28\ucc44\ucc60\ucc7c\ucc98\uccb4\uccd0\uccec\ucd08\ucd24\ucd40\ucd5c\ucd78\ucd94\ucdb0\ucdcc\ucde8\uce04\uce20\uce3c\uce58\uce74\uce90\uceac\ucec8\ucee4\ucf00\ucf1c\ucf38\ucf54\ucf70\ucf8c\ucfa8\ucfc4\ucfe0\ucffc\ud018\ud034\ud050\ud06c\ud088\ud0a4\ud0c0\ud0dc\ud0f8\ud114\ud130\ud14c\ud168\ud184\ud1a0\ud1bc\ud1d8\ud1f4\ud210\ud22c\ud248\ud264\ud280\ud29c\ud2b8\ud2d4\ud2f0\ud30c\ud328\ud344\ud360\ud37c\ud398\ud3b4\ud3d0\ud3ec\ud408\ud424\ud440\ud45c\ud478\ud494\ud4b0\ud4cc\ud4e8\ud504\ud520\ud53c\ud558\ud574\ud590\ud5ac\ud5c8\ud5e4\ud600\ud61c\ud638\ud654\ud670\ud68c\ud6a8\ud6c4\ud6e0\ud6fc\ud718\ud734\ud750\ud76c\ud788]; hangul_syllable_class_LVT = [\uac01-\uac1b\uac1d-\uac37\uac39-\uac53\uac55-\uac6f\uac71-\uac8b\uac8d-\uaca7\uaca9-\uacc3\uacc5-\uacdf\uace1-\uacfb\uacfd-\uad17\uad19-\uad33\uad35-\uad4f\uad51-\uad6b\uad6d-\uad87\uad89-\uada3\uada5-\uadbf\uadc1-\uaddb\uaddd-\uadf7\uadf9-\uae13\uae15-\uae2f\uae31-\uae4b\uae4d-\uae67\uae69-\uae83\uae85-\uae9f\uaea1-\uaebb\uaebd-\uaed7\uaed9-\uaef3\uaef5-\uaf0f\uaf11-\uaf2b\uaf2d-\uaf47\uaf49-\uaf63\uaf65-\uaf7f\uaf81-\uaf9b\uaf9d-\uafb7\uafb9-\uafd3\uafd5-\uafef\uaff1-\ub00b\ub00d-\ub027\ub029-\ub043\ub045-\ub05f\ub061-\ub07b\ub07d-\ub097\ub099-\ub0b3\ub0b5-\ub0cf\ub0d1-\ub0eb\ub0ed-\ub107\ub109-\ub123\ub125-\ub13f\ub141-\ub15b\ub15d-\ub177\ub179-\ub193\ub195-\ub1af\ub1b1-\ub1cb\ub1cd-\ub1e7\ub1e9-\ub203\ub205-\ub21f\ub221-\ub23b\ub23d-\ub257\ub259-\ub273\ub275-\ub28f\ub291-\ub2ab\ub2ad-\ub2c7\ub2c9-\ub2e3\ub2e5-\ub2ff\ub301-\ub31b\ub31d-\ub337\ub339-\ub353\ub355-\ub36f\ub371-\ub38b\ub38d-\ub3a7\ub3a9-\ub3c3\ub3c5-\ub3df\ub3e1-\ub3fb\ub3fd-\ub417\ub419-\ub433\ub435-\ub44f\ub451-\ub46b\ub46d-\ub487\ub489-\ub4a3\ub4a5-\ub4bf\ub4c1-\ub4db\ub4dd-\ub4f7\ub4f9-\ub513\ub515-\ub52f\ub531-\ub54b\ub54d-\ub567\ub569-\ub583\ub585-\ub59f\ub5a1-\ub5bb\ub5bd-\ub5d7\ub5d9-\ub5f3\ub5f5-\ub60f\ub611-\ub62b\ub62d-\ub647\ub649-\ub663\ub665-\ub67f\ub681-\ub69b\ub69d-\ub6b7\ub6b9-\ub6d3\ub6d5-\ub6ef\ub6f1-\ub70b\ub70d-\ub727\ub729-\ub743\ub745-\ub75f\ub761-\ub77b\ub77d-\ub797\ub799-\ub7b3\ub7b5-\ub7cf\ub7d1-\ub7eb\ub7ed-\ub807\ub809-\ub823\ub825-\ub83f\ub841-\ub85b\ub85d-\ub877\ub879-\ub893\ub895-\ub8af\ub8b1-\ub8cb\ub8cd-\ub8e7\ub8e9-\ub903\ub905-\ub91f\ub921-\ub93b\ub93d-\ub957\ub959-\ub973\ub975-\ub98f\ub991-\ub9ab\ub9ad-\ub9c7\ub9c9-\ub9e3\ub9e5-\ub9ff\uba01-\uba1b\uba1d-\uba37\uba39-\uba53\uba55-\uba6f\uba71-\uba8b\uba8d-\ubaa7\ubaa9-\ubac3\ubac5-\ubadf\ubae1-\ubafb\ubafd-\ubb17\ubb19-\ubb33\ubb35-\ubb4f\ubb51-\ubb6b\ubb6d-\ubb87\ubb89-\ubba3\ubba5-\ubbbf\ubbc1-\ubbdb\ubbdd-\ubbf7\ubbf9-\ubc13\ubc15-\ubc2f\ubc31-\ubc4b\ubc4d-\ubc67\ubc69-\ubc83\ubc85-\ubc9f\ubca1-\ubcbb\ubcbd-\ubcd7\ubcd9-\ubcf3\ubcf5-\ubd0f\ubd11-\ubd2b\ubd2d-\ubd47\ubd49-\ubd63\ubd65-\ubd7f\ubd81-\ubd9b\ubd9d-\ubdb7\ubdb9-\ubdd3\ubdd5-\ubdef\ubdf1-\ube0b\ube0d-\ube27\ube29-\ube43\ube45-\ube5f\ube61-\ube7b\ube7d-\ube97\ube99-\ubeb3\ubeb5-\ubecf\ubed1-\ubeeb\ubeed-\ubf07\ubf09-\ubf23\ubf25-\ubf3f\ubf41-\ubf5b\ubf5d-\ubf77\ubf79-\ubf93\ubf95-\ubfaf\ubfb1-\ubfcb\ubfcd-\ubfe7\ubfe9-\uc003\uc005-\uc01f\uc021-\uc03b\uc03d-\uc057\uc059-\uc073\uc075-\uc08f\uc091-\uc0ab\uc0ad-\uc0c7\uc0c9-\uc0e3\uc0e5-\uc0ff\uc101-\uc11b\uc11d-\uc137\uc139-\uc153\uc155-\uc16f\uc171-\uc18b\uc18d-\uc1a7\uc1a9-\uc1c3\uc1c5-\uc1df\uc1e1-\uc1fb\uc1fd-\uc217\uc219-\uc233\uc235-\uc24f\uc251-\uc26b\uc26d-\uc287\uc289-\uc2a3\uc2a5-\uc2bf\uc2c1-\uc2db\uc2dd-\uc2f7\uc2f9-\uc313\uc315-\uc32f\uc331-\uc34b\uc34d-\uc367\uc369-\uc383\uc385-\uc39f\uc3a1-\uc3bb\uc3bd-\uc3d7\uc3d9-\uc3f3\uc3f5-\uc40f\uc411-\uc42b\uc42d-\uc447\uc449-\uc463\uc465-\uc47f\uc481-\uc49b\uc49d-\uc4b7\uc4b9-\uc4d3\uc4d5-\uc4ef\uc4f1-\uc50b\uc50d-\uc527\uc529-\uc543\uc545-\uc55f\uc561-\uc57b\uc57d-\uc597\uc599-\uc5b3\uc5b5-\uc5cf\uc5d1-\uc5eb\uc5ed-\uc607\uc609-\uc623\uc625-\uc63f\uc641-\uc65b\uc65d-\uc677\uc679-\uc693\uc695-\uc6af\uc6b1-\uc6cb\uc6cd-\uc6e7\uc6e9-\uc703\uc705-\uc71f\uc721-\uc73b\uc73d-\uc757\uc759-\uc773\uc775-\uc78f\uc791-\uc7ab\uc7ad-\uc7c7\uc7c9-\uc7e3\uc7e5-\uc7ff\uc801-\uc81b\uc81d-\uc837\uc839-\uc853\uc855-\uc86f\uc871-\uc88b\uc88d-\uc8a7\uc8a9-\uc8c3\uc8c5-\uc8df\uc8e1-\uc8fb\uc8fd-\uc917\uc919-\uc933\uc935-\uc94f\uc951-\uc96b\uc96d-\uc987\uc989-\uc9a3\uc9a5-\uc9bf\uc9c1-\uc9db\uc9dd-\uc9f7\uc9f9-\uca13\uca15-\uca2f\uca31-\uca4b\uca4d-\uca67\uca69-\uca83\uca85-\uca9f\ucaa1-\ucabb\ucabd-\ucad7\ucad9-\ucaf3\ucaf5-\ucb0f\ucb11-\ucb2b\ucb2d-\ucb47\ucb49-\ucb63\ucb65-\ucb7f\ucb81-\ucb9b\ucb9d-\ucbb7\ucbb9-\ucbd3\ucbd5-\ucbef\ucbf1-\ucc0b\ucc0d-\ucc27\ucc29-\ucc43\ucc45-\ucc5f\ucc61-\ucc7b\ucc7d-\ucc97\ucc99-\uccb3\uccb5-\ucccf\uccd1-\ucceb\ucced-\ucd07\ucd09-\ucd23\ucd25-\ucd3f\ucd41-\ucd5b\ucd5d-\ucd77\ucd79-\ucd93\ucd95-\ucdaf\ucdb1-\ucdcb\ucdcd-\ucde7\ucde9-\uce03\uce05-\uce1f\uce21-\uce3b\uce3d-\uce57\uce59-\uce73\uce75-\uce8f\uce91-\uceab\ucead-\ucec7\ucec9-\ucee3\ucee5-\uceff\ucf01-\ucf1b\ucf1d-\ucf37\ucf39-\ucf53\ucf55-\ucf6f\ucf71-\ucf8b\ucf8d-\ucfa7\ucfa9-\ucfc3\ucfc5-\ucfdf\ucfe1-\ucffb\ucffd-\ud017\ud019-\ud033\ud035-\ud04f\ud051-\ud06b\ud06d-\ud087\ud089-\ud0a3\ud0a5-\ud0bf\ud0c1-\ud0db\ud0dd-\ud0f7\ud0f9-\ud113\ud115-\ud12f\ud131-\ud14b\ud14d-\ud167\ud169-\ud183\ud185-\ud19f\ud1a1-\ud1bb\ud1bd-\ud1d7\ud1d9-\ud1f3\ud1f5-\ud20f\ud211-\ud22b\ud22d-\ud247\ud249-\ud263\ud265-\ud27f\ud281-\ud29b\ud29d-\ud2b7\ud2b9-\ud2d3\ud2d5-\ud2ef\ud2f1-\ud30b\ud30d-\ud327\ud329-\ud343\ud345-\ud35f\ud361-\ud37b\ud37d-\ud397\ud399-\ud3b3\ud3b5-\ud3cf\ud3d1-\ud3eb\ud3ed-\ud407\ud409-\ud423\ud425-\ud43f\ud441-\ud45b\ud45d-\ud477\ud479-\ud493\ud495-\ud4af\ud4b1-\ud4cb\ud4cd-\ud4e7\ud4e9-\ud503\ud505-\ud51f\ud521-\ud53b\ud53d-\ud557\ud559-\ud573\ud575-\ud58f\ud591-\ud5ab\ud5ad-\ud5c7\ud5c9-\ud5e3\ud5e5-\ud5ff\ud601-\ud61b\ud61d-\ud637\ud639-\ud653\ud655-\ud66f\ud671-\ud68b\ud68d-\ud6a7\ud6a9-\ud6c3\ud6c5-\ud6df\ud6e1-\ud6fb\ud6fd-\ud717\ud719-\ud733\ud735-\ud74f\ud751-\ud76b\ud76d-\ud787\ud789-\ud7a3]; hangul_syllable_class_V = [\u1160-\u11a7\ud7b0-\ud7c6]; hangul_syllable_class_L = [\u1100-\u115f\ua960-\ua97c]; hangul_syllable_class_T = [\u11a8-\u11ff\ud7cb-\ud7fb]; +ideographic_numeric_chars = [\u00b2-\u00b3\u00b9\u00bc-\u00be\u2070\u2074-\u2079\u2080-\u2089\u2150-\u215f\u2189\u2460-\u249b\u24ea-\u24ff\u2776-\u2793\u3192-\u3195\u3220-\u3229\u3248-\u324f\u3251-\u325f\u3280-\u3289\u32b1-\u32bf\ua830-\ua835\U00010107-\U00010133\U000102e1-\U000102fb\U0001d360-\U0001d371\U0001f100-\U0001f10c\u2160-\u2182\u2185-\u2188\U00010140-\U00010174\U00010175-\U00010178\U0001018a-\U0001018b\U00010e60-\U00010e7e\u09f4-\u09f9\u0b72-\u0b77\u0bf0-\u0bf2\u0c78-\u0c7e\u0d70-\u0d75\U000111e1-\U000111f4\u0f2a-\u0f33\u1369-\u137c\u16ee-\u16f0\u17f0-\u17f9\u3007\u3021-\u3029\u3038-\u303a\U00010320-\U00010323\U00010341\U0001034a\u2cfd\u19da\U000103d1-\U000103d5\U00010a40-\U00010a47\U00012400-\U0001246e\U00010916-\U0001091b\ua6e6-\ua6ef\U00010858-\U0001085f\U00010a7d-\U00010a7e\U00010b58-\U00010b5f\U00010b78-\U00010b7f\U00011052-\U00011065\U000109bc-\U000109bd\U000109c0-\U000109cf\U000109d2-\U000109ff\U00016b5b-\U00016b61\U00010aeb-\U00010aef\U0001e8c7-\U0001e8cf\U00010a9d-\U00010a9f\U000108a7-\U000108af\U00010879-\U0001087f\U00010ba9-\U00010baf\U000118ea-\U000118f2\U0001173a-\U0001173b\U000108fb-\U000108ff\U00010cfa-\U00010cff]; +ideographic_chars = [\u0e01-\u0e30\u0e31\u0e32-\u0e33\u0e34-\u0e3a\u0e40-\u0e45\u0e46\u0e47-\u0e4e\u0e4f\u0e50-\u0e59\u0e5a-\u0e5b\u0e81-\u0e82\u0e84\u0e87-\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa-\u0eab\u0ead-\u0eb0\u0eb1\u0eb2-\u0eb3\u0eb4-\u0eb9\u0ebb-\u0ebc\u0ebd\u0ec0-\u0ec4\u0ec6\u0ec8-\u0ecd\u0ed0-\u0ed9\u0edc-\u0edf\u0f00\u0f01-\u0f03\u0f04-\u0f12\u0f13\u0f14\u0f15-\u0f17\u0f18-\u0f19\u0f1a-\u0f1f\u0f20-\u0f29\u0f2a-\u0f33\u0f34\u0f35\u0f36\u0f37\u0f38\u0f39\u0f3a\u0f3b\u0f3c\u0f3d\u0f3e-\u0f3f\u0f40-\u0f47\u0f49-\u0f6c\u0f71-\u0f7e\u0f7f\u0f80-\u0f84\u0f85\u0f86-\u0f87\u0f88-\u0f8c\u0f8d-\u0f97\u0f99-\u0fbc\u0fbe-\u0fc5\u0fc6\u0fc7-\u0fcc\u0fce-\u0fcf\u0fd0-\u0fd4\u0fd9-\u0fda\u1100-\u11ff\u302e-\u302f\u3131-\u318e\u3200-\u321e\u3260-\u327e\ua960-\ua97c\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uffa0-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc\u3041-\u3096\u309d-\u309e\u309f\U0001b001\U0001f200\u2e80-\u2e99\u2e9b-\u2ef3\u2f00-\u2fd5\u3005\u3007\u3021-\u3029\u3038-\u303a\u303b\u3400-\u4db5\u4e00-\u9fd5\uf900-\ufa6d\ufa70-\ufad9\U00020000-\U0002a6d6\U0002a700-\U0002b734\U0002b740-\U0002b81d\U0002b820-\U0002cea1\U0002f800-\U0002fa1d\ua000-\ua014\ua015\ua016-\ua48c\ua490-\ua4c6\u1b00-\u1b03\u1b04\u1b05-\u1b33\u1b34\u1b35\u1b36-\u1b3a\u1b3b\u1b3c\u1b3d-\u1b41\u1b42\u1b43-\u1b44\u1b45-\u1b4b\u1b50-\u1b59\u1b5a-\u1b60\u1b61-\u1b6a\u1b6b-\u1b73\u1b74-\u1b7c\ua980-\ua982\ua983\ua984-\ua9b2\ua9b3\ua9b4-\ua9b5\ua9b6-\ua9b9\ua9ba-\ua9bb\ua9bc\ua9bd-\ua9c0\ua9c1-\ua9cd\ua9d0-\ua9d9\ua9de-\ua9df]; + single_quote = [\u0027]; double_quote = [\u0022];