[numex] Adding a whole words only option on numex languages e.g. for Latin so we don't match an initial D with 500

This commit is contained in:
Al
2015-06-12 16:09:45 -04:00
parent 6b60446dbe
commit fc735bb5c3

View File

@@ -73,7 +73,7 @@ ordinal_indicator_template = u'{{"{key}", {gender}, {category}, "{value}"}}'
stopwords_template = u'"{word}"'
language_template = u'{{"{language}", {rule_index}, {num_rules}, {ordinal_indicator_index}, {num_ordinal_indicators}}}'
language_template = u'{{"{language}", {whole_words_only}, {rule_index}, {num_rules}, {ordinal_indicator_index}, {num_ordinal_indicators}}}'
numex_rules_data_template = u'''
numex_rule_source_t numex_rules[] = {{
@@ -108,6 +108,8 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
data = json.load(open(path))
whole_words_only = data.get('whole_words_only', False)
rules = data.get('rules', [])
rule_index = len(all_rules)
@@ -163,6 +165,7 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
all_languages.append(unicode(language_template.format(
language=language,
whole_words_only=int(whole_words_only),
rule_index=rule_index,
num_rules=num_rules,
ordinal_indicator_index=ordinal_indicator_index,