[numex] adding stopword rules with the regular numex rules
This commit is contained in:
@@ -56,6 +56,8 @@ rule_type_map = {
|
|||||||
|
|
||||||
numex_rule_template = u'{{"{key}", (numex_rule_t){{{left_context_type}, {right_context_type}, {rule_type}, {gender}, {radix}, {value}LL}}}}'
|
numex_rule_template = u'{{"{key}", (numex_rule_t){{{left_context_type}, {right_context_type}, {rule_type}, {gender}, {radix}, {value}LL}}}}'
|
||||||
|
|
||||||
|
stopword_rule_template = u'{{"{key}", NUMEX_STOPWORD_RULE}}'
|
||||||
|
|
||||||
ordinal_indicator_template = u'{{{number}, {gender}, "{value}"}}'
|
ordinal_indicator_template = u'{{{number}, {gender}, "{value}"}}'
|
||||||
|
|
||||||
stopwords_template = u'"{word}"'
|
stopwords_template = u'"{word}"'
|
||||||
@@ -71,10 +73,6 @@ ordinal_indicator_t ordinal_indicator_rules[] = {{
|
|||||||
{ordinal_indicator_rules}
|
{ordinal_indicator_rules}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
char *numex_stopwords[] = {{
|
|
||||||
{stopwords}
|
|
||||||
}};
|
|
||||||
|
|
||||||
numex_language_source_t numex_languages[] = {{
|
numex_language_source_t numex_languages[] = {{
|
||||||
{languages}
|
{languages}
|
||||||
}};
|
}};
|
||||||
@@ -101,7 +99,6 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
|||||||
|
|
||||||
rules = data.get('rules', [])
|
rules = data.get('rules', [])
|
||||||
rule_index = len(all_rules)
|
rule_index = len(all_rules)
|
||||||
num_rules = len(rules)
|
|
||||||
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
gender = gender_map[rule.get('gender')]
|
gender = gender_map[rule.get('gender')]
|
||||||
@@ -142,7 +139,9 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
|||||||
num_stopwords = len(stopwords)
|
num_stopwords = len(stopwords)
|
||||||
|
|
||||||
for stopword in stopwords:
|
for stopword in stopwords:
|
||||||
all_stopwords.append(unicode(stopwords_template.format(word=stopword)))
|
all_rules.append(unicode(stopword_rule_template.format(key=stopword)))
|
||||||
|
|
||||||
|
num_rules = len(rules) + len(stopwords)
|
||||||
|
|
||||||
all_languages.append(unicode(language_template.format(
|
all_languages.append(unicode(language_template.format(
|
||||||
language=language,
|
language=language,
|
||||||
|
|||||||
@@ -15,8 +15,6 @@ typedef struct numex_language_source {
|
|||||||
size_t num_rules;
|
size_t num_rules;
|
||||||
size_t ordinal_indicator_index;
|
size_t ordinal_indicator_index;
|
||||||
size_t num_ordinal_indicators;
|
size_t num_ordinal_indicators;
|
||||||
size_t stopword_index;
|
|
||||||
size_t num_stopwords;
|
|
||||||
} numex_language_source_t;
|
} numex_language_source_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user