[numex] Separating rules from keys for Linux gcc compilation
This commit is contained in:
@@ -73,9 +73,10 @@ rule_type_map = {
|
|||||||
'ordinal_indicator': ORDINAL_INDICATOR,
|
'ordinal_indicator': ORDINAL_INDICATOR,
|
||||||
}
|
}
|
||||||
|
|
||||||
numex_rule_template = u'{{"{key}", (numex_rule_t){{{left_context_type}, {right_context_type}, {rule_type}, {gender}, {category}, {radix}, {value}LL}}}}'
|
numex_key_template = u'"{key}"'
|
||||||
|
numex_rule_template = u'{{{left_context_type}, {right_context_type}, {rule_type}, {gender}, {category}, {radix}, {value}LL}}'
|
||||||
|
|
||||||
stopword_rule_template = u'{{"{key}", NUMEX_STOPWORD_RULE}}'
|
stopword_rule = u'NUMEX_STOPWORD_RULE'
|
||||||
|
|
||||||
ordinal_indicator_template = u'{{"{key}", {gender}, {category}, "{value}"}}'
|
ordinal_indicator_template = u'{{"{key}", {gender}, {category}, "{value}"}}'
|
||||||
|
|
||||||
@@ -84,7 +85,11 @@ stopwords_template = u'"{word}"'
|
|||||||
language_template = u'{{"{language}", {whole_words_only}, {rule_index}, {num_rules}, {ordinal_indicator_index}, {num_ordinal_indicators}}}'
|
language_template = u'{{"{language}", {whole_words_only}, {rule_index}, {num_rules}, {ordinal_indicator_index}, {num_ordinal_indicators}}}'
|
||||||
|
|
||||||
numex_rules_data_template = u'''
|
numex_rules_data_template = u'''
|
||||||
numex_rule_source_t numex_rules[] = {{
|
char *numex_keys[] = {{
|
||||||
|
{numex_keys}
|
||||||
|
}};
|
||||||
|
|
||||||
|
numex_rule_t numex_rules[] = {{
|
||||||
{numex_rules}
|
{numex_rules}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
@@ -99,7 +104,9 @@ numex_language_source_t numex_languages[] = {{
|
|||||||
|
|
||||||
|
|
||||||
def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
||||||
|
all_keys = []
|
||||||
all_rules = []
|
all_rules = []
|
||||||
|
|
||||||
all_ordinal_indicators = []
|
all_ordinal_indicators = []
|
||||||
all_stopwords = []
|
all_stopwords = []
|
||||||
|
|
||||||
@@ -133,8 +140,8 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
|||||||
category = category_map[rule.get('category')]
|
category = category_map[rule.get('category')]
|
||||||
left_context_type = left_context_map[rule.get('left')]
|
left_context_type = left_context_map[rule.get('left')]
|
||||||
right_context_type = right_context_map[rule.get('right')]
|
right_context_type = right_context_map[rule.get('right')]
|
||||||
|
all_keys.append(unicode(numex_key_template.format(key=key)))
|
||||||
all_rules.append(unicode(numex_rule_template.format(
|
all_rules.append(unicode(numex_rule_template.format(
|
||||||
key=key,
|
|
||||||
language=language,
|
language=language,
|
||||||
rule_type=rule_type,
|
rule_type=rule_type,
|
||||||
gender=gender,
|
gender=gender,
|
||||||
@@ -171,7 +178,8 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
|||||||
num_stopwords = len(stopwords)
|
num_stopwords = len(stopwords)
|
||||||
|
|
||||||
for stopword in stopwords:
|
for stopword in stopwords:
|
||||||
all_rules.append(unicode(stopword_rule_template.format(key=stopword)))
|
all_keys.append(numex_key_template.format(key=unicode(stopword)))
|
||||||
|
all_rules.append(stopword_rule)
|
||||||
|
|
||||||
num_rules = len(rules) + len(stopwords)
|
num_rules = len(rules) + len(stopwords)
|
||||||
|
|
||||||
@@ -185,6 +193,8 @@ def parse_numex_rules(dirname=NUMEX_DATA_DIR, outfile=NUMEX_RULES_FILE):
|
|||||||
)))
|
)))
|
||||||
|
|
||||||
out.write(safe_encode(numex_rules_data_template.format(
|
out.write(safe_encode(numex_rules_data_template.format(
|
||||||
|
numex_keys=u''',
|
||||||
|
'''.join(all_keys),
|
||||||
numex_rules=u''',
|
numex_rules=u''',
|
||||||
'''.join(all_rules),
|
'''.join(all_rules),
|
||||||
ordinal_indicator_rules=u''',
|
ordinal_indicator_rules=u''',
|
||||||
|
|||||||
10625
src/numex_data.c
10625
src/numex_data.c
File diff suppressed because it is too large
Load Diff
@@ -4,11 +4,6 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "numex.h"
|
#include "numex.h"
|
||||||
|
|
||||||
typedef struct numex_rule_source {
|
|
||||||
char *key;
|
|
||||||
numex_rule_t rule;
|
|
||||||
} numex_rule_source_t;
|
|
||||||
|
|
||||||
typedef struct numex_language_source {
|
typedef struct numex_language_source {
|
||||||
char *name;
|
char *name;
|
||||||
bool whole_tokens_only;
|
bool whole_tokens_only;
|
||||||
|
|||||||
@@ -36,7 +36,15 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
size_t num_languages = sizeof(numex_languages) / sizeof(numex_language_source_t);
|
size_t num_languages = sizeof(numex_languages) / sizeof(numex_language_source_t);
|
||||||
|
|
||||||
size_t num_source_rules = sizeof(numex_rules) / sizeof(numex_rule_source_t);
|
size_t num_source_keys = sizeof(numex_keys) / sizeof(char *);
|
||||||
|
size_t num_source_rules = sizeof(numex_rules) / sizeof(numex_rule_t);
|
||||||
|
|
||||||
|
if (num_source_keys != num_source_rules) {
|
||||||
|
log_error("num_sourcE_keys != num_source_rules, aborting\n");
|
||||||
|
numex_module_teardown();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
size_t num_ordinal_indicator_rules = sizeof(ordinal_indicator_rules) / sizeof(ordinal_indicator_t);
|
size_t num_ordinal_indicator_rules = sizeof(ordinal_indicator_rules) / sizeof(ordinal_indicator_t);
|
||||||
|
|
||||||
char_array *key = char_array_new();
|
char_array *key = char_array_new();
|
||||||
@@ -60,15 +68,16 @@ int main(int argc, char **argv) {
|
|||||||
log_info("Doing language=%s\n", lang);
|
log_info("Doing language=%s\n", lang);
|
||||||
|
|
||||||
for (j = rule_index; j < rule_index + num_rules; j++) {
|
for (j = rule_index; j < rule_index + num_rules; j++) {
|
||||||
numex_rule_source_t rule_source = numex_rules[j];
|
char *numex_key = numex_keys[j];
|
||||||
|
numex_rule_t rule = numex_rules[j];
|
||||||
|
|
||||||
value = rule_source.rule.rule_type != NUMEX_STOPWORD ? numex_table->rules->n : NUMEX_STOPWORD_INDEX;
|
value = rule.rule_type != NUMEX_STOPWORD ? numex_table->rules->n : NUMEX_STOPWORD_INDEX;
|
||||||
numex_rule_array_push(numex_table->rules, rule_source.rule);
|
numex_rule_array_push(numex_table->rules, rule);
|
||||||
|
|
||||||
char_array_clear(key);
|
char_array_clear(key);
|
||||||
char_array_cat(key, lang);
|
char_array_cat(key, lang);
|
||||||
char_array_cat(key, NAMESPACE_SEPARATOR_CHAR);
|
char_array_cat(key, NAMESPACE_SEPARATOR_CHAR);
|
||||||
char_array_cat(key, rule_source.key);
|
char_array_cat(key, numex_key);
|
||||||
|
|
||||||
char *str_key = char_array_get_string(key);
|
char *str_key = char_array_get_string(key);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user