[fix] canonical index in address expansion data, should be -1 for all canonical phrases

This commit is contained in:
Al
2015-12-08 15:09:51 -05:00
parent e1ea2ac704
commit 35db855819
2 changed files with 2 additions and 4 deletions

View File

@@ -35,7 +35,6 @@ comissaria|com
convent|convt convent|convt
correus|corr correus|corr
corretatge corretatge
convent
cooperativa cooperativa
devesa devesa
disseminat disseminat

View File

@@ -149,8 +149,8 @@ def create_address_expansion_rules_file(base_dir=ADDRESS_EXPANSIONS_DIR, output_
else: else:
canonical_index = -1 canonical_index = -1
for p in phrases: for i, p in enumerate(phrases):
language_canonical_dictionaries[(p, canonical_index)].append(dictionary_type) language_canonical_dictionaries[p, canonical_index if i > 0 else -1].append(dictionary_type)
for (phrase, canonical_index), dictionary_types in language_canonical_dictionaries.iteritems(): for (phrase, canonical_index), dictionary_types in language_canonical_dictionaries.iteritems():
max_dictionary_types = max(max_dictionary_types, len(dictionary_types)) max_dictionary_types = max(max_dictionary_types, len(dictionary_types))
@@ -161,7 +161,6 @@ def create_address_expansion_rules_file(base_dir=ADDRESS_EXPANSIONS_DIR, output_
expansion_rules.append(rule_template) expansion_rules.append(rule_template)
num_language_rules += 1 num_language_rules += 1
address_languages.append(address_language_index_template.format(language=quote_string(language), address_languages.append(address_language_index_template.format(language=quote_string(language),
index=language_index, index=language_index,
length=num_language_rules)) length=num_language_rules))