Merge branch 'master' into parser-data
This commit is contained in:
@@ -1 +1,2 @@
|
|||||||
casella postale|cp|c.p.|c.p|c p
|
casella postale|cp|c.p.|c.p|c p|cpt
|
||||||
|
presso|co|c \ o|c / o
|
||||||
|
|||||||
@@ -1,19 +1,38 @@
|
|||||||
accesso|acc.so|accso
|
accesso|acc.so|accso
|
||||||
autostrada|aut.da|autda
|
alto|alt
|
||||||
|
alzaia|alz
|
||||||
|
androna|and
|
||||||
|
arco|arc
|
||||||
|
autostrada|aut.da|autda|aut
|
||||||
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
||||||
autostrade|aut.de|autde
|
autostrade|aut.de|autde
|
||||||
|
baluardo|bdo
|
||||||
|
banchina|bna
|
||||||
|
bivio|biv
|
||||||
|
borgata|bga
|
||||||
|
borgo|b.go|bgo
|
||||||
bretella|br.lla|brlla
|
bretella|br.lla|brlla
|
||||||
bretelle|br.lle|brlle
|
bretelle|br.lle|brlle
|
||||||
calle|c.le|cle
|
brigata|bgt
|
||||||
|
calata|clt
|
||||||
|
calle|cal
|
||||||
|
campiello|cpl
|
||||||
campo|c.po|cpo
|
campo|c.po|cpo
|
||||||
cascina|cna
|
canale|cnl
|
||||||
|
cantone|cat
|
||||||
|
casale|cas
|
||||||
|
cascina|c.na|cna
|
||||||
cavalcavia|cavalc.a|cavalca
|
cavalcavia|cavalc.a|cavalca
|
||||||
chiusa|c.usa|cusa
|
chiusa|c.usa|cusa
|
||||||
circonvallazione|circonv.e|circonve|crv
|
circonvallazione|circonv.e|circonve|crv
|
||||||
|
contrà|con|contra'
|
||||||
contrada|contr.a|contra|c.da|cda|cnt
|
contrada|contr.a|contra|c.da|cda|cnt
|
||||||
corso|c.so|cso
|
corso|c.so|cso
|
||||||
corte|c.te|cte
|
corte|c.te|cte
|
||||||
cortile|c.ile|ctile
|
cortile|c.ile|ctile|c.le|cle
|
||||||
|
cortina|ctn
|
||||||
|
costa|cta
|
||||||
|
crocevia|cra
|
||||||
cupa|cup
|
cupa|cup
|
||||||
discesa|dis
|
discesa|dis
|
||||||
ferrata|f.rata|frata
|
ferrata|f.rata|frata
|
||||||
@@ -61,7 +80,7 @@ rampa|rpa
|
|||||||
rampe|rpe
|
rampe|rpe
|
||||||
regione|reg
|
regione|reg
|
||||||
rio|rii
|
rio|rii
|
||||||
rio tera'|rit
|
rio terà|rit|rio tera'
|
||||||
ripa|ri
|
ripa|ri
|
||||||
riva|rva
|
riva|rva
|
||||||
riviera|riv
|
riviera|riv
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -280,6 +280,8 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
|||||||
|
|
||||||
token_t token;
|
token_t token;
|
||||||
|
|
||||||
|
size_t added_expansions = 0;
|
||||||
|
|
||||||
if ((value.components & options.address_components) > 0) {
|
if ((value.components & options.address_components) > 0) {
|
||||||
key->n = namespace_len;
|
key->n = namespace_len;
|
||||||
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
||||||
@@ -298,8 +300,13 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
|||||||
address_expansion_array *expansions = address_dictionary_get_expansions(key_str);
|
address_expansion_array *expansions = address_dictionary_get_expansions(key_str);
|
||||||
|
|
||||||
if (expansions != NULL) {
|
if (expansions != NULL) {
|
||||||
|
|
||||||
for (int j = 0; j < expansions->n; j++) {
|
for (int j = 0; j < expansions->n; j++) {
|
||||||
address_expansion_t expansion = expansions->a[j];
|
address_expansion_t expansion = expansions->a[j];
|
||||||
|
if ((expansion.address_components & options.address_components) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (expansion.canonical_index != NULL_CANONICAL_INDEX) {
|
if (expansion.canonical_index != NULL_CANONICAL_INDEX) {
|
||||||
char *canonical = address_dictionary_get_canonical(expansion.canonical_index);
|
char *canonical = address_dictionary_get_canonical(expansion.canonical_index);
|
||||||
char *canonical_normalized = normalize_string_utf8(canonical, normalize_string_options);
|
char *canonical_normalized = normalize_string_utf8(canonical, normalize_string_options);
|
||||||
@@ -347,10 +354,15 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
|||||||
}
|
}
|
||||||
cstring_array_terminate(tree->strings);
|
cstring_array_terminate(tree->strings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
added_expansions++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
if (added_expansions == 0) {
|
||||||
uint32_t start_index = cstring_array_start_token(tree->strings);
|
uint32_t start_index = cstring_array_start_token(tree->strings);
|
||||||
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
||||||
token = tokens->a[j];
|
token = tokens->a[j];
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ The normalize module provides several options for preprocessing full strings:
|
|||||||
|
|
||||||
As well as normalizations for individual string tokens:
|
As well as normalizations for individual string tokens:
|
||||||
|
|
||||||
- Replace hyphens with space e.g. "quatre-vignt" => "quatre vignt"
|
- Replace hyphens with space e.g. "quatre-vingt" => "quatre vingt"
|
||||||
- Delete hyphens e.g. "auto-estrada" => "autoestrada"
|
- Delete hyphens e.g. "auto-estrada" => "autoestrada"
|
||||||
- Delete final period "R." => "R"
|
- Delete final period "R." => "R"
|
||||||
- Delete acronym periods: "U.S.A." => "USA"
|
- Delete acronym periods: "U.S.A." => "USA"
|
||||||
@@ -73,4 +73,4 @@ string_tree_t *normalize_string(char *str, uint64_t options);
|
|||||||
string_tree_t *normalize_string_languages(char *str, uint64_t options, size_t num_languages, char **languages);
|
string_tree_t *normalize_string_languages(char *str, uint64_t options, size_t num_languages, char **languages);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user