Merge branch 'master' into parser-data
This commit is contained in:
@@ -1 +1,2 @@
|
||||
casella postale|cp|c.p.|c.p|c p
|
||||
casella postale|cp|c.p.|c.p|c p|cpt
|
||||
presso|co|c \ o|c / o
|
||||
|
||||
@@ -1,19 +1,38 @@
|
||||
accesso|acc.so|accso
|
||||
autostrada|aut.da|autda
|
||||
alto|alt
|
||||
alzaia|alz
|
||||
androna|and
|
||||
arco|arc
|
||||
autostrada|aut.da|autda|aut
|
||||
audostradale|aut.sle|autsle|aut sle|aut.ale|autale
|
||||
autostrade|aut.de|autde
|
||||
baluardo|bdo
|
||||
banchina|bna
|
||||
bivio|biv
|
||||
borgata|bga
|
||||
borgo|b.go|bgo
|
||||
bretella|br.lla|brlla
|
||||
bretelle|br.lle|brlle
|
||||
calle|c.le|cle
|
||||
brigata|bgt
|
||||
calata|clt
|
||||
calle|cal
|
||||
campiello|cpl
|
||||
campo|c.po|cpo
|
||||
cascina|cna
|
||||
canale|cnl
|
||||
cantone|cat
|
||||
casale|cas
|
||||
cascina|c.na|cna
|
||||
cavalcavia|cavalc.a|cavalca
|
||||
chiusa|c.usa|cusa
|
||||
circonvallazione|circonv.e|circonve|crv
|
||||
contrà|con|contra'
|
||||
contrada|contr.a|contra|c.da|cda|cnt
|
||||
corso|c.so|cso
|
||||
corte|c.te|cte
|
||||
cortile|c.ile|ctile
|
||||
cortile|c.ile|ctile|c.le|cle
|
||||
cortina|ctn
|
||||
costa|cta
|
||||
crocevia|cra
|
||||
cupa|cup
|
||||
discesa|dis
|
||||
ferrata|f.rata|frata
|
||||
@@ -61,7 +80,7 @@ rampa|rpa
|
||||
rampe|rpe
|
||||
regione|reg
|
||||
rio|rii
|
||||
rio tera'|rit
|
||||
rio terà|rit|rio tera'
|
||||
ripa|ri
|
||||
riva|rva
|
||||
riviera|riv
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -280,6 +280,8 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
||||
|
||||
token_t token;
|
||||
|
||||
size_t added_expansions = 0;
|
||||
|
||||
if ((value.components & options.address_components) > 0) {
|
||||
key->n = namespace_len;
|
||||
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
||||
@@ -298,8 +300,13 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
||||
address_expansion_array *expansions = address_dictionary_get_expansions(key_str);
|
||||
|
||||
if (expansions != NULL) {
|
||||
|
||||
for (int j = 0; j < expansions->n; j++) {
|
||||
address_expansion_t expansion = expansions->a[j];
|
||||
if ((expansion.address_components & options.address_components) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (expansion.canonical_index != NULL_CANONICAL_INDEX) {
|
||||
char *canonical = address_dictionary_get_canonical(expansion.canonical_index);
|
||||
char *canonical_normalized = normalize_string_utf8(canonical, normalize_string_options);
|
||||
@@ -347,10 +354,15 @@ static string_tree_t *add_string_alternatives(char *str, normalize_options_t opt
|
||||
}
|
||||
cstring_array_terminate(tree->strings);
|
||||
}
|
||||
|
||||
added_expansions++;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
} else {
|
||||
}
|
||||
|
||||
if (added_expansions == 0) {
|
||||
uint32_t start_index = cstring_array_start_token(tree->strings);
|
||||
for (int j = phrase.start; j < phrase.start + phrase.len; j++) {
|
||||
token = tokens->a[j];
|
||||
|
||||
@@ -9,7 +9,7 @@ The normalize module provides several options for preprocessing full strings:
|
||||
|
||||
As well as normalizations for individual string tokens:
|
||||
|
||||
- Replace hyphens with space e.g. "quatre-vignt" => "quatre vignt"
|
||||
- Replace hyphens with space e.g. "quatre-vingt" => "quatre vingt"
|
||||
- Delete hyphens e.g. "auto-estrada" => "autoestrada"
|
||||
- Delete final period "R." => "R"
|
||||
- Delete acronym periods: "U.S.A." => "USA"
|
||||
@@ -73,4 +73,4 @@ string_tree_t *normalize_string(char *str, uint64_t options);
|
||||
string_tree_t *normalize_string_languages(char *str, uint64_t options, size_t num_languages, char **languages);
|
||||
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user