[addresses/dictionaries] Updates to Portuguese configs, variations for Brasil

This commit is contained in:
Al
2016-06-25 20:29:36 -04:00
parent 8a5dd26dbf
commit ee27dc5ea1
6 changed files with 149 additions and 14 deletions

View File

@@ -19,10 +19,56 @@ components:
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
null_probability: 0.7
alphanumeric_probability: 0.25
standalone_probability: 0.05
combinations:
# For unit types like 2/34 (more common in Canada and Australia)
house_number_unit:
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.05
- separator: /
probability: 0.05
probability: 0.005
house_number_floor:
components:
- house_number
- unit
label: house_number
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.05
- separator: /
probability: 0.05
probability: 0.005
house_number_staircase_unit:
components:
- house_number
- staircase
- unit
label: house_number
separators:
- separator: "-"
probability: 0.9
- separator: " - "
probability: 0.05
- separator: /
probability: 0.05
probability: 0.005
numbers:
default: &numero
canonical: número
@@ -148,6 +194,19 @@ levels:
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
pavimento_terreo: &pavimento_terreo
canonical: pavimento terréo
abbreviated: pt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
terreo: &terreo
canonical: terréo
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
baixos: &baixos
canonical: baixos
abbreviated: bxs
@@ -663,8 +722,16 @@ units:
sample_probability: 0.3
numeric:
direction: left
conjunto: &conjunto
canonical: conjunto
abbreviated: conj
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
suite: &suite
canonical: suite
sample: true
canonical_probability: 0.7
sample_probability: 0.3
numeric:
@@ -686,6 +753,13 @@ units:
canonical: sala
numeric:
direction: left
unidade: &unidade
canonical: unidade
abbreviated: un
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
alphanumeric: &unit_alphanumeric
default: *apartamento
@@ -725,17 +799,19 @@ units:
- alternative: *porta
probability: 0.05
- alternative: *letra
probability: 0.02
probability: 0.015
- alternative: *unidade
probability: 0.005
zones:
residential: *unit_alphanumeric
commercial:
default: *suite
default: *sala
probability: 0.6
alternatives:
- alternative: *escritorio
probability: 0.2
- alternative: *sala
- alternative: *suite
probability: 0.2
numeric_probability: 0.9 # e.g. escritório 1
@@ -748,12 +824,12 @@ units:
whitespace_probability: 0.1
alpha:
default: *suite
default: *sala
probability: 0.7
alternatives:
- alternative: *escritorio
probability: 0.15
- alternative: *sala
- alternative: *suite
probability: 0.1
- alternative: *letra
probability: 0.05
@@ -830,16 +906,44 @@ countries:
default: *andar_terreo
probability: 0.4
alternatives:
- alternative: *rez_do_chao
probability: 0.34
- alternative: *terreo
probability: 0.2
- alternative: *baixos
probability: 0.2
- alternative: *rez_do_chao
probability: 0.13
- alternative: *pavimento_terreo
probability: 0.01
# Andar / Piso 0 is uncommon
- alternative: *andar
probability: 0.05
- alternative: *piso
probability: 0.01
postcodes:
alphanumeric:
default:
canonical: código de endereçamento postal
abbreviated: cep
sample: true
canonical_probability: 0.001
abbreviated_probability: 0.995
sample_probability: 0.004
numeric:
# Postcodes in Brazil are sometimes prefixed by CEP
direction: left
numeric_affix:
affix: cep
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.7
numeric_probability: 0.18
numeric_affix_probability: 0.12
strict_numeric: true
po_boxes:
alphanumeric:
default:
@@ -850,4 +954,30 @@ countries:
abbreviated_probability: 0.6
sample_probability: 0.3
numeric:
direction: left
direction: left
units:
zones:
commercial:
default: *conjunto
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.1
- alternative: *suite
probability: 0.05
- alternative: *escritorio
probability: 0.05
alpha:
default: *conjunto
probability: 0.75
alternatives:
- alternative: *sala
probability: 0.1
- alternative: *suite
probability: 0.05
- alternative: *escritorio
probability: 0.05
- alternative: *letra
probability: 0.05

View File

@@ -1,3 +1,5 @@
andar terréo|at|a.t|a t|andar terreo
baixos|bxs
rés-do-chão|res-do-chao|rés do chão|res do chao|résdochão|resdochao|rc|r / c|r.c|r c|rdc|r / d / c|r/r / chão|r / chao|r-d-c|r d c |r.d.c
rés-do-chão|res-do-chao|rés do chão|res do chao|résdochão|resdochao|rc|r / c|r.c|r c|rdc|r / d / c|r/r / chão|r / chao|r-d-c|r d c |r.d.c
pavimento terréo|pt|p.t|p t|pavimento terreo
terréo|terreo

View File

@@ -1 +1 @@
sem número|sem numero|sn|s.n.|s.n|s / n|s n
sem número|sem numero|sn|s.n.|s.n|s / n|s n|s / nº|s.nº|snº|s / no

View File

@@ -0,0 +1 @@
código de endereçamento postal|cep|c.e.p|c e p|c de e p|c de ep|codigo de enderecamento postal

View File

@@ -1,8 +1,10 @@
apartamento|ap|apt|apto|apt.o
casa
conjunto|conj|cj
letra
lote|lt
parcela
porta|pta
moradia
sala
sala
unidade|un

View File

@@ -26,7 +26,7 @@ class AddressConfig(object):
self.cache = {}
for filename in os.listdir(config_dir):
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml'):
if filename not in ('en.yaml', 'es.yaml', 'ca.yaml', 'fr.yaml', 'de.yaml', 'nl.yaml', 'da.yaml', 'nb.yaml', 'sv.yaml', 'pt.yaml', 'pl.yaml'):
continue
config = yaml.load(open(os.path.join(ADDRESS_CONFIG_DIR, filename)))