Files
libpostal/resources/addresses/ca.yaml

858 lines
24 KiB
YAML

# ca.yaml
# -------
# Note: make Latin-American conventions by default (country overrides for Spain
# as well as any other country-specific norms)
components:
level:
# If no floor number is specified
null_probability: 0.6
alphanumeric_probability: 0.35
standalone_probability: 0.05
staircase:
null_probability: 0.99
alphanumeric_probability: 0.01
entrance:
null_probability: 0.999
alphanumeric_probability: 0.001
unit:
# If no unit number is specified
null_probability: 0.3
alphanumeric_probability: 0.65
standalone_probability: 0.05
numbers:
default: &numero
canonical: número
abbreviated: "nº"
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
sample_exclude:
- "#"
numeric:
direction: left
numeric_affix:
affix: "#" # e.g. #3, #2F, etc.
probability: 0.5
alternatives:
- alternative:
direction: left # affix goes on the number's left
# Probabilities for numbers
numeric_probability: 0.7
numeric_affix_probability: 0.3
and:
default: &i
canonical: i
abbreviated: "&"
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.4
sample_probability: 0.1
house_numbers:
# sense número (s/n) addresses
no_number:
default:
canonical: sense número
abbreviated: s/n
sample: true
canonical_probability: 0.1
abbreviated_probability: 0.7
sample_probability: 0.2
alphanumeric:
default: *numero
alphanumeric_phrase_probability: 0.01
no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified
levels:
# Everywhere except Spain
floor: &pis
canonical: pis
abbreviated: p
sample: true
canonical_probability: 0.8
abbreviated_probability: 0.1
sample_probability: 0.1
numeric:
direction: left
add_number_phrase: true # Occasionally add variation of "number", e.g. Pis No 2
add_number_phrase_probability: 0.05
numeric_affix:
affix: p
direction: left # P2
# e.g. 2o piso
ordinal:
direction: right
direction_probability: 0.95 # Let it vary occasionally e.g. Pis 2o
standalone_probability: 0.2 # Let e.g. 5º be the entire floor string
# If ordinal is selected, chance of e.g. just using 2o without Piso
null_phrase_probability: 0.6
numeric_probability: 0.2
numeric_affix_probability: 0.05
ordinal_probability: 0.75
# Ground floor
baixos: &baixos
canonical: baixos
abbreviated: bxs
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.3
sample_probability: 0.1
pis_baix: &pis_baix
canonical: pis baix
abbreviated: pb
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.5
sample_probability: 0.1
sota: &sota
canonical: sota
sample: true
canonical_probability: 0.8
sample_probability: 0.2
# Used when floor number is < 0 (starts at -1 in all countries)
soterrani: &soterrani
canonical: soterrani
abbreviated: so
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
# e.g. soterrani 1
numeric:
direction: left
numeric_affix:
affix: so
direction: left
# e.g. segon soterrani
ordinal:
direction: right
standalone_probability: 0.985
number_abs_value: true
number_min_abs_value: 1
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
sub_soterrani: &sub_soterrani
canonical: sub soterrani
abbreviated: ss
sample: true
# e.g. sub soterrani 1
numeric:
direction: left
numeric_affix:
affix: ss
direction: left
# e.g. segon sub soterrani
ordinal:
direction: right
number_abs_value: true
number_min_abs_value: 2
# Soterrani 2 == Sub-soterrani 1
number_subtract_abs_value: 1
standalone_probability: 0.985
numeric_probability: 0.005
numeric_affix_probability: 0.005
ordinal_probability: 0.005
entresol: &entresol
canonical: entresòl
abbreviated: entl
half_floors: true
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.2
sample_probability: 0.1
# e.g. entresòl 2
numeric:
direction: left
# e.g. ent2
numeric_affix:
affix: ent
direction: left
# e.g. segon entresòl
ordinal:
direction: right
numeric_probability: 0.1
numeric_affix_probability: 0.1
ordinal_probability: 0.2
standalone_probability: 0.6
pis_principal: &pis_principal
canonical: pis principal
abbreviated: pis pral
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.3
sample_probability: 0.5
principal: &principal
canonical: principal
abbreviated: pral
sample: true
canonical_probability: 0.2
abbreviated_probability: 0.6
sample_probability: 0.2
atic: &atic
canonical: àtic
abbreviated: át
sample: true
canonical_probability: 0.7
abbreviated_probability: 0.1
sample_probability: 0.2
sobreatic: &sobreatic
canonical: sobreàtic
aliases:
"<-1":
default: *soterrani
probability: 0.6
alternatives:
- alternative: *sub_soterrani
probability: 0.3995
- alternative: *pis
probability: 0.0005
"-1":
default: *soterrani
probability: 0.9995
alternatives:
- alternative: *pis
probability: 0.0005
# Special token for half-floors
half_floors:
default: *entresol
"0":
default: *baixos
probability: 0.495
alternatives:
- alternative: *pis_baix
probability: 0.395
- alternative: *sota
probability: 0.1
- alternative: *pis
# Piso 0 is uncommon
probability: 0.01
top:
default: *pis
probability: 0.85
alternatives:
- alternative: *atic
probability: 0.1
- alternative: *sobreatic
probability: 0.05
numbering_starts_at: 0
alphanumeric:
default: *pis
add_number_phrase: true
add_number_phrase_probability: 0.05
numeric_probability: 0.99
alpha_probability: 0.01
blocks:
alphanumeric:
default:
canonical: bloc
abbreviated: bl
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: left
categories:
near:
default:
canonical: a prop de
probability: 0.5
alternatives:
- alternative:
canonical: prop de
probability: 0.2
- alternative:
canonical: prop
probability: 0.1
- alternative:
canonical: a prop
probability: 0.1
- alternative:
canonical: proper
probability: 0.05
- alternative:
canonical: proper a
probability: 0.05
nearby:
default:
canonical: proper
probability: 0.5
alternatives:
- alternative:
canonical: a prop
probability: 0.1
- alternative:
canonical: a prop d'aquí
probability: 0.1
- alternative:
canonical: a prop d'aqui
probability: 0.1
- alternative:
canonical: aquí
probability: 0.1
- alternative:
canonical: aqui
probability: 0.1
near_me:
default:
canonical: a prop meu
in:
default:
canonical: a
probability: 0.6
alternatives:
- alternative:
canonical: dins
probability: 0.2
- alternative:
canonical: en
probability: 0.2
# Probabilities of each phrase
near_probability: 0.35
nearby_probability: 0.2
near_me_probability: 0.1
in_probability: 0.35
cross_streets:
and: *i
amb: &amb
canonical: amb
a: &a
canonical: a
corner_of: &cantonada_de
canonical: cantonada de
sample: true
canonical_probability: 0.7
sample_probability: 0.3
at_the_corner_of: &a_la_cantonada_de
canonical: a la cantonada de
sample: true
canonical_probability: 0.7
sample_probability: 0.3
corner: &cantonada
canonical: cantonada
sample: true
canonical_probability: 0.7
sample_probability: 0.3
intersection:
default: *i
probability: 0.55
alternatives:
- alternative: *amb
probability: 0.2
- alternative: *a
probability: 0.1
- alternative: *cantonada_de
probability: 0.09
- alternative: *a_la_cantonada_de
probability: 0.05
- alternative: *cantonada
probability: 0.01
between:
canonical: entre
sample: true
canonical_probability: 0.8
sample_probability: 0.2
parentheses_probabililty: 0.5
po_boxes:
apartat: &apartat
canonical: apartat
abbreviated: apt
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.3
sample_probability: 0.2
numeric:
direction: left
add_number_phrase: true
add_number_phrase_probability: 0.4 # Apt No 1234
numeric_probability: 1.0
alphanumeric:
sample: false
default: *apartat
numeric_probability: 0.9 # Apt 123
alpha_probability: 0.05 # Apt A
numeric_plus_alpha_probability: 0.04 # Apt 123G
alpha_plus_numeric_probability: 0.01 # Apt A123
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
digits:
- length: 1
probability: 0.05
- length: 2
probability: 0.1
- length: 3
probability: 0.2
- length: 4
probability: 0.5
- length: 5
probability: 0.1
- length: 6
probability: 0.05
postcodes:
alphanumeric:
default:
canonical: codi postal
abbreviated: cp
sample: true
canonical_probability: 0.01
abbreviated_probability: 0.95
sample_probability: 0.04
numeric:
# Postcodes in Spain and Latin America are sometimes prefixed by CP
direction: left
numeric_affix:
affix: cp
direction: left
# null_probability means the chance of doing nothing e.g. just the postal code
null_probability: 0.7
numeric_probability: 0.18
numeric_affix_probability: 0.12
strict_numeric: true
directions:
right: &dreta
canonical: dreta
abbreviated: dta
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: d
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
left: &esquerra
canonical: esquerra
abbreviated: esq
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: right
numeric_affix:
affix: e
direction: right
whitespace_probability: 0.1
numeric_probability: 0.4
numeric_affix_probability: 0.6
rear: &posterior
canonical: posterior
abbreviated: pos
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
numeric:
direction: right
front: &front
canonical: front
sample: true
canonical_probability: 0.8
sample_probability: 0.2
numeric:
direction: right
alternatives:
- alternative: *dreta
probability: 0.45
- alternative: *esquerra
probability: 0.45
- alternative: *posterior
probability: 0.05
- alternative: *front
probability: 0.05
anteroposterior:
alternatives:
- alternative: *front
probability: 0.5
- alternative: *posterior
probability: 0.5
lateral:
alternatives:
- alternative: *dreta
probability: 0.5
- alternative: *esquerra
probability: 0.5
cardinal_directions:
east: &est
canonical: est
abbreviated: e
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: e
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
west: &oest
canonical: oest
abbreviated: w
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: w
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
north: &nord
canonical: nord
abbreviated: n
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: n
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
south: &sud
canonical: sud
abbreviated: s
canonical_probability: 0.4
abbreviated_probability: 0.6
numeric:
direction: right
numeric_affix:
affix: s
direction: right
numeric_probability: 0.5
numeric_affix_probability: 0.5
alternatives:
- alternative: *nord
probability: 0.25
- alternative: *est
probability: 0.25
- alternative: *sud
probability: 0.25
- alternative: *oest
probability: 0.25
entrances:
entrada: &entrada
canonical: entrada
abbreviated: entr
sample: true
canonical_probability: 0.5
abbreviated_probability: 0.2
sample_probability: 0.3
numeric:
direction: left
# Entrance 1, Entrance A, etc.
alphanumeric:
default: *entrada
numeric_probability: 0.1 # e.g. Entrance 1
alpha_probability: 0.85 # e.g. Entrnace A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *oest
- alternative: *dreta
- alternative: *esquerra
- alternative: *posterior
- alternative: *front
staircases:
escala: &escala
canonical: escala
abbreviated: esc
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
alphanumeric:
# For alphanumerics, Stair A, Stair 1, etc.
default: *escala
numeric_probability: 0.6 # e.g. Escalera 1
alpha_probability: 0.35 # e.g. Escalera A
numeric_plus_alpha_probability: 0.025 # e.g. 1A
alpha_plus_numeric_probability: 0.025 # e.g. A1
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
directional:
direction: right # e.g. Escalera Izq
direction_probability: 0.8
modifier:
alternatives:
- alternative: *nord
- alternative: *sud
- alternative: *est
- alternative: *oest
- alternative: *dreta
- alternative: *esquerra
- alternative: *posterior
- alternative: *front
units:
flat: &apartament
canonical: apartament
abbreviated: apmt
sample: true
canonical_probability: 0.3
abbreviated_probability: 0.4
sample_probability: 0.3
numeric:
direction: left
door: &porta
canonical: porta
abbreviated: pta
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
numeric:
direction: left
# If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B"
null_phrase_probability: 0.15
ordinal:
direction: right
gender: f
direction_probability: 0.95 # Let it vary occasionally e.g. Porta 2a
null_phrase_probability: 0.8 # Let e.g. 5a be the entire unit string
numeric_probability: 0.25
ordinal_probability: 0.75
lletra: &lletra
canonical: lletra
sample: true
canonical_probability: 0.9
sample_probability: 0.1
numeric:
direction: left
office: &oficina
canonical: oficina
abbreviated: of
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.3
sample_probability: 0.3
numeric:
direction: left
# Another word for unit, used more in Colombia
unitat: &unitat
canonical: unitat
abbreviated: un
sample: true
canonical_probability: 0.4
abbreviated_probability: 0.4
sample_probability: 0.2
lot: &lot
canonical: lot
abbreviated: lt
sample: true
canonical_probability: 0.6
abbreviated_probability: 0.2
sample_probability: 0.2
parcel: &parcella
canonical: parcel·la
sample: true
canonical_probability: 0.8
sample_probability: 0.2
habitacio: &habitacio
canonical: habitació
sample: true
canonical_probability: 0.8
sample_probability: 0.2
casa: &casa
canonical: casa
numeric:
direction: left
room: &sala
canonical: sala
numeric:
direction: left
alphanumeric: &unit_alphanumeric
default: *porta
probability: 0.8
sample: true
alternatives:
- alternative: *apartament
probability: 0.1
- alternative: *casa
probability: 0.1
# Separate random probability for adding directions like 2o Izq, 2 Dcha, etc.
add_direction: true
add_direction_probability: 0.1
add_direction_numeric: true # Only for numbers
add_direction_standalone: true # A unit can be as simple as "D"
numeric_probability: 0.7 # e.g. Porta 1a
numeric_plus_alpha_probability: 0.01 # e.g. Porta 1A
alpha_plus_numeric_probability: 0.01 # e.g. Porta A1
alpha_probability: 0.28 # e.g. Porta A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *porta
probability: 0.8
alternatives:
- alternative: *lletra
probability: 0.12
- alternative: *apartament
probability: 0.05
- alternative: *casa
probability: 0.01
- alternative: *unitat
probability: 0.01
- alternative: *habitacio
probability: 0.01
zones:
residential: *unit_alphanumeric
commercial:
default: *oficina
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.2
numeric_probability: 0.9 # e.g. Oficina 1
numeric_plus_alpha_probability: 0.01 # e.g. Oficina 1A
alpha_plus_numeric_probability: 0.01 # e.g. Oficina A1
alpha_probability: 0.08 # e.g. Oficina A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *oficina
probability: 0.8
alternatives:
- alternative: *sala
probability: 0.15
- alternative: *lletra
probability: 0.05
industrial:
default: *lot
probability: 0.5
alternatives:
- alternative: *oficina
probability: 0.3
- alternative: *unitat
probability: 0.19
- alternative: *parcella
probability: 0.01
numeric_probability: 0.9 # e.g. Lote 1
numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A
alpha_plus_numeric_probability: 0.01 # e.g. Lote A1
alpha_probability: 0.08 # e.g. Lote A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
university:
default: *sala
probability: 0.9
alternatives:
- alternative: *porta
probability: 0.1
numeric_probability: 0.9 # e.g. Sala 1
numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A
alpha_plus_numeric_probability: 0.01 # e.g. Sala A1
alpha_probability: 0.08 # e.g. Sala A
alpha_plus_numeric:
whitespace_probability: 0.1
numeric_plus_alpha:
whitespace_probability: 0.1
alpha:
default: *sala
probability: 0.9
alternatives:
- alternative: *porta
probability: 0.08
- alternative: *lletra
probability: 0.02
allotments:
lot:
default: *lot
numeric_probability: 0.8
alphanumeric_probability: 0.1
alpha_probability: 0.1
parcel:
default: *parcella
numeric_probability: 0.3
alphanumeric_probability: 0.3
alpha_probability: 0.4
lot_probability: 0.9
parcel_probability: 0.06
lot_plus_parcel_probability: 0.02
parcel_plus_lot_probability: 0.02