Initial fork commit

This commit is contained in:
2025-09-06 22:03:29 -04:00
commit 2d238cd339
1748 changed files with 932506 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
names:
keys:
default: name
probability: 0.75
alternatives:
- alternative: short_name # e.g. NYC
probability: 0.12
- alternative: alt_name # e.g. New York (instead of New York City)
probability: 0.12
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
probability: 0.01
components:
country:
keys:
default: name
probability: 0.87
alternatives:
- alternative: ISO3166-1:alpha2
probability: 0.02
- alternative: ISO3166-1:alpha3
probability: 0.01
- alternative: short_name
probability: 0.04
- alternative: alt_name
probability: 0.04
- alternative: int_name
probability: 0.01
- alternative: official_name # e.g. United Kingdom of Great Britain and Northern Ireland
probability: 0.01
regex_replacements:
- country: fr
pattern: "(?:lyon|paris|marseilles?) ([\\d]+(?:e|er|ème|eme) arrondissement)"
replace_with_group: 1
replace_probability: 0.5
prefixes:
language:
ru:
city:
default:
prefix: г.
probability: 0.35
alternatives:
- alternative:
prefix: г
probability: 0.1
- alternative:
prefix: город
probability: 0.05
# This section overrides place names
exceptions:
# Boroughs of New York City
- id: 2552485 # New York County (don't use Manhattan)
type: relation
default: name # New York County
probability: 1.0
- id: 369518 # Kings County (don't use Brooklyn)
type: relation
default: name # Kings County
probability: 1.0
- id: 369519 # Queens County (don't use Queens)
type: relation
default: name # Queens County
probability: 1.0
- id: 2552450 # Bronx County (don't use The Bronx)
type: relation
default: name # Bronx County
probability: 1.0
- id: 962876 # Richmond County (don't use Staten Island)
type: relation
default: name # Richmond County
probability: 1.0
- id: 6577227 # Kingston Parish (always use Kingston)
type: relation
default: name # Kingston
probability: 1.0
- id: 30674098 # Sao Paulo
type: node
default: name
probability: 0.9
alternatives:
- alternative: alt_name
probability: 0.09
- alternative: official_name
probability: 0.01
- id: 298285 # Sao Paulo (relation)
type: relation
default: name
probability: 0.9
alternatives:
- alternative: alt_name
probability: 0.09
- alternative: official_name
probability: 0.01
- id: 556706 # New Zealand
type: relation
default: name:en
probability: 0.77
alternatives:
- alternative: name
probability: 0.1
- alternative: ISO3166-1:alpha2
probability: 0.02
- alternative: ISO3166-1:alpha3
probability: 0.01
- alternative: short_name
probability: 0.04
- alternative: alt_name
probability: 0.04
- alternative: int_name
probability: 0.01
- alternative: official_name
probability: 0.01
- id: 2383266 # Melbourne (city center)
type: relation
default: alt_name # Melbourne
probability: 1.0

View File

@@ -0,0 +1,11 @@
# Prefixes which can be stripped to normalize a place name
prefixes:
- stadtteil
- stadtbezirk
- gemeinde
- landkreis
- kreis
- grenze
- freistaat
- regierungsbezirk
- gemeindefreies gebiet

View File

@@ -0,0 +1,66 @@
# Prefixes which can be stripped to normalize a place name
# Note these will be stripped off the front at random, not every time
prefixes:
- city of
- city and borough of
- city and county of
- township of
- town of
- regional municipality of
- district municipality of
- rural city of
- municipality of
- borough of
- london borough of
- royal borough of
- shire of
- parish of
- free state of
- autonomous city of
- territorial waters of
- county of
- county
# Suffixes which can be stripped to normalize a place name
suffixes:
- township
- municipality
- local municipality
- metropolitan municipality
- district municipality
- regional municipality
- cp
- civil parish
- community development council
- cdc
- council
- borough council
- city council
- regional council
countries:
au:
prefixes:
- district of
gb:
prefixes:
- district of
suffixes:
- city
ie:
suffixes:
- city
- municipal district
in:
prefixes:
- district of
nz:
prefixes:
- district of
suffixes:
- city

View File

@@ -0,0 +1,10 @@
prefixes:
# Note: "ciudad de" should not be included as it's part of proper names
- colonia
- municipio nuestra senora de
prefixes_similarity_only:
- ciudad de
suffixes:
- colonia

View File

@@ -0,0 +1,5 @@
prefixes:
- commune de
suffixes:
- (eaux territoriales)

View File

@@ -0,0 +1,37 @@
prefixes:
- ōaza
- ō-aza
- oaza
- aza
- koaza
suffixes:
- ken
- to
-
- do
- fu
- gun
- shi
- machi
- chō
- cho
- mura
- ku
- aza
- oaza
- koaza
suffixes_no_whitespace:
- # ken
- # to
- # dō
- # fu
- # gun
- # shi
- # machi
- # mura
- # ku
- # aza
- 大字 # ōaza
- 小字 # koaza