From 82fb5c1dcaa4a2b94fb9df772735910d2d5bde2d Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 27 Jan 2017 13:15:18 -0500 Subject: [PATCH] [countries] moving country constants to a separate module --- scripts/geodata/addresses/components.py | 20 +- scripts/geodata/countries/constants.py | 254 ++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 15 deletions(-) create mode 100644 scripts/geodata/countries/constants.py diff --git a/scripts/geodata/addresses/components.py b/scripts/geodata/addresses/components.py index da8c7887..687a8239 100644 --- a/scripts/geodata/addresses/components.py +++ b/scripts/geodata/addresses/components.py @@ -58,15 +58,6 @@ this_dir = os.path.realpath(os.path.dirname(__file__)) PARSER_DEFAULT_CONFIG = os.path.join(this_dir, os.pardir, os.pardir, os.pardir, 'resources', 'parser', 'default.yaml') - -CHINA = 'cn' -JAPAN = 'jp' -KOREA = 'kr' -TAIWAN = 'tw' -HONG_KONG = 'hk' -MACAO = 'mo' -UNITED_STATES = 'us' - JAPANESE_ROMAJI = 'ja_rm' ENGLISH = 'en' SPANISH = 'es' @@ -76,7 +67,6 @@ CHINESE = 'zh' KOREAN = 'ko' CJK_LANGUAGES = set([CHINESE, JAPANESE, KOREAN]) -CJK_COUNTRIES = set([CHINA, JAPAN, KOREA, TAIWAN, HONG_KONG, MACAO]) class AddressComponents(object): @@ -980,7 +970,7 @@ class AddressComponents(object): def add_city_and_equivalent_points(self, grouped_components, containing_components, country, latitude, longitude): city_replacements = place_config.city_replacements(country) - is_japan = country == JAPAN + is_japan = country == Countries.JAPAN checked_first_suburb = False first_village = None @@ -1090,7 +1080,7 @@ class AddressComponents(object): for component, components_values in grouped_osm_components.iteritems(): seen = set() - if country == JAPAN and component == AddressFormatter.SUBURB: + if country == Countries.JAPAN and component == AddressFormatter.SUBURB: components_values = sorted(components_values, key=self.japanese_neighborhood_sort_key) for component_value in components_values: @@ -1122,7 +1112,7 @@ class AddressComponents(object): new_admin_components = {} - is_japan = country == JAPAN + is_japan = country == Countries.JAPAN for component, vals in poly_components.iteritems(): if component not in address_components or (non_local_language and random.random() < replace_with_non_local_prob): @@ -1251,7 +1241,7 @@ class AddressComponents(object): self.abbreviate_admin_components(neighborhood_components, country, language) address_components.update(neighborhood_components) - if country == JAPAN and (language_suffix.endswith(JAPANESE_ROMAJI) or non_local_language == ENGLISH): + if country == Countries.JAPAN and (language_suffix.endswith(JAPANESE_ROMAJI) or non_local_language == ENGLISH): self.format_japanese_neighborhood_romaji(address_components) def generate_sub_building_component(self, component, address_components, language, country=None, **kw): @@ -1830,7 +1820,7 @@ class AddressComponents(object): lang, script = language.split('_', 1) if lang not in CJK_LANGUAGES and script.lower() not in self.valid_scripts: language = lang - elif country in CJK_COUNTRIES and (non_local_language == ENGLISH or (language_suffix or '').lstrip(':').lower() == ENGLISH): + elif country in Countries.CJK_COUNTRIES and (non_local_language == ENGLISH or (language_suffix or '').lstrip(':').lower() == ENGLISH): language = ENGLISH return address_components, country, language diff --git a/scripts/geodata/countries/constants.py b/scripts/geodata/countries/constants.py new file mode 100644 index 00000000..0f2923aa --- /dev/null +++ b/scripts/geodata/countries/constants.py @@ -0,0 +1,254 @@ + +class Countries(object): + AFGHANISTAN = 'af' + ALAND_ISLANDS = 'ax' + ALBANIA = 'al' + ALGERIA = 'dz' + AMERICAN_SAMOA = 'as' + ANDORRA = 'ad' + ANGOLA = 'ao' + ANGUILLA = 'ai' + ANTARCTICA = 'aq' + ANTIGUA_AND_BARBUDA = 'ag' + ARGENTINA = 'ar' + ARMENIA = 'am' + ARUBA = 'aw' + AUSTRALIA = 'au' + AUSTRIA = 'at' + AZERBAIJAN = 'az' + BAHAMAS = 'bs' + BAHRAIN = 'bh' + BANGLADESH = 'bd' + BARBADOS = 'bb' + BELARUS = 'by' + BELGIUM = 'be' + BELIZE = 'bz' + BENIN = 'bj' + BERMUDA = 'bm' + BHUTAN = 'bt' + BOLIVIA = 'bo' + BONAIRE = 'bq' + BOSNIA_AND_HERZEGOVINA = 'bq' + BOTSWANA = 'bw' + BOUVET_ISLAND = 'bv' + BRAZIL = 'br' + BRITISH_INDIAN_OCEAN_TERRITORY = 'io' + BRITISH_VIRGIN_ISLANDS = 'vg' + BRUNEI_DARUSSALAM = 'bn' + BULGARIA = 'bg' + BURKINA_FASO = 'bf' + BURUNDI = 'bi' + CAMBODIA = 'kh' + CAMEROON = 'cm' + CANADA = 'ca' + CAPE_VERDE = 'cv' + CAYMAN_ISLANDS = 'ky' + CENTRAL_AFRICAN_REPUBLIC = 'cf' + CHAD = 'td' + CHILE = 'cl' + CHINA = 'cn' + CHRISTMAS_ISLAND = 'cx' + COCOS_KEELING_ISLANDS = 'cc' + COLOMBIA = 'co' + COMOROS = 'km' + COOK_ISLANDS = 'ck' + COSTA_RICA = 'cr' + COTE_DIVOIRE = 'ci' + CROATIA = 'hr' + CUBA = 'cu' + CURACAO = 'cw' + CYPRUS = 'cy' + CZECH_REPUBLIC = 'cz' + DENMARK = 'dk' + DEMOCRATIC_REPUBLIC_OF_THE_CONGO = 'cd' + DJIBOUTI = 'dj' + DOMINICA = 'dm' + DOMINICAN_REPUBLIC = 'do' + ECUADOR = 'ec' + EGYPT = 'eg' + EL_SALVADOR = 'sv' + EQUATORIAL_GUINEA = 'gq' + ERITREA = 'er' + ESTONIA = 'ee' + ETHIOPIA = 'et' + FALKLAND_ISLANDS_MALVINAS = 'fk' + FAROE_ISLANDS = 'fo' + FEDERATED_STATES_OF_MICRONESIA = 'fm' + FIJI = 'fj' + FINLAND = 'fi' + FRANCE = 'fr' + FRENCH_GUIANA = 'gf' + FRENCH_POLYNESIA = 'pf' + FRENCH_SOUTHERN_TERRITORIES = 'tf' + GABON = 'ga' + GAMBIA = 'gm' + GEORGIA = 'ge' + GERMANY = 'de' + GHANA = 'gh' + GIBRALTAR = 'gi' + GREECE = 'gr' + GREENLAND = 'gl' + GRENADA = 'gd' + GUADELOUPE = 'gp' + GUAM = 'gu' + GUATEMALA = 'gt' + GUERNSEY = 'gg' + GUINEA = 'gn' + GUINEA_BISSAU = 'gw' + GUYANA = 'gy' + HAITI = 'ht' + HEARD_ISLAND_AND_MCDONALD_ISLANDS = 'hm' + HONDURAS = 'hn' + HONG_KONG = 'hk' + HUNGARY = 'hu' + ICELAND = 'is' + INDIA = 'in' + INDONESIA = 'id' + IRAN = 'ir' + IRAQ = 'iq' + IRELAND = 'ie' + ISLE_OF_MAN = 'im' + ISRAEL = 'il' + ITALY = 'it' + JAMAICA = 'jm' + JAPAN = 'jp' + JERSEY = 'je' + JORDAN = 'jo' + KAZAKHSTAN = 'kz' + KENYA = 'ke' + KIRIBATI = 'ki' + KUWAIT = 'kw' + KYRGYZSTAN = 'kg' + LAOS = 'la' + LATVIA = 'lv' + LEBANON = 'lb' + LESOTHO = 'ls' + LIBERIA = 'lr' + LIBYA = 'ly' + LIECHTENSTEIN = 'li' + LITHUANIA = 'lt' + LUXEMBOURG = 'lu' + MACAO = 'mo' + MACEDONIA = 'mk' + MADAGASCAR = 'mg' + MALAWI = 'mw' + MALAYSIA = 'my' + MALDIVES = 'mv' + MALI = 'ml' + MALTA = 'mt' + MARSHALL_ISLANDS = 'mh' + MARTINIQUE = 'mq' + MAURITANIA = 'mr' + MAURITIUS = 'mu' + MAYOTTE = 'yt' + MEXICO = 'mx' + MOLDOVA = 'md' + MONACO = 'mc' + MONGOLIA = 'mn' + MONTENEGRO = 'me' + MONTSERRAT = 'ms' + MOROCCO = 'ma' + MOZAMBIQUE = 'mz' + MYANMAR = 'mm' + NAMIBIA = 'na' + NAURU = 'nr' + NEPAL = 'np' + NETHERLANDS = 'nl' + NEW_CALEDONIA = 'nc' + NEW_ZEALAND = 'nz' + NICARAGUA = 'ni' + NIGER = 'ne' + NIGERIA = 'ng' + NIUE = 'nu' + NORFOLK_ISLAND = 'nf' + NORTH_KOREA = 'kp' + NORTHERN_MARIANA_ISLANDS = 'mp' + NORWAY = 'no' + OMAN = 'om' + PAKISTAN = 'pk' + PALAU = 'pw' + PALESTINE = 'ps' + PANAMA = 'pa' + PAPUA_NEW_GUINEA = 'pg' + PARAGUAY = 'py' + PERU = 'pe' + PHILIPPINES = 'ph' + PITCAIRN_ISLANDS = 'pn' + POLAND = 'pl' + PORTUGAL = 'pt' + PUERTO_RICO = 'pr' + QATAR = 'qa' + REPUBLIC_OF_CONGO = 'cg' + REUNION = 're' + ROMANIA = 'ro' + RUSSIA = 'ru' + RWANDA = 'rw' + SAINT_BARTHELEMY = 'bl' + SAINT_HELENA_ASCENSION_AND_TRISTAN_DA_CUNHA = 'sh' + SAINT_KITTS_AND_NEVIS = 'kn' + SAINT_LUCIA = 'lc' + SAINT_MARTIN = 'mf' + SAINT_PIERRE_AND_MIQUELON = 'pm' + SAINT_VINCENT_AND_THE_GRENADINES = 'vc' + SAMOA = 'ws' + SAN_MARINO = 'sm' + SAO_TOME_AND_PRINCIPE = 'st' + SAUDI_ARABIA = 'sa' + SENEGAL = 'sn' + SERBIA = 'rs' + SEYCHELLES = 'sc' + SIERRA_LEONE = 'sl' + SINGAPORE = 'sg' + SINT_MAARTEN = 'sx' + SLOVAKIA = 'sk' + SLOVENIA = 'si' + SOLOMON_ISLANDS = 'sb' + SOMALIA = 'so' + SOUTH_AFRICA = 'za' + SOUTH_GEORGIA_AND_THE_SOUTH_SANDWICH_ISLANDS = 'gs' + SOUTH_KOREA = 'kr' + SOUTH_SUDAN = 'ss' + SPAIN = 'es' + SRI_LANKA = 'lk' + SUDAN = 'sd' + SURINAME = 'sr' + SVALBARD_AND_JAN_MAYEN = 'sj' + SWAZILAND = 'sz' + SWEDEN = 'se' + SWITZERLAND = 'ch' + SYRIA = 'sy' + TAIWAN = 'tw' + TAJIKISTAN = 'tj' + TANZANIA = 'tz' + THAILAND = 'th' + TIMOR_LESTE = 'tl' + TOGO = 'tg' + TOKELAU = 'tk' + TONGA = 'to' + TRINIDAD_AND_TOBAGO = 'tt' + TUNISIA = 'tn' + TURKEY = 'tr' + TURKMENISTAN = 'tm' + TURKS_AND_CAICOS_ISLANDS = 'tc' + TUVALU = 'tv' + UGANDA = 'ug' + UKRAINE = 'ua' + UNITED_ARAB_EMIRATES = 'ae' + UNITED_KINGDOM = 'gb' + UNITED_STATES = 'us' + UNITED_STATES_MINOR_OUTLYING_ISLANDS = 'um' + URUGUAY = 'uy' + US_VIRGIN_ISLANDS = 'vi' + UZBEKISTAN = 'uz' + VANUATU = 'vu' + VATICAN = 'va' + VENEZUELA = 've' + VIETNAM = 'vn' + WALLIS_AND_FUTUNA = 'wf' + WESTERN_SAHARA = 'eh' + YEMEN = 'ye' + ZAMBIA = 'zm' + ZIMBABWE = 'zw' + + FORMER_SOVIET_UNION_COUNTRIES = set([RUSSIA, UKRAINE, BELARUS, KAZAKHSTAN, AZERBAIJAN, KYRGYZSTAN, GEORGIA, UZBEKISTAN, ARMENIA, TAJIKISTAN, MOLDOVA, TURKMENISTAN, LATVIA, LITHUANIA, ESTONIA]) + CJK_COUNTRIES = set([CHINA, JAPAN, SOUTH_KOREA, TAIWAN, HONG_KONG, MACAO])