[osm/boundaries] check polygons with an ISO3166-2 as well in the country polygon index in case the country polygon is funky

This commit is contained in:
Al
2017-04-09 02:15:42 -04:00
parent 4ecd6c23c6
commit fc91471434
4 changed files with 23 additions and 3 deletions

View File

@@ -1,3 +1,5 @@
import pycountry
class Countries(object):
AFGHANISTAN = 'af'
@@ -252,3 +254,9 @@ class Countries(object):
FORMER_SOVIET_UNION_COUNTRIES = set([RUSSIA, UKRAINE, BELARUS, KAZAKHSTAN, AZERBAIJAN, KYRGYZSTAN, GEORGIA, UZBEKISTAN, ARMENIA, TAJIKISTAN, MOLDOVA, TURKMENISTAN, LATVIA, LITHUANIA, ESTONIA])
CJK_COUNTRIES = set([CHINA, JAPAN, SOUTH_KOREA, TAIWAN, HONG_KONG, MACAO])
all_country_iso_codes = set([c.alpha2.lower() for c in pycountry.countries])
@classmethod
def is_valid_country_code(cls, alpha2_code):
return alpha2_code and alpha2_code.lower() in cls.all_country_iso_codes

View File

@@ -315,7 +315,7 @@ class OSMBuildingPolygonReader(OSMPolygonReader):
class OSMCountryPolygonReader(OSMPolygonReader):
def include_polygon(self, props):
return 'ISO3166-1:alpha2' in props or (props.get('type', 'relation'), safe_encode(props.get('id', ''))) in osm_admin1_ids
return 'ISO3166-1:alpha2' in props or 'ISO3166-2' in props or (props.get('type', 'relation'), safe_encode(props.get('id', ''))) in osm_admin1_ids
class OSMNeighborhoodPolygonReader(OSMPolygonReader):

View File

@@ -123,6 +123,7 @@ PLANET_BORDERS="planet-borders.osm"
PLANET_ADMIN_BORDERS_OSM="planet-admin-borders.osm"
VALID_COUNTRY_KEYS="ISO3166-1:alpha2="
VALID_ADMIN1_KEYS="ISO3166-2="
ADMIN1_LANGUAGE_EXCEPTION_IDS=$(grep "osm" $ADMIN1_FILE | sed 's/^.*relation:\([0-9][0-9]*\).*$/@id=\1/' | xargs echo | sed 's/\s/ or /g')
VALID_ADMIN_BORDER_KEYS="boundary=administrative or boundary=town or boundary=city_limit or boundary=civil_parish or boundary=civil or boundary=ceremonial or boundary=postal_district or place=island or place=city or place=town or place=village or place=hamlet or place=municipality or place=settlement"
@@ -147,7 +148,7 @@ osmconvert $PLANET_BORDERS_O5M --max-objects=1000000000 --all-to-nodes -o=$PLANE
rm $PLANET_BORDERS_O5M
osmfilter $PLANET_BORDERS_LATLONS --keep="$VALID_ADMIN_BORDER_KEYS or $VALID_LOCALITY_KEYS" -o=$PLANET_BORDERS
rm $PLANET_BORDERS_LATLONS
osmfilter $PLANET_O5M --keep="$VALID_COUNTRY_KEYS or $ADMIN1_LANGUAGE_EXCEPTION_IDS" --drop-author --drop-version -o=$PLANET_COUNTRIES
osmfilter $PLANET_O5M --keep="$VALID_COUNTRY_KEYS or $VALID_ADMIN1_KEYS or $ADMIN1_LANGUAGE_EXCEPTION_IDS" --drop-author --drop-version -o=$PLANET_COUNTRIES
echo "Filtering for neighborhoods"
PLANET_LOCALITIES="planet-localities.osm"

View File

@@ -28,6 +28,7 @@ this_dir = os.path.realpath(os.path.dirname(__file__))
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
from geodata.coordinates.conversion import latlon_to_decimal
from goedata.countries.constants import Countries
from geodata.encoding import safe_decode
from geodata.file_utils import ensure_dir, download_file
from geodata.i18n.unicode_properties import get_chars_by_script
@@ -298,6 +299,7 @@ class OSMReverseGeocoder(RTreePolygonIndex):
'name:*',
'ISO3166-1:alpha2',
'ISO3166-1:alpha3',
'ISO3166-2',
'int_name',
'official_name',
'official_name:*',
@@ -492,7 +494,16 @@ class OSMCountryReverseGeocoder(OSMReverseGeocoder):
if country:
break
else:
return None, []
# See if there's an ISO3166-2 code that matches
# in case the country polygon is wacky
for c in osm_components:
admin1 = c.get('ISO3166-2')
if admin1:
# If so, and if the country is valid, use that
country = admin1[:2]
if not Countries.is_valid_country_code(country.lower()):
return None, []
break
country = country.lower()