Initial fork commit
This commit is contained in:
0
scripts/geodata/tests/__init__.py
Normal file
0
scripts/geodata/tests/__init__.py
Normal file
128
scripts/geodata/tests/test_address_configs.py
Normal file
128
scripts/geodata/tests/test_address_configs.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import re
|
||||
import six
|
||||
import unittest
|
||||
|
||||
from geodata.addresses.entrances import *
|
||||
from geodata.addresses.floors import *
|
||||
from geodata.intersections.query import *
|
||||
from geodata.addresses.po_boxes import *
|
||||
from geodata.addresses.postcodes import *
|
||||
from geodata.addresses.staircases import *
|
||||
from geodata.addresses.units import *
|
||||
from geodata.categories.query import *
|
||||
|
||||
from geodata.math.floats import isclose
|
||||
|
||||
|
||||
invalid_phrase_re = re.compile(r'\b(None|False|True)\b')
|
||||
|
||||
|
||||
class TestAddressConfigs(unittest.TestCase):
|
||||
def valid_phrase(self, phrase):
|
||||
return phrase is None or not invalid_phrase_re.search(phrase)
|
||||
|
||||
def check_components(self, language, country):
|
||||
conf = address_config.get_property('components', language, country=country)
|
||||
for component, value in six.iteritems(conf):
|
||||
if component == 'combinations':
|
||||
continue
|
||||
total_prob = 0.0
|
||||
for k, v in six.iteritems(value):
|
||||
if k.endswith('probability'):
|
||||
total_prob += v
|
||||
|
||||
self.assertTrue(isclose(total_prob, 1.0), six.u('language: {}, country: {}, component: {}'.format(language, country, component)))
|
||||
|
||||
def check_entrance_phrases(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = Entrance.phrase(Entrance.random(language, country=country), language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_staircase_phrases(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = Entrance.phrase(Entrance.random(language, country=country), language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_floor_phrases(self, language, country=None):
|
||||
for i in xrange(10000):
|
||||
phrase = Floor.phrase(Floor.random(language, country=country), language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
for i in xrange(1000):
|
||||
phrase = Floor.phrase(None, language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
for i in xrange(1000):
|
||||
phrase = Floor.phrase(None, language, country=country, num_floors=3)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_unit_phrases(self, language, country=None):
|
||||
for i in xrange(10000):
|
||||
phrase = Unit.phrase(Unit.random(language, country=country), language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
for i in xrange(1000):
|
||||
phrase = Unit.phrase(None, language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
for i in xrange(1000):
|
||||
phrase = Unit.phrase(Unit.random(language, country=country, num_floors=3, num_basements=1), language, country=country)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
for zone in ('commercial', 'industrial', 'university'):
|
||||
for i in xrange(1000):
|
||||
phrase = Unit.phrase(Unit.random(language, country=country), language, country=country, zone=zone)
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_po_boxes(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = POBox.phrase(POBox.random(language, country=country), language, country=country)
|
||||
if phrase is None:
|
||||
break
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_postcodes(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = PostCode.phrase('12345', language, country=country)
|
||||
if phrase is None:
|
||||
break
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_intersection_phrases(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = Intersection.phrase(language, country=country)
|
||||
if phrase is None:
|
||||
break
|
||||
self.assertTrue(self.valid_phrase(phrase), six.u('phrase was: {}').format(phrase))
|
||||
|
||||
def check_category_phrases(self, language, country=None):
|
||||
for i in xrange(1000):
|
||||
phrase = Category.phrase(language, 'amenity', 'restaurant', country=country)
|
||||
if phrase.category is None:
|
||||
break
|
||||
|
||||
def check_config(self, language, country=None):
|
||||
print('Doing lang={}, country={}'.format(language, country))
|
||||
print('Checking components')
|
||||
self.check_components(language, country=country)
|
||||
print('Checking entrances')
|
||||
self.check_entrance_phrases(language, country=country)
|
||||
print('Checking staircases')
|
||||
self.check_staircase_phrases(language, country=country)
|
||||
print('Checking floors')
|
||||
self.check_floor_phrases(language, country=country)
|
||||
print('Checking units')
|
||||
self.check_unit_phrases(language, country=country)
|
||||
print('Checking intersections')
|
||||
self.check_intersection_phrases(language, country=country)
|
||||
print('Checking categories')
|
||||
self.check_category_phrases(language, country=country)
|
||||
print('Checking PO boxes')
|
||||
self.check_po_boxes(language, country=country)
|
||||
print('Checking postcodes')
|
||||
self.check_postcodes(language, country=country)
|
||||
|
||||
def test_configs(self):
|
||||
for lang, value in six.iteritems(address_config.address_configs):
|
||||
self.check_config(lang)
|
||||
for country in value.get('countries', []):
|
||||
self.check_config(lang, country)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
119
scripts/geodata/tests/test_disambiguation.py
Normal file
119
scripts/geodata/tests/test_disambiguation.py
Normal file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
this_dir = os.path.realpath(os.path.dirname(__file__))
|
||||
sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir)))
|
||||
|
||||
from geodata.i18n.languages import init_languages, get_country_languages, get_regional_languages
|
||||
from geodata.language_id.disambiguation import disambiguate_language, street_types_gazetteer, UNKNOWN_LANGUAGE, AMBIGUOUS_LANGUAGE
|
||||
|
||||
|
||||
country_test_cases = [
|
||||
# String, country, expected language
|
||||
('Division Street', 'us', 'en'),
|
||||
('Kalfarveien', 'no', 'nb'),
|
||||
('Upper Glenburn Road', 'gb', 'en'),
|
||||
('Zafer Caddesi', 'cy', 'tr'),
|
||||
|
||||
# US has some Spanish and French street names
|
||||
('Avenue P', 'us', 'en'),
|
||||
('Avenue du Champs', 'us', 'fr'),
|
||||
('Avenida de la Plata', 'us', 'es'),
|
||||
('Pl', 'us', UNKNOWN_LANGUAGE),
|
||||
('No 2 School House', 'us', UNKNOWN_LANGUAGE),
|
||||
('E Thetford Rd', 'us', 'en'),
|
||||
('El Camino', 'us', 'es'),
|
||||
('The El Camino', 'us', 'en'),
|
||||
('Via Antiqua Street', 'us', 'en'),
|
||||
('Salt Evaporator Plan Road', 'us', 'en'),
|
||||
('Calle Las Brisas North', 'us', 'en'),
|
||||
('Chateau Estates', 'us', 'en'),
|
||||
('Grand Boulevard', 'us', 'en'),
|
||||
('Rue Louis Phillippe', 'us', 'fr'),
|
||||
('Calle Street', 'us', 'en'),
|
||||
('Del Rio Avenue', 'us', 'en'),
|
||||
('South Signal Butte Road', 'us', 'en'),
|
||||
('Chief All Over', 'us', UNKNOWN_LANGUAGE),
|
||||
('South Alameda Street', 'us', 'en'),
|
||||
('The Alameda', 'us', 'en'),
|
||||
('Rincon Road', 'us', 'en'),
|
||||
|
||||
# Avenue + stopword
|
||||
('Avenue du Bourget-du-Lac', 'je', 'fr'),
|
||||
|
||||
# UAE, English is non-default, has abbreviation
|
||||
('128 A St', 'ae', 'en'),
|
||||
('128 A St.', 'ae', 'en'),
|
||||
|
||||
# English / Arabic street address
|
||||
('Omar Street شارع عمر', 'iq', AMBIGUOUS_LANGUAGE),
|
||||
|
||||
# Random script
|
||||
('Bayard Street - 擺也街', 'us', AMBIGUOUS_LANGUAGE),
|
||||
|
||||
|
||||
# Brussels address
|
||||
('Avenue Paul Héger - Paul Hégerlaan', 'be', AMBIGUOUS_LANGUAGE),
|
||||
('Smaragdstraat', 'be', 'nl'),
|
||||
|
||||
|
||||
# India
|
||||
('Kidwai nagar', 'in', 'hi'),
|
||||
('Mavoor Rd.', 'in', 'en'),
|
||||
|
||||
# Sri Lanka
|
||||
('Sri Sadathissa Mawatha', 'lk', 'si'),
|
||||
|
||||
# Russian
|
||||
('Фрунзе улица', 'kg', 'ru'),
|
||||
]
|
||||
|
||||
regional_test_cases = [
|
||||
# Spain
|
||||
('Carrer de la Morella', 'es', 'qs_a1r', 'Cataluña/Catalunya', 'ca'),
|
||||
('Avinguda Diagonal', 'es', 'qs_a1r', 'Cataluña/Catalunya', 'ca'),
|
||||
('Avinguda de Filipines - Avenida de Filipinas', 'es', 'qs_a1r', 'Cataluña/Catalunya', AMBIGUOUS_LANGUAGE),
|
||||
('Calle de la Morella', 'es', 'qs_a1r', 'Cataluña/Catalunya', 'es'),
|
||||
('autobidea', 'es', 'qs_a1r', 'Comunidad Foral de Navarra', 'eu'),
|
||||
('Calle', 'es', 'qs_a1r', 'Comunidad Foral de Navarra', 'es'),
|
||||
('Txurruka', 'es', 'qs_a1r', 'País Vasco/Euskadi', UNKNOWN_LANGUAGE),
|
||||
|
||||
# Belgium
|
||||
('Lutticherstrasse', 'be', 'qs_a1', 'Liège', 'de'),
|
||||
('Chaussée de Charleroi', 'be', 'qs_a1', 'Namur', 'fr'),
|
||||
|
||||
# France / Occitan
|
||||
('Carriera de Brasinvert', 'fr', 'qs_a1r', 'Rhône-Alpes', 'oc'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
class TestNormalization(unittest.TestCase):
|
||||
def test_countries(self):
|
||||
for s, country, expected in country_test_cases:
|
||||
languages = get_country_languages(country)
|
||||
self.assertTrue(bool(languages))
|
||||
|
||||
lang = disambiguate_language(s, languages.items())
|
||||
self.assertEqual(lang, expected, '{} != {} for {}, langs={}'.format(lang, expected, s, languages.items()))
|
||||
|
||||
def test_regional(self):
|
||||
for s, country, k, v, expected in regional_test_cases:
|
||||
languages = get_country_languages(country)
|
||||
self.assertTrue(bool(languages))
|
||||
regional = get_regional_languages(country, k, v)
|
||||
self.assertTrue(bool(regional))
|
||||
regional.update(languages)
|
||||
|
||||
lang = disambiguate_language(s, regional.items())
|
||||
|
||||
self.assertEqual(lang, expected, '{} != {} for {}, langs={}'.format(lang, expected, s, regional.items()))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user