From d57f9df7ed697bc9ec106db571f7a66f07fcbbba Mon Sep 17 00:00:00 2001 From: Al Date: Tue, 14 Jul 2015 14:04:32 -0400 Subject: [PATCH] [fix] regexes --- scripts/geodata/osm/osm_address_training_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py index c1e37fc6..f3e142b4 100644 --- a/scripts/geodata/osm/osm_address_training_data.py +++ b/scripts/geodata/osm/osm_address_training_data.py @@ -238,6 +238,8 @@ def normalize_osm_name_tag(tag, script=False): WAYS_LANGUAGE_DATA_FILENAME = 'streets_by_language.tsv' +beginning_re = re.compile('^[^0-9\-]+', re.UNICODE) +end_re = re.compile('[^0-9]+$', re.UNICODE) latitude_dms_regex = re.compile(ur'^(-?[0-9]{1,2})[ ]*[ :°ºd][ ]*([0-5]?[0-9])?[ ]*[:\'\u2032m]?[ ]*([0-5]?[0-9](?:\.\d+)?)?[ ]*[:\?\"\u2033s]?[ ]*(N|n|S|s)?$', re.I | re.UNICODE) longitude_dms_regex = re.compile(ur'^(-?1[0-8][0-9]|0?[0-9]{1,2})[ ]*[ :°ºd][ ]*([0-5]?[0-9])?[ ]*[:\'\u2032m]?[ ]*([0-5]?[0-9](?:\.\d+)?)?[ ]*[:\?\"\u2033s]?[ ]*(E|e|W|w)?$', re.I | re.UNICODE)