From cdfa9e11bf7d9bef5cce51a2bd19ba7e159f205c Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 1 Sep 2016 17:45:12 -0400 Subject: [PATCH] [openaddresses] excluding all streets with "unknown" in the name. Though possibly excluding one or two valid addresses, the gains far outweigh the costs --- scripts/geodata/openaddresses/formatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/geodata/openaddresses/formatter.py b/scripts/geodata/openaddresses/formatter.py index abfbe68e..ee0991a9 100644 --- a/scripts/geodata/openaddresses/formatter.py +++ b/scripts/geodata/openaddresses/formatter.py @@ -28,7 +28,7 @@ OPENADDRESSES_FORMAT_DATA_TAGGED_FILENAME = 'openaddresses_formatted_addresses_t OPENADDRESSES_FORMAT_DATA_FILENAME = 'openaddresses_formatted_addresses.tsv' null_regex = re.compile('^\s*(?:null|none)\s*$', re.I) -unknown_regex = re.compile('^\s*(?:unknown)\s*$', re.I) +unknown_regex = re.compile('\bunknown\b', re.I) not_applicable_regex = re.compile('^\s*n\.?\s*/?\s*a\.?\s*$', re.I) sin_numero_regex = re.compile('^\s*s\s\s*/\s*n\s*$') fraction_regex = re.compile('^\s*[\d]+[\s]*/[\s]*(?:[\d]+|[a-z]|[\d]+[a-z]|[a-z][\d]+)[\s]*$', re.I)