[osm] Treating components that are all punctuation as blank in address parsing (e.g. a single comma)
This commit is contained in:
@@ -244,6 +244,12 @@ class AddressFormatter(object):
|
|||||||
text = re.sub(regex, replacement, text)
|
text = re.sub(regex, replacement, text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
def tokenize_component(self, v):
|
||||||
|
tokens = tokenize(value)
|
||||||
|
if sum((1 for c, t in tokens if c.value < token_types.PERIOD.value)) > 0:
|
||||||
|
return [t for c, t in tokenize(v)]
|
||||||
|
return []
|
||||||
|
|
||||||
def format_address(self, country, components, minimal_only=True, tag_components=True):
|
def format_address(self, country, components, minimal_only=True, tag_components=True):
|
||||||
template = self.config.get(country.upper())
|
template = self.config.get(country.upper())
|
||||||
if not template:
|
if not template:
|
||||||
@@ -263,10 +269,10 @@ class AddressFormatter(object):
|
|||||||
|
|
||||||
if tag_components:
|
if tag_components:
|
||||||
components = {k: u' '.join([u'{}/{}'.format(t, k.replace(' ', '_'))
|
components = {k: u' '.join([u'{}/{}'.format(t, k.replace(' ', '_'))
|
||||||
for c, t in tokenize(v)])
|
for t in self.tokenize_component(v)])
|
||||||
for k, v in components.iteritems()}
|
for k, v in components.iteritems()}
|
||||||
else:
|
else:
|
||||||
components = {k: u' '.join([t for c, t in tokenize(v)])
|
components = {k: u' '.join(self.tokenize_component(v))
|
||||||
for k, v in components.iteritems()}
|
for k, v in components.iteritems()}
|
||||||
|
|
||||||
text = self.render_template(template_text, **components)
|
text = self.render_template(template_text, **components)
|
||||||
|
|||||||
Reference in New Issue
Block a user