[osm] Postponing punctuation stripping until after address template rendering
This commit is contained in:
@@ -135,6 +135,8 @@ class AddressFormatter(object):
|
|||||||
('road', 'postcode')
|
('road', 'postcode')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
whitespace_component_regex = re.compile('[\r\n]+[\s\r\n]*')
|
||||||
|
|
||||||
splitter = ' | '
|
splitter = ' | '
|
||||||
|
|
||||||
aliases = OrderedDict([
|
aliases = OrderedDict([
|
||||||
@@ -210,7 +212,11 @@ class AddressFormatter(object):
|
|||||||
return selected
|
return selected
|
||||||
output = pystache.render(template, first=render_first,
|
output = pystache.render(template, first=render_first,
|
||||||
**components).strip()
|
**components).strip()
|
||||||
output = re.sub('[\r\n]+[\s\r\n]*', self.splitter, output)
|
|
||||||
|
output = self.splitter.join([
|
||||||
|
self.strip_component(val)
|
||||||
|
for val in self.whitespace_component_regex.split(output)
|
||||||
|
])
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@@ -244,11 +250,20 @@ class AddressFormatter(object):
|
|||||||
text = re.sub(regex, replacement, text)
|
text = re.sub(regex, replacement, text)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def tokenize_component(self, value):
|
def strip_component(self, value):
|
||||||
tokens = tokenize(value)
|
tokens = tokenize(value)
|
||||||
if sum((1 for c, t in tokens if c.value < token_types.PERIOD.value)) > 0:
|
for i, (c, t) in enumerate(tokens):
|
||||||
return [t for c, t in tokens]
|
if c.value < token_types.PERIOD.value:
|
||||||
return []
|
break
|
||||||
|
|
||||||
|
for j, (c, t) in enumerate(reversed(tokens)):
|
||||||
|
if c.value < token_types.PERIOD.value:
|
||||||
|
break
|
||||||
|
if j == 0:
|
||||||
|
j = None
|
||||||
|
else:
|
||||||
|
j = -j
|
||||||
|
return u' '.join([t for c, t in tokens[i:j]])
|
||||||
|
|
||||||
def format_address(self, country, components, minimal_only=True, tag_components=True):
|
def format_address(self, country, components, minimal_only=True, tag_components=True):
|
||||||
template = self.config.get(country.upper())
|
template = self.config.get(country.upper())
|
||||||
@@ -269,10 +284,10 @@ class AddressFormatter(object):
|
|||||||
|
|
||||||
if tag_components:
|
if tag_components:
|
||||||
components = {k: u' '.join([u'{}/{}'.format(t, k.replace(' ', '_'))
|
components = {k: u' '.join([u'{}/{}'.format(t, k.replace(' ', '_'))
|
||||||
for t in self.tokenize_component(v)])
|
for c, t in tokenize(v)])
|
||||||
for k, v in components.iteritems()}
|
for k, v in components.iteritems()}
|
||||||
else:
|
else:
|
||||||
components = {k: u' '.join(self.tokenize_component(v))
|
components = {k: u' '.join([t for c, t in tokenize(v)])
|
||||||
for k, v in components.iteritems()}
|
for k, v in components.iteritems()}
|
||||||
|
|
||||||
text = self.render_template(template_text, **components)
|
text = self.render_template(template_text, **components)
|
||||||
|
|||||||
Reference in New Issue
Block a user