From 78450bfad91fbb5da92aa95eb1282d10b2a91e52 Mon Sep 17 00:00:00 2001
From: Al <albarrentine@gmail.com>
Date: Sat, 23 Jan 2016 21:36:20 -0500
Subject: [PATCH] [fix] Spaces in abbreviation

---
 scripts/geodata/osm/osm_address_training_data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py
index 7b45b668..bca06990 100644
--- a/scripts/geodata/osm/osm_address_training_data.py
+++ b/scripts/geodata/osm/osm_address_training_data.py
@@ -386,7 +386,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
             if random.random() > abbreviate_prob:
                 for j, (t_i, c_i) in enumerate(t):
                     abbreviated.append(tokens[i + j][0])
-                    if i + j < n - 1 and raw_tokens[i + j + 1][1] > raw_tokens[i + j][1]  + 1:
+                    if i + j < n - 1 and raw_tokens[i + j + 1][0] > sum(raw_tokens[i + j][:2]):
                         abbreviated.append(u' ')
                 i += len(t)
                 continue
@@ -412,7 +412,7 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
                     token = random.choice(abbreviations) if abbreviations else canonical
                     token = recase_abbreviation(token, tokens[i:i + len(t)])
                     abbreviated.append(token)
-                    if t[-1][1] != token_types.IDEOGRAPHIC_CHAR:
+                    if i < n - 1 and raw_tokens[i + len(t)][0] > sum(raw_tokens[i + len(t) - 1][:2]):
                         abbreviated.append(u' ')
                     break
                 elif is_prefix:
@@ -464,13 +464,13 @@ def osm_abbreviate(gazetteer, s, language, abbreviate_prob=0.3, separate_prob=0.
             else:
                 for j, (t_i, c_i) in enumerate(t):
                     abbreviated.append(tokens[i + j][0])
-                    if i + j < n - 1 and raw_tokens[i + j + 1][1] > raw_tokens[i + j][1]  + 1:
+                    if i + j < n - 1 and raw_tokens[i + j + 1][0] > sum(raw_tokens[i + j][:2]):
                         abbreviated.append(u' ')
             i += len(t)
 
         else:
             abbreviated.append(tokens[i][0])
-            if i < n - 1 and raw_tokens[i + 1][1] > raw_tokens[i][1]  + 1:
+            if i < n - 1 and raw_tokens[i + 1][0] > sum(raw_tokens[i][:2]):
                 abbreviated.append(u' ')
             i += 1