[fix] genitive case for Russian/Ukrainian toponyms, not locative (#125)

2016-12-28 14:34:20 -05:00
parent e6e4b28e43
commit 7ee44a584b
2 changed files with 12 additions and 12 deletions
--- a/resources/parser/default.yaml
+++ b/resources/parser/default.yaml
@@ -121,12 +121,12 @@ state:
    full_name_probability: 0.2
    abbreviated_probability: 0.8
-# Currently for Russian and Ukrainian, convert some names to the genitive/locative case
+# Currently for Russian and Ukrainian, convert some names to the genitive case
 slavic_names:
    state:
-        locative_probability: 0.4
+        genitive_probability: 0.4
    state_district:
-        locative_probability: 0.4
+        genitive_probability: 0.4
 country:
    # If no country is specified, pull the country name from CLDR (authoratative country names translated into different languages)
--- a/scripts/geodata/addresses/components.py
+++ b/scripts/geodata/addresses/components.py
@@ -858,31 +858,31 @@ class AddressComponents(object):
        else:
            return self.japanese_node_admin_level_map.get(val.get('place'), 1000)
-    def locative_name(self, name, language):
+    def genitive_name(self, name, language):
        morph = self.slavic_morphology_analyzers.get(language)
        if not morph:
            return None
        norm = []
        words = safe_decode(name).split()
        n = len(words)
-        for i, word in enumerate(words):
+
        for word in words:
            parsed = morph.parse(word)[0]
-            word_class = {'gent'} if i < n - 1 else {'loct'}
+            inflected = parsed.inflect({'gent'})
            inflected = parsed.inflect(word_class)
            if inflected and inflected.word:
                norm.append(inflected.word)
            else:
                norm.append(word)
        return six.u(' ').join(norm)
-    def add_locatives(self, address_components, language):
+    def add_genitives(self, address_components, language):
        if language in self.slavic_morphology_analyzers and AddressFormatter.CITY in address_components:
            for component in address_components:
                if component not in AddressFormatter.BOUNDARY_COMPONENTS:
                    continue
-                locative_probability = nested_get(self.config, ('slavic_names', component, 'locative_probability'), default=None)
+                genitive_probability = nested_get(self.config, ('slavic_names', component, 'genitive_probability'), default=None)
-                if locative_probability is not None and random.random() < float(locative_probability):
+                if genitive_probability is not None and random.random() < float(genitive_probability):
-                    address_components[component] = self.locative_name(address_components[component], language)
+                    address_components[component] = self.genitive_name(address_components[component], language)
    def abbreviated_state(self, state, country, language):
        abbreviate_state_prob = float(nested_get(self.config, ('state', 'abbreviated_probability')))
@@ -1719,7 +1719,7 @@ class AddressComponents(object):
        self.drop_invalid_components(address_components, country)
-        self.add_locatives(address_components, language)
+        self.add_genitives(address_components, language)
        if language_suffix and not non_local_language and not language_altered:
            language = language_suffix.lstrip(':').lower()