diff --git a/scripts/geodata/osm/osm_address_training_data.py b/scripts/geodata/osm/osm_address_training_data.py
index 4dacac6b..89e4e5ed 100644
--- a/scripts/geodata/osm/osm_address_training_data.py
+++ b/scripts/geodata/osm/osm_address_training_data.py
@@ -696,6 +696,13 @@ def build_address_format_training_data(admin_rtree, language_rtree, neighborhood
             if place_type == 'borough' or polygon_type == 'local_admin':
                 neighborhood_level = AddressFormatter.CITY_DISTRICT
 
+                # Optimization so we don't use Brooklyn for Kings County
+                city_name = address_components.get(AddressFormatter.CITY)
+                if name == city_name:
+                    name = neighbrohood.get(name_key, neighborhood.get(raw_name_key))
+                    if not name or name == city_name:
+                        continue
+
             neighborhood_levels[neighborhood_level].append(name)
 
         for component, neighborhoods in neighborhood_levels.iteritems():
diff --git a/src/address_parser.h b/src/address_parser.h
index af97ecc3..4f946a39 100644
--- a/src/address_parser.h
+++ b/src/address_parser.h
@@ -71,6 +71,41 @@ with the general error-driven averaged perceptron.
 #define SEPARATOR_LABEL "sep"
 #define FIELD_SEPARATOR_LABEL "fsep"
 
+
+#define ADDRESS_COMPONENT_HOUSE 1 << 0
+#define ADDRESS_COMPONENT_HOUSE_NUMBER 1 << 1
+#define ADDRESS_COMPONENT_ROAD 1 << 4
+#define ADDRESS_COMPONENT_SUBURB 1 << 7
+#define ADDRESS_COMPONENT_CITY_DISTRICT 1 << 8
+#define ADDRESS_COMPONENT_CITY 1 << 9
+#define ADDRESS_COMPONENT_STATE_DISTRICT 1 << 10
+#define ADDRESS_COMPONENT_STATE 1 << 11
+#define ADDRESS_COMPONENT_POSTAL_CODE 1 << 12
+#define ADDRESS_COMPONENT_COUNTRY 1 << 13
+
+enum {
+    ADDRESS_PARSER_HOUSE,
+    ADDRESS_PARSER_HOUSE_NUMBER,
+    ADDRESS_PARSER_ROAD,
+    ADDRESS_PARSER_SUBURB,
+    ADDRESS_PARSER_CITY_DISTRICT,
+    ADDRESS_PARSER_CITY,
+    ADDRESS_PARSER_STATE_DISTRICT,
+    ADDRESS_PARSER_STATE,
+    ADDRESS_PARSER_POSTAL_CODE,
+    ADDRESS_PARSER_COUNTRY,
+    NUM_ADDRESS_PARSER_TYPES
+} address_parser_types;
+
+typedef union address_parser_types {
+    uint32_t value;
+    struct {
+        uint32_t components:16;     // Bitset of components
+        uint32_t most_common:16;    // Most common component as short integer enum value 
+    };
+} address_parser_types_t;
+
+
 typedef struct address_parser_context {
     char *language;
     char *country;
@@ -84,6 +119,9 @@ typedef struct address_parser_context {
     phrase_array *geodb_phrases;
     // Index in gedob_phrases or -1
     int64_array *geodb_phrase_memberships;
+    phrase_array *component_phrases;
+    // Index in component_phrases or -1
+    int64_array *component_phrase_memberships;
     tokenized_string_t *tokenized_str;
 } address_parser_context_t;
 
@@ -97,6 +135,7 @@ typedef struct address_parser_response {
 typedef struct address_parser {
     averaged_perceptron_t *model;
     trie_t *vocab;
+    trie_t *phrase_types;
 } address_parser_t;
 
 // General usage
@@ -115,7 +154,7 @@ void address_parser_normalize_token(cstring_array *array, char *str, token_t tok
 address_parser_context_t *address_parser_context_new(void);
 void address_parser_context_destroy(address_parser_context_t *self);
 
-void address_parser_context_fill(address_parser_context_t *context, tokenized_string_t *tokenized_str, char *language, char *country);
+void address_parser_context_fill(address_parser_context_t *context, address_parser_t *parser, tokenized_string_t *tokenized_str, char *language, char *country);
 
 // Feature function
 bool address_parser_features(void *self, void *ctx, tokenized_string_t *str, uint32_t i, char *prev, char *prev2);
@@ -131,4 +170,4 @@ bool address_parser_module_setup(char *dir);
 void address_parser_module_teardown(void);
 
 
-#endif
\ No newline at end of file
+#endif