diff --git a/src/near_dupe.c b/src/near_dupe.c
index f2f15c0d..0b2efa7f 100644
--- a/src/near_dupe.c
+++ b/src/near_dupe.c
@@ -231,8 +231,6 @@ cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normal
 
     char_array *combined_words_no_whitespace = char_array_new();
 
-    bool keep_whitespace = false;
-
     khash_t(str_set) *unique_strings = kh_init(str_set);
     khiter_t k;
     int ret = 0;
@@ -240,11 +238,14 @@ cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normal
     for (size_t i = 0; i < num_expansions; i++) {
         char *expansion = cstring_array_get_string(name_expansions, i);
         log_debug("expansion = %s\n", expansion);
+        bool keep_whitespace = false;
         tokenize_add_tokens(token_array, expansion, strlen(expansion), keep_whitespace);
         size_t num_tokens = token_array->n;
         token_t *tokens = token_array->a;
         token_t prev_token = NULL_TOKEN;
         char *token_str;
+        char_array_clear(combined_words_no_whitespace);
+
         for (size_t j = 0; j < num_tokens; j++) {
             token_t token = tokens[j];
             bool ideogram = is_ideographic(token.type);
@@ -259,6 +260,8 @@ cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normal
                 char_array_cat_len(token_string_array, expansion + prev_token.offset, prev_token.len);
             }
 
+            char_array_cat_len(combined_words_no_whitespace, expansion + token.offset, token.len);
+
             // For Latin script, add double metaphone of the words
             if (is_latin && !is_numeric_token(token.type) && !ideogram && !is_punctuation(token.type)) {
                 char_array_clear(token_string_array);
@@ -323,6 +326,18 @@ cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normal
             prev_token = token;
         }
 
+        char *combined = char_array_get_string(combined_words_no_whitespace);
+        log_debug("combined = %s\n", combined);
+        k = kh_get(str_set, unique_strings, combined);
+
+        if (k == kh_end(unique_strings)) {
+            cstring_array_add_string(strings, combined);
+            k = kh_put(str_set, unique_strings, strdup(combined), &ret);
+            if (ret < 0) {
+                break;
+            }
+        }
+
         token_array_clear(token_array);
     }