From 893745f09b89842ca528ba501ba5acc4db9dd6f5 Mon Sep 17 00:00:00 2001
From: Al <albarrentine@gmail.com>
Date: Fri, 25 Mar 2022 14:05:03 -0400
Subject: [PATCH] [near_dupes] using quadgrams in Latin scripts as well for
 near dupe hashes

---
 src/near_dupe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/near_dupe.c b/src/near_dupe.c
index 45f7c536..06a89ac4 100644
--- a/src/near_dupe.c
+++ b/src/near_dupe.c
@@ -387,6 +387,7 @@ cstring_array *name_word_hashes(char *name, libpostal_normalize_options_t normal
                 log_debug("token_str = %s\n", token_str);
 
                 add_double_metaphone_to_array_if_unique(token_str, strings, unique_strings, ngrams);
+                add_quadgrams_or_string_to_array_if_unique(token_str, strings, unique_strings, ngrams);
             // For non-Latin words (Arabic, Cyrllic, etc.) just add the word
             // For ideograms, we do two-character shingles, so only add the first character if the string has one token
             } else if (!ideogram || j > 0 || num_tokens == 1) {