[similarity] adding a multi-word alignmnet algorithm for streets and names like "de la cruz" vs. "dela cruz" or "Oceanwalk Ter" vs. "Ocean Walk Ter"

This commit is contained in:
Al
2018-02-23 01:22:12 -05:00
parent c5bb9d8daa
commit 2b4e7073c2
2 changed files with 92 additions and 0 deletions

View File

@@ -5,6 +5,7 @@
#include <stdlib.h>
#include "collections.h"
#include "trie_search.h"
#define DEFAULT_AFFINE_GAP_OPEN_COST 3
#define DEFAULT_AFFINE_GAP_EXTEND_COST 2
@@ -43,5 +44,6 @@ double jaro_winkler_distance_unicode_prefix_threshold(uint32_array *u1_array, ui
double jaro_winkler_distance(const char *s1, const char *s2);
double jaro_winkler_distance_unicode(uint32_array *u1_array, uint32_array *u2_array);
phrase_array *multi_word_token_alignments(const char *s1, token_array *tokens1, const char *s2, token_array *tokens2);
#endif