From 58661c9f2789bee5a7655e7908dc5438794acb91 Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 2 Apr 2017 23:32:24 -0400 Subject: [PATCH] [languages] adding replace_hyphens and split_alpha_from_numeric in language classifier input normalization --- src/language_features.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/language_features.c b/src/language_features.c index d98432e9..6d54b166 100644 --- a/src/language_features.c +++ b/src/language_features.c @@ -11,8 +11,8 @@ #define QUADGRAMS 4 #define OCTAGRAMS 8 -#define LANGUAGE_CLASSIFIER_NORMALIZE_STRING_OPTIONS NORMALIZE_STRING_LOWERCASE | NORMALIZE_STRING_LATIN_ASCII -#define LANGUAGE_CLASSIFIER_NORMALIZE_TOKEN_OPTIONS NORMALIZE_TOKEN_REPLACE_HYPHENS | NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS +#define LANGUAGE_CLASSIFIER_NORMALIZE_STRING_OPTIONS NORMALIZE_STRING_LOWERCASE | NORMALIZE_STRING_LATIN_ASCII | NORMALIZE_STRING_REPLACE_HYPHENS +#define LANGUAGE_CLASSIFIER_NORMALIZE_TOKEN_OPTIONS NORMALIZE_TOKEN_DELETE_FINAL_PERIOD | NORMALIZE_TOKEN_DELETE_ACRONYM_PERIODS | NORMALIZE_TOKEN_SPLIT_ALPHA_FROM_NUMERIC inline char *language_classifier_normalize_string(char *str) { return normalize_string_latin(str, strlen(str), LANGUAGE_CLASSIFIER_NORMALIZE_STRING_OPTIONS);