From 6c39c663ff772877213f0973663ed45133f49b91 Mon Sep 17 00:00:00 2001 From: Al Date: Sat, 28 May 2016 19:25:12 -0400 Subject: [PATCH] [normalize] Adding NORMALIZE_STRING_COMPOSE for NFC unicode normalization --- src/normalize.c | 5 +++++ src/normalize.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/normalize.c b/src/normalize.c index 515da8ed..8e33a378 100644 --- a/src/normalize.c +++ b/src/normalize.c @@ -19,6 +19,11 @@ char *normalize_string_utf8(char *str, uint64_t options) { utf8proc_options |= UTF8PROC_OPTIONS_NFD; } + if (options & NORMALIZE_STRING_COMPOSE) { + have_utf8proc_options = true; + utf8proc_options |= UTF8PROC_OPTIONS_NFC; + } + if (options & NORMALIZE_STRING_STRIP_ACCENTS) { have_utf8proc_options = true; utf8proc_options |= UTF8PROC_OPTIONS_STRIP_ACCENTS; diff --git a/src/normalize.h b/src/normalize.h index 65d7fc29..782dce76 100644 --- a/src/normalize.h +++ b/src/normalize.h @@ -45,6 +45,7 @@ As well as normalizations for individual string tokens: #define NORMALIZE_STRING_LOWERCASE 1 << 4 #define NORMALIZE_STRING_TRIM 1 << 5 #define NORMALIZE_STRING_REPLACE_HYPHENS 1 << 6 +#define NORMALIZE_STRING_COMPOSE 1 << 7 #define NORMALIZE_TOKEN_REPLACE_HYPHENS 1 << 0 #define NORMALIZE_TOKEN_DELETE_HYPHENS 1 << 1