[strings] reverting to utf8proc v1.3.1, as 2.0 and above can chop off certain sequences

This commit is contained in:
Al
2017-01-01 20:03:23 -05:00
parent fe88630f78
commit 5c56a44faa
3 changed files with 15340 additions and 13186 deletions

View File

@@ -1,6 +1,5 @@
/* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */ /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
/* /*
* Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@@ -128,7 +127,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8; if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
if (uc < 0xe0) { // 2-byte sequence if (uc < 0xe0) { // 2-byte sequence
// Must have valid continuation character // Must have valid continuation character
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8; if (!utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
*dst = ((uc & 0x1f)<<6) | (*str & 0x3f); *dst = ((uc & 0x1f)<<6) | (*str & 0x3f);
return 2; return 2;
} }
@@ -166,24 +165,24 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
if (uc < 0x00) { if (uc < 0x00) {
return 0; return 0;
} else if (uc < 0x80) { } else if (uc < 0x80) {
dst[0] = (utf8proc_uint8_t) uc; dst[0] = uc;
return 1; return 1;
} else if (uc < 0x800) { } else if (uc < 0x800) {
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6)); dst[0] = 0xC0 + (uc >> 6);
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[1] = 0x80 + (uc & 0x3F);
return 2; return 2;
// Note: we allow encoding 0xd800-0xdfff here, so as not to change // Note: we allow encoding 0xd800-0xdfff here, so as not to change
// the API, however, these are actually invalid in UTF-8 // the API, however, these are actually invalid in UTF-8
} else if (uc < 0x10000) { } else if (uc < 0x10000) {
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12)); dst[0] = 0xE0 + (uc >> 12);
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); dst[1] = 0x80 + ((uc >> 6) & 0x3F);
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[2] = 0x80 + (uc & 0x3F);
return 3; return 3;
} else if (uc < 0x110000) { } else if (uc < 0x110000) {
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18)); dst[0] = 0xF0 + (uc >> 18);
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F)); dst[1] = 0x80 + ((uc >> 12) & 0x3F);
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); dst[2] = 0x80 + ((uc >> 6) & 0x3F);
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[3] = 0x80 + (uc & 0x3F);
return 4; return 4;
} else return 0; } else return 0;
} }
@@ -193,28 +192,28 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
if (uc < 0x00) { if (uc < 0x00) {
return 0; return 0;
} else if (uc < 0x80) { } else if (uc < 0x80) {
dst[0] = (utf8proc_uint8_t)uc; dst[0] = uc;
return 1; return 1;
} else if (uc < 0x800) { } else if (uc < 0x800) {
dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6)); dst[0] = 0xC0 + (uc >> 6);
dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[1] = 0x80 + (uc & 0x3F);
return 2; return 2;
} else if (uc == 0xFFFF) { } else if (uc == 0xFFFF) {
dst[0] = (utf8proc_uint8_t)0xFF; dst[0] = 0xFF;
return 1; return 1;
} else if (uc == 0xFFFE) { } else if (uc == 0xFFFE) {
dst[0] = (utf8proc_uint8_t)0xFE; dst[0] = 0xFE;
return 1; return 1;
} else if (uc < 0x10000) { } else if (uc < 0x10000) {
dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12)); dst[0] = 0xE0 + (uc >> 12);
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); dst[1] = 0x80 + ((uc >> 6) & 0x3F);
dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[2] = 0x80 + (uc & 0x3F);
return 3; return 3;
} else if (uc < 0x110000) { } else if (uc < 0x110000) {
dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18)); dst[0] = 0xF0 + (uc >> 18);
dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F)); dst[1] = 0x80 + ((uc >> 12) & 0x3F);
dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); dst[2] = 0x80 + ((uc >> 6) & 0x3F);
dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); dst[3] = 0x80 + (uc & 0x3F);
return 4; return 4;
} else return 0; } else return 0;
} }
@@ -233,144 +232,48 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc); return uc < 0 || uc >= 0x110000 ? utf8proc_properties : unsafe_get_property(uc);
} }
/* return whether there is a grapheme break between boundclasses lbc and tbc /* return whether there is a grapheme break between boundclasses lbc and tbc */
(according to the definition of extended grapheme clusters) static utf8proc_bool grapheme_break(int lbc, int tbc) {
Rule numbering refers to TR29 Version 29 (Unicode 9.0.0):
http://www.unicode.org/reports/tr29/tr29-29.html
CAVEATS:
Please note that evaluation of GB10 (grapheme breaks between emoji zwj sequences)
and GB 12/13 (regional indicator code points) require knowledge of previous characters
and are thus not handled by this function. This may result in an incorrect break before
an E_Modifier class codepoint and an incorrectly missing break between two
REGIONAL_INDICATOR class code points if such support does not exist in the caller.
See the special support in grapheme_break_extended, for required bookkeeping by the caller.
*/
static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
return return
(lbc == UTF8PROC_BOUNDCLASS_START) ? true : // GB1 (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
(lbc == UTF8PROC_BOUNDCLASS_CR && // GB3 (lbc == UTF8PROC_BOUNDCLASS_CR &&
tbc == UTF8PROC_BOUNDCLASS_LF) ? false : // --- tbc == UTF8PROC_BOUNDCLASS_LF) ? false :
(lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB4 (lbc >= UTF8PROC_BOUNDCLASS_CR && lbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
(tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true : // GB5 (tbc >= UTF8PROC_BOUNDCLASS_CR && tbc <= UTF8PROC_BOUNDCLASS_CONTROL) ? true :
(lbc == UTF8PROC_BOUNDCLASS_L && // GB6 (tbc == UTF8PROC_BOUNDCLASS_EXTEND) ? false :
(tbc == UTF8PROC_BOUNDCLASS_L || // --- (lbc == UTF8PROC_BOUNDCLASS_L &&
tbc == UTF8PROC_BOUNDCLASS_V || // --- (tbc == UTF8PROC_BOUNDCLASS_L ||
tbc == UTF8PROC_BOUNDCLASS_LV || // --- tbc == UTF8PROC_BOUNDCLASS_V ||
tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false : // --- tbc == UTF8PROC_BOUNDCLASS_LV ||
((lbc == UTF8PROC_BOUNDCLASS_LV || // GB7 tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false :
lbc == UTF8PROC_BOUNDCLASS_V) && // --- ((lbc == UTF8PROC_BOUNDCLASS_LV ||
(tbc == UTF8PROC_BOUNDCLASS_V || // --- lbc == UTF8PROC_BOUNDCLASS_V) &&
tbc == UTF8PROC_BOUNDCLASS_T)) ? false : // --- (tbc == UTF8PROC_BOUNDCLASS_V ||
((lbc == UTF8PROC_BOUNDCLASS_LVT || // GB8 tbc == UTF8PROC_BOUNDCLASS_T)) ? false :
lbc == UTF8PROC_BOUNDCLASS_T) && // --- ((lbc == UTF8PROC_BOUNDCLASS_LVT ||
tbc == UTF8PROC_BOUNDCLASS_T) ? false : // --- lbc == UTF8PROC_BOUNDCLASS_T) &&
(tbc == UTF8PROC_BOUNDCLASS_EXTEND || // GB9 tbc == UTF8PROC_BOUNDCLASS_T) ? false :
tbc == UTF8PROC_BOUNDCLASS_ZWJ || // --- (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR &&
tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK || // GB9a tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false :
lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false : // GB9b (tbc != UTF8PROC_BOUNDCLASS_SPACINGMARK);
((lbc == UTF8PROC_BOUNDCLASS_E_BASE || // GB10 (requires additional handling below)
lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) && // ----
tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
(lbc == UTF8PROC_BOUNDCLASS_ZWJ && // GB11
(tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ || // ----
tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false : // ----
(lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR && // GB12/13 (requires additional handling below)
tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false : // ----
true; // GB999
} }
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state) /* return whether there is a grapheme break between codepoints c1 and c2 */
{ UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t c1, utf8proc_int32_t c2) {
int lbc_override = lbc; return grapheme_break(utf8proc_get_property(c1)->boundclass,
if (state && *state != UTF8PROC_BOUNDCLASS_START) utf8proc_get_property(c2)->boundclass);
lbc_override = *state;
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
if (state) {
// Special support for GB 12/13 made possible by GB999. After two RI
// class codepoints we want to force a break. Do this by resetting the
// second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
// after that character according to GB999 (unless of course such a break is
// forbidden by a different rule such as GB9).
if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
*state = UTF8PROC_BOUNDCLASS_OTHER;
// Special support for GB10. Fold any EXTEND codepoints into the previous
// boundclass if we're dealing with an emoji base boundclass.
else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE ||
*state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&
tbc == UTF8PROC_BOUNDCLASS_EXTEND)
*state = UTF8PROC_BOUNDCLASS_E_BASE;
else
*state = tbc;
}
return break_permitted;
}
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
utf8proc_int32_t c1, utf8proc_int32_t c2, utf8proc_int32_t *state) {
return grapheme_break_extended(utf8proc_get_property(c1)->boundclass,
utf8proc_get_property(c2)->boundclass,
state);
}
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(
utf8proc_int32_t c1, utf8proc_int32_t c2) {
return utf8proc_grapheme_break_stateful(c1, c2, NULL);
}
static utf8proc_int32_t seqindex_decode_entry(const utf8proc_uint16_t **entry)
{
utf8proc_int32_t entry_cp = **entry;
if ((entry_cp & 0xF800) == 0xD800) {
*entry = *entry + 1;
entry_cp = ((entry_cp & 0x03FF) << 10) | (**entry & 0x03FF);
entry_cp += 0x10000;
}
return entry_cp;
}
static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
{
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex];
return seqindex_decode_entry(&entry);
}
static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
utf8proc_ssize_t written = 0;
const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
int len = seqindex >> 13;
if (len >= 7) {
len = *entry;
entry++;
}
for (; len >= 0; entry++, len--) {
utf8proc_int32_t entry_cp = seqindex_decode_entry(&entry);
written += utf8proc_decompose_char(entry_cp, dst+written,
(bufsize > written) ? (bufsize - written) : 0, options,
last_boundclass);
if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
}
return written;
} }
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c) UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
{ {
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex; utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_mapping;
return cl != UINT16_MAX ? seqindex_decode_index(cl) : c; return cl >= 0 ? cl : c;
} }
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c) UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
{ {
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex; utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_mapping;
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c; return cu >= 0 ? cu : c;
}
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
{
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
} }
/* return a character width analogous to wcwidth (except portable and /* return a character width analogous to wcwidth (except portable and
@@ -453,20 +356,39 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
category == UTF8PROC_CATEGORY_ME) return 0; category == UTF8PROC_CATEGORY_ME) return 0;
} }
if (options & UTF8PROC_CASEFOLD) { if (options & UTF8PROC_CASEFOLD) {
if (property->casefold_seqindex != UINT16_MAX) { if (property->casefold_mapping) {
return seqindex_write_char_decomposed(property->casefold_seqindex, dst, bufsize, options, last_boundclass); const utf8proc_int32_t *casefold_entry;
utf8proc_ssize_t written = 0;
for (casefold_entry = property->casefold_mapping;
*casefold_entry >= 0; casefold_entry++) {
written += utf8proc_decompose_char(*casefold_entry, dst+written,
(bufsize > written) ? (bufsize - written) : 0, options,
last_boundclass);
if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
}
return written;
} }
} }
if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) { if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
if (property->decomp_seqindex != UINT16_MAX && if (property->decomp_mapping &&
(!property->decomp_type || (options & UTF8PROC_COMPAT))) { (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
return seqindex_write_char_decomposed(property->decomp_seqindex, dst, bufsize, options, last_boundclass); const utf8proc_int32_t *decomp_entry;
utf8proc_ssize_t written = 0;
for (decomp_entry = property->decomp_mapping;
*decomp_entry >= 0; decomp_entry++) {
written += utf8proc_decompose_char(*decomp_entry, dst+written,
(bufsize > written) ? (bufsize - written) : 0, options,
last_boundclass);
if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
}
return written;
} }
} }
if (options & UTF8PROC_CHARBOUND) { if (options & UTF8PROC_CHARBOUND) {
utf8proc_bool boundary; utf8proc_bool boundary;
int tbc = property->boundclass; int tbc = property->boundclass;
boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass); boundary = grapheme_break(*last_boundclass, tbc);
*last_boundclass = tbc;
if (boundary) { if (boundary) {
if (bufsize >= 1) dst[0] = 0xFFFF; if (bufsize >= 1) dst[0] = 0xFFFF;
if (bufsize >= 2) dst[1] = uc; if (bufsize >= 2) dst[1] = uc;
@@ -480,14 +402,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
) {
return utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL);
}
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
) { ) {
/* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */ /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
utf8proc_ssize_t wpos = 0; utf8proc_ssize_t wpos = 0;
@@ -514,9 +428,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc); rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8; if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
} }
if (custom_func != NULL) {
uc = custom_func(uc, custom_data); /* user-specified custom mapping */
}
decomp_result = utf8proc_decompose_char( decomp_result = utf8proc_decompose_char(
uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options, uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
&boundclass &boundclass
@@ -524,8 +435,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
if (decomp_result < 0) return decomp_result; if (decomp_result < 0) return decomp_result;
wpos += decomp_result; wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */ /* prohibiting integer overflows due to too long strings: */
if (wpos < 0 || if (wpos < 0 || wpos > SSIZE_MAX/sizeof(utf8proc_int32_t)/2)
wpos > (utf8proc_ssize_t)(SSIZE_MAX/sizeof(utf8proc_int32_t)/2))
return UTF8PROC_ERROR_OVERFLOW; return UTF8PROC_ERROR_OVERFLOW;
} }
} }
@@ -551,8 +461,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
return wpos; return wpos;
} }
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */ /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
utf8proc_ssize_t rpos; utf8proc_ssize_t rpos;
utf8proc_ssize_t wpos = 0; utf8proc_ssize_t wpos = 0;
@@ -625,19 +536,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
if (!starter_property) { if (!starter_property) {
starter_property = unsafe_get_property(*starter); starter_property = unsafe_get_property(*starter);
} }
if (starter_property->comb_index < 0x8000 && if (starter_property->comb1st_index >= 0 &&
current_property->comb_index != UINT16_MAX && current_property->comb2nd_index >= 0) {
current_property->comb_index >= 0x8000) { composition = utf8proc_combinations[
int sidx = starter_property->comb_index; starter_property->comb1st_index +
int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx]; current_property->comb2nd_index
if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) { ];
idx += sidx + 2; if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
if (current_property->comb_index & 0x4000) {
composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
} else
composition = utf8proc_combinations[idx];
if (composition > 0 && (!(options & UTF8PROC_STABLE) ||
!(unsafe_get_property(composition)->comp_exclusion))) { !(unsafe_get_property(composition)->comp_exclusion))) {
*starter = composition; *starter = composition;
starter_property = NULL; starter_property = NULL;
@@ -645,7 +550,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
} }
} }
} }
}
buffer[wpos] = current_char; buffer[wpos] = current_char;
if (current_property->combining_class) { if (current_property->combining_class) {
if (current_property->combining_class > max_combining_class) { if (current_property->combining_class > max_combining_class) {
@@ -660,14 +564,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
} }
length = wpos; length = wpos;
} }
return length;
}
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
length = utf8proc_normalize_utf32(buffer, length, options);
if (length < 0) return length;
{ {
utf8proc_ssize_t rpos, wpos = 0; utf8proc_ssize_t rpos, wpos = 0;
utf8proc_int32_t uc; utf8proc_int32_t uc;
@@ -689,22 +585,15 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
) {
return utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL);
}
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
) { ) {
utf8proc_int32_t *buffer; utf8proc_int32_t *buffer;
utf8proc_ssize_t result; utf8proc_ssize_t result;
*dstptr = NULL; *dstptr = NULL;
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data); result = utf8proc_decompose(str, strlen, NULL, 0, options);
if (result < 0) return result; if (result < 0) return result;
buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1); buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
if (!buffer) return UTF8PROC_ERROR_NOMEM; if (!buffer) return UTF8PROC_ERROR_NOMEM;
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data); result = utf8proc_decompose(str, strlen, buffer, result, options);
if (result < 0) { if (result < 0) {
free(buffer); free(buffer);
return result; return result;
@@ -750,3 +639,4 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
UTF8PROC_COMPOSE | UTF8PROC_COMPAT); UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
return retval; return retval;
} }

View File

@@ -1,5 +1,4 @@
/* /*
* Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
* Copyright (c) 2009 Public Software Group e. V., Berlin, Germany * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@@ -28,7 +27,7 @@
* utf8proc is a free/open-source (MIT/expat licensed) C library * utf8proc is a free/open-source (MIT/expat licensed) C library
* providing Unicode normalization, case-folding, and other operations * providing Unicode normalization, case-folding, and other operations
* for strings in the UTF-8 encoding, supporting Unicode version * for strings in the UTF-8 encoding, supporting Unicode version
* 8.0.0. See the utf8proc home page (http://julialang.org/utf8proc/) * 7.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
* for downloads and other information, or the source code on github * for downloads and other information, or the source code on github
* (https://github.com/JuliaLang/utf8proc). * (https://github.com/JuliaLang/utf8proc).
* *
@@ -63,23 +62,21 @@
* runtime version may append a string like "-dev" to the version number * runtime version may append a string like "-dev" to the version number
* for prerelease versions. * for prerelease versions.
* *
* @note The shared-library version number in the Makefile * @note The shared-library version number in the Makefile may be different,
* (and CMakeLists.txt, and MANIFEST) may be different,
* being based on ABI compatibility rather than API compatibility. * being based on ABI compatibility rather than API compatibility.
*/ */
/** @{ */ /** @{ */
/** The MAJOR version number (increased when backwards API compatibility is broken). */ /** The MAJOR version number (increased when backwards API compatibility is broken). */
#define UTF8PROC_VERSION_MAJOR 2 #define UTF8PROC_VERSION_MAJOR 1
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
#define UTF8PROC_VERSION_MINOR 1 #define UTF8PROC_VERSION_MINOR 3
/** The PATCH version (increased for fixes that do not change the API). */ /** The PATCH version (increased for fixes that do not change the API). */
#define UTF8PROC_VERSION_PATCH 0 #define UTF8PROC_VERSION_PATCH 0
/** @} */ /** @} */
#include <stdlib.h> #include <stdlib.h>
#include <sys/types.h> #include <sys/types.h>
#if defined(_MSC_VER) && _MSC_VER < 1800 #ifdef _MSC_VER
// MSVC prior to 2013 lacked stdbool.h and inttypes.h
typedef signed char utf8proc_int8_t; typedef signed char utf8proc_int8_t;
typedef unsigned char utf8proc_uint8_t; typedef unsigned char utf8proc_uint8_t;
typedef short utf8proc_int16_t; typedef short utf8proc_int16_t;
@@ -94,18 +91,12 @@ typedef int utf8proc_ssize_t;
typedef unsigned int utf8proc_size_t; typedef unsigned int utf8proc_size_t;
# endif # endif
# ifndef __cplusplus # ifndef __cplusplus
// emulate C99 bool
typedef unsigned char utf8proc_bool; typedef unsigned char utf8proc_bool;
# ifndef __bool_true_false_are_defined enum {false, true};
# define false 0
# define true 1
# define __bool_true_false_are_defined 1
# endif
# else # else
typedef bool utf8proc_bool; typedef bool utf8proc_bool;
# endif # endif
#else #else
# include <stddef.h>
# include <stdbool.h> # include <stdbool.h>
# include <inttypes.h> # include <inttypes.h>
typedef int8_t utf8proc_int8_t; typedef int8_t utf8proc_int8_t;
@@ -115,7 +106,7 @@ typedef uint16_t utf8proc_uint16_t;
typedef int32_t utf8proc_int32_t; typedef int32_t utf8proc_int32_t;
typedef uint32_t utf8proc_uint32_t; typedef uint32_t utf8proc_uint32_t;
typedef size_t utf8proc_size_t; typedef size_t utf8proc_size_t;
typedef ptrdiff_t utf8proc_ssize_t; typedef ssize_t utf8proc_ssize_t;
typedef bool utf8proc_bool; typedef bool utf8proc_bool;
#endif #endif
#include <limits.h> #include <limits.h>
@@ -140,10 +131,6 @@ extern "C" {
#define SSIZE_MAX ((size_t)SIZE_MAX/2) #define SSIZE_MAX ((size_t)SIZE_MAX/2)
#endif #endif
#ifndef UINT16_MAX
# define UINT16_MAX 65535U
#endif
/** /**
* Option flags used by several functions in the library. * Option flags used by several functions in the library.
*/ */
@@ -250,12 +237,13 @@ typedef struct utf8proc_property_struct {
* @see utf8proc_decomp_type_t. * @see utf8proc_decomp_type_t.
*/ */
utf8proc_propval_t decomp_type; utf8proc_propval_t decomp_type;
utf8proc_uint16_t decomp_seqindex; const utf8proc_int32_t *decomp_mapping;
utf8proc_uint16_t casefold_seqindex; const utf8proc_int32_t *casefold_mapping;
utf8proc_uint16_t uppercase_seqindex; utf8proc_int32_t uppercase_mapping;
utf8proc_uint16_t lowercase_seqindex; utf8proc_int32_t lowercase_mapping;
utf8proc_uint16_t titlecase_seqindex; utf8proc_int32_t titlecase_mapping;
utf8proc_uint16_t comb_index; utf8proc_int32_t comb1st_index;
utf8proc_int32_t comb2nd_index;
unsigned bidi_mirrored:1; unsigned bidi_mirrored:1;
unsigned comp_exclusion:1; unsigned comp_exclusion:1;
/** /**
@@ -266,14 +254,13 @@ typedef struct utf8proc_property_struct {
*/ */
unsigned ignorable:1; unsigned ignorable:1;
unsigned control_boundary:1; unsigned control_boundary:1;
/** The width of the codepoint. */
unsigned charwidth:2;
unsigned pad:2;
/** /**
* Boundclass. * Boundclass.
* @see utf8proc_boundclass_t. * @see utf8proc_boundclass_t.
*/ */
unsigned boundclass:8; unsigned boundclass:4;
/** The width of the codepoint. */
unsigned charwidth:2;
} utf8proc_property_t; } utf8proc_property_t;
/** Unicode categories. */ /** Unicode categories. */
@@ -357,7 +344,7 @@ typedef enum {
UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */ UTF8PROC_DECOMP_TYPE_COMPAT = 16, /**< Compat */
} utf8proc_decomp_type_t; } utf8proc_decomp_type_t;
/** Boundclass property. (TR29) */ /** Boundclass property. */
typedef enum { typedef enum {
UTF8PROC_BOUNDCLASS_START = 0, /**< Start */ UTF8PROC_BOUNDCLASS_START = 0, /**< Start */
UTF8PROC_BOUNDCLASS_OTHER = 1, /**< Other */ UTF8PROC_BOUNDCLASS_OTHER = 1, /**< Other */
@@ -372,21 +359,8 @@ typedef enum {
UTF8PROC_BOUNDCLASS_LVT = 10, /**< Lvt */ UTF8PROC_BOUNDCLASS_LVT = 10, /**< Lvt */
UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR = 11, /**< Regional indicator */ UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR = 11, /**< Regional indicator */
UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */ UTF8PROC_BOUNDCLASS_SPACINGMARK = 12, /**< Spacingmark */
UTF8PROC_BOUNDCLASS_PREPEND = 13, /**< Prepend */
UTF8PROC_BOUNDCLASS_ZWJ = 14, /**< Zero Width Joiner */
UTF8PROC_BOUNDCLASS_E_BASE = 15, /**< Emoji Base */
UTF8PROC_BOUNDCLASS_E_MODIFIER = 16, /**< Emoji Modifier */
UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ = 17, /**< Glue_After_ZWJ */
UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */
} utf8proc_boundclass_t; } utf8proc_boundclass_t;
/**
* Function pointer type passed to @ref utf8proc_map_custom and
* @ref utf8proc_decompose_custom, which is used to specify a user-defined
* mapping of codepoints to be applied in conjunction with other mappings.
*/
typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data);
/** /**
* Array containing the byte lengths of a UTF-8 encoded codepoint based * Array containing the byte lengths of a UTF-8 encoded codepoint based
* on the first byte. * on the first byte.
@@ -494,7 +468,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
* `buffer` (which must contain at least `bufsize` entries). In case of * `buffer` (which must contain at least `bufsize` entries). In case of
* success, the number of codepoints written is returned; in case of an * success, the number of codepoints written is returned; in case of an
* error, a negative error code is returned (@ref utf8proc_errmsg). * error, a negative error code is returned (@ref utf8proc_errmsg).
* See @ref utf8proc_decompose_custom to supply additional transformations.
* *
* If the number of written codepoints would be bigger than `bufsize`, the * If the number of written codepoints would be bigger than `bufsize`, the
* required buffer size is returned, while the buffer will be overwritten with * required buffer size is returned, while the buffer will be overwritten with
@@ -505,47 +478,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose(
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options
); );
/**
* The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function
* that is called on each codepoint in `str` before any other transformations
* (along with a `custom_data` pointer that is passed through to `custom_func`).
* The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom.
*/
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen,
utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
);
/**
* Normalizes the sequence of `length` codepoints pointed to by `buffer`
* in-place (i.e., the result is also stored in `buffer`).
*
* @param buffer the (native-endian UTF-32) unicode codepoints to re-encode.
* @param length the length (in codepoints) of the buffer.
* @param options a bitwise or (`|`) of one or more of the following flags:
* - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS
* - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS
* - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF
* - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters
* - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite
* codepoints
* - @ref UTF8PROC_STABLE - prohibit combining characters that would violate
* the unicode versioning stability
*
* @return
* In case of success, the length (in codepoints) of the normalized UTF-32 string is
* returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg).
*
* @warning The entries of the array pointed to by `str` have to be in the
* range `0x0000` to `0x10FFFF`. Otherwise, the program might crash!
*/
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options);
/** /**
* Reencodes the sequence of `length` codepoints pointed to by `buffer` * Reencodes the sequence of `length` codepoints pointed to by `buffer`
* UTF-8 data in-place (i.e., the result is also stored in `buffer`). * UTF-8 data in-place (i.e., the result is also stored in `buffer`).
* Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion.
* *
* @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode.
* @param length the length (in codepoints) of the buffer. * @param length the length (in codepoints) of the buffer.
@@ -558,12 +493,10 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *b
* codepoints * codepoints
* - @ref UTF8PROC_STABLE - prohibit combining characters that would violate * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate
* the unicode versioning stability * the unicode versioning stability
* - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
* *
* @return * @return
* In case of success, the length (in bytes) of the resulting nul-terminated * In case of success, the length (in bytes) of the resulting UTF-8 string is
* UTF-8 string is returned; otherwise, a negative error code is returned * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg).
* (@ref utf8proc_errmsg).
* *
* @warning The amount of free space pointed to by `buffer` must * @warning The amount of free space pointed to by `buffer` must
* exceed the amount of the input data by one byte, and the * exceed the amount of the input data by one byte, and the
@@ -575,26 +508,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
/** /**
* Given a pair of consecutive codepoints, return whether a grapheme break is * Given a pair of consecutive codepoints, return whether a grapheme break is
* permitted between them (as defined by the extended grapheme clusters in UAX#29). * permitted between them (as defined by the extended grapheme clusters in UAX#29).
*
* @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
* state to break graphemes. This state can be passed in as a pointer
* in the `state` argument and should initially be set to 0. If the
* state is not passed in (i.e. a null pointer is passed), UAX#29 rules
* GB10/12/13 which require this state will not be applied, essentially
* matching the rules in Unicode 8.0.0.
*
* @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
* be called IN ORDER on ALL potential breaks in a string.
*/ */
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful( UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
/**
* Same as @ref utf8proc_grapheme_break_stateful, except without support for the
* Unicode 9 additions to the algorithm. Supported for legacy reasons.
*/
UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break(
utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2);
/** /**
@@ -611,13 +526,6 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c);
*/ */
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c); UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
/**
* Given a codepoint `c`, return the codepoint of the corresponding
* title-case character, if any; otherwise (if there is no title-case
* variant, or if `c` is not a valid codepoint) return `c`.
*/
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c);
/** /**
* Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
* except that a width of 0 is returned for non-printable codepoints * except that a width of 0 is returned for non-printable codepoints
@@ -650,8 +558,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
* in any case the result will be NULL terminated (though it might * in any case the result will be NULL terminated (though it might
* contain NULL characters with the string if `str` contained NULL * contain NULL characters with the string if `str` contained NULL
* characters). Other flags in the `options` field are passed to the * characters). Other flags in the `options` field are passed to the
* functions defined above, and regarded as described. See also * functions defined above, and regarded as described.
* @ref utfproc_map_custom to supply a custom codepoint transformation.
* *
* In case of success the length of the new string is returned, * In case of success the length of the new string is returned,
* otherwise a negative error code is returned. * otherwise a negative error code is returned.
@@ -663,17 +570,6 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options
); );
/**
* Like @ref utf8proc_map, but also takes a `custom_func` mapping function
* that is called on each codepoint in `str` before any other transformations
* (along with a `custom_data` pointer that is passed through to `custom_func`).
* The `custom_func` argument is ignored if it is `NULL`.
*/
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options,
utf8proc_custom_func custom_func, void *custom_data
);
/** @name Unicode normalization /** @name Unicode normalization
* *
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
@@ -686,9 +582,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str);
/** NFC normalization (@ref UTF8PROC_COMPOSE). */ /** NFC normalization (@ref UTF8PROC_COMPOSE). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ /** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ /** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
/** @} */ /** @} */
@@ -697,3 +593,4 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
#endif #endif
#endif #endif

File diff suppressed because it is too large Load Diff