[names] Name affixes respect hyphens and lack of whitespace (for ideographic languages)
This commit is contained in:
@@ -28,18 +28,23 @@ class NameAffixes(object):
|
||||
|
||||
conf = yaml.load(open(os.path.join(config_dir, filename)))
|
||||
|
||||
prefixes = conf.get('prefixes', [])
|
||||
name_prefixes = [safe_decode(phrase).lower() for phrase in prefixes]
|
||||
self.language_prefixes[lang] = name_prefixes
|
||||
prefixes = [safe_decode(phrase).lower() for phrase in conf.get('prefixes', [])]
|
||||
prefixes_no_whitespace = [safe_decode(phrase).lower() for phrase in conf.get('prefixes_no_whitespace', [])]
|
||||
|
||||
suffixes = conf.get('suffixes', [])
|
||||
name_suffixes = [safe_decode(phrase).lower() for phrase in suffixes]
|
||||
self.language_suffixes[lang] = name_suffixes
|
||||
self.language_prefixes[lang] = prefixes + prefixes_no_whitespace
|
||||
|
||||
whitespace_phrase = six.u(' ') if conf.get('whitespace', True) else six.u('')
|
||||
suffixes = [safe_decode(phrase).lower() for phrase in conf.get('suffixes', [])]
|
||||
suffixes_no_whitespace = [safe_decode(phrase).lower() for phrase in conf.get('suffixes_no_whitespace', [])]
|
||||
|
||||
prefix_regex = six.u('^(?:{})').format(six.u('|').join(['{}{}'.format(s, whitespace_phrase) for s in name_prefixes]))
|
||||
suffix_regex = six.u('(?:{})$').format(six.u('|').join(['{}{}'.format(whitespace_phrase, s) for s in name_suffixes]))
|
||||
self.language_suffixes[lang] = suffixes + suffixes_no_whitespace
|
||||
|
||||
whitespace_phrase = six.u('[ \-]')
|
||||
|
||||
all_prefixes = [six.u('{}{}').format(s, whitespace_phrase) for s in prefixes] + prefixes_no_whitespace
|
||||
all_suffixes = [six.u('{}{}').format(whitespace_phrase, s) for s in suffixes] + suffixes_no_whitespace
|
||||
|
||||
prefix_regex = six.u('^(?:{})').format(six.u('|').join(all_prefixes))
|
||||
suffix_regex = six.u('(?:{})$').format(six.u('|').join(all_suffixes))
|
||||
|
||||
self.language_prefix_regexes[lang] = re.compile(prefix_regex, re.I | re.UNICODE)
|
||||
self.language_suffix_regexes[lang] = re.compile(suffix_regex, re.I | re.UNICODE)
|
||||
|
||||
Reference in New Issue
Block a user