|
|
|
@@ -34,6 +34,7 @@ typedef enum dictionary_type {
|
|
|
|
DICTIONARY_SYNONYM = 2,
|
|
|
|
DICTIONARY_SYNONYM = 2,
|
|
|
|
DICTIONARY_STOPWORD = 3,
|
|
|
|
DICTIONARY_STOPWORD = 3,
|
|
|
|
DICTIONARY_ELISION = 4,
|
|
|
|
DICTIONARY_ELISION = 4,
|
|
|
|
|
|
|
|
DICTIONARY_AMBIGUOUS_EXPANSION = 5,
|
|
|
|
|
|
|
|
|
|
|
|
DICTIONARY_STREET_NAME = 10,
|
|
|
|
DICTIONARY_STREET_NAME = 10,
|
|
|
|
DICTIONARY_STREET_TYPE = 11,
|
|
|
|
DICTIONARY_STREET_TYPE = 11,
|
|
|
|
@@ -84,38 +85,36 @@ typedef struct named_gazetteer {
|
|
|
|
} named_gazetteer_t;
|
|
|
|
} named_gazetteer_t;
|
|
|
|
|
|
|
|
|
|
|
|
// Only need these for the in-memory dictionaries
|
|
|
|
// Only need these for the in-memory dictionaries
|
|
|
|
named_gazetteer_t gazetteer_config[] = {
|
|
|
|
gazetteer_t gazetteer_config[] = {
|
|
|
|
{"academic_degrees", {DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}},
|
|
|
|
{DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME},
|
|
|
|
{"building_types", {DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}},
|
|
|
|
{DICTIONARY_AMBIGUOUS_EXPANSION, ADDRESS_ANY},
|
|
|
|
{"company_types", {DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}},
|
|
|
|
{DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
|
|
|
|
{"concatenated_prefixes_inseparable", {DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_COMPANY_TYPE, ADDRESS_NAME},
|
|
|
|
{"concatenated_suffixes_inseparable", {DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET},
|
|
|
|
{"concatenated_suffixes_separable", {DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET},
|
|
|
|
{"directionals", {DICTIONARY_DIRECTIONAL, ADDRESS_ANY}},
|
|
|
|
{DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET},
|
|
|
|
{"elisions", {DICTIONARY_ELISION, ADDRESS_ANY}},
|
|
|
|
{DICTIONARY_DIRECTIONAL, ADDRESS_ANY},
|
|
|
|
{"given_names", {DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME}},
|
|
|
|
{DICTIONARY_ELISION, ADDRESS_ANY},
|
|
|
|
{"level_types", {DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}},
|
|
|
|
{DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME},
|
|
|
|
{"no_number", {DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
|
|
|
|
{"nulls", {DICTIONARY_NULL, ADDRESS_ANY}},
|
|
|
|
{DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
|
|
|
|
{"organizations", {DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME}},
|
|
|
|
{DICTIONARY_NULL, ADDRESS_ANY},
|
|
|
|
{"people", {DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME},
|
|
|
|
{"personal_suffixes", {DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET},
|
|
|
|
{"personal_titles", {DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET},
|
|
|
|
{"place_names", {DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET},
|
|
|
|
{"post_office", {DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET},
|
|
|
|
{"qualifiers", {DICTIONARY_QUALIFIER, ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
|
|
|
|
{"stopwords", {DICTIONARY_STOPWORD, ADDRESS_ANY}},
|
|
|
|
{DICTIONARY_QUALIFIER, ADDRESS_STREET},
|
|
|
|
{"street_types", {DICTIONARY_STREET_TYPE, ADDRESS_STREET}},
|
|
|
|
{DICTIONARY_STOPWORD, ADDRESS_ANY},
|
|
|
|
{"surnames", {DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME}},
|
|
|
|
{DICTIONARY_STREET_TYPE, ADDRESS_STREET},
|
|
|
|
{"synonyms", {DICTIONARY_SYNONYM, ADDRESS_ANY}},
|
|
|
|
{DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME},
|
|
|
|
{"toponyms", {DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD}},
|
|
|
|
{DICTIONARY_SYNONYM, ADDRESS_ANY},
|
|
|
|
{"unit_types", {DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}}
|
|
|
|
{DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD},
|
|
|
|
|
|
|
|
{DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define NUM_DICTIONARY_TYPES sizeof(gazetteer_config) / sizeof(gazetteer_t)
|
|
|
|
VECTOR_INIT(gazetteer_array, gazetteer_t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define NUM_DICTIONARY_TYPES sizeof(gazetteer_config) / sizeof(named_gazetteer_t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|