[gazetteers] Adding new gazetteer types/address components

This commit is contained in:
Al
2016-05-28 19:19:18 -04:00
parent acd97a0081
commit c0e8578b9c
3 changed files with 52 additions and 27 deletions

View File

@@ -3,29 +3,45 @@ gazetteer_t gazetteer_config[] = {
{DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME},
{DICTIONARY_AMBIGUOUS_EXPANSION, ADDRESS_NONE},
{DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
{DICTIONARY_CATEGORY, ADDRESS_CATEGORY},
{DICTIONARY_CHAIN, ADDRESS_NAME},
{DICTIONARY_COMPANY_TYPE, ADDRESS_NAME},
{DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET},
{DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET},
{DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET},
{DICTIONARY_CROSS_STREET, ADDRESS_STREET},
{DICTIONARY_DIRECTIONAL, ADDRESS_ANY},
{DICTIONARY_ELISION, ADDRESS_ANY},
{DICTIONARY_ENTRANCE, ADDRESS_ENTRANCE},
{DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME},
{DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
{DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
{DICTIONARY_HOUSE_NUMBER, ADDRESS_HOUSE_NUMBER},
{DICTIONARY_LEVEL_NUMBERED, ADDRESS_LEVEL},
{DICTIONARY_LEVEL_STANDALONE, ADDRESS_LEVEL},
{DICTIONARY_LEVEL_MEZZANINE, ADDRESS_LEVEL},
{DICTIONARY_LEVEL_BASEMENT, ADDRESS_LEVEL},
{DICTIONARY_LEVEL_SUB_BASEMENT, ADDRESS_LEVEL},
{DICTIONARY_NULL, ADDRESS_ANY},
{DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME},
{DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET},
{DICTIONARY_NO_NUMBER, ADDRESS_HOUSE_NUMBER},
{DICTIONARY_NUMBER, ADDRESS_HOUSE_NUMBER | ADDRESS_UNIT | ADDRESS_LEVEL | ADDRESS_STAIRCASE | ADDRESS_ENTRANCE},
{DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET},
{DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET},
{DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET},
{DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
{DICTIONARY_POSTAL_CODE, ADDRESS_POSTAL_CODE},
{DICTIONARY_QUALIFIER, ADDRESS_STREET},
{DICTIONARY_STAIRCASE, ADDRESS_STAIRCASE},
{DICTIONARY_STOPWORD, ADDRESS_ANY},
{DICTIONARY_STREET_TYPE, ADDRESS_STREET},
{DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME},
{DICTIONARY_SYNONYM, ADDRESS_ANY},
{DICTIONARY_TOPONYM, ADDRESS_NAME | ADDRESS_STREET | ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD},
{DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}
{DICTIONARY_TOPONYM, ADDRESS_NAME | ADDRESS_STREET | ADDRESS_TOPONYM},
{DICTIONARY_UNIT_NUMBERED, ADDRESS_UNIT},
{DICTIONARY_UNIT_STANDALONE, ADDRESS_UNIT},
{DICTIONARY_UNIT_DIRECTION, ADDRESS_UNIT}
};
#define NUM_DICTIONARY_TYPES sizeof(gazetteer_config) / sizeof(gazetteer_t)

View File

@@ -27,11 +27,22 @@ typedef enum dictionary_type {
DICTIONARY_DIRECTIONAL = 15,
DICTIONARY_QUALIFIER = 16,
DICTIONARY_BUILDING_TYPE = 17,
DICTIONARY_LEVEL = 18,
DICTIONARY_UNIT = 19,
DICTIONARY_POST_OFFICE = 20,
DICTIONARY_NO_ADDRESS = 21,
DICTIONARY_NULL = 22,
DICTIONARY_LEVEL_NUMBERED = 18,
DICTIONARY_LEVEL_STANDALONE = 19,
DICTIONARY_LEVEL_MEZZANINE = 20,
DICTIONARY_LEVEL_BASEMENT = 21,
DICTIONARY_LEVEL_SUB_BASEMENT = 22,
DICTIONARY_UNIT_NUMBERED = 23,
DICTIONARY_UNIT_STANDALONE = 24,
DICTIONARY_UNIT_DIRECTION = 25,
DICTIONARY_ENTRANCE = 26,
DICTIONARY_STAIRCASE = 27,
DICTIONARY_NUMBER = 30,
DICTIONARY_NO_NUMBER = 31,
DICTIONARY_HOUSE_NUMBER = 32,
DICTIONARY_POST_OFFICE = 33,
DICTIONARY_POSTCODE = 34,
DICTIONARY_PLACE_NAME = 50,
DICTIONARY_COMPANY_TYPE = 51,
@@ -44,16 +55,15 @@ typedef enum dictionary_type {
DICTIONARY_NAMED_PERSON = 60,
DICTIONARY_NAMED_ORGANIZATION = 61,
DICTIONARY_LOCALITY = 100,
DICTIONARY_ADMIN1 = 101,
DICTIONARY_ADMIN2 = 102,
DICTIONARY_ADMIN3 = 103,
DICTIONARY_ADMIN4 = 104,
DICTIONARY_ADMIN_OTHER = 105,
DICTIONARY_NEIGHBORHOOD = 106,
DICTIONARY_POSTAL_CODE = 107,
DICTIONARY_COUNTRY = 108,
DICTIONARY_TOPONYM = 109
DICTIONARY_CATEGORY = 70,
DICTIONARY_CHAIN = 71,
DICTIONARY_CROSS_STREET = 80,
DICTIONARY_NULL = 90,
DICTIONARY_TOPONYM = 100,
DICTIONARY_POSTAL_CODE = 101,
} dictionary_type_t;

View File

@@ -22,14 +22,13 @@ Address dictionaries
#define ADDRESS_HOUSE_NUMBER (1 << 2)
#define ADDRESS_STREET (1 << 3)
#define ADDRESS_UNIT (1 << 4)
#define ADDRESS_LEVEL (1 << 5)
#define ADDRESS_STAIRCASE (1 << 6)
#define ADDRESS_ENTRANCE (1 << 7)
#define ADDRESS_LOCALITY (1 << 7)
#define ADDRESS_ADMIN1 (1 << 8)
#define ADDRESS_ADMIN2 (1 << 9)
#define ADDRESS_ADMIN3 (1 << 10)
#define ADDRESS_ADMIN4 (1 << 11)
#define ADDRESS_ADMIN_OTHER (1 << 12)
#define ADDRESS_COUNTRY (1 << 13)
#define ADDRESS_CATEGORY (1 << 8)
#define ADDRESS_TOPONYM (1 << 13)
#define ADDRESS_POSTAL_CODE (1 << 14)
#define ADDRESS_NEIGHBORHOOD (1 << 15)
#define ADDRESS_ALL ((1 << 16) - 1)
@@ -37,7 +36,7 @@ Address dictionaries
typedef struct normalize_options {
// List of language codes
char **languages;
int num_languages;
size_t num_languages;
uint16_t address_components;
// String options