diff --git a/src/gazetteers.h b/src/gazetteers.h new file mode 100644 index 00000000..d923d1f3 --- /dev/null +++ b/src/gazetteers.h @@ -0,0 +1,108 @@ +#ifndef GAZETTEERS_H +#define GAZETTEERS_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ADDRESS_ANY UINT64_C(1) << 0 +#define ADDRESS_NAME UINT64_C(1) << 1 +#define ADDRESS_HOUSE_NUMBER UINT64_C(1) << 2 +#define ADDRESS_STREET UINT64_C(1) << 3 +#define ADDRESS_UNIT UINT64_C(1) << 4 + +#define ADDRESS_LOCALITY UINT64_C(1) << 32 +#define ADDRESS_ADMIN1 UINT64_C(1) << 33 +#define ADDRESS_ADMIN2 UINT64_C(1) << 34 +#define ADDRESS_ADMIN3 UINT64_C(1) << 35 +#define ADDRESS_ADMIN4 UINT64_C(1) << 36 +#define ADDRESS_ADMIN_OTHER UINT64_C(1) << 37 +#define ADDRESS_COUNTRY UINT64_C(1) << 38 +#define ADDRESS_POSTAL_CODE UINT64_C(1) << 39 +#define ADDRESS_NEIGHBORHOOD UINT64_C(1) << 40 + +typedef enum dictionary_type { + DICTIONARY_ANY = 1, + DICTIONARY_SYNONYM = 2, + DICTIONARY_STOPWORD = 3, + DICTIONARY_ELISION = 4, + + DICTIONARY_STREET_NAME = 10, + DICTIONARY_STREET_TYPE = 11, + DICTIONARY_CONCATENATED_SEPARABLE = 12, + DICTIONARY_CONCATENATED_INSEPARABLE = 13, + DICTIONARY_DIRECTIONAL = 14, + DICTIONARY_QUALIFIER = 15, + DICTIONARY_BUILDING_TYPE = 16, + DICTIONARY_LEVEL = 17, + DICTIONARY_UNIT = 18, + DICTIONARY_POST_OFFICE = 19, + DICTIONARY_NO_ADDRESS = 20, + DICTIONARY_NULL = 21, + + DICTIONARY_PLACE_NAME = 50, + DICTIONARY_COMPANY_TYPE = 51, + DICTIONARY_GIVEN_NAME = 52, + DICTIONARY_SURNAME = 53, + DICTIONARY_PERSONAL_TITLE = 54, + DICTIONARY_PERSONAL_SUFFIX = 55, + DICTIONARY_ACADEMIC_DEGREE = 56, + + DICTIONARY_LOCALITY = 100, + DICTIONARY_ADMIN1 = 101, + DICTIONARY_ADMIN2 = 102, + DICTIONARY_ADMIN3 = 103, + DICTIONARY_ADMIN4 = 104, + DICTIONARY_ADMIN_OTHER = 105, + DICTIONARY_NEIGHBORHOOD = 106, + DICTIONARY_POSTAL_CODE = 107, + DICTIONARY_COUNTRY = 108, + DICTIONARY_TOPONYM = 109 + +} dictionary_type_t; + +typedef struct gazetteer { + char name[64]; + dictionary_type_t type; + uint64_t address_components; +} gazetteer_t; + +// Only need these for the in-memory dictionaries +gazetteer_t gazetteers[] = { + {"academic_degrees", DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}, + {"building_types", DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, + {"company_types", DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}, + {"concatenated_suffixes_inseparable", DICTIONARY_CONCATENATED_INSEPARABLE, ADDRESS_STREET}, + {"concatenated_suffixes_separable", DICTIONARY_CONCATENATED_SEPARABLE, ADDRESS_STREET}, + {"directionals", DICTIONARY_DIRECTIONAL, ADDRESS_ANY}, + {"elisions", DICTIONARY_ELISION, ADDRESS_ANY}, + {"given_names", DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME}, + {"level_types", DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, + {"no_number", DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}, + {"nulls", DICTIONARY_NULL, ADDRESS_ANY}, + {"personal_suffixes", DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET}, + {"personal_titles", DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET}, + {"place_names", DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET}, + {"post_office", DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}, + {"qualifiers", DICTIONARY_QUALIFIER, ADDRESS_STREET}, + {"stopwords", DICTIONARY_STOPWORD, ADDRESS_ANY}, + {"street_types", DICTIONARY_STREET_TYPE, ADDRESS_STREET}, + {"surnames", DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME}, + {"synonyms", DICTIONARY_SYNONYM, ADDRESS_ANY}, + {"toponyms", DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD}, + {"unit_types", DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET} +}; + +#define NUM_DICTIONARY_TYPES sizeof(gazetteers) / sizeof(gazetteer_t) + +#ifdef __cplusplus +} +#endif + +#endif +