[dictionaries] gazetteers.h has the config for in-memory dictionaries' directory structure
This commit is contained in:
108
src/gazetteers.h
Normal file
108
src/gazetteers.h
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
#ifndef GAZETTEERS_H
|
||||||
|
#define GAZETTEERS_H
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define ADDRESS_ANY UINT64_C(1) << 0
|
||||||
|
#define ADDRESS_NAME UINT64_C(1) << 1
|
||||||
|
#define ADDRESS_HOUSE_NUMBER UINT64_C(1) << 2
|
||||||
|
#define ADDRESS_STREET UINT64_C(1) << 3
|
||||||
|
#define ADDRESS_UNIT UINT64_C(1) << 4
|
||||||
|
|
||||||
|
#define ADDRESS_LOCALITY UINT64_C(1) << 32
|
||||||
|
#define ADDRESS_ADMIN1 UINT64_C(1) << 33
|
||||||
|
#define ADDRESS_ADMIN2 UINT64_C(1) << 34
|
||||||
|
#define ADDRESS_ADMIN3 UINT64_C(1) << 35
|
||||||
|
#define ADDRESS_ADMIN4 UINT64_C(1) << 36
|
||||||
|
#define ADDRESS_ADMIN_OTHER UINT64_C(1) << 37
|
||||||
|
#define ADDRESS_COUNTRY UINT64_C(1) << 38
|
||||||
|
#define ADDRESS_POSTAL_CODE UINT64_C(1) << 39
|
||||||
|
#define ADDRESS_NEIGHBORHOOD UINT64_C(1) << 40
|
||||||
|
|
||||||
|
typedef enum dictionary_type {
|
||||||
|
DICTIONARY_ANY = 1,
|
||||||
|
DICTIONARY_SYNONYM = 2,
|
||||||
|
DICTIONARY_STOPWORD = 3,
|
||||||
|
DICTIONARY_ELISION = 4,
|
||||||
|
|
||||||
|
DICTIONARY_STREET_NAME = 10,
|
||||||
|
DICTIONARY_STREET_TYPE = 11,
|
||||||
|
DICTIONARY_CONCATENATED_SEPARABLE = 12,
|
||||||
|
DICTIONARY_CONCATENATED_INSEPARABLE = 13,
|
||||||
|
DICTIONARY_DIRECTIONAL = 14,
|
||||||
|
DICTIONARY_QUALIFIER = 15,
|
||||||
|
DICTIONARY_BUILDING_TYPE = 16,
|
||||||
|
DICTIONARY_LEVEL = 17,
|
||||||
|
DICTIONARY_UNIT = 18,
|
||||||
|
DICTIONARY_POST_OFFICE = 19,
|
||||||
|
DICTIONARY_NO_ADDRESS = 20,
|
||||||
|
DICTIONARY_NULL = 21,
|
||||||
|
|
||||||
|
DICTIONARY_PLACE_NAME = 50,
|
||||||
|
DICTIONARY_COMPANY_TYPE = 51,
|
||||||
|
DICTIONARY_GIVEN_NAME = 52,
|
||||||
|
DICTIONARY_SURNAME = 53,
|
||||||
|
DICTIONARY_PERSONAL_TITLE = 54,
|
||||||
|
DICTIONARY_PERSONAL_SUFFIX = 55,
|
||||||
|
DICTIONARY_ACADEMIC_DEGREE = 56,
|
||||||
|
|
||||||
|
DICTIONARY_LOCALITY = 100,
|
||||||
|
DICTIONARY_ADMIN1 = 101,
|
||||||
|
DICTIONARY_ADMIN2 = 102,
|
||||||
|
DICTIONARY_ADMIN3 = 103,
|
||||||
|
DICTIONARY_ADMIN4 = 104,
|
||||||
|
DICTIONARY_ADMIN_OTHER = 105,
|
||||||
|
DICTIONARY_NEIGHBORHOOD = 106,
|
||||||
|
DICTIONARY_POSTAL_CODE = 107,
|
||||||
|
DICTIONARY_COUNTRY = 108,
|
||||||
|
DICTIONARY_TOPONYM = 109
|
||||||
|
|
||||||
|
} dictionary_type_t;
|
||||||
|
|
||||||
|
typedef struct gazetteer {
|
||||||
|
char name[64];
|
||||||
|
dictionary_type_t type;
|
||||||
|
uint64_t address_components;
|
||||||
|
} gazetteer_t;
|
||||||
|
|
||||||
|
// Only need these for the in-memory dictionaries
|
||||||
|
gazetteer_t gazetteers[] = {
|
||||||
|
{"academic_degrees", DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME},
|
||||||
|
{"building_types", DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
|
||||||
|
{"company_types", DICTIONARY_COMPANY_TYPE, ADDRESS_NAME},
|
||||||
|
{"concatenated_suffixes_inseparable", DICTIONARY_CONCATENATED_INSEPARABLE, ADDRESS_STREET},
|
||||||
|
{"concatenated_suffixes_separable", DICTIONARY_CONCATENATED_SEPARABLE, ADDRESS_STREET},
|
||||||
|
{"directionals", DICTIONARY_DIRECTIONAL, ADDRESS_ANY},
|
||||||
|
{"elisions", DICTIONARY_ELISION, ADDRESS_ANY},
|
||||||
|
{"given_names", DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME},
|
||||||
|
{"level_types", DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT},
|
||||||
|
{"no_number", DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
|
||||||
|
{"nulls", DICTIONARY_NULL, ADDRESS_ANY},
|
||||||
|
{"personal_suffixes", DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET},
|
||||||
|
{"personal_titles", DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET},
|
||||||
|
{"place_names", DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET},
|
||||||
|
{"post_office", DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET},
|
||||||
|
{"qualifiers", DICTIONARY_QUALIFIER, ADDRESS_STREET},
|
||||||
|
{"stopwords", DICTIONARY_STOPWORD, ADDRESS_ANY},
|
||||||
|
{"street_types", DICTIONARY_STREET_TYPE, ADDRESS_STREET},
|
||||||
|
{"surnames", DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME},
|
||||||
|
{"synonyms", DICTIONARY_SYNONYM, ADDRESS_ANY},
|
||||||
|
{"toponyms", DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD},
|
||||||
|
{"unit_types", DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}
|
||||||
|
};
|
||||||
|
|
||||||
|
#define NUM_DICTIONARY_TYPES sizeof(gazetteers) / sizeof(gazetteer_t)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
Reference in New Issue
Block a user