diff --git a/data/language/regional/adm1.tsv b/data/language/regional/adm1.tsv index 32b5e033..d0b102c3 100644 --- a/data/language/regional/adm1.tsv +++ b/data/language/regional/adm1.tsv @@ -1,3 +1,42 @@ +be name Liège fr 1 +be gn_id 2792411 fr 1 +be osm relation:1407192 de 1 +be name Liège de 1 +be gn_id 2792411 de 1 +be osm relation:1407192 de 1 +be name Brabant wallon fr 1 +be gn_id 3333251 fr 1 +be osm relation:78748 fr 1 +be name Namur fr 1 +be gn_id 2790469 fr 1 +be osm relation:1311816 fr 1 +be name Limburg nl 1 +be gn_id 2792347 nl 1 +be osm relation:53142 nl 1 +be name West-Vlanderen nl 1 +be gn_id 2783770 nl 1 +be osm relation:416271 nl 1 +be qs_a1r Brussels Hoofstedelijk Gewest#R\xe9gion de Bruxelles-Capitale fr 1 +be gn_id 2800867 fr 1 +be osm relation:54094 fr 1 +be qs_a1r Brussels Hoofstedelijk Gewest#R\xe9gion de Bruxelles-Capitale nl 1 +be gn_id 2800867 nl 1 +be osm relation:54094 nl 1 +be name Antwerpen nl 1 +be gn_id 2803136 nl 1 +be osm relation:53114 nl 1 +be name Hainaut fr 1 +be gn_id 2796741 fr 1 +be osm relation:157559 fr 1 +be name Luxembourg fr 1 +be gn_id 2791993 fr 1 +be osm relation:1412581 fr 1 +be name Vlaams Brabant nl 1 +be gn_id 3333250 nl 1 +be osm relation:58004 nl 1 +be name Oost-Vlanderen nl 1 +be gn_id 2789733 nl 1 +be osm relation:53135 nl 1 ca name Quebec fr 1 ca gn_id 6115047 fr 1 ca osm relation:61549 fr 1 diff --git a/src/gazetteers.h b/src/gazetteers.h index c0e57876..7ad704f7 100644 --- a/src/gazetteers.h +++ b/src/gazetteers.h @@ -34,6 +34,7 @@ typedef enum dictionary_type { DICTIONARY_SYNONYM = 2, DICTIONARY_STOPWORD = 3, DICTIONARY_ELISION = 4, + DICTIONARY_AMBIGUOUS_EXPANSION = 5, DICTIONARY_STREET_NAME = 10, DICTIONARY_STREET_TYPE = 11, @@ -84,38 +85,36 @@ typedef struct named_gazetteer { } named_gazetteer_t; // Only need these for the in-memory dictionaries -named_gazetteer_t gazetteer_config[] = { - {"academic_degrees", {DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}}, - {"building_types", {DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}}, - {"company_types", {DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}}, - {"concatenated_prefixes_inseparable", {DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET}}, - {"concatenated_suffixes_inseparable", {DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET}}, - {"concatenated_suffixes_separable", {DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET}}, - {"directionals", {DICTIONARY_DIRECTIONAL, ADDRESS_ANY}}, - {"elisions", {DICTIONARY_ELISION, ADDRESS_ANY}}, - {"given_names", {DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME}}, - {"level_types", {DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}}, - {"no_number", {DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}}, - {"nulls", {DICTIONARY_NULL, ADDRESS_ANY}}, - {"organizations", {DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME}}, - {"people", {DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET}}, - {"personal_suffixes", {DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET}}, - {"personal_titles", {DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET}}, - {"place_names", {DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET}}, - {"post_office", {DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}}, - {"qualifiers", {DICTIONARY_QUALIFIER, ADDRESS_STREET}}, - {"stopwords", {DICTIONARY_STOPWORD, ADDRESS_ANY}}, - {"street_types", {DICTIONARY_STREET_TYPE, ADDRESS_STREET}}, - {"surnames", {DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME}}, - {"synonyms", {DICTIONARY_SYNONYM, ADDRESS_ANY}}, - {"toponyms", {DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD}}, - {"unit_types", {DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}} +gazetteer_t gazetteer_config[] = { + {DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}, + {DICTIONARY_AMBIGUOUS_EXPANSION, ADDRESS_ANY}, + {DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, + {DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}, + {DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET}, + {DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET}, + {DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET}, + {DICTIONARY_DIRECTIONAL, ADDRESS_ANY}, + {DICTIONARY_ELISION, ADDRESS_ANY}, + {DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME}, + {DICTIONARY_LEVEL, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, + {DICTIONARY_NO_ADDRESS, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}, + {DICTIONARY_NULL, ADDRESS_ANY}, + {DICTIONARY_NAMED_ORGANIZATION, ADDRESS_NAME}, + {DICTIONARY_NAMED_PERSON, ADDRESS_NAME | ADDRESS_STREET}, + {DICTIONARY_PERSONAL_SUFFIX, ADDRESS_NAME | ADDRESS_STREET}, + {DICTIONARY_PERSONAL_TITLE, ADDRESS_NAME | ADDRESS_STREET}, + {DICTIONARY_PLACE_NAME, ADDRESS_NAME | ADDRESS_STREET}, + {DICTIONARY_POST_OFFICE, ADDRESS_HOUSE_NUMBER | ADDRESS_STREET}, + {DICTIONARY_QUALIFIER, ADDRESS_STREET}, + {DICTIONARY_STOPWORD, ADDRESS_ANY}, + {DICTIONARY_STREET_TYPE, ADDRESS_STREET}, + {DICTIONARY_SURNAME, ADDRESS_STREET | ADDRESS_NAME}, + {DICTIONARY_SYNONYM, ADDRESS_ANY}, + {DICTIONARY_TOPONYM, ADDRESS_LOCALITY | ADDRESS_ADMIN1 | ADDRESS_ADMIN2 | ADDRESS_ADMIN3 | ADDRESS_ADMIN4 | ADDRESS_ADMIN_OTHER | ADDRESS_NEIGHBORHOOD}, + {DICTIONARY_UNIT, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET} }; - -VECTOR_INIT(gazetteer_array, gazetteer_t) - -#define NUM_DICTIONARY_TYPES sizeof(gazetteer_config) / sizeof(named_gazetteer_t) +#define NUM_DICTIONARY_TYPES sizeof(gazetteer_config) / sizeof(gazetteer_t) #ifdef __cplusplus }