From 642d3697d44f534280eee7271b943f8f8eb66c2a Mon Sep 17 00:00:00 2001 From: Al Date: Sun, 8 Mar 2015 17:55:57 -0400 Subject: [PATCH] [dictionaries] additions to German dictionaries, including a separable prefix dictionary --- .../de/academic_degrees/academic_degrees.txt | 3 +- .../de/company_types/company_types.txt | 8 ++++-- .../concatenated_prefixes_separable.txt | 5 ++++ .../concatenated_suffixes_inseparable | 9 +++++- .../concatenated_suffixes_separable.txt | 7 ++--- .../de/directionals/directionals.txt | 17 +++++++++++ .../de/level_types/level_types.txt | 1 + .../de/personal_titles/personal_titles.txt | 11 ++++++++ .../de/place_names/place_names.txt | 28 ++++++++++++++++++- data/dictionaries/de/stopwords/stopwords.txt | 5 ++-- .../de/street_types/street_types.txt | 5 ++-- data/dictionaries/de/synonyms/synonyms.txt | 13 ++++++++- data/dictionaries/de/toponyms/toponyms.txt | 3 +- .../dictionaries/de/unit_types/unit_types.txt | 7 ++++- src/gazetteers.h | 28 ++++++++++--------- 15 files changed, 119 insertions(+), 31 deletions(-) create mode 100644 data/dictionaries/de/concatenated_prefixes_separable/concatenated_prefixes_separable.txt create mode 100644 data/dictionaries/de/directionals/directionals.txt create mode 100644 data/dictionaries/de/level_types/level_types.txt diff --git a/data/dictionaries/de/academic_degrees/academic_degrees.txt b/data/dictionaries/de/academic_degrees/academic_degrees.txt index ca8a6770..eb722d57 100644 --- a/data/dictionaries/de/academic_degrees/academic_degrees.txt +++ b/data/dictionaries/de/academic_degrees/academic_degrees.txt @@ -1,4 +1,5 @@ diplom ingenieur|dipl ing diplom kaufmann|dipl kfm doktor de medizin|dr med -doktor der philosophie|dr phil \ No newline at end of file +doktor der philosophie|dr phil +magister|mag \ No newline at end of file diff --git a/data/dictionaries/de/company_types/company_types.txt b/data/dictionaries/de/company_types/company_types.txt index 3a8099e3..eef78453 100644 --- a/data/dictionaries/de/company_types/company_types.txt +++ b/data/dictionaries/de/company_types/company_types.txt @@ -1,4 +1,8 @@ -aktiengesellschaft|ag +ag|aktiengesellschaft +eingetragener verein|e v firma|fa fussball club|fc|f c -gmbh \ No newline at end of file +gmbh|gesellschaft mit beschrankter haftung +gesellschaft|ges +kg|kommanditgesellschaft +ohg|offene handelsgesellschaft \ No newline at end of file diff --git a/data/dictionaries/de/concatenated_prefixes_separable/concatenated_prefixes_separable.txt b/data/dictionaries/de/concatenated_prefixes_separable/concatenated_prefixes_separable.txt new file mode 100644 index 00000000..d41caa53 --- /dev/null +++ b/data/dictionaries/de/concatenated_prefixes_separable/concatenated_prefixes_separable.txt @@ -0,0 +1,5 @@ +hinter +klein +ober +unter +vorder \ No newline at end of file diff --git a/data/dictionaries/de/concatenated_suffixes_inseparable/concatenated_suffixes_inseparable b/data/dictionaries/de/concatenated_suffixes_inseparable/concatenated_suffixes_inseparable index fb4a2a70..8645cb2b 100644 --- a/data/dictionaries/de/concatenated_suffixes_inseparable/concatenated_suffixes_inseparable +++ b/data/dictionaries/de/concatenated_suffixes_inseparable/concatenated_suffixes_inseparable @@ -1,3 +1,10 @@ berg|bg burg|bg -dorf|df \ No newline at end of file +dorf|df +kogel|kg +kreuz|kz +platz|pl +siedlung|sdlg +stiege|stg +strasse|str +wiese|ws \ No newline at end of file diff --git a/data/dictionaries/de/concatenated_suffixes_separable/concatenated_suffixes_separable.txt b/data/dictionaries/de/concatenated_suffixes_separable/concatenated_suffixes_separable.txt index 5c90071e..789900da 100644 --- a/data/dictionaries/de/concatenated_suffixes_separable/concatenated_suffixes_separable.txt +++ b/data/dictionaries/de/concatenated_suffixes_separable/concatenated_suffixes_separable.txt @@ -4,9 +4,6 @@ burgermeister|bgm damm gasse hof -kamp -pfad -platz|pl -quelle|qu -strasse|str +quelle|qu|q +rundwanderweg|rww weg|w \ No newline at end of file diff --git a/data/dictionaries/de/directionals/directionals.txt b/data/dictionaries/de/directionals/directionals.txt new file mode 100644 index 00000000..e0d25a97 --- /dev/null +++ b/data/dictionaries/de/directionals/directionals.txt @@ -0,0 +1,17 @@ +nord|n +norden +nordliche|nordl +nordost|no +nordwest|nw +ost|o +osten +ostliche|ostl +suden|s +sudliche|sudl +sudost|so +sudosten +sudwest|sw +sudwesten +west|w +westen +westliche|westl \ No newline at end of file diff --git a/data/dictionaries/de/level_types/level_types.txt b/data/dictionaries/de/level_types/level_types.txt new file mode 100644 index 00000000..1e552a0e --- /dev/null +++ b/data/dictionaries/de/level_types/level_types.txt @@ -0,0 +1 @@ +untergeschoss|ug|u g \ No newline at end of file diff --git a/data/dictionaries/de/personal_titles/personal_titles.txt b/data/dictionaries/de/personal_titles/personal_titles.txt index 58fce466..8899d9d0 100644 --- a/data/dictionaries/de/personal_titles/personal_titles.txt +++ b/data/dictionaries/de/personal_titles/personal_titles.txt @@ -1,7 +1,18 @@ doktor|dr burgermeister|bgm direktor|dir +frau|fr +fraulein|fri +heilige|hl +heiliger +heiliges herr|hr herrn|hrn ingnieur|ing +kapitanleutnant|ka leut +mitglied des bundestages|mdb +mitglied des landtages|mdl +oberburgermeister|ob|obgm +pater +professor|prof sankt|st \ No newline at end of file diff --git a/data/dictionaries/de/place_names/place_names.txt b/data/dictionaries/de/place_names/place_names.txt index 42c27025..9b8c332c 100644 --- a/data/dictionaries/de/place_names/place_names.txt +++ b/data/dictionaries/de/place_names/place_names.txt @@ -19,10 +19,36 @@ hafen haus handelsakademie|hak handelsschule|hasch +hauptbahnhof|hbf +hohle +hohere technische lehranstalt|htl|h t l hotel +hutte|htt +jagdhaus +jagdhutte|jhtt jungenherberge|jh|j h kaffee +kapelle|kap|kpl +klaranlage|ka +konzentrationslager|kz|kl +magistratsabteilung|ma markt|mkt marktplatz|markt platz|markt pl|mkt pl|marktpl +nationalpark|np|national park +naturschutzgebiet|nsg +neue mittelschule|nms +padagogische hochschule|ph sportplatz|sport platz|sport pl|sportpl -restaurant \ No newline at end of file +resevoir|res +restaurant +ruine +schloss|schl +steinbruch|stb +technische universitat|tu|t u +truppenubungsplatz|tupl +universitat|uni +universitatsbibliothek|ub|u b +volksschule|vs +wald +wasserfall|wsf|wssf +wirtshaus|wh \ No newline at end of file diff --git a/data/dictionaries/de/stopwords/stopwords.txt b/data/dictionaries/de/stopwords/stopwords.txt index c8636cef..5326dbd9 100644 --- a/data/dictionaries/de/stopwords/stopwords.txt +++ b/data/dictionaries/de/stopwords/stopwords.txt @@ -18,18 +18,19 @@ fur gegenuber im|i in|i -in der|id|i d +in der|i d mit nach nachst neben ob|o +oder|od uber und|& unter vor|v von|v -von der|v d|vd +von der|v d zu zu der zur diff --git a/data/dictionaries/de/street_types/street_types.txt b/data/dictionaries/de/street_types/street_types.txt index 756efb54..a5ad235d 100644 --- a/data/dictionaries/de/street_types/street_types.txt +++ b/data/dictionaries/de/street_types/street_types.txt @@ -14,7 +14,6 @@ graben|gr grosser grosse|gr|g grosses -hauptbahnhof|hbf heiligen hof|h kamp|k @@ -24,10 +23,10 @@ kleines obere|ob oberer|ob oberes -pfad|p +pfad platz|pl quelle|qu -rhein|rh +rundwanderweg|rww strasse|str untere|u unterer diff --git a/data/dictionaries/de/synonyms/synonyms.txt b/data/dictionaries/de/synonyms/synonyms.txt index 78a5de00..6c204a1f 100644 --- a/data/dictionaries/de/synonyms/synonyms.txt +++ b/data/dictionaries/de/synonyms/synonyms.txt @@ -1,3 +1,14 @@ deutsch|dt ehemalige|ehem -haltestelle|hst \ No newline at end of file +gebruder|gebr +haltestelle|hst +hinter|hint|ht +internationale|int +kleine|kl +kogel|kg +niedere|nd +rhein|rh +see|s +spitze|sp +vordere|vd|vord +wiese|ws \ No newline at end of file diff --git a/data/dictionaries/de/toponyms/toponyms.txt b/data/dictionaries/de/toponyms/toponyms.txt index 6d9657ab..cbeffd80 100644 --- a/data/dictionaries/de/toponyms/toponyms.txt +++ b/data/dictionaries/de/toponyms/toponyms.txt @@ -1 +1,2 @@ -burgenlaendische|burgenlandische|bgld \ No newline at end of file +burgenlaendische|burgenlandische|bgld +wiener|wr \ No newline at end of file diff --git a/data/dictionaries/de/unit_types/unit_types.txt b/data/dictionaries/de/unit_types/unit_types.txt index f6c95ae7..93a81dc1 100644 --- a/data/dictionaries/de/unit_types/unit_types.txt +++ b/data/dictionaries/de/unit_types/unit_types.txt @@ -1,2 +1,7 @@ abteilung|abt -buro \ No newline at end of file +buro +nummer|nr|# +stiege|stg +wohnung|whg +zahl|z +zimmer|zi \ No newline at end of file diff --git a/src/gazetteers.h b/src/gazetteers.h index d923d1f3..c031bca8 100644 --- a/src/gazetteers.h +++ b/src/gazetteers.h @@ -34,17 +34,18 @@ typedef enum dictionary_type { DICTIONARY_STREET_NAME = 10, DICTIONARY_STREET_TYPE = 11, - DICTIONARY_CONCATENATED_SEPARABLE = 12, - DICTIONARY_CONCATENATED_INSEPARABLE = 13, - DICTIONARY_DIRECTIONAL = 14, - DICTIONARY_QUALIFIER = 15, - DICTIONARY_BUILDING_TYPE = 16, - DICTIONARY_LEVEL = 17, - DICTIONARY_UNIT = 18, - DICTIONARY_POST_OFFICE = 19, - DICTIONARY_NO_ADDRESS = 20, - DICTIONARY_NULL = 21, - + DICTIONARY_CONCATENATED_PREFIX_SEPARABLE = 12, + DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE = 13, + DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE = 14, + DICTIONARY_DIRECTIONAL = 15, + DICTIONARY_QUALIFIER = 16, + DICTIONARY_BUILDING_TYPE = 17, + DICTIONARY_LEVEL = 18, + DICTIONARY_UNIT = 19, + DICTIONARY_POST_OFFICE = 20, + DICTIONARY_NO_ADDRESS = 21, + DICTIONARY_NULL = 22, + DICTIONARY_PLACE_NAME = 50, DICTIONARY_COMPANY_TYPE = 51, DICTIONARY_GIVEN_NAME = 52, @@ -77,8 +78,9 @@ gazetteer_t gazetteers[] = { {"academic_degrees", DICTIONARY_ACADEMIC_DEGREE, ADDRESS_NAME}, {"building_types", DICTIONARY_BUILDING_TYPE, ADDRESS_NAME | ADDRESS_HOUSE_NUMBER | ADDRESS_STREET | ADDRESS_UNIT}, {"company_types", DICTIONARY_COMPANY_TYPE, ADDRESS_NAME}, - {"concatenated_suffixes_inseparable", DICTIONARY_CONCATENATED_INSEPARABLE, ADDRESS_STREET}, - {"concatenated_suffixes_separable", DICTIONARY_CONCATENATED_SEPARABLE, ADDRESS_STREET}, + {"concatenated_prefixes_inseparable", DICTIONARY_CONCATENATED_PREFIX_SEPARABLE, ADDRESS_STREET}, + {"concatenated_suffixes_inseparable", DICTIONARY_CONCATENATED_SUFFIX_INSEPARABLE, ADDRESS_STREET}, + {"concatenated_suffixes_separable", DICTIONARY_CONCATENATED_SUFFIX_SEPARABLE, ADDRESS_STREET}, {"directionals", DICTIONARY_DIRECTIONAL, ADDRESS_ANY}, {"elisions", DICTIONARY_ELISION, ADDRESS_ANY}, {"given_names", DICTIONARY_GIVEN_NAME, ADDRESS_STREET | ADDRESS_NAME},