diff --git a/resources/dictionaries/all/ambiguous_expansions.txt b/resources/dictionaries/all/ambiguous_expansions.txt new file mode 100644 index 00000000..1f630bb0 --- /dev/null +++ b/resources/dictionaries/all/ambiguous_expansions.txt @@ -0,0 +1 @@ +ck diff --git a/resources/dictionaries/all/chains.txt b/resources/dictionaries/all/chains.txt index 8725646b..5974e524 100644 --- a/resources/dictionaries/all/chains.txt +++ b/resources/dictionaries/all/chains.txt @@ -1,4 +1,4 @@ -7-eleven|7 eleven|7-11|seven-eleven|seven eleven|seveneleven|seven-11|seven 11|7-elevens|7 elevens|7-11s|seven-elevens|seven elevens|sevenelevens|seven-11s|seven 11s +7-eleven|7 eleven|7-11|seven-eleven|seven eleven|seveneleven|seven-11|seven 11|7-elevens|7 elevens|7-11s|seven-elevens|seven elevens|sevenelevens|seven-11s|seven 11s|sevel a&w|a & w|a and w|a&ws|a & ws|a and ws|a&w restaurants|a & w restaurants|a and w restaurants ace hardware|ace hardwares adidas @@ -9,7 +9,7 @@ albertsons|albertson's aldi aldi nord aldi süd|aldi sued -alfamart +alfamart|alfamar allianz alpha bank anz @@ -108,7 +108,7 @@ chuck e. cheese's|chuck e cheese's|chuck e. cheeses|chuck e cheeses|chuck e. che church's chicken|churchs chicken cibc|canadian imperial bank of commerce cici's pizza|ci ci's pizza|cici's|cicis pizza|cicis -circle k|circle-k +circle k|circle-k|circlek|ck citgo citibank|citi bank|citibanks|citi banks citroën|citroen diff --git a/resources/dictionaries/id/academic_degrees.txt b/resources/dictionaries/id/academic_degrees.txt new file mode 100644 index 00000000..bf61a2c2 --- /dev/null +++ b/resources/dictionaries/id/academic_degrees.txt @@ -0,0 +1,78 @@ +magister administrasi bisnis|mab|m ab +magister administrasi publi|map|m ap +magister administrasi rumah sakit|mars|m a r s +magister agama|mag|m ag +magister akuntansi|mak|m ak +magister epidemiolog|mepid|m epid +magister farmasi|mfarm|m farmasi +magister hukum|m h|mh +magister humaniora|mhum|m hum +magister ilmu biomedik|mbiomedik|m biomedik +magister ilmu komputer|mkom|m kom +magister kehutanan|mhut|m hut +magister kenotariatan|mkn|m kn +magister keperawatan|mkep|m kep +magister kesehatan|mkes|m kes +magister manajemen|mm|m m +magister manajemen pendidikan|mmpd|m mpd +magister manajemen sistem informasi|mmsi|m msi +magister marine|mmar|m mar +magister pendidikan|mpd|m pd +magister psikologi|mpsi|m psi +magister sains bidang ilmu pertahanan|msihan|m si han +magister seni|msn|m sn +magister statistik|mstat|m stat +magister teknik|mt|m t +magister teknologi informasi|mti|m ti +magister veteriner|mvet|m vet +sarjana administrasi bisnis|sab|s ab +sarjana administrasi publik|sap|s ap +sarjana agama|s ag|sag|sa g|s a g +sarjana agroteknologi|sagr|s agr +sarjana antropologi|sant|s ant +sarjana arsitektur|sars|s ars +sarjana desain|sds|s ds +sarjana ekonomi islam|sei|s e i +sarjana ekonomi|se|s e +sarjana farmasi|sfarm|s farm +sarjana hubungan internasional|shint|s h int|sh int +sarjana hukum islam|shi|s h i|sh i|s hi +sarjana hukum|sh|s h +sarjana humaniora|shum|s hum +sarjana ilmu gizi|sgz|s gz +sarjana ilmu kelautan|skel|s kel +sarjana ilmu kepolisian|s ik|sik|si k +sarjana ilmu perpustakaan|sip|s ip +sarjana ilmu politik|sarjana ilmu pemerintahan|sip|s i p +sarjana ilmu psikologi|sikom|s i kom|si kom +sarjana intelijen|sin|s in +sarjana kedokteran gigi|skg|s kg +sarjana kedokteran hewan|skh|s kh +sarjana kedokteran|s ked|sked +sarjana kehutanan|shut|s hut +sarjana komputer|skom|s kom +sarjana komunikasi dan pengembangan masyarakat|skpm|s kpm +sarjana manajemen bisnis|smb|s mb +sarjana pariwisata|spar|s par +sarjana pendidikan islam|spdi|s pd i|s pdi +sarjana pendidikan sekolah dasar|spdsd|s pd sd +sarjana pendidikan|spd|s pd|sp d|s p d +sarjana pertahanan|shan|s han +sarjana pertanian|sp|s p +sarjana peternakan|spt|s pt +sarjana psikologi|s psi|spsi +sarjana sains|s si|ssi +sarjana sains|ssi|s si +sarjana sains terapan pemerintahan|sstp|s stp +sarjana sastra|ss|s s +sarjana seni|s sn|ssn +sarjana sistem informasi|ssi|s si +sarjana sosial|ssos|s sos +sarjana syari ah|s sy|ssy +sarjana teknik|st|s t +sarjana teknologi informasi|sti|s ti|s t i +sarjana teknologi pertanian|stp|s tp|st p|s t p +sarjana teologi islam|s thi|s th i|sthi +sarjana teologi kristen|sthk|s th k|s thk +sarjana teologi|s teol|steol +sarjana terapan kepolisian|strk|s trk diff --git a/resources/dictionaries/id/ambiguous_expansions.txt b/resources/dictionaries/id/ambiguous_expansions.txt new file mode 100644 index 00000000..02700b38 --- /dev/null +++ b/resources/dictionaries/id/ambiguous_expansions.txt @@ -0,0 +1,63 @@ +bg +bu +di +dn +dr +ds +fa +f a +gd +gg +gn +gp +jl +kb +ke +kp +kv +l +lt +mb +mh +m h +mm +m m +ms +mt +m t +n +no +nr +pd +p d +pj +p j +pl +po +p o +pt +p t +pu +p u +r +rm +r m +rr +r r +rs +r s +rt +rw +se +s e +sh +s h +sp +s p +ss +s s +st +s t +ud +u d +vl diff --git a/resources/dictionaries/id/building_types.txt b/resources/dictionaries/id/building_types.txt new file mode 100644 index 00000000..72c28d78 --- /dev/null +++ b/resources/dictionaries/id/building_types.txt @@ -0,0 +1,7 @@ +apartemen|apart|aprtmn +garasi|grs +gedung|gd|gdg +gudang|gdang +kebun|kebon|kbn +rumah|rmah|rmh +vila|vla|vl diff --git a/resources/dictionaries/id/chains.txt b/resources/dictionaries/id/chains.txt new file mode 100644 index 00000000..013777b4 --- /dev/null +++ b/resources/dictionaries/id/chains.txt @@ -0,0 +1,5 @@ +indomart|indomaret +superindo|super indo +lottemart|lotte mart +bonjour +bright diff --git a/resources/dictionaries/id/company_types.txt b/resources/dictionaries/id/company_types.txt new file mode 100644 index 00000000..61f017ff --- /dev/null +++ b/resources/dictionaries/id/company_types.txt @@ -0,0 +1,15 @@ +asosiasi +dan rekan|& rekan +firma|fa|f a +koperasi|kop|kprs +koperasi usaha dagang|kud|k ud +persekutuan +perseroan komanditer|comanditaire venotschap|cv|c v +perseroan terbatas|pt|p t +perusahaan daerah|pd +perusahaan dagang|pd|p d +perusahaan jawatan|pj|pjaw|p jaw|p j +perusahaan otobus|po|p o +perusahaan umum|perum|p u +usaha dagang|ud|u d +yayasan diff --git a/resources/dictionaries/id/concatenated_prefixes_separable.txt b/resources/dictionaries/id/concatenated_prefixes_separable.txt new file mode 100644 index 00000000..3f4d6c59 --- /dev/null +++ b/resources/dictionaries/id/concatenated_prefixes_separable.txt @@ -0,0 +1,2 @@ +jl. +jln. diff --git a/resources/dictionaries/id/cross_streets.txt b/resources/dictionaries/id/cross_streets.txt new file mode 100644 index 00000000..023bd4a1 --- /dev/null +++ b/resources/dictionaries/id/cross_streets.txt @@ -0,0 +1,6 @@ +& +dan +di +di pojok|d pjk +pojok|pjk +di antara|d antr diff --git a/resources/dictionaries/id/directionals.txt b/resources/dictionaries/id/directionals.txt index f704bd7f..6ccb8652 100644 --- a/resources/dictionaries/id/directionals.txt +++ b/resources/dictionaries/id/directionals.txt @@ -1,9 +1,9 @@ -barat -barat daya -barat laut -selatan -tengah -tenggara -timur -timur laut -utara \ No newline at end of file +barat|bar|brt|brat|kulon|kln|klon +barat daya|brt dy|bar day|brat dya +barat laut|brt lt|bar laut +selatan|sel|sltn|slatan|kidul|kdl|kdul +tengah|teng|tngh +tenggara|teng|tnggra +timur|tim|tmur|wetan|wtan +timur laut|tim laut|tmur laut +utara|ut|utra|utr|lor diff --git a/resources/dictionaries/id/entrances.txt b/resources/dictionaries/id/entrances.txt new file mode 100644 index 00000000..76402a38 --- /dev/null +++ b/resources/dictionaries/id/entrances.txt @@ -0,0 +1,3 @@ +masuk|msk +gerbang masuk|grbg msk +pintu masuk|pntu msk|pntu msuk diff --git a/resources/dictionaries/id/level_types_numbered.txt b/resources/dictionaries/id/level_types_numbered.txt new file mode 100644 index 00000000..943aad24 --- /dev/null +++ b/resources/dictionaries/id/level_types_numbered.txt @@ -0,0 +1 @@ +lantai|lt|ltai|lt. diff --git a/resources/dictionaries/id/level_types_standalone.txt b/resources/dictionaries/id/level_types_standalone.txt new file mode 100644 index 00000000..0b8e8120 --- /dev/null +++ b/resources/dictionaries/id/level_types_standalone.txt @@ -0,0 +1,3 @@ +lantai atas|lantai ats|lt ats|lt. ats|lt. atas +lantai dasar|lt dsr|lt dsar|lt. dsr|lt. dasar +lobi diff --git a/resources/dictionaries/id/near.txt b/resources/dictionaries/id/near.txt new file mode 100644 index 00000000..38c659f9 --- /dev/null +++ b/resources/dictionaries/id/near.txt @@ -0,0 +1,3 @@ +sekitar|sktr +dalam|dalem|dlm|dlam +dekat|dkt|dkat diff --git a/resources/dictionaries/id/number.txt b/resources/dictionaries/id/number.txt new file mode 100644 index 00000000..0c61787b --- /dev/null +++ b/resources/dictionaries/id/number.txt @@ -0,0 +1 @@ +nomor|nomr|nmr|#|no|№|nr diff --git a/resources/dictionaries/id/personal_suffixes.txt b/resources/dictionaries/id/personal_suffixes.txt deleted file mode 100644 index 427730e5..00000000 --- a/resources/dictionaries/id/personal_suffixes.txt +++ /dev/null @@ -1 +0,0 @@ -utama \ No newline at end of file diff --git a/resources/dictionaries/id/personal_titles.txt b/resources/dictionaries/id/personal_titles.txt index 812b53cf..f1695011 100644 --- a/resources/dictionaries/id/personal_titles.txt +++ b/resources/dictionaries/id/personal_titles.txt @@ -1,5 +1,34 @@ -imam -jenderal -pangeran -raja -sultan \ No newline at end of file +abang|bang|bg +brigadir jendral|brig jen|brigjen +bapak|pak +doktor|dr|dok +doktorandus|drs +ibu|bu|ibuk|buk +jenderal|jend|jnd|jen|jendral +kapten|kpt|kapt +kolonel|kol +komandan|kmndn|kmd +kopral|koprl|kpl +komandan letnan|kmndn let|kmd lt +letnan|letn|lt +letnan kolonel|letkol|lt kol|lt kl +letnan jenderal|let jen|letjen|lt jn +mas|ms +mayor|may|myr +mayor jenderal|may jen|mayjen +mbak +menteri|mentri|mntr +pangeran|pngrn +pastor +pendeta|pdt +ustadz|ust +prajurit|prjrt +presiden|pres +profesor|prof +raden|rdn|r +raden mas|rm|r m +raden rara|rdn rara|rr +raja|rja +ratu +sersan|sers|sersn +sultan|sltn diff --git a/resources/dictionaries/id/place_names.txt b/resources/dictionaries/id/place_names.txt index 75a53866..809eead6 100644 --- a/resources/dictionaries/id/place_names.txt +++ b/resources/dictionaries/id/place_names.txt @@ -1,5 +1,145 @@ +air mancur +akademi|akdm +akuarium|akrium +amfiteater +apartmen|apt +arkade +auditorium +balai kota +bandar udara|bandara +banjar|bjr +bank perkreditan rakyat|bpr +bar +barak +bendungan +benteng +benteng +biara +bioskop +cagar alam +cuci mobil +daerah +danau +departmen|dep +dermaga +desa|ds +distributor|dstr|distrib|dstrb +dokter hewan +dusun|dsn +farmasi +galeri +galeri seni +gampong|gp|gpg|gmpg +garasi +gedung rekreasi +gerbang +gereja +institut|inst +istana jembatan -mesjid -perkebunan -pondok -puri \ No newline at end of file +kabupaten|kab +kali +kampus +kandang +kantor bupati +kantor|kntr +kantor pos +kantor pusat +kapel +karang taruna +kawasan industri +kebun binatang|bonbin +kecamatan|kec +kedai +kedutaan +kelompok bermain|kb +kelurahan|kel +kepolisian daerah|polda +kepolisian resor|polres +kepolisian sektor|polsek +kios +klinik +kolam renang +komite +kompleks +komunitas +konservatori +kopi +kos|kosan +kota +krematorium +kuburan +laboratorium|lab +lapangan +lapangan golf +lembaga pemasyarakatan +marina +markas besar|mabes +medik|med +mesjid|masjid|msjd +monumen +pabrik|pbrk +pandai besi +pangkalan udara|lanud|lanud +pantai|pante +panti jompo|pnt jmp|pnti jmpo +parkiran +paroki +pasar +paviliun|pav +pelabuhan +pemadam kebakaran +pemakaman +penampungan +penampungan hewan +pengadilan +penginapan +penitipan anak +penjara +perkebunan|kebun|kbn|kebon +perpustakaan +perserikatan +politeknik +pom bensin +pondok|pndok +pos polisi +provinsi|prov +puri +pusat kebudayaan +pusat kesehatan +pusat pemulihan +pusat perawatan +pusat seni +restoran +ruangan +rumah makan|rm +rumah|rmh|rmah|rumh +rumah sakit hewan|rs hewan +rumah sakit|rs|r s +rumah sakit wanita|rs wanita|r s wanita +sekolah dasar neger|sdn|sd n +sekolah menengah atas negeri|sma n|sma +sekolah menengah pertama negeri|smp n|smpn +sirkuit +sirkus +situ +stasiun +suaka margasatwa nasional +sungai +tahanan +taman +taman kanak|tk +taman nasional +teater +tempat pembuangan umum|tpu +tempat pertunjukan +tepi pantai +terminal|term +toko +toko buku +unit gawat darurat|ugd +unit geriatrik +universitas|univ|uni +vila|vl|vla +warung +yoga diff --git a/resources/dictionaries/id/qualifiers.txt b/resources/dictionaries/id/qualifiers.txt index b9d14020..6074d89c 100644 --- a/resources/dictionaries/id/qualifiers.txt +++ b/resources/dictionaries/id/qualifiers.txt @@ -1,7 +1,13 @@ -blok -gedung -kampung -kampong -kompleks -kota -pulau \ No newline at end of file +banjar|bjr +blok|blk +desa|ds|dsa +dusun|dsn|dsun +gampong|gmpg|gpg|gp +gedung|gdg|gd +kabupaten|kab|kbptn|kbp +kampung|kmpg|kpg|kp|kampong +kecamatan|kec|kcmntn|kcm +kelurahan|kel|klrhn|klh +kompleks|komp|kmplk +kota|kta +pulau|plau|pl diff --git a/resources/dictionaries/id/stopwords.txt b/resources/dictionaries/id/stopwords.txt new file mode 100644 index 00000000..efa1719b --- /dev/null +++ b/resources/dictionaries/id/stopwords.txt @@ -0,0 +1,9 @@ +berlawanan|lawanan|lwnn +dan|dn|n +dari|dr +dekat|dkt +di +ke +lewat|lwt +seberang|sebrang|sbrg +semua|semoa|smua|smoa diff --git a/resources/dictionaries/id/street_types.txt b/resources/dictionaries/id/street_types.txt index 4bbadef8..98dc1498 100644 --- a/resources/dictionaries/id/street_types.txt +++ b/resources/dictionaries/id/street_types.txt @@ -1,18 +1,7 @@ alun-alun|alun alun|alunalun -gang -jalan|jln|jl -jalan besar|jl besar|jl.besar|jln besar|jln.besar -jalan desa|jl desa|jl.desa|jln desa|jln.desa -jalan tol lingkar|jl tol lingkar|jl.tol lingkar|jln tol lingkar|jln.tol lingkar -jalan lingkar|jl lingkar|jl.lingkar|jln lingkar|jln.lingkar -jalan lintas|jl lintas|jl.lintas|jln lintas|jln.lintas -jalan pedesaan|jl pedesaan|jl.pedesaan|jln pedesaan|jln.pedesaan -jalan pemukiman|jl pemukiman|jl.pemukiman|jln pemukiman|jln.pemukiman -jalan poros|jl poros|jl.poros|jln poros|jln.poros -jalan raya|jl raya|jl.raya|jln raya|jln.raya -jalan tol|jl tol|jl.tol|jln tol|jln.tol -jalan utama|jl utama|jl.utama|jln utama|jln.utama +gang|gg +jalan|jln|jl|jl. jalur jembatan -lorong -terowongan \ No newline at end of file +lorong|lrong +terowongan diff --git a/resources/dictionaries/id/synonyms.txt b/resources/dictionaries/id/synonyms.txt new file mode 100644 index 00000000..9490c227 --- /dev/null +++ b/resources/dictionaries/id/synonyms.txt @@ -0,0 +1,22 @@ +banjar|bnjr|bjr +daerah|drh +danau|dano +desa|dsa|ds +dusun|dsun|dsn +flat|flt +gampong|gpg +gunung|gn +internasional|int|int'l +kabupaten|kbptn +kali|kli +kampung|kpg|kmpg +kecamatan|kcmtn +kelurahan|klrhn +mas|ms +mbak|mba|mb +medikal|med +militer|mil +nasional|nasl|nas'l +sungai|sngai +tanjung|tjg +utama|utm diff --git a/resources/dictionaries/id/toponyms.txt b/resources/dictionaries/id/toponyms.txt new file mode 100644 index 00000000..14236e5f --- /dev/null +++ b/resources/dictionaries/id/toponyms.txt @@ -0,0 +1,34 @@ +bali +bangka belitung|babel|ba bel +banten +bengkulu +daerah istimewa yogyakarta|di yogyakarta|d.i. yogyakarta|diy|d i y +dki jakarta|jakarta +gorontalo +jambi +jawa barat|jabar|ja bar +jawa tengah|jateng|ja teng +jawa timur|jatim|ja tim +kalimantan barat|kalbar|kal bar +kalimantan selatan|kalsel|kal sel +kalimantan tengah|kalteng|kal teng +kalimantan timur|kaltim|kal tim +kalimantan utara|kalut|kal ut +kepulauan riau|kepri|kep ri +lampung +maluku +maluku utara|malut|mal ut +nanggroe aceh darussalam|nanggro aceh darussalam|nanggroe aceh darusalam|nad|n a d +nusa tenggara barat|ntb|n t b +nusa tenggara timur|ntt|n t t +papua +papua barat +riau +sulawesi barat|sulbar|sul bar +sulawesi selatan|sulsel|sul sel +sulawesi tengah|sulteng|sul teng +sulawesi tenggara|sultra|sul tra +sulawesi utara|sulut|sul ut +sumatera barat|sumatra barat|sumbar|sum bar +sumatera selatan|sumatra selatan|sumsel|sum sel +sumatera utara|sumatra utara|sumut|sum ut diff --git a/resources/dictionaries/id/unit_directions.txt b/resources/dictionaries/id/unit_directions.txt new file mode 100644 index 00000000..3648c709 --- /dev/null +++ b/resources/dictionaries/id/unit_directions.txt @@ -0,0 +1,4 @@ +belakang|blkg|blkang +depan|dpn +kanan|knan|knn +kiri|kri|kr diff --git a/resources/dictionaries/id/unit_types_numbered.txt b/resources/dictionaries/id/unit_types_numbered.txt new file mode 100644 index 00000000..e2f9b444 --- /dev/null +++ b/resources/dictionaries/id/unit_types_numbered.txt @@ -0,0 +1,16 @@ +garasi|grsi +gedung|gdg|gd +gudang|gdng +hangar|hngr +kamar|kmr +kantor +kavling|kav|kv +kebun|kebon|kbn +kios +pabrik|pabrk|pbrk +parkiran|park +rukun tetangga|rt +rukun warga|rw +stasiun +toko|tko +vila|vla|vl diff --git a/resources/dictionaries/id/unit_types_standalone.txt b/resources/dictionaries/id/unit_types_standalone.txt new file mode 100644 index 00000000..1b40e282 --- /dev/null +++ b/resources/dictionaries/id/unit_types_standalone.txt @@ -0,0 +1,9 @@ +belakang bawah|belakang bwh|blkg bwh +belakang bawah|belakang bwh|blkg bwh +belakang|blkg +depan|dpn +garasi|grsi +kanan bawah|kanan bwh|knan bwh +kiri bawah|kiri bwh|kri bwh +lobi|lbi|lbbi +sisi|ssi