diff --git a/resources/dictionaries/id/academic_degrees.txt b/resources/dictionaries/id/academic_degrees.txt new file mode 100644 index 00000000..a6db8c35 --- /dev/null +++ b/resources/dictionaries/id/academic_degrees.txt @@ -0,0 +1,82 @@ +doktor|dr +magister administrasi bisnis|mab|m ab +magister administrasi publi|map|m ap +magister administrasi rumah sakit|mars|m a r s +magister agama|mag|m ag +magister akuntansi|mak|m ak +magister epidemiolog|mepid|m epid +magister farmasi|mfarm|m farmasi +magister hukum|m h| +magister humaniora|mhum|m hum +magister ilmu biomedik|mbiomedik|m biomedik +magister ilmu komputer|mkom|m kom +magister kehutanan|mhut|m hut +magister kenotariatan|mkn|m kn +magister keperawatan|mkep|m kep +magister kesehatan|mkes|m kes +magister manajemen|mm|m m +magister manajemen pendidikan|mmpd|m mpd +magister manajemen sistem informasi|mmsi|m msi +magister marine|mmar|m mar +magister pendidikan|mpd|m pd +magister psikologi|mpsi|m psi +magister sains bidang ilmu pertahanan|msihan|m si han +magister seni|msn|m sn +magister statistik|mstat|m stat +magister teknik|mt|m t +magister teknologi informasi|mti|m ti +magister veteriner|mvet|m vet +master of arts|ma|m a +master of public health|mph|m p h +master of scienc|msc|m sc +sarjana administrasi bisnis|sab|s ab +sarjana administrasi publik|sap|s ap +sarjana agama|s ag|sag|sa g|s a g +sarjana agroteknologi|sagr|s agr +sarjana antropologi|sant|s ant +sarjana arsitektur|sars|s ars +sarjana desain|sds|s ds +sarjana ekonomi islam|sei|s e i +sarjana ekonomi|se|s e +sarjana farmasi|sfarm|s farm +sarjana hubungan internasional|shint|s h int|sh int +sarjana hukum islam|shi|s h i|sh i|s hi +sarjana hukum|sh|s h +sarjana humaniora|shum|s hum +sarjana ilmu gizi|sgz|s gz +sarjana ilmu kelautan|skel|s kel +sarjana ilmu kepolisian|s ik|sik|si k +sarjana ilmu perpustakaan|sip|s ip +sarjana ilmu politik|sarjana ilmu pemerintahan|sip|s i p +sarjana ilmu psikologi|sikom|s i kom|si kom +sarjana intelijen|sin|s in +sarjana kedokteran gigi|skg|s kg +sarjana kedokteran hewan|skh|s kh +sarjana kedokteran|s ked|sked +sarjana kehutanan|shut|s hut +sarjana komputer|skom|s kom +sarjana komunikasi dan pengembangan masyarakat|skpm|s kpm +sarjana manajemen bisnis|smb|s mb +sarjana pariwisata|spar|s par +sarjana pendidikan islam|spdi|s pd i|s pdi +sarjana pendidikan sekolah dasar|spdsd|s pd sd +sarjana pendidikan|spd|s pd|sp d|s p d +sarjana pertahanan|shan|s han +sarjana pertanian|sp|s p +sarjana peternakan|spt|s pt +sarjana psikologi|s psi|spsi +sarjana sains|s si|ssi +sarjana sains|ssi|s si +sarjana sains terapan pemerintahan|sstp|s stp +sarjana sastra|ss|s s +sarjana seni|s sn|ssn +sarjana sistem informasi|ssi|s si +sarjana sosial|ssos|s sos +sarjana syari ah|s sy|ssy +sarjana teknik|st|s t +sarjana teknologi informasi|sti|s ti|s t i +sarjana teknologi pertanian|stp|s tp|st p|s t p +sarjana teologi islam|s thi|s th i|sthi +sarjana teologi kristen|sthk|s th k|s thk +sarjana teologi|s teol|steol +sarjana terapan kepolisian|strk|s trk diff --git a/resources/dictionaries/id/building_types.txt b/resources/dictionaries/id/building_types.txt new file mode 100644 index 00000000..0aab3bcb --- /dev/null +++ b/resources/dictionaries/id/building_types.txt @@ -0,0 +1,8 @@ +apartemen|apart|aprtmn +garasi|grs +gedung|gd|gdg +gudang|gdang +kebun|kebon|kbn +rumah|rmah|rmh +tower|twr +villa|vlla|vl diff --git a/resources/dictionaries/id/chains.txt b/resources/dictionaries/id/chains.txt new file mode 100644 index 00000000..59781fe0 --- /dev/null +++ b/resources/dictionaries/id/chains.txt @@ -0,0 +1,9 @@ +alfamart|alfamar +indomart|indomaret +circle k|circlek +seven eleven|sevel +carrefour +superindo +lottemart +bonjour +bright diff --git a/resources/dictionaries/id/company_types.txt b/resources/dictionaries/id/company_types.txt new file mode 100644 index 00000000..48b8364f --- /dev/null +++ b/resources/dictionaries/id/company_types.txt @@ -0,0 +1,29 @@ +bank +company|co +cooperative|coop|co op +corporation|corp +dan rekan|& rekan +enterprise|ent +firma|fa|f a +foundation +general patnership|gp|g p +incorporated|inc +intermediary|nt|n t +international business company|ibc|i b c +koperasi|kop|kprs +koperasi usaha dagang|kud|k ud +limited company|lc|l c|ltd co +limited liability company|llc|l l c|ltd liability company|ltd liability co +limited|ltd +national association|na|n a +nonprofit|non profit +perseroan komanditer|comanditaire venotschap|cv|c v +perseroan terbatas|pt|p t +perusahaan daerah|pd +perusahaan dagang|pd|p d +perusahaan jawatan|pj|pjaw|p jaw +perusahaan umum|perum|p u +trust +unlimited|ultd|unltd +usaha dagang|ud|u d +yayasan diff --git a/resources/dictionaries/id/cross_streets.txt b/resources/dictionaries/id/cross_streets.txt new file mode 100644 index 00000000..023bd4a1 --- /dev/null +++ b/resources/dictionaries/id/cross_streets.txt @@ -0,0 +1,6 @@ +& +dan +di +di pojok|d pjk +pojok|pjk +di antara|d antr diff --git a/resources/dictionaries/id/directionals.txt b/resources/dictionaries/id/directionals.txt index f704bd7f..6ccb8652 100644 --- a/resources/dictionaries/id/directionals.txt +++ b/resources/dictionaries/id/directionals.txt @@ -1,9 +1,9 @@ -barat -barat daya -barat laut -selatan -tengah -tenggara -timur -timur laut -utara \ No newline at end of file +barat|bar|brt|brat|kulon|kln|klon +barat daya|brt dy|bar day|brat dya +barat laut|brt lt|bar laut +selatan|sel|sltn|slatan|kidul|kdl|kdul +tengah|teng|tngh +tenggara|teng|tnggra +timur|tim|tmur|wetan|wtan +timur laut|tim laut|tmur laut +utara|ut|utra|utr|lor diff --git a/resources/dictionaries/id/entrances.txt b/resources/dictionaries/id/entrances.txt new file mode 100644 index 00000000..76402a38 --- /dev/null +++ b/resources/dictionaries/id/entrances.txt @@ -0,0 +1,3 @@ +masuk|msk +gerbang masuk|grbg msk +pintu masuk|pntu msk|pntu msuk diff --git a/resources/dictionaries/id/level_types_basement.txt b/resources/dictionaries/id/level_types_basement.txt new file mode 100644 index 00000000..34d55d03 --- /dev/null +++ b/resources/dictionaries/id/level_types_basement.txt @@ -0,0 +1 @@ +basement|bsm|bsmt|bsmnt|basement|bsment \ No newline at end of file diff --git a/resources/dictionaries/id/level_types_mezzanine.txt b/resources/dictionaries/id/level_types_mezzanine.txt new file mode 100644 index 00000000..aa8cbaa1 --- /dev/null +++ b/resources/dictionaries/id/level_types_mezzanine.txt @@ -0,0 +1,5 @@ +mezzanine|mezz +mezzanine floor|mezz fl|mezz floor +mezzanine level|mezz lvl|mezz level +lower mezzanine|lower mezz|lwr mezz +upper mezzanine|upper mezz|uppr mezz|upr mezz \ No newline at end of file diff --git a/resources/dictionaries/id/level_types_numbered.txt b/resources/dictionaries/id/level_types_numbered.txt new file mode 100644 index 00000000..8ec779f4 --- /dev/null +++ b/resources/dictionaries/id/level_types_numbered.txt @@ -0,0 +1,3 @@ +lantai|lt|ltai|lt. +level|lev|levl|lvel|lvl|l|/ l +platform|pf diff --git a/resources/dictionaries/id/level_types_standalone.txt b/resources/dictionaries/id/level_types_standalone.txt new file mode 100644 index 00000000..a044b9c8 --- /dev/null +++ b/resources/dictionaries/id/level_types_standalone.txt @@ -0,0 +1,14 @@ +ground|g|gd +ground floor|gdfl|gd fl|gd/fl|gd / fl|gf|g / f +ground level|gd lvl|g lvl|g level|gd level|ground lvl|gd / lvl|gl|g / l +lantai atas|lantai ats|lt ats|lt. ats|lt. atas +lantai dasar|lt dsr|lt dsar|lt. dsr|lt. dasar +lobby +lower ground floor|lg|lgf|lgfl|l / g|l / gf|l / g / f|l / g / fl +lower level|lwr level|lower lvl|lwr lvl +podium|pd +podium level|podium lev|podium levl|podium lvel|podium lvl|podium l|pd level|pd lev|pd levl|pd lvel|pd lvl|pd l +rooftop|rt|rf|r / t +top floor|top fl|top / f|tf|t.f|t f|t / f +upper ground floor|ug|ugf|ugfl|ug / f|ug / fl +upper|uppr|upr diff --git a/resources/dictionaries/id/level_types_sub_basement.txt b/resources/dictionaries/id/level_types_sub_basement.txt new file mode 100644 index 00000000..db2c66d2 --- /dev/null +++ b/resources/dictionaries/id/level_types_sub_basement.txt @@ -0,0 +1 @@ +sub basement|sub-basement|subbasement|sb|s.b \ No newline at end of file diff --git a/resources/dictionaries/id/near.txt b/resources/dictionaries/id/near.txt new file mode 100644 index 00000000..003ec4e1 --- /dev/null +++ b/resources/dictionaries/id/near.txt @@ -0,0 +1,3 @@ +sekitar +dalam|dalem|dlm|dlam +dekat|dkt|dkat diff --git a/resources/dictionaries/id/no_number.txt b/resources/dictionaries/id/no_number.txt new file mode 100644 index 00000000..8ef9c661 --- /dev/null +++ b/resources/dictionaries/id/no_number.txt @@ -0,0 +1 @@ +no fixed address|nfa|n f a|n / f / a \ No newline at end of file diff --git a/resources/dictionaries/id/nulls.txt b/resources/dictionaries/id/nulls.txt new file mode 100644 index 00000000..661b09b0 --- /dev/null +++ b/resources/dictionaries/id/nulls.txt @@ -0,0 +1,2 @@ +not applicable|n / a|na|n a +null \ No newline at end of file diff --git a/resources/dictionaries/id/number.txt b/resources/dictionaries/id/number.txt new file mode 100644 index 00000000..657d45e2 --- /dev/null +++ b/resources/dictionaries/id/number.txt @@ -0,0 +1 @@ +nomor|nomr|nmr|#|no|№|nr| diff --git a/resources/dictionaries/id/personal_titles.txt b/resources/dictionaries/id/personal_titles.txt index 812b53cf..1a9b9460 100644 --- a/resources/dictionaries/id/personal_titles.txt +++ b/resources/dictionaries/id/personal_titles.txt @@ -1,5 +1,34 @@ -imam -jenderal -pangeran -raja -sultan \ No newline at end of file +brigadir jendral|brig jen|brigjen +bapak|pak +doktor|dr|dok +doktorandus|drs +ibu|bu|ibuk|buk +jenderal|jend|jnd|jen|jendral +kapten|kpt|kapt +kolonel|kol +komandan|kmndn|kmd +kopral|koprl|kpl +komandan letnan|kmndn let|kmd lt +letnan|letn|lt +letnan kolonel|letkol|lt kol|lt kl +letnan jenderal|let jen|letjen|lt jn +mas|bang|ms|bg +mayor|may|myr +mayor jenderal|may jen|mayjen +mbak +menteri|mentri|mntr +pangeran|pngrn +pastor +pendeta|pdt +ustadz|ust +prajurit|prjrt +presiden|pres +profesor|prof +raden|rdn|r +raden rara|rdn rara|rr +raja|rja +ratu +reverend|rev +saint|st +sersan|sers|sersn +sultan|sltn diff --git a/resources/dictionaries/id/place_names.txt b/resources/dictionaries/id/place_names.txt index 75a53866..cc21778f 100644 --- a/resources/dictionaries/id/place_names.txt +++ b/resources/dictionaries/id/place_names.txt @@ -1,5 +1,222 @@ +air mancur +akademi|akdm +akuarium|akrium +amfiteater +apartmen|apt +arkade +auditorium +bakery +balai kota +ballroom +bandar udara|bandara +banjar|bjr +bank +bank perkreditan rakyat|bpr +bar +barak +bazar +bendungan +benteng +benteng +biara +bioskop +bistro +business park|bus pk|biz pk|bus prk|biz prk +cafe|café +cagar alam +child care|childcare +cinema +cineplex +club|clb +clubhouse|club house +clubrooms +cottage|cott|cottg +cuci mobil +daerah +danau +dance studio +day care +dental +dentist +departmen|dep +dermaga +desa|ds +developmen +distributor|dstr|distrib|dstrb +dojo +dokter hewan +dormitory|dorm +dusun|dsn +embassy +farmasi +farm|frm +fitness center|fitness centre +flat|flt +galeri +galeri seni +gampong|gp +garasi +gedung rekreasi +gerbang +gereja +golf club +gym|gymnasium +hall +headquarter +health center|health centre +hostel +hostel|host|hostl|hstel|hstl +hotel|hot|hotl|htel +hub +ice cream|icecream +institut|inst +istana jembatan -mesjid -perkebunan -pondok -puri \ No newline at end of file +kabupaten|kab +kafe|kafé +kali +kampung|kp|kpg|kmpg +kampus +kandang +kantor bupati +kantor|kntr +kantor pos +kantor pusat +kapel +karang taruan +kawasan industri +kebun binatang|bonbin +kecamatan|kec +kedai +kedutaan +kelompok bermain|kb +kelurahan|kel +kennel +kepolisian daerah|polda +kepolisian resor|polres +kepolisian sektor|polsek +kindergarten +kios +kitchen +klinik +kolam renang +komite +kompleks +komunitas +konservatori +kopi +kos|kosan +kota +krematorium +kuburan +lab +laboratorium +lapangan +lapangan golf +lembaga pemasyarakatan +lounge +mall|mll +mansion +marina +markas besar|mabes +market|mkt|mrkt +medik|med +memorial|mem +mesjid|masjid|msjd +monumen +motel|mot|motl|mtel +museum|mus +night club|nightclub +office|ofc +office tower|ofc twr|office twr|ofc tower +pabrik|pbrk +pandai besi +pangkalan udara|lanud|lanud +pantai|pante +panti jompo|pnt jmp|pnti jmpo +parking +parking lot +parkiran +park|pk|prk +paroki +pasar +pavilion +paviliun|pav +pediatric +pelabuhan +pemadam kebakaran +pemakaman +penampungan +penampungan hewan +pengadilan +penginapan +penitipan anak +penjara +perkebunan|kebun|kbn|kebon +perpustakaan +perserikatan +playgroup +plaza +politeknik +pom bensin +pondok|pndok +pos polisi +provinsi|prov +puri +pusat kebudayaan +pusat kesehatan +pusat pemulihan +pusat perawatan +pusat seni +reservation|res|resrv|resv|rsrv|rserv|rs +resort +restoran +ruangan +rumah makan|rm +rumah|rmh|rmah|rumh +rumah sakit hewan|rs hewan +rumah sakit|rs|r s +rumah sakit wanita|rs wanita|r s wanita +salon +saloon +sauna +sekolah dasar neger|sdn|sd n +sekolah menengah atas negeri|sma n|sma +sekolah menengah pertama negeri|smp n|smpn +shopping center|shoppingcenter|shoppingcentre|shopping|shoppingtown|shopping town|shopping centre|shctr|sh ctr|s / centre|shp / centre +shop|shp +showground +sirkuit +sirkus +situ +social club +spa +stadium +stasiun +steakhouse +studio +suaka margasatwa nasional +sungai +supermarket|super market +tahanan +taman +taman kanak|tk +taman nasional +teater +tempat pembuangan umum|tpu +tempat pertunjukan +tepi pantai +terminal|term +toko +toko +toko buku +tower|twr +townhouse|town house +unit +unit gawat darurat|ugd +unit geriatrik +universitas|univ|uni +velodrome +vila|vl|vla +warung +yoga diff --git a/resources/dictionaries/id/qualifiers.txt b/resources/dictionaries/id/qualifiers.txt index b9d14020..37fc2612 100644 --- a/resources/dictionaries/id/qualifiers.txt +++ b/resources/dictionaries/id/qualifiers.txt @@ -1,7 +1,14 @@ -blok -gedung -kampung -kampong -kompleks +blok|blk +gedung|gdg|gd +kampung|kmpg|kpg|kp +kampong|kmpg|kpg|kp +gampong|gmpg|gpg +banjar|bjr +kabupaten|kab|kbptn|kbp +kecamatan|kec|kcmntn|kcm +kelurahan|kel|klrhn|klh +desa|ds +dusun|dsn +kompleks|komp|kmplk kota -pulau \ No newline at end of file +pulau diff --git a/resources/dictionaries/id/stopwords.txt b/resources/dictionaries/id/stopwords.txt new file mode 100644 index 00000000..1703f14d --- /dev/null +++ b/resources/dictionaries/id/stopwords.txt @@ -0,0 +1,9 @@ +berlawanan|lawanan +dan|dn|n +dari|dr +dekat|dkt +di +ke +lewat|lwt +seberang|sebrang|sbrg +semua|semoa|smua|smoa diff --git a/resources/dictionaries/id/street_types.txt b/resources/dictionaries/id/street_types.txt index 4bbadef8..a3ef2a02 100644 --- a/resources/dictionaries/id/street_types.txt +++ b/resources/dictionaries/id/street_types.txt @@ -1,6 +1,6 @@ alun-alun|alun alun|alunalun -gang -jalan|jln|jl +gang|gg +jalan|jln|jl|jl. jalan besar|jl besar|jl.besar|jln besar|jln.besar jalan desa|jl desa|jl.desa|jln desa|jln.desa jalan tol lingkar|jl tol lingkar|jl.tol lingkar|jln tol lingkar|jln.tol lingkar @@ -12,7 +12,8 @@ jalan poros|jl poros|jl.poros|jln poros|jln.poros jalan raya|jl raya|jl.raya|jln raya|jln.raya jalan tol|jl tol|jl.tol|jln tol|jln.tol jalan utama|jl utama|jl.utama|jln utama|jln.utama +jalan raya|jl raya|jl. raya|jln raya|jln. raya jalur jembatan -lorong -terowongan \ No newline at end of file +lorong|lrong +terowongan diff --git a/resources/dictionaries/id/synonyms.txt b/resources/dictionaries/id/synonyms.txt new file mode 100644 index 00000000..557e679f --- /dev/null +++ b/resources/dictionaries/id/synonyms.txt @@ -0,0 +1,23 @@ +banjar|bnjr|bjr +daerah|drh +danau|dano +desa|dsa|ds +dusun|dsun|dsn +flat|flt +gampong|gpg +gunung|gn +internasional|int|int'l +kabupaten|kbptn +kali|kli +kampung|kpg|kmpg +kecamatan|kcmtn +kelurahan|klrhn +mas|ms +mbak|mba|mb +medikal|med +memorial|mem +militer|mil +national|nasl|nas'l +sungai|sngai +tanjung|tjg +utama|utm diff --git a/resources/dictionaries/id/toponyms.txt b/resources/dictionaries/id/toponyms.txt new file mode 100644 index 00000000..ddba1aef --- /dev/null +++ b/resources/dictionaries/id/toponyms.txt @@ -0,0 +1,34 @@ +bali +bangka belitung|babel +banten +bengkulu +daerah istimewa yogyakarta|di yogyakarta|d.i. yogyakarta|diy +dki jakarta|jakarta +gorontalo +jambi +jawa barat|jabar +jawa tengah|jateng +jawa timur|jatim +kalimantan barat|kalbar +kalimantan selatan|kalsel +kalimantan tengah|kalteng +kalimantan timur|kaltim +kalimantan utara|kalut +kepulauan riau|kepri +lampung +maluku +maluku utara|malut +nanggroe aceh darussalam|nanggro aceh darussalam|nanggroe aceh darusalam|nad|n a d +nusa tenggara barat|ntb +nusa tenggara timur|ntt +papua +papua barat +riau +sulawesi barat|sulbar +sulawesi selatan|sulsel +sulawesi tengah|sulteng +sulawesi tenggara|sultra +sulawesi utara|sulut +sumatera barat|sumatra barat|sumbar +sumatera selatan|sumatra selatan|sumsel +sumatera utara|sumatra utara|sumut diff --git a/resources/dictionaries/id/unit_directions.txt b/resources/dictionaries/id/unit_directions.txt new file mode 100644 index 00000000..3648c709 --- /dev/null +++ b/resources/dictionaries/id/unit_directions.txt @@ -0,0 +1,4 @@ +belakang|blkg|blkang +depan|dpn +kanan|knan|knn +kiri|kri|kr diff --git a/resources/dictionaries/id/unit_types_numbered.txt b/resources/dictionaries/id/unit_types_numbered.txt new file mode 100644 index 00000000..a430fae7 --- /dev/null +++ b/resources/dictionaries/id/unit_types_numbered.txt @@ -0,0 +1,33 @@ +flat|flt +garasi|grsi +gedung|gdg|gd +gudang|gdng +hangar|hngr +kamar|kmr +kantor +kavling|kav|kv +kebun|kebon|kbn +kios +pabrik|pabrk|pbrk +parcel +parkiran|park +penthouse|pths|ph|pent house +reserve|rsve|rsrv|rsv|resv +room|rm +rukun tetangga|rt +rukun warga|rw +shed|shd +shop|shp +showroom|shrm +stasiun +stop +studio|stu +suite|ste|se +tenancy|tncy +toko|tko +tower|twr +townhouse|tnhs|twnhs|tnhse|twnhse +unit|un|unt|u +vila|vla|vl +warehouse|we|whs|wrhs +workshop|wshp|wkshp|wksp diff --git a/resources/dictionaries/id/unit_types_standalone.txt b/resources/dictionaries/id/unit_types_standalone.txt new file mode 100644 index 00000000..b012acde --- /dev/null +++ b/resources/dictionaries/id/unit_types_standalone.txt @@ -0,0 +1,11 @@ +depan|dpn +garasi|grsi +lobby|lby|lbby +kiri bawah|kiri bwh|kri bwh +kanan bawah|kanan bwh|knan bwh +penthouse|pths|ph|pent house +belakang|blkg +rear lower|rear lwr +belakang bawah|belakang bwh|blkg bwh +sisi +