[merge] merging in the Ohio expansion numex changes from master
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
# libpostal: international street address NLP
|
# libpostal: international street address NLP
|
||||||
|
|
||||||
[](https://travis-ci.org/openvenues/libpostal) [](https://github.com/openvenues/libpostal/blob/master/LICENSE)
|
[](https://travis-ci.org/openvenues/libpostal) [](https://github.com/openvenues/libpostal/blob/master/LICENSE)
|
||||||
[](#sponsors)
|
[](#sponsors)
|
||||||
[](#backers)
|
[](#backers)
|
||||||
|
|
||||||
libpostal is a C library for parsing/normalizing street addresses around the world using statistical NLP and open data. The goal of this project is to understand location-based strings in every language, everywhere. For a more comprehensive overview of the research behind libpostal, be sure to check out the (lengthy) introductory blog posts:
|
libpostal is a C library for parsing/normalizing street addresses around the world using statistical NLP and open data. The goal of this project is to understand location-based strings in every language, everywhere. For a more comprehensive overview of the research behind libpostal, be sure to check out the (lengthy) introductory blog posts:
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
apartment|aparment|apartmen|apt
|
apartment|aparment|apartmen|apt
|
||||||
bangunan|bgn
|
bangunan|bgn
|
||||||
|
dewan
|
||||||
kondominium|kondo
|
kondominium|kondo
|
||||||
pangsapuri|p/puri
|
pangsapuri|p/puri
|
||||||
dewan
|
|
||||||
rumah|rmh
|
|
||||||
rumah pangsa
|
|
||||||
residensi|residen
|
residensi|residen
|
||||||
menara
|
rumah|rmh
|
||||||
|
rumah kediaman
|
||||||
|
rumah pangsa
|
||||||
|
menara
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
bank
|
bank
|
||||||
syarikat
|
|
||||||
kelab bola sepak
|
|
||||||
berhad|bhd
|
berhad|bhd
|
||||||
sendirian berhad|sdn bhd
|
kelab
|
||||||
|
kelab bola sepak
|
||||||
persatuan
|
persatuan
|
||||||
|
sendirian berhad|sdn bhd
|
||||||
|
syarikat
|
||||||
|
yayasan
|
||||||
@@ -1 +1,3 @@
|
|||||||
mohammad|muhammad|mohd
|
mohammad|muhammad|mohd
|
||||||
|
suleiman|sulaiman
|
||||||
|
yusuf|yusof
|
||||||
|
|||||||
@@ -1 +1,92 @@
|
|||||||
puteri
|
ahli dewan undangan negeri|adun
|
||||||
|
bintara kanan
|
||||||
|
bintara muda
|
||||||
|
brigedier jeneral
|
||||||
|
cik
|
||||||
|
datin
|
||||||
|
datin
|
||||||
|
datin paduka
|
||||||
|
datin paduka patinggi
|
||||||
|
datin paduka seri
|
||||||
|
datin paduka seri panglima
|
||||||
|
datin paduka seri utama
|
||||||
|
dato
|
||||||
|
dato paduka
|
||||||
|
dato sri|dato seri
|
||||||
|
dato wira
|
||||||
|
datuk
|
||||||
|
datuk seri
|
||||||
|
doktor|dr
|
||||||
|
encik|en
|
||||||
|
fil marsyal
|
||||||
|
flait sarjan
|
||||||
|
hakim
|
||||||
|
jeneral
|
||||||
|
kadet kanan
|
||||||
|
kapten
|
||||||
|
kolonel
|
||||||
|
komander
|
||||||
|
korporal
|
||||||
|
korporal udara
|
||||||
|
laksamana
|
||||||
|
laksamana armada
|
||||||
|
laksamana madya
|
||||||
|
laksamana muda
|
||||||
|
laksamana pertama
|
||||||
|
lans korporal
|
||||||
|
laskar kanan
|
||||||
|
laskar kelas kedua
|
||||||
|
laskar kelas pertama
|
||||||
|
laskar muda
|
||||||
|
laskar udara kanan
|
||||||
|
laskar udara kelas kedua
|
||||||
|
laskar udara kelas pertama
|
||||||
|
leftenan
|
||||||
|
leftenan jeneral
|
||||||
|
leftenan kolonel
|
||||||
|
leftenan komander
|
||||||
|
leftenan madya
|
||||||
|
leftenan muda
|
||||||
|
marsyal tentera udara
|
||||||
|
mejar
|
||||||
|
mejar jeneral
|
||||||
|
menteri
|
||||||
|
naib pengerusi
|
||||||
|
parajurit muda
|
||||||
|
pegawai
|
||||||
|
pegawai kadet
|
||||||
|
pegawai waran kelas kedua
|
||||||
|
pegawai waran kelas pertama
|
||||||
|
pengerusi
|
||||||
|
perdana menteri
|
||||||
|
prebet
|
||||||
|
presiden
|
||||||
|
puan|pn
|
||||||
|
puan sri
|
||||||
|
putera|putra
|
||||||
|
putera|putra
|
||||||
|
puteri|putri
|
||||||
|
puteri|putri
|
||||||
|
raja
|
||||||
|
rekrut
|
||||||
|
sarjan
|
||||||
|
sarjan udara
|
||||||
|
sharifah
|
||||||
|
staff sarjan
|
||||||
|
sultan
|
||||||
|
syed
|
||||||
|
tan sri
|
||||||
|
tengku
|
||||||
|
timbalan perdana menteri
|
||||||
|
timbalan perdana menteri
|
||||||
|
timbalan presiden
|
||||||
|
toh puan
|
||||||
|
to puan
|
||||||
|
tuan
|
||||||
|
tun
|
||||||
|
wakil
|
||||||
|
wakil-wakil
|
||||||
|
yang berhormat
|
||||||
|
yang di-pertuan agong|agong
|
||||||
|
yang di-pertuan besar
|
||||||
|
yang di-pertua negeri
|
||||||
|
|||||||
@@ -1,4 +1,145 @@
|
|||||||
|
aiskrim
|
||||||
|
akademi
|
||||||
|
akuarium
|
||||||
|
arked
|
||||||
|
asrama
|
||||||
|
auditorum
|
||||||
|
balai bomba
|
||||||
|
balai polis
|
||||||
|
bangunan perbandaran
|
||||||
|
bank
|
||||||
|
berek
|
||||||
|
biara
|
||||||
|
bilik|blk
|
||||||
|
bilik kebal
|
||||||
|
dapur
|
||||||
|
dermaga
|
||||||
|
dewan
|
||||||
|
dewan bandar
|
||||||
|
dewan bandar
|
||||||
|
dewan konsert
|
||||||
|
dewan muzik
|
||||||
|
doktor-doktor
|
||||||
|
doktor haiwan|dr haiwan
|
||||||
|
empangan
|
||||||
|
farmasi
|
||||||
|
galeri
|
||||||
|
galeri seni
|
||||||
|
garaj
|
||||||
|
gereja
|
||||||
|
gim|gimnasium
|
||||||
|
hospital
|
||||||
|
hospital haiwan
|
||||||
|
hostel
|
||||||
|
ibu pejabat|ibu pej
|
||||||
|
institut
|
||||||
|
istana
|
||||||
|
jabatan bomba
|
||||||
|
jabatan polis
|
||||||
|
jawatankuasa|jawatan kuasa
|
||||||
jelapang
|
jelapang
|
||||||
|
jeti
|
||||||
|
kafe
|
||||||
|
kampung|kampong|kg
|
||||||
|
kampus
|
||||||
|
kasino
|
||||||
|
kawasan perindustrian|kaw perindustri|kawasan industri
|
||||||
|
kedai
|
||||||
|
kedai buku
|
||||||
|
kedutaan
|
||||||
|
kejururawatan
|
||||||
|
kelab
|
||||||
|
kelab golf
|
||||||
|
kelab malam
|
||||||
|
kelab sosial
|
||||||
|
kilang
|
||||||
|
kiropraktik
|
||||||
|
kitar semula
|
||||||
|
klinik
|
||||||
|
kolam renang
|
||||||
|
kolam renang awam
|
||||||
|
kolej
|
||||||
|
kompleks
|
||||||
|
kompleks
|
||||||
|
komuniti
|
||||||
|
kuarters|kuarter
|
||||||
|
ladang pertanian|ladang
|
||||||
|
lapangan terbang|lpg terbang
|
||||||
|
liga
|
||||||
|
mahkamah
|
||||||
|
makmal
|
||||||
masjid
|
masjid
|
||||||
|
mata air
|
||||||
|
menara
|
||||||
|
menara pejabat
|
||||||
|
misi
|
||||||
|
monumen
|
||||||
|
muzium
|
||||||
|
nurseri
|
||||||
|
padang golf
|
||||||
|
pagar
|
||||||
|
panggung
|
||||||
|
panggung wayang gambar|pawagam
|
||||||
|
pangkalan tentera udara
|
||||||
|
pantai
|
||||||
|
parkir
|
||||||
pasar
|
pasar
|
||||||
sekolah
|
pasaran
|
||||||
|
pasar raya|pasaraya
|
||||||
|
pasar tani|pasar petani
|
||||||
|
pawagam
|
||||||
|
pediatrik
|
||||||
|
pejabat
|
||||||
|
pejabat pos
|
||||||
|
pelabuhan
|
||||||
|
pelabuhan
|
||||||
|
pembangunan
|
||||||
|
pengedar
|
||||||
|
penjara
|
||||||
|
penjara
|
||||||
|
perpustakaan
|
||||||
|
perubatan
|
||||||
|
pintu gerbang
|
||||||
|
pintu pagar
|
||||||
|
politeknik
|
||||||
|
pra sekolah
|
||||||
|
presint
|
||||||
|
pusat
|
||||||
|
pusat bandar|pusat bandaraya
|
||||||
|
pusat belia
|
||||||
|
pusat kebudayaan
|
||||||
|
pusat kecergasan
|
||||||
|
pusat kejururawatan
|
||||||
|
pusat kesihatan
|
||||||
|
pusat komuniti
|
||||||
|
pusat membeli-belah|pusat beli-belah
|
||||||
|
pusat penjagaan
|
||||||
|
pusat seni
|
||||||
|
pusat seni persembahan
|
||||||
|
residen
|
||||||
|
restoran
|
||||||
|
rumah kediaman
|
||||||
|
rumah kelab
|
||||||
|
rumah orang tua
|
||||||
|
rumah pangsa|pangsapuri|p/puri
|
||||||
|
rumah|rmh
|
||||||
|
rumah sakit
|
||||||
|
salon kecantikan
|
||||||
|
sekolah menegah|sekolah men|sek menengah|sek men
|
||||||
|
sekolah rendah|sekolah ren|sek rendah|sek ren
|
||||||
|
sekolah|sek
|
||||||
|
stesen minyak
|
||||||
|
stor
|
||||||
|
studio tarian
|
||||||
|
surau
|
||||||
|
taman
|
||||||
|
taman didikan kanak-kanak|tadika
|
||||||
|
taman negara
|
||||||
|
taman perindustrian|taman industri|tmn ind|tmn perindustrian
|
||||||
|
tanah perkuburan
|
||||||
|
tapak letak kereta
|
||||||
|
teh
|
||||||
|
tempat letak kereta
|
||||||
|
teres
|
||||||
|
universiti
|
||||||
|
veterinar
|
||||||
@@ -1,3 +1,6 @@
|
|||||||
|
blok
|
||||||
|
kampung|kampong|kg
|
||||||
nusa
|
nusa
|
||||||
penampang
|
penampang
|
||||||
pulau
|
pulau
|
||||||
|
seksyen
|
||||||
|
|||||||
@@ -1,17 +1,35 @@
|
|||||||
awang|awg
|
awang|awg
|
||||||
bulatan
|
bulatan
|
||||||
|
cerunan|crn
|
||||||
|
changkat|ckt
|
||||||
|
dalaman|dlm
|
||||||
|
dataran|dtr
|
||||||
dayang|dyg
|
dayang|dyg
|
||||||
denai
|
denai
|
||||||
haji|hj
|
haji|hj
|
||||||
|
halaman|hlm
|
||||||
|
hilir|hlr
|
||||||
jalan|jln|jl
|
jalan|jln|jl
|
||||||
lapangan
|
kampung|kampong|kg
|
||||||
laluan
|
laluan
|
||||||
lebuhraya
|
laman|lmn
|
||||||
lengkok
|
langgak|lgk
|
||||||
linkaran
|
lapangan
|
||||||
lorong
|
lebuh|lbh|luh
|
||||||
kampong|kg
|
lebuhraya|lebuh raya|lbh raya|l/raya|lbr
|
||||||
|
lengkok|lkk
|
||||||
|
lingkaran|lkr
|
||||||
|
lintang|ltg
|
||||||
|
lorong|lrg
|
||||||
|
medan|mdn
|
||||||
pengiran|pg
|
pengiran|pg
|
||||||
|
persiaran|psn|psrn
|
||||||
rapat
|
rapat
|
||||||
|
simpangan|spn
|
||||||
simpang|spg
|
simpang|spg
|
||||||
sngai|sg
|
sisiran|ssr
|
||||||
|
solok|slk
|
||||||
|
sungai|sngai|sg
|
||||||
|
taman|tmn
|
||||||
|
tepian|tpn
|
||||||
|
tingkat|tkt
|
||||||
|
|||||||
@@ -1,23 +1,27 @@
|
|||||||
timur|timor
|
|
||||||
air|ayer
|
air|ayer
|
||||||
kampung|kampong|kg
|
bandar|bdr
|
||||||
itam|hitam
|
|
||||||
tanjung|tanjong
|
|
||||||
sri|seri
|
|
||||||
tasik|tasek
|
|
||||||
dwitasik|dwitasek
|
|
||||||
fasa|phasa
|
|
||||||
selasih|selaseh
|
|
||||||
putera|putra
|
|
||||||
puteri|putri
|
|
||||||
kecil|kechil
|
|
||||||
sungai|sungei|sg
|
|
||||||
teluk|telok
|
|
||||||
yusuf|yusof
|
|
||||||
batu|bt
|
batu|bt
|
||||||
bukit|bkt
|
bukit|bkt
|
||||||
buluh|buloh
|
buluh|buloh
|
||||||
jelutung|jelutong
|
|
||||||
tanduk|tandok
|
|
||||||
chempaka|cempaka
|
chempaka|cempaka
|
||||||
|
dwitasik|dwitasek
|
||||||
|
fasa|phasa
|
||||||
|
glugor|gelugor
|
||||||
|
itam|hitam
|
||||||
|
jelutung|jelutong
|
||||||
|
kampung|kampong|kg
|
||||||
|
kawasan|kaw
|
||||||
|
kecil|kechil
|
||||||
|
manjalara|menjalara
|
||||||
|
putera|putra
|
||||||
|
puteri|putri
|
||||||
|
selasih|selaseh
|
||||||
|
sri|seri
|
||||||
suleiman|sulaiman
|
suleiman|sulaiman
|
||||||
|
sungai|sungei|sg
|
||||||
|
tanduk|tandok
|
||||||
|
tanjung|tanjong
|
||||||
|
tasik|tasek
|
||||||
|
teluk|telok
|
||||||
|
timur|timor
|
||||||
|
yusuf|yusof
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
name: "oh"
|
name: "oh"
|
||||||
value: 0
|
value: 0
|
||||||
type: "cardinal"
|
type: "cardinal"
|
||||||
|
left: "concat_only_if_number"
|
||||||
-
|
-
|
||||||
name: "one"
|
name: "one"
|
||||||
value: 1
|
value: 1
|
||||||
|
|||||||
@@ -47,11 +47,13 @@ category_map = {
|
|||||||
|
|
||||||
LEFT_CONTEXT_MULTIPLY = 'NUMEX_LEFT_CONTEXT_MULTIPLY'
|
LEFT_CONTEXT_MULTIPLY = 'NUMEX_LEFT_CONTEXT_MULTIPLY'
|
||||||
LEFT_CONTEXT_ADD = 'NUMEX_LEFT_CONTEXT_ADD'
|
LEFT_CONTEXT_ADD = 'NUMEX_LEFT_CONTEXT_ADD'
|
||||||
|
LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER = 'NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER'
|
||||||
LEFT_CONTEXT_NONE = 'NUMEX_LEFT_CONTEXT_NONE'
|
LEFT_CONTEXT_NONE = 'NUMEX_LEFT_CONTEXT_NONE'
|
||||||
|
|
||||||
left_context_map = {
|
left_context_map = {
|
||||||
'add': LEFT_CONTEXT_ADD,
|
'add': LEFT_CONTEXT_ADD,
|
||||||
'multiply': LEFT_CONTEXT_MULTIPLY,
|
'multiply': LEFT_CONTEXT_MULTIPLY,
|
||||||
|
'concat_only_if_number': LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER,
|
||||||
None: LEFT_CONTEXT_NONE,
|
None: LEFT_CONTEXT_NONE,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -133,7 +133,7 @@ download_file() {
|
|||||||
for subdir in $subdirs; do
|
for subdir in $subdirs; do
|
||||||
rm -rf $data_dir/$subdir;
|
rm -rf $data_dir/$subdir;
|
||||||
done
|
done
|
||||||
tar -xvzf $local_path -C $data_dir;
|
tar -xvzf $local_path --no-same-owner -C $data_dir;
|
||||||
rm $local_path;
|
rm $local_path;
|
||||||
else
|
else
|
||||||
echo "libpostal $name up to date"
|
echo "libpostal $name up to date"
|
||||||
|
|||||||
56
src/numex.c
56
src/numex.c
@@ -709,6 +709,8 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
bool possible_complete_token = false;
|
bool possible_complete_token = false;
|
||||||
bool complete_token = false;
|
bool complete_token = false;
|
||||||
|
|
||||||
|
bool prev_rule_was_number = false;
|
||||||
|
|
||||||
log_debug("Converting numex for str=%s, lang=%s\n", str, lang);
|
log_debug("Converting numex for str=%s, lang=%s\n", str, lang);
|
||||||
|
|
||||||
while (idx < len) {
|
while (idx < len) {
|
||||||
@@ -844,22 +846,41 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
FLOOR_LOG_BASE(rule.value, prev_rule.radix) < FLOOR_LOG_BASE(prev_rule.value, prev_rule.radix)) {
|
FLOOR_LOG_BASE(rule.value, prev_rule.radix) < FLOOR_LOG_BASE(prev_rule.value, prev_rule.radix)) {
|
||||||
result.value += rule.value;
|
result.value += rule.value;
|
||||||
log_debug("Last token was RIGHT_CONTEXT_ADD, value=%" PRId64 "\n", result.value);
|
log_debug("Last token was RIGHT_CONTEXT_ADD, value=%" PRId64 "\n", result.value);
|
||||||
} else if (prev_rule.rule_type != NUMEX_NULL && rule.rule_type != NUMEX_STOPWORD) {
|
} else if (prev_rule.rule_type != NUMEX_NULL && rule.rule_type != NUMEX_STOPWORD && (!whole_tokens_only || complete_token)) {
|
||||||
log_debug("Had previous token with no context, finishing previous rule before returning\n");
|
log_debug("Had previous token with no context, finishing previous rule before returning\n");
|
||||||
if (!whole_tokens_only || complete_token) {
|
result.len = prev_result_len;
|
||||||
result.len = prev_result_len;
|
number_finished = true;
|
||||||
number_finished = true;
|
complete_token = false;
|
||||||
complete_token = false;
|
advance_index = false;
|
||||||
advance_index = false;
|
state = start_state;
|
||||||
state = start_state;
|
prev_rule_was_number = true;
|
||||||
rule = prev_rule = NUMEX_NULL_RULE;
|
rule = prev_rule = NUMEX_NULL_RULE;
|
||||||
prev_result_len = 0;
|
prev_result_len = 0;
|
||||||
} else {
|
} else if (prev_rule.rule_type != NUMEX_NULL && rule.rule_type != NUMEX_STOPWORD && whole_tokens_only && !complete_token) {
|
||||||
rule = NUMEX_NULL_RULE;
|
log_debug("whole_tokens_only = %d, complete_token = %d\n", whole_tokens_only, complete_token);
|
||||||
last_was_separator = false;
|
rule = NUMEX_NULL_RULE;
|
||||||
state.state = NUMEX_SEARCH_STATE_SKIP_TOKEN;
|
last_was_separator = false;
|
||||||
continue;
|
prev_rule_was_number = false;
|
||||||
}
|
state.state = NUMEX_SEARCH_STATE_SKIP_TOKEN;
|
||||||
|
continue;
|
||||||
|
} else if (rule.left_context_type == NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER && !prev_rule_was_number) {
|
||||||
|
log_debug("LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER, no context\n");
|
||||||
|
prev_rule = rule;
|
||||||
|
last_was_separator = false;
|
||||||
|
rule = NUMEX_NULL_RULE;
|
||||||
|
prev_result_len = result.len;
|
||||||
|
result = NULL_NUMEX_RESULT;
|
||||||
|
stopword_phrase = NULL_PHRASE;
|
||||||
|
state.state = NUMEX_SEARCH_STATE_SKIP_TOKEN;
|
||||||
|
last_was_stopword = false;
|
||||||
|
continue;
|
||||||
|
} else if (rule.left_context_type == NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER && prev_rule_was_number) {
|
||||||
|
last_was_separator = false;
|
||||||
|
number_finished = true;
|
||||||
|
state = start_state;
|
||||||
|
last_was_stopword = false;
|
||||||
|
prev_rule_was_number = true;
|
||||||
|
log_debug("LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER, value = %" PRId64 "\n", result.value);
|
||||||
} else if (rule.rule_type != NUMEX_STOPWORD) {
|
} else if (rule.rule_type != NUMEX_STOPWORD) {
|
||||||
result.value = rule.value;
|
result.value = rule.value;
|
||||||
log_debug("Got number, result.value=%" PRId64 "\n", result.value);
|
log_debug("Got number, result.value=%" PRId64 "\n", result.value);
|
||||||
@@ -871,6 +892,8 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
prev_rule_was_number = prev_rule_was_number || prev_rule.rule_type != NUMEX_NULL;
|
||||||
|
|
||||||
if (rule.rule_type != NUMEX_STOPWORD) {
|
if (rule.rule_type != NUMEX_STOPWORD) {
|
||||||
prev_rule = rule;
|
prev_rule = rule;
|
||||||
prev_result_len = result.len;
|
prev_result_len = result.len;
|
||||||
@@ -903,7 +926,6 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
if (prev_rule.rule_type != NUMEX_NULL) {
|
if (prev_rule.rule_type != NUMEX_NULL) {
|
||||||
number_finished = true;
|
number_finished = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!set_rule) {
|
if (!set_rule) {
|
||||||
@@ -926,6 +948,7 @@ numex_result_array *convert_numeric_expressions(char *str, char *lang) {
|
|||||||
log_debug("Adding phrase, value=%" PRId64 "\n", result.value);
|
log_debug("Adding phrase, value=%" PRId64 "\n", result.value);
|
||||||
result = NULL_NUMEX_RESULT;
|
result = NULL_NUMEX_RESULT;
|
||||||
number_finished = false;
|
number_finished = false;
|
||||||
|
rule = prev_rule = NUMEX_NULL_RULE;
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_state = state;
|
prev_state = state;
|
||||||
@@ -1150,7 +1173,6 @@ char *replace_numeric_expressions(char *str, char *lang) {
|
|||||||
char_array_append(replacement, ordinal_suffix);
|
char_array_append(replacement, ordinal_suffix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
start = result.start + result.len;
|
start = result.start + result.len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -50,7 +50,8 @@ typedef enum {
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
NUMEX_LEFT_CONTEXT_NONE,
|
NUMEX_LEFT_CONTEXT_NONE,
|
||||||
NUMEX_LEFT_CONTEXT_ADD,
|
NUMEX_LEFT_CONTEXT_ADD,
|
||||||
NUMEX_LEFT_CONTEXT_MULTIPLY
|
NUMEX_LEFT_CONTEXT_MULTIPLY,
|
||||||
|
NUMEX_LEFT_CONTEXT_CONCAT_ONLY_IF_NUMBER
|
||||||
} numex_left_context;
|
} numex_left_context;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|||||||
@@ -82,6 +82,8 @@ TEST test_expansions(void) {
|
|||||||
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
CHECK_CALL(test_expansion_contains_with_languages("123 Main St. #2f", "123 main street number 2f", options, 1, "en"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St", "120 east 96 street", options, 1, "en"));
|
CHECK_CALL(test_expansion_contains_with_languages("120 E 96th St", "120 east 96 street", options, 1, "en"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("120 E Ninety-sixth St", "120 east 96 street", options, 1, "en"));
|
CHECK_CALL(test_expansion_contains_with_languages("120 E Ninety-sixth St", "120 east 96 street", options, 1, "en"));
|
||||||
|
CHECK_CALL(test_expansion_contains_with_languages("4998 Vanderbilt Dr, Columbus, OH 43213", "4998 vanderbilt drive columbus ohio 43213", options, 1, "en"));
|
||||||
|
CHECK_CALL(test_expansion_contains_with_languages("Nineteen oh one W El Segundo Blvd", "1901 west el segundo boulevard", options, 1, "en"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
|
CHECK_CALL(test_expansion_contains_with_languages("S St. NW", "s street northwest", options, 1, "en"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
CHECK_CALL(test_expansion_contains_with_languages("Marktstrasse", "markt strasse", options, 1, "de"));
|
||||||
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
CHECK_CALL(test_expansion_contains_with_languages("Hoofdstraat", "hoofdstraat", options, 1, "nl"));
|
||||||
|
|||||||
Reference in New Issue
Block a user