150 lines
4.8 KiB
Go
150 lines
4.8 KiB
Go
package postal
|
|
|
|
/*
|
|
#cgo pkg-config: libpostal
|
|
#include <libpostal/libpostal.h>
|
|
#include <stdlib.h>
|
|
|
|
*/
|
|
import "C"
|
|
|
|
import (
|
|
"log"
|
|
"unicode/utf8"
|
|
"unsafe"
|
|
)
|
|
|
|
func init() {
|
|
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) {
|
|
log.Fatal("Could not load libpostal")
|
|
}
|
|
}
|
|
|
|
type ExpandOptions struct {
|
|
Languages []string
|
|
AddressComponents uint16
|
|
LatinAscii bool
|
|
Transliterate bool
|
|
StripAccents bool
|
|
Decompose bool
|
|
Lowercase bool
|
|
TrimString bool
|
|
ReplaceWordHyphens bool
|
|
DeleteWordHyphens bool
|
|
ReplaceNumericHyphens bool
|
|
DeleteNumericHyphens bool
|
|
SplitAlphaFromNumeric bool
|
|
DeleteFinalPeriods bool
|
|
DeleteAcronymPeriods bool
|
|
DropEnglishPossessives bool
|
|
DeleteApostrophes bool
|
|
ExpandNumex bool
|
|
RomanNumerals bool
|
|
}
|
|
|
|
var cDefaultOptions = C.libpostal_get_default_options()
|
|
|
|
func GetDefaultExpansionOptions() ExpandOptions {
|
|
return ExpandOptions{
|
|
Languages: nil,
|
|
AddressComponents: uint16(cDefaultOptions.address_components),
|
|
LatinAscii: bool(cDefaultOptions.latin_ascii),
|
|
Transliterate: bool(cDefaultOptions.transliterate),
|
|
StripAccents: bool(cDefaultOptions.strip_accents),
|
|
Decompose: bool(cDefaultOptions.decompose),
|
|
Lowercase: bool(cDefaultOptions.lowercase),
|
|
TrimString: bool(cDefaultOptions.trim_string),
|
|
ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens),
|
|
DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens),
|
|
ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens),
|
|
DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens),
|
|
SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric),
|
|
DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods),
|
|
DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods),
|
|
DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives),
|
|
DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes),
|
|
ExpandNumex: bool(cDefaultOptions.expand_numex),
|
|
RomanNumerals: bool(cDefaultOptions.roman_numerals),
|
|
}
|
|
}
|
|
|
|
var libpostalDefaultOptions = GetDefaultExpansionOptions()
|
|
|
|
func ExpandAddressOptions(address string, options ExpandOptions) []string {
|
|
if !utf8.ValidString(address) {
|
|
return nil
|
|
}
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
cAddress := C.CString(address)
|
|
defer C.free(unsafe.Pointer(cAddress))
|
|
|
|
var charPtr *C.char
|
|
ptrSize := unsafe.Sizeof(charPtr)
|
|
|
|
cOptions := C.libpostal_get_default_options()
|
|
if options.Languages != nil {
|
|
cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize))
|
|
cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages))
|
|
|
|
var cLang C.String
|
|
|
|
defer C.free(unsafe.Pointer(cLang))
|
|
defer C.free(unsafe.Pointer(cLanguages))
|
|
|
|
for i := 0; i < len(options.Languages); i++ {
|
|
cLang = C.CString(options.Languages[i])
|
|
cLanguagesPtr[i] = cLang
|
|
}
|
|
|
|
cOptions.languages = (**C.char)(cLanguages)
|
|
cOptions.num_languages = C.size_t(len(options.Languages))
|
|
} else {
|
|
cOptions.num_languages = 0
|
|
}
|
|
|
|
cOptions.address_components = C.uint16_t(options.AddressComponents)
|
|
cOptions.latin_ascii = C.bool(options.LatinAscii)
|
|
cOptions.transliterate = C.bool(options.Transliterate)
|
|
cOptions.strip_accents = C.bool(options.StripAccents)
|
|
cOptions.decompose = C.bool(options.Decompose)
|
|
cOptions.lowercase = C.bool(options.Lowercase)
|
|
cOptions.trim_string = C.bool(options.TrimString)
|
|
cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens)
|
|
cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens)
|
|
cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens)
|
|
cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens)
|
|
cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric)
|
|
cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods)
|
|
cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods)
|
|
cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives)
|
|
cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes)
|
|
cOptions.expand_numex = C.bool(options.ExpandNumex)
|
|
cOptions.roman_numerals = C.bool(options.RomanNumerals)
|
|
|
|
var cNumExpansions = C.size_t(0)
|
|
|
|
cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions)
|
|
|
|
numExpansions := uint64(cNumExpansions)
|
|
|
|
var expansions = make([]string, numExpansions)
|
|
|
|
// Accessing a C array
|
|
cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions))
|
|
|
|
var i uint64
|
|
for i = 0; i < numExpansions; i++ {
|
|
expansions[i] = C.GoString(cExpansionsPtr[i])
|
|
}
|
|
|
|
C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions)
|
|
return expansions
|
|
}
|
|
|
|
func ExpandAddress(address string) []string {
|
|
return ExpandAddressOptions(address, libpostalDefaultOptions)
|
|
}
|