Forked libpostal integration

This commit is contained in:
2025-09-07 20:36:34 -04:00
parent df1630adb5
commit 99117dd319
8 changed files with 265 additions and 60 deletions

149
pkg/postal/expand.go Normal file
View File

@@ -0,0 +1,149 @@
package postal
/*
#cgo pkg-config: libpostal
#include <libpostal/libpostal.h>
#include <stdlib.h>
*/
import "C"
import (
"log"
"unicode/utf8"
"unsafe"
)
func init() {
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) {
log.Fatal("Could not load libpostal")
}
}
type ExpandOptions struct {
Languages []string
AddressComponents uint16
LatinAscii bool
Transliterate bool
StripAccents bool
Decompose bool
Lowercase bool
TrimString bool
ReplaceWordHyphens bool
DeleteWordHyphens bool
ReplaceNumericHyphens bool
DeleteNumericHyphens bool
SplitAlphaFromNumeric bool
DeleteFinalPeriods bool
DeleteAcronymPeriods bool
DropEnglishPossessives bool
DeleteApostrophes bool
ExpandNumex bool
RomanNumerals bool
}
var cDefaultOptions = C.libpostal_get_default_options()
func GetDefaultExpansionOptions() ExpandOptions {
return ExpandOptions{
Languages: nil,
AddressComponents: uint16(cDefaultOptions.address_components),
LatinAscii: bool(cDefaultOptions.latin_ascii),
Transliterate: bool(cDefaultOptions.transliterate),
StripAccents: bool(cDefaultOptions.strip_accents),
Decompose: bool(cDefaultOptions.decompose),
Lowercase: bool(cDefaultOptions.lowercase),
TrimString: bool(cDefaultOptions.trim_string),
ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens),
DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens),
ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens),
DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens),
SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric),
DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods),
DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods),
DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives),
DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes),
ExpandNumex: bool(cDefaultOptions.expand_numex),
RomanNumerals: bool(cDefaultOptions.roman_numerals),
}
}
var libpostalDefaultOptions = GetDefaultExpansionOptions()
func ExpandAddressOptions(address string, options ExpandOptions) []string {
if !utf8.ValidString(address) {
return nil
}
mu.Lock()
defer mu.Unlock()
cAddress := C.CString(address)
defer C.free(unsafe.Pointer(cAddress))
var charPtr *C.char
ptrSize := unsafe.Sizeof(charPtr)
cOptions := C.libpostal_get_default_options()
if options.Languages != nil {
cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize))
cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages))
var cLang C.String
defer C.free(unsafe.Pointer(cLang))
defer C.free(unsafe.Pointer(cLanguages))
for i := 0; i < len(options.Languages); i++ {
cLang = C.CString(options.Languages[i])
cLanguagesPtr[i] = cLang
}
cOptions.languages = (**C.char)(cLanguages)
cOptions.num_languages = C.size_t(len(options.Languages))
} else {
cOptions.num_languages = 0
}
cOptions.address_components = C.uint16_t(options.AddressComponents)
cOptions.latin_ascii = C.bool(options.LatinAscii)
cOptions.transliterate = C.bool(options.Transliterate)
cOptions.strip_accents = C.bool(options.StripAccents)
cOptions.decompose = C.bool(options.Decompose)
cOptions.lowercase = C.bool(options.Lowercase)
cOptions.trim_string = C.bool(options.TrimString)
cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens)
cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens)
cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens)
cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens)
cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric)
cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods)
cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods)
cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives)
cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes)
cOptions.expand_numex = C.bool(options.ExpandNumex)
cOptions.roman_numerals = C.bool(options.RomanNumerals)
var cNumExpansions = C.size_t(0)
cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions)
numExpansions := uint64(cNumExpansions)
var expansions = make([]string, numExpansions)
// Accessing a C array
cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions))
var i uint64
for i = 0; i < numExpansions; i++ {
expansions[i] = C.GoString(cExpansionsPtr[i])
}
C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions)
return expansions
}
func ExpandAddress(address string) []string {
return ExpandAddressOptions(address, libpostalDefaultOptions)
}