From 99117dd319c61c0d27a6d13c38538f9e983c2428 Mon Sep 17 00:00:00 2001 From: Tommy Date: Sun, 7 Sep 2025 20:36:34 -0400 Subject: [PATCH] Forked libpostal integration --- .idea/vcs.xml | 10 +-- deploy.sh | 9 --- go.mod | 1 - internal/controllers/api.go | 10 +-- old-dockerfile | 39 ---------- pkg/postal/expand.go | 149 ++++++++++++++++++++++++++++++++++++ pkg/postal/parser.go | 102 ++++++++++++++++++++++++ pkg/postal/postal.go | 5 ++ 8 files changed, 265 insertions(+), 60 deletions(-) delete mode 100644 deploy.sh delete mode 100644 old-dockerfile create mode 100644 pkg/postal/expand.go create mode 100644 pkg/postal/parser.go create mode 100644 pkg/postal/postal.go diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 9661ac7..35eb1dd 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -1,6 +1,6 @@ - - - - - + + + + + \ No newline at end of file diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index ee651df..0000000 --- a/deploy.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -git reset --hard -git checkout master -git pull - -docker stop addrss -docker rm addrss -docker build --tag addrss . -docker run --name addrss -d -p 1337:1337 --restart always --env-file /home/tommy/addrss.env addrss \ No newline at end of file diff --git a/go.mod b/go.mod index a9a540d..8c45082 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.23.0 require ( github.com/go-sql-driver/mysql v1.9.3 github.com/google/uuid v1.6.0 - github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519 golang.org/x/crypto v0.41.0 ) diff --git a/internal/controllers/api.go b/internal/controllers/api.go index 6a15b58..5d4a34d 100644 --- a/internal/controllers/api.go +++ b/internal/controllers/api.go @@ -1,11 +1,9 @@ package controllers import ( + "addrss/pkg/postal" "addrss/pkg/router" "fmt" - - expand "github.com/openvenues/gopostal/expand" - parser "github.com/openvenues/gopostal/parser" ) type Api struct{} @@ -20,7 +18,7 @@ func (a Api) AddRoutes() { } func expandAddress(ctx *router.Context) { - expansions := expand.ExpandAddress("1080 Brayden Ct. Hebron KY 41048") + expansions := postal.ExpandAddress("1080 Brayden Ct. Hebron KY 41048") for i := 0; i < len(expansions); i++ { fmt.Println(expansions[i]) } @@ -34,9 +32,9 @@ func parseAddress(ctx *router.Context) { ctx.Response.BadRequest(err) } - options := parser.ParserOptions{} + options := postal.ParserOptions{} - pa := parser.ParseAddressOptions(pr.Address, options) + pa := postal.ParseAddressOptions(pr.Address, options) addr := map[string]any{} for i := 0; i < len(pa); i++ { diff --git a/old-dockerfile b/old-dockerfile deleted file mode 100644 index 9063545..0000000 --- a/old-dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -# syntax=docker/dockerfile:1 - -FROM debian:trixie -WORKDIR /app -RUN < +#include + +*/ +import "C" + +import ( + "log" + "unicode/utf8" + "unsafe" +) + +func init() { + if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) { + log.Fatal("Could not load libpostal") + } +} + +type ExpandOptions struct { + Languages []string + AddressComponents uint16 + LatinAscii bool + Transliterate bool + StripAccents bool + Decompose bool + Lowercase bool + TrimString bool + ReplaceWordHyphens bool + DeleteWordHyphens bool + ReplaceNumericHyphens bool + DeleteNumericHyphens bool + SplitAlphaFromNumeric bool + DeleteFinalPeriods bool + DeleteAcronymPeriods bool + DropEnglishPossessives bool + DeleteApostrophes bool + ExpandNumex bool + RomanNumerals bool +} + +var cDefaultOptions = C.libpostal_get_default_options() + +func GetDefaultExpansionOptions() ExpandOptions { + return ExpandOptions{ + Languages: nil, + AddressComponents: uint16(cDefaultOptions.address_components), + LatinAscii: bool(cDefaultOptions.latin_ascii), + Transliterate: bool(cDefaultOptions.transliterate), + StripAccents: bool(cDefaultOptions.strip_accents), + Decompose: bool(cDefaultOptions.decompose), + Lowercase: bool(cDefaultOptions.lowercase), + TrimString: bool(cDefaultOptions.trim_string), + ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens), + DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens), + ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens), + DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens), + SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric), + DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods), + DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods), + DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives), + DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes), + ExpandNumex: bool(cDefaultOptions.expand_numex), + RomanNumerals: bool(cDefaultOptions.roman_numerals), + } +} + +var libpostalDefaultOptions = GetDefaultExpansionOptions() + +func ExpandAddressOptions(address string, options ExpandOptions) []string { + if !utf8.ValidString(address) { + return nil + } + + mu.Lock() + defer mu.Unlock() + + cAddress := C.CString(address) + defer C.free(unsafe.Pointer(cAddress)) + + var charPtr *C.char + ptrSize := unsafe.Sizeof(charPtr) + + cOptions := C.libpostal_get_default_options() + if options.Languages != nil { + cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize)) + cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages)) + + var cLang C.String + + defer C.free(unsafe.Pointer(cLang)) + defer C.free(unsafe.Pointer(cLanguages)) + + for i := 0; i < len(options.Languages); i++ { + cLang = C.CString(options.Languages[i]) + cLanguagesPtr[i] = cLang + } + + cOptions.languages = (**C.char)(cLanguages) + cOptions.num_languages = C.size_t(len(options.Languages)) + } else { + cOptions.num_languages = 0 + } + + cOptions.address_components = C.uint16_t(options.AddressComponents) + cOptions.latin_ascii = C.bool(options.LatinAscii) + cOptions.transliterate = C.bool(options.Transliterate) + cOptions.strip_accents = C.bool(options.StripAccents) + cOptions.decompose = C.bool(options.Decompose) + cOptions.lowercase = C.bool(options.Lowercase) + cOptions.trim_string = C.bool(options.TrimString) + cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens) + cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens) + cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens) + cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens) + cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric) + cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods) + cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods) + cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives) + cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes) + cOptions.expand_numex = C.bool(options.ExpandNumex) + cOptions.roman_numerals = C.bool(options.RomanNumerals) + + var cNumExpansions = C.size_t(0) + + cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions) + + numExpansions := uint64(cNumExpansions) + + var expansions = make([]string, numExpansions) + + // Accessing a C array + cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions)) + + var i uint64 + for i = 0; i < numExpansions; i++ { + expansions[i] = C.GoString(cExpansionsPtr[i]) + } + + C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions) + return expansions +} + +func ExpandAddress(address string) []string { + return ExpandAddressOptions(address, libpostalDefaultOptions) +} diff --git a/pkg/postal/parser.go b/pkg/postal/parser.go new file mode 100644 index 0000000..ed1d6dd --- /dev/null +++ b/pkg/postal/parser.go @@ -0,0 +1,102 @@ +package postal + +/* +#cgo pkg-config: libpostal +#include +#include +*/ +import "C" + +import ( + "log" + "unicode/utf8" + "unsafe" +) + +func init() { + if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_parser()) { + log.Fatal("Could not load libpostal") + } +} + +type ParserOptions struct { + Language string + Country string +} + +func getDefaultParserOptions() ParserOptions { + return ParserOptions{ + Language: "", + Country: "", + } +} + +var parserDefaultOptions = getDefaultParserOptions() + +type ParsedComponent struct { + Label string `json:"label"` + Value string `json:"value"` +} + +func ParseAddressOptions(address string, options ParserOptions) []ParsedComponent { + if !utf8.ValidString(address) { + return nil + } + + mu.Lock() + defer mu.Unlock() + + cAddress := C.CString(address) + defer C.free(unsafe.Pointer(cAddress)) + + cOptions := C.libpostal_get_address_parser_default_options() + if options.Language != "" { + cLanguage := C.CString(options.Language) + defer C.free(unsafe.Pointer(cLanguage)) + + cOptions.language = cLanguage + } + + if options.Country != "" { + cCountry := C.CString(options.Country) + defer C.free(unsafe.Pointer(cCountry)) + + cOptions.country = cCountry + } + + cAddressParserResponsePtr := C.libpostal_parse_address(cAddress, cOptions) + + if cAddressParserResponsePtr == nil { + return nil + } + + cAddressParserResponse := *cAddressParserResponsePtr + + cNumComponents := cAddressParserResponse.num_components + cComponents := cAddressParserResponse.components + cLabels := cAddressParserResponse.labels + + numComponents := uint64(cNumComponents) + + parsedComponents := make([]ParsedComponent, numComponents) + + // Accessing a C array + cComponentsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cComponents))[:numComponents:numComponents] + cLabelsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLabels))[:numComponents:numComponents] + + var i uint64 + for i = 0; i < numComponents; i++ { + parsedComponents[i] = ParsedComponent{ + Label: C.GoString(cLabelsPtr[i]), + Value: C.GoString(cComponentsPtr[i]), + } + } + + C.libpostal_address_parser_response_destroy(cAddressParserResponsePtr) + + return parsedComponents +} + +func ParseAddress(address string) []ParsedComponent { + return ParseAddressOptions(address, parserDefaultOptions) +} diff --git a/pkg/postal/postal.go b/pkg/postal/postal.go new file mode 100644 index 0000000..4845706 --- /dev/null +++ b/pkg/postal/postal.go @@ -0,0 +1,5 @@ +package postal + +import "sync" + +var mu sync.Mutex