Forked libpostal integration

This commit is contained in:
2025-09-07 20:36:34 -04:00
parent df1630adb5
commit 99117dd319
8 changed files with 265 additions and 60 deletions

10
.idea/vcs.xml generated
View File

@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -1,9 +0,0 @@
#!/bin/bash
git reset --hard
git checkout master
git pull
docker stop addrss
docker rm addrss
docker build --tag addrss .
docker run --name addrss -d -p 1337:1337 --restart always --env-file /home/tommy/addrss.env addrss

1
go.mod
View File

@@ -5,7 +5,6 @@ go 1.23.0
require (
github.com/go-sql-driver/mysql v1.9.3
github.com/google/uuid v1.6.0
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
golang.org/x/crypto v0.41.0
)

View File

@@ -1,11 +1,9 @@
package controllers
import (
"addrss/pkg/postal"
"addrss/pkg/router"
"fmt"
expand "github.com/openvenues/gopostal/expand"
parser "github.com/openvenues/gopostal/parser"
)
type Api struct{}
@@ -20,7 +18,7 @@ func (a Api) AddRoutes() {
}
func expandAddress(ctx *router.Context) {
expansions := expand.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
expansions := postal.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
for i := 0; i < len(expansions); i++ {
fmt.Println(expansions[i])
}
@@ -34,9 +32,9 @@ func parseAddress(ctx *router.Context) {
ctx.Response.BadRequest(err)
}
options := parser.ParserOptions{}
options := postal.ParserOptions{}
pa := parser.ParseAddressOptions(pr.Address, options)
pa := postal.ParseAddressOptions(pr.Address, options)
addr := map[string]any{}
for i := 0; i < len(pa); i++ {

View File

@@ -1,39 +0,0 @@
# syntax=docker/dockerfile:1
FROM debian:trixie
WORKDIR /app
RUN <<EOF
apt-get -y update
apt-get -y upgrade
apt-get -y install curl build-essential autoconf automake libtool pkg-config wget
wget https://go.dev/dl/go1.25.1.linux-amd64.tar.gz
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.25.1.linux-amd64.tar.gz
export PATH=$PATH:/usr/local/go/bin
wget https://git.ratermania.net/tommy/libpostal-addrss/archive/libpostal.tar.gz
tar xzf libpostal.tar.gz
cd libpostal-addrss
./bootstrap.sh
./configure MODEL=senzing
make -j8
make install
ldconfig
EOF
WORKDIR /app
COPY go.mod ./
COPY go.sum ./
RUN go mod download
COPY . ./
RUN go build
EXPOSE 1337
RUN ls -la
CMD [ "./addrss" ]

149
pkg/postal/expand.go Normal file
View File

@@ -0,0 +1,149 @@
package postal
/*
#cgo pkg-config: libpostal
#include <libpostal/libpostal.h>
#include <stdlib.h>
*/
import "C"
import (
"log"
"unicode/utf8"
"unsafe"
)
func init() {
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) {
log.Fatal("Could not load libpostal")
}
}
type ExpandOptions struct {
Languages []string
AddressComponents uint16
LatinAscii bool
Transliterate bool
StripAccents bool
Decompose bool
Lowercase bool
TrimString bool
ReplaceWordHyphens bool
DeleteWordHyphens bool
ReplaceNumericHyphens bool
DeleteNumericHyphens bool
SplitAlphaFromNumeric bool
DeleteFinalPeriods bool
DeleteAcronymPeriods bool
DropEnglishPossessives bool
DeleteApostrophes bool
ExpandNumex bool
RomanNumerals bool
}
var cDefaultOptions = C.libpostal_get_default_options()
func GetDefaultExpansionOptions() ExpandOptions {
return ExpandOptions{
Languages: nil,
AddressComponents: uint16(cDefaultOptions.address_components),
LatinAscii: bool(cDefaultOptions.latin_ascii),
Transliterate: bool(cDefaultOptions.transliterate),
StripAccents: bool(cDefaultOptions.strip_accents),
Decompose: bool(cDefaultOptions.decompose),
Lowercase: bool(cDefaultOptions.lowercase),
TrimString: bool(cDefaultOptions.trim_string),
ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens),
DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens),
ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens),
DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens),
SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric),
DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods),
DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods),
DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives),
DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes),
ExpandNumex: bool(cDefaultOptions.expand_numex),
RomanNumerals: bool(cDefaultOptions.roman_numerals),
}
}
var libpostalDefaultOptions = GetDefaultExpansionOptions()
func ExpandAddressOptions(address string, options ExpandOptions) []string {
if !utf8.ValidString(address) {
return nil
}
mu.Lock()
defer mu.Unlock()
cAddress := C.CString(address)
defer C.free(unsafe.Pointer(cAddress))
var charPtr *C.char
ptrSize := unsafe.Sizeof(charPtr)
cOptions := C.libpostal_get_default_options()
if options.Languages != nil {
cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize))
cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages))
var cLang C.String
defer C.free(unsafe.Pointer(cLang))
defer C.free(unsafe.Pointer(cLanguages))
for i := 0; i < len(options.Languages); i++ {
cLang = C.CString(options.Languages[i])
cLanguagesPtr[i] = cLang
}
cOptions.languages = (**C.char)(cLanguages)
cOptions.num_languages = C.size_t(len(options.Languages))
} else {
cOptions.num_languages = 0
}
cOptions.address_components = C.uint16_t(options.AddressComponents)
cOptions.latin_ascii = C.bool(options.LatinAscii)
cOptions.transliterate = C.bool(options.Transliterate)
cOptions.strip_accents = C.bool(options.StripAccents)
cOptions.decompose = C.bool(options.Decompose)
cOptions.lowercase = C.bool(options.Lowercase)
cOptions.trim_string = C.bool(options.TrimString)
cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens)
cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens)
cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens)
cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens)
cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric)
cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods)
cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods)
cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives)
cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes)
cOptions.expand_numex = C.bool(options.ExpandNumex)
cOptions.roman_numerals = C.bool(options.RomanNumerals)
var cNumExpansions = C.size_t(0)
cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions)
numExpansions := uint64(cNumExpansions)
var expansions = make([]string, numExpansions)
// Accessing a C array
cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions))
var i uint64
for i = 0; i < numExpansions; i++ {
expansions[i] = C.GoString(cExpansionsPtr[i])
}
C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions)
return expansions
}
func ExpandAddress(address string) []string {
return ExpandAddressOptions(address, libpostalDefaultOptions)
}

102
pkg/postal/parser.go Normal file
View File

@@ -0,0 +1,102 @@
package postal
/*
#cgo pkg-config: libpostal
#include <libpostal/libpostal.h>
#include <stdlib.h>
*/
import "C"
import (
"log"
"unicode/utf8"
"unsafe"
)
func init() {
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_parser()) {
log.Fatal("Could not load libpostal")
}
}
type ParserOptions struct {
Language string
Country string
}
func getDefaultParserOptions() ParserOptions {
return ParserOptions{
Language: "",
Country: "",
}
}
var parserDefaultOptions = getDefaultParserOptions()
type ParsedComponent struct {
Label string `json:"label"`
Value string `json:"value"`
}
func ParseAddressOptions(address string, options ParserOptions) []ParsedComponent {
if !utf8.ValidString(address) {
return nil
}
mu.Lock()
defer mu.Unlock()
cAddress := C.CString(address)
defer C.free(unsafe.Pointer(cAddress))
cOptions := C.libpostal_get_address_parser_default_options()
if options.Language != "" {
cLanguage := C.CString(options.Language)
defer C.free(unsafe.Pointer(cLanguage))
cOptions.language = cLanguage
}
if options.Country != "" {
cCountry := C.CString(options.Country)
defer C.free(unsafe.Pointer(cCountry))
cOptions.country = cCountry
}
cAddressParserResponsePtr := C.libpostal_parse_address(cAddress, cOptions)
if cAddressParserResponsePtr == nil {
return nil
}
cAddressParserResponse := *cAddressParserResponsePtr
cNumComponents := cAddressParserResponse.num_components
cComponents := cAddressParserResponse.components
cLabels := cAddressParserResponse.labels
numComponents := uint64(cNumComponents)
parsedComponents := make([]ParsedComponent, numComponents)
// Accessing a C array
cComponentsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cComponents))[:numComponents:numComponents]
cLabelsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLabels))[:numComponents:numComponents]
var i uint64
for i = 0; i < numComponents; i++ {
parsedComponents[i] = ParsedComponent{
Label: C.GoString(cLabelsPtr[i]),
Value: C.GoString(cComponentsPtr[i]),
}
}
C.libpostal_address_parser_response_destroy(cAddressParserResponsePtr)
return parsedComponents
}
func ParseAddress(address string) []ParsedComponent {
return ParseAddressOptions(address, parserDefaultOptions)
}

5
pkg/postal/postal.go Normal file
View File

@@ -0,0 +1,5 @@
package postal
import "sync"
var mu sync.Mutex