Forked libpostal integration
This commit is contained in:
10
.idea/vcs.xml
generated
10
.idea/vcs.xml
generated
@@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
git reset --hard
|
||||
git checkout master
|
||||
git pull
|
||||
|
||||
docker stop addrss
|
||||
docker rm addrss
|
||||
docker build --tag addrss .
|
||||
docker run --name addrss -d -p 1337:1337 --restart always --env-file /home/tommy/addrss.env addrss
|
||||
1
go.mod
1
go.mod
@@ -5,7 +5,6 @@ go 1.23.0
|
||||
require (
|
||||
github.com/go-sql-driver/mysql v1.9.3
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
|
||||
golang.org/x/crypto v0.41.0
|
||||
)
|
||||
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
package controllers
|
||||
|
||||
import (
|
||||
"addrss/pkg/postal"
|
||||
"addrss/pkg/router"
|
||||
"fmt"
|
||||
|
||||
expand "github.com/openvenues/gopostal/expand"
|
||||
parser "github.com/openvenues/gopostal/parser"
|
||||
)
|
||||
|
||||
type Api struct{}
|
||||
@@ -20,7 +18,7 @@ func (a Api) AddRoutes() {
|
||||
}
|
||||
|
||||
func expandAddress(ctx *router.Context) {
|
||||
expansions := expand.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
|
||||
expansions := postal.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
|
||||
for i := 0; i < len(expansions); i++ {
|
||||
fmt.Println(expansions[i])
|
||||
}
|
||||
@@ -34,9 +32,9 @@ func parseAddress(ctx *router.Context) {
|
||||
ctx.Response.BadRequest(err)
|
||||
}
|
||||
|
||||
options := parser.ParserOptions{}
|
||||
options := postal.ParserOptions{}
|
||||
|
||||
pa := parser.ParseAddressOptions(pr.Address, options)
|
||||
pa := postal.ParseAddressOptions(pr.Address, options)
|
||||
addr := map[string]any{}
|
||||
|
||||
for i := 0; i < len(pa); i++ {
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
FROM debian:trixie
|
||||
WORKDIR /app
|
||||
RUN <<EOF
|
||||
apt-get -y update
|
||||
apt-get -y upgrade
|
||||
apt-get -y install curl build-essential autoconf automake libtool pkg-config wget
|
||||
|
||||
wget https://go.dev/dl/go1.25.1.linux-amd64.tar.gz
|
||||
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.25.1.linux-amd64.tar.gz
|
||||
export PATH=$PATH:/usr/local/go/bin
|
||||
|
||||
wget https://git.ratermania.net/tommy/libpostal-addrss/archive/libpostal.tar.gz
|
||||
tar xzf libpostal.tar.gz
|
||||
cd libpostal-addrss
|
||||
|
||||
./bootstrap.sh
|
||||
./configure MODEL=senzing
|
||||
|
||||
make -j8
|
||||
make install
|
||||
|
||||
ldconfig
|
||||
EOF
|
||||
|
||||
WORKDIR /app
|
||||
COPY go.mod ./
|
||||
COPY go.sum ./
|
||||
RUN go mod download
|
||||
COPY . ./
|
||||
RUN go build
|
||||
|
||||
EXPOSE 1337
|
||||
|
||||
RUN ls -la
|
||||
|
||||
CMD [ "./addrss" ]
|
||||
|
||||
149
pkg/postal/expand.go
Normal file
149
pkg/postal/expand.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package postal
|
||||
|
||||
/*
|
||||
#cgo pkg-config: libpostal
|
||||
#include <libpostal/libpostal.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() {
|
||||
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) {
|
||||
log.Fatal("Could not load libpostal")
|
||||
}
|
||||
}
|
||||
|
||||
type ExpandOptions struct {
|
||||
Languages []string
|
||||
AddressComponents uint16
|
||||
LatinAscii bool
|
||||
Transliterate bool
|
||||
StripAccents bool
|
||||
Decompose bool
|
||||
Lowercase bool
|
||||
TrimString bool
|
||||
ReplaceWordHyphens bool
|
||||
DeleteWordHyphens bool
|
||||
ReplaceNumericHyphens bool
|
||||
DeleteNumericHyphens bool
|
||||
SplitAlphaFromNumeric bool
|
||||
DeleteFinalPeriods bool
|
||||
DeleteAcronymPeriods bool
|
||||
DropEnglishPossessives bool
|
||||
DeleteApostrophes bool
|
||||
ExpandNumex bool
|
||||
RomanNumerals bool
|
||||
}
|
||||
|
||||
var cDefaultOptions = C.libpostal_get_default_options()
|
||||
|
||||
func GetDefaultExpansionOptions() ExpandOptions {
|
||||
return ExpandOptions{
|
||||
Languages: nil,
|
||||
AddressComponents: uint16(cDefaultOptions.address_components),
|
||||
LatinAscii: bool(cDefaultOptions.latin_ascii),
|
||||
Transliterate: bool(cDefaultOptions.transliterate),
|
||||
StripAccents: bool(cDefaultOptions.strip_accents),
|
||||
Decompose: bool(cDefaultOptions.decompose),
|
||||
Lowercase: bool(cDefaultOptions.lowercase),
|
||||
TrimString: bool(cDefaultOptions.trim_string),
|
||||
ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens),
|
||||
DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens),
|
||||
ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens),
|
||||
DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens),
|
||||
SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric),
|
||||
DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods),
|
||||
DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods),
|
||||
DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives),
|
||||
DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes),
|
||||
ExpandNumex: bool(cDefaultOptions.expand_numex),
|
||||
RomanNumerals: bool(cDefaultOptions.roman_numerals),
|
||||
}
|
||||
}
|
||||
|
||||
var libpostalDefaultOptions = GetDefaultExpansionOptions()
|
||||
|
||||
func ExpandAddressOptions(address string, options ExpandOptions) []string {
|
||||
if !utf8.ValidString(address) {
|
||||
return nil
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
cAddress := C.CString(address)
|
||||
defer C.free(unsafe.Pointer(cAddress))
|
||||
|
||||
var charPtr *C.char
|
||||
ptrSize := unsafe.Sizeof(charPtr)
|
||||
|
||||
cOptions := C.libpostal_get_default_options()
|
||||
if options.Languages != nil {
|
||||
cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize))
|
||||
cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages))
|
||||
|
||||
var cLang C.String
|
||||
|
||||
defer C.free(unsafe.Pointer(cLang))
|
||||
defer C.free(unsafe.Pointer(cLanguages))
|
||||
|
||||
for i := 0; i < len(options.Languages); i++ {
|
||||
cLang = C.CString(options.Languages[i])
|
||||
cLanguagesPtr[i] = cLang
|
||||
}
|
||||
|
||||
cOptions.languages = (**C.char)(cLanguages)
|
||||
cOptions.num_languages = C.size_t(len(options.Languages))
|
||||
} else {
|
||||
cOptions.num_languages = 0
|
||||
}
|
||||
|
||||
cOptions.address_components = C.uint16_t(options.AddressComponents)
|
||||
cOptions.latin_ascii = C.bool(options.LatinAscii)
|
||||
cOptions.transliterate = C.bool(options.Transliterate)
|
||||
cOptions.strip_accents = C.bool(options.StripAccents)
|
||||
cOptions.decompose = C.bool(options.Decompose)
|
||||
cOptions.lowercase = C.bool(options.Lowercase)
|
||||
cOptions.trim_string = C.bool(options.TrimString)
|
||||
cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens)
|
||||
cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens)
|
||||
cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens)
|
||||
cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens)
|
||||
cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric)
|
||||
cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods)
|
||||
cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods)
|
||||
cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives)
|
||||
cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes)
|
||||
cOptions.expand_numex = C.bool(options.ExpandNumex)
|
||||
cOptions.roman_numerals = C.bool(options.RomanNumerals)
|
||||
|
||||
var cNumExpansions = C.size_t(0)
|
||||
|
||||
cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions)
|
||||
|
||||
numExpansions := uint64(cNumExpansions)
|
||||
|
||||
var expansions = make([]string, numExpansions)
|
||||
|
||||
// Accessing a C array
|
||||
cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions))
|
||||
|
||||
var i uint64
|
||||
for i = 0; i < numExpansions; i++ {
|
||||
expansions[i] = C.GoString(cExpansionsPtr[i])
|
||||
}
|
||||
|
||||
C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions)
|
||||
return expansions
|
||||
}
|
||||
|
||||
func ExpandAddress(address string) []string {
|
||||
return ExpandAddressOptions(address, libpostalDefaultOptions)
|
||||
}
|
||||
102
pkg/postal/parser.go
Normal file
102
pkg/postal/parser.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package postal
|
||||
|
||||
/*
|
||||
#cgo pkg-config: libpostal
|
||||
#include <libpostal/libpostal.h>
|
||||
#include <stdlib.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() {
|
||||
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_parser()) {
|
||||
log.Fatal("Could not load libpostal")
|
||||
}
|
||||
}
|
||||
|
||||
type ParserOptions struct {
|
||||
Language string
|
||||
Country string
|
||||
}
|
||||
|
||||
func getDefaultParserOptions() ParserOptions {
|
||||
return ParserOptions{
|
||||
Language: "",
|
||||
Country: "",
|
||||
}
|
||||
}
|
||||
|
||||
var parserDefaultOptions = getDefaultParserOptions()
|
||||
|
||||
type ParsedComponent struct {
|
||||
Label string `json:"label"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
func ParseAddressOptions(address string, options ParserOptions) []ParsedComponent {
|
||||
if !utf8.ValidString(address) {
|
||||
return nil
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
cAddress := C.CString(address)
|
||||
defer C.free(unsafe.Pointer(cAddress))
|
||||
|
||||
cOptions := C.libpostal_get_address_parser_default_options()
|
||||
if options.Language != "" {
|
||||
cLanguage := C.CString(options.Language)
|
||||
defer C.free(unsafe.Pointer(cLanguage))
|
||||
|
||||
cOptions.language = cLanguage
|
||||
}
|
||||
|
||||
if options.Country != "" {
|
||||
cCountry := C.CString(options.Country)
|
||||
defer C.free(unsafe.Pointer(cCountry))
|
||||
|
||||
cOptions.country = cCountry
|
||||
}
|
||||
|
||||
cAddressParserResponsePtr := C.libpostal_parse_address(cAddress, cOptions)
|
||||
|
||||
if cAddressParserResponsePtr == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cAddressParserResponse := *cAddressParserResponsePtr
|
||||
|
||||
cNumComponents := cAddressParserResponse.num_components
|
||||
cComponents := cAddressParserResponse.components
|
||||
cLabels := cAddressParserResponse.labels
|
||||
|
||||
numComponents := uint64(cNumComponents)
|
||||
|
||||
parsedComponents := make([]ParsedComponent, numComponents)
|
||||
|
||||
// Accessing a C array
|
||||
cComponentsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cComponents))[:numComponents:numComponents]
|
||||
cLabelsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLabels))[:numComponents:numComponents]
|
||||
|
||||
var i uint64
|
||||
for i = 0; i < numComponents; i++ {
|
||||
parsedComponents[i] = ParsedComponent{
|
||||
Label: C.GoString(cLabelsPtr[i]),
|
||||
Value: C.GoString(cComponentsPtr[i]),
|
||||
}
|
||||
}
|
||||
|
||||
C.libpostal_address_parser_response_destroy(cAddressParserResponsePtr)
|
||||
|
||||
return parsedComponents
|
||||
}
|
||||
|
||||
func ParseAddress(address string) []ParsedComponent {
|
||||
return ParseAddressOptions(address, parserDefaultOptions)
|
||||
}
|
||||
5
pkg/postal/postal.go
Normal file
5
pkg/postal/postal.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package postal
|
||||
|
||||
import "sync"
|
||||
|
||||
var mu sync.Mutex
|
||||
Reference in New Issue
Block a user