Forked libpostal integration
This commit is contained in:
2
.idea/vcs.xml
generated
2
.idea/vcs.xml
generated
@@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
<mapping directory="" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
git reset --hard
|
|
||||||
git checkout master
|
|
||||||
git pull
|
|
||||||
|
|
||||||
docker stop addrss
|
|
||||||
docker rm addrss
|
|
||||||
docker build --tag addrss .
|
|
||||||
docker run --name addrss -d -p 1337:1337 --restart always --env-file /home/tommy/addrss.env addrss
|
|
||||||
1
go.mod
1
go.mod
@@ -5,7 +5,6 @@ go 1.23.0
|
|||||||
require (
|
require (
|
||||||
github.com/go-sql-driver/mysql v1.9.3
|
github.com/go-sql-driver/mysql v1.9.3
|
||||||
github.com/google/uuid v1.6.0
|
github.com/google/uuid v1.6.0
|
||||||
github.com/openvenues/gopostal v0.0.0-20240426055609-4fe3a773f519
|
|
||||||
golang.org/x/crypto v0.41.0
|
golang.org/x/crypto v0.41.0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
package controllers
|
package controllers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"addrss/pkg/postal"
|
||||||
"addrss/pkg/router"
|
"addrss/pkg/router"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
expand "github.com/openvenues/gopostal/expand"
|
|
||||||
parser "github.com/openvenues/gopostal/parser"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Api struct{}
|
type Api struct{}
|
||||||
@@ -20,7 +18,7 @@ func (a Api) AddRoutes() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func expandAddress(ctx *router.Context) {
|
func expandAddress(ctx *router.Context) {
|
||||||
expansions := expand.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
|
expansions := postal.ExpandAddress("1080 Brayden Ct. Hebron KY 41048")
|
||||||
for i := 0; i < len(expansions); i++ {
|
for i := 0; i < len(expansions); i++ {
|
||||||
fmt.Println(expansions[i])
|
fmt.Println(expansions[i])
|
||||||
}
|
}
|
||||||
@@ -34,9 +32,9 @@ func parseAddress(ctx *router.Context) {
|
|||||||
ctx.Response.BadRequest(err)
|
ctx.Response.BadRequest(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
options := parser.ParserOptions{}
|
options := postal.ParserOptions{}
|
||||||
|
|
||||||
pa := parser.ParseAddressOptions(pr.Address, options)
|
pa := postal.ParseAddressOptions(pr.Address, options)
|
||||||
addr := map[string]any{}
|
addr := map[string]any{}
|
||||||
|
|
||||||
for i := 0; i < len(pa); i++ {
|
for i := 0; i < len(pa); i++ {
|
||||||
|
|||||||
@@ -1,39 +0,0 @@
|
|||||||
# syntax=docker/dockerfile:1
|
|
||||||
|
|
||||||
FROM debian:trixie
|
|
||||||
WORKDIR /app
|
|
||||||
RUN <<EOF
|
|
||||||
apt-get -y update
|
|
||||||
apt-get -y upgrade
|
|
||||||
apt-get -y install curl build-essential autoconf automake libtool pkg-config wget
|
|
||||||
|
|
||||||
wget https://go.dev/dl/go1.25.1.linux-amd64.tar.gz
|
|
||||||
rm -rf /usr/local/go && tar -C /usr/local -xzf go1.25.1.linux-amd64.tar.gz
|
|
||||||
export PATH=$PATH:/usr/local/go/bin
|
|
||||||
|
|
||||||
wget https://git.ratermania.net/tommy/libpostal-addrss/archive/libpostal.tar.gz
|
|
||||||
tar xzf libpostal.tar.gz
|
|
||||||
cd libpostal-addrss
|
|
||||||
|
|
||||||
./bootstrap.sh
|
|
||||||
./configure MODEL=senzing
|
|
||||||
|
|
||||||
make -j8
|
|
||||||
make install
|
|
||||||
|
|
||||||
ldconfig
|
|
||||||
EOF
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
COPY go.mod ./
|
|
||||||
COPY go.sum ./
|
|
||||||
RUN go mod download
|
|
||||||
COPY . ./
|
|
||||||
RUN go build
|
|
||||||
|
|
||||||
EXPOSE 1337
|
|
||||||
|
|
||||||
RUN ls -la
|
|
||||||
|
|
||||||
CMD [ "./addrss" ]
|
|
||||||
|
|
||||||
149
pkg/postal/expand.go
Normal file
149
pkg/postal/expand.go
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
package postal
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo pkg-config: libpostal
|
||||||
|
#include <libpostal/libpostal.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"unicode/utf8"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_language_classifier()) {
|
||||||
|
log.Fatal("Could not load libpostal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ExpandOptions struct {
|
||||||
|
Languages []string
|
||||||
|
AddressComponents uint16
|
||||||
|
LatinAscii bool
|
||||||
|
Transliterate bool
|
||||||
|
StripAccents bool
|
||||||
|
Decompose bool
|
||||||
|
Lowercase bool
|
||||||
|
TrimString bool
|
||||||
|
ReplaceWordHyphens bool
|
||||||
|
DeleteWordHyphens bool
|
||||||
|
ReplaceNumericHyphens bool
|
||||||
|
DeleteNumericHyphens bool
|
||||||
|
SplitAlphaFromNumeric bool
|
||||||
|
DeleteFinalPeriods bool
|
||||||
|
DeleteAcronymPeriods bool
|
||||||
|
DropEnglishPossessives bool
|
||||||
|
DeleteApostrophes bool
|
||||||
|
ExpandNumex bool
|
||||||
|
RomanNumerals bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var cDefaultOptions = C.libpostal_get_default_options()
|
||||||
|
|
||||||
|
func GetDefaultExpansionOptions() ExpandOptions {
|
||||||
|
return ExpandOptions{
|
||||||
|
Languages: nil,
|
||||||
|
AddressComponents: uint16(cDefaultOptions.address_components),
|
||||||
|
LatinAscii: bool(cDefaultOptions.latin_ascii),
|
||||||
|
Transliterate: bool(cDefaultOptions.transliterate),
|
||||||
|
StripAccents: bool(cDefaultOptions.strip_accents),
|
||||||
|
Decompose: bool(cDefaultOptions.decompose),
|
||||||
|
Lowercase: bool(cDefaultOptions.lowercase),
|
||||||
|
TrimString: bool(cDefaultOptions.trim_string),
|
||||||
|
ReplaceWordHyphens: bool(cDefaultOptions.replace_word_hyphens),
|
||||||
|
DeleteWordHyphens: bool(cDefaultOptions.delete_word_hyphens),
|
||||||
|
ReplaceNumericHyphens: bool(cDefaultOptions.replace_numeric_hyphens),
|
||||||
|
DeleteNumericHyphens: bool(cDefaultOptions.delete_numeric_hyphens),
|
||||||
|
SplitAlphaFromNumeric: bool(cDefaultOptions.split_alpha_from_numeric),
|
||||||
|
DeleteFinalPeriods: bool(cDefaultOptions.delete_final_periods),
|
||||||
|
DeleteAcronymPeriods: bool(cDefaultOptions.delete_acronym_periods),
|
||||||
|
DropEnglishPossessives: bool(cDefaultOptions.drop_english_possessives),
|
||||||
|
DeleteApostrophes: bool(cDefaultOptions.delete_apostrophes),
|
||||||
|
ExpandNumex: bool(cDefaultOptions.expand_numex),
|
||||||
|
RomanNumerals: bool(cDefaultOptions.roman_numerals),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var libpostalDefaultOptions = GetDefaultExpansionOptions()
|
||||||
|
|
||||||
|
func ExpandAddressOptions(address string, options ExpandOptions) []string {
|
||||||
|
if !utf8.ValidString(address) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
|
||||||
|
cAddress := C.CString(address)
|
||||||
|
defer C.free(unsafe.Pointer(cAddress))
|
||||||
|
|
||||||
|
var charPtr *C.char
|
||||||
|
ptrSize := unsafe.Sizeof(charPtr)
|
||||||
|
|
||||||
|
cOptions := C.libpostal_get_default_options()
|
||||||
|
if options.Languages != nil {
|
||||||
|
cLanguages := C.calloc(C.size_t(len(options.Languages)), C.size_t(ptrSize))
|
||||||
|
cLanguagesPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLanguages))
|
||||||
|
|
||||||
|
var cLang C.String
|
||||||
|
|
||||||
|
defer C.free(unsafe.Pointer(cLang))
|
||||||
|
defer C.free(unsafe.Pointer(cLanguages))
|
||||||
|
|
||||||
|
for i := 0; i < len(options.Languages); i++ {
|
||||||
|
cLang = C.CString(options.Languages[i])
|
||||||
|
cLanguagesPtr[i] = cLang
|
||||||
|
}
|
||||||
|
|
||||||
|
cOptions.languages = (**C.char)(cLanguages)
|
||||||
|
cOptions.num_languages = C.size_t(len(options.Languages))
|
||||||
|
} else {
|
||||||
|
cOptions.num_languages = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
cOptions.address_components = C.uint16_t(options.AddressComponents)
|
||||||
|
cOptions.latin_ascii = C.bool(options.LatinAscii)
|
||||||
|
cOptions.transliterate = C.bool(options.Transliterate)
|
||||||
|
cOptions.strip_accents = C.bool(options.StripAccents)
|
||||||
|
cOptions.decompose = C.bool(options.Decompose)
|
||||||
|
cOptions.lowercase = C.bool(options.Lowercase)
|
||||||
|
cOptions.trim_string = C.bool(options.TrimString)
|
||||||
|
cOptions.replace_word_hyphens = C.bool(options.ReplaceWordHyphens)
|
||||||
|
cOptions.delete_word_hyphens = C.bool(options.DeleteWordHyphens)
|
||||||
|
cOptions.replace_numeric_hyphens = C.bool(options.ReplaceNumericHyphens)
|
||||||
|
cOptions.delete_numeric_hyphens = C.bool(options.DeleteNumericHyphens)
|
||||||
|
cOptions.split_alpha_from_numeric = C.bool(options.SplitAlphaFromNumeric)
|
||||||
|
cOptions.delete_final_periods = C.bool(options.DeleteFinalPeriods)
|
||||||
|
cOptions.delete_acronym_periods = C.bool(options.DeleteAcronymPeriods)
|
||||||
|
cOptions.drop_english_possessives = C.bool(options.DropEnglishPossessives)
|
||||||
|
cOptions.delete_apostrophes = C.bool(options.DeleteApostrophes)
|
||||||
|
cOptions.expand_numex = C.bool(options.ExpandNumex)
|
||||||
|
cOptions.roman_numerals = C.bool(options.RomanNumerals)
|
||||||
|
|
||||||
|
var cNumExpansions = C.size_t(0)
|
||||||
|
|
||||||
|
cExpansions := C.libpostal_expand_address(cAddress, cOptions, &cNumExpansions)
|
||||||
|
|
||||||
|
numExpansions := uint64(cNumExpansions)
|
||||||
|
|
||||||
|
var expansions = make([]string, numExpansions)
|
||||||
|
|
||||||
|
// Accessing a C array
|
||||||
|
cExpansionsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cExpansions))
|
||||||
|
|
||||||
|
var i uint64
|
||||||
|
for i = 0; i < numExpansions; i++ {
|
||||||
|
expansions[i] = C.GoString(cExpansionsPtr[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
C.libpostal_expansion_array_destroy(cExpansions, cNumExpansions)
|
||||||
|
return expansions
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExpandAddress(address string) []string {
|
||||||
|
return ExpandAddressOptions(address, libpostalDefaultOptions)
|
||||||
|
}
|
||||||
102
pkg/postal/parser.go
Normal file
102
pkg/postal/parser.go
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
package postal
|
||||||
|
|
||||||
|
/*
|
||||||
|
#cgo pkg-config: libpostal
|
||||||
|
#include <libpostal/libpostal.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
*/
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"unicode/utf8"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if !bool(C.libpostal_setup()) || !bool(C.libpostal_setup_parser()) {
|
||||||
|
log.Fatal("Could not load libpostal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ParserOptions struct {
|
||||||
|
Language string
|
||||||
|
Country string
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDefaultParserOptions() ParserOptions {
|
||||||
|
return ParserOptions{
|
||||||
|
Language: "",
|
||||||
|
Country: "",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var parserDefaultOptions = getDefaultParserOptions()
|
||||||
|
|
||||||
|
type ParsedComponent struct {
|
||||||
|
Label string `json:"label"`
|
||||||
|
Value string `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseAddressOptions(address string, options ParserOptions) []ParsedComponent {
|
||||||
|
if !utf8.ValidString(address) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
|
||||||
|
cAddress := C.CString(address)
|
||||||
|
defer C.free(unsafe.Pointer(cAddress))
|
||||||
|
|
||||||
|
cOptions := C.libpostal_get_address_parser_default_options()
|
||||||
|
if options.Language != "" {
|
||||||
|
cLanguage := C.CString(options.Language)
|
||||||
|
defer C.free(unsafe.Pointer(cLanguage))
|
||||||
|
|
||||||
|
cOptions.language = cLanguage
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.Country != "" {
|
||||||
|
cCountry := C.CString(options.Country)
|
||||||
|
defer C.free(unsafe.Pointer(cCountry))
|
||||||
|
|
||||||
|
cOptions.country = cCountry
|
||||||
|
}
|
||||||
|
|
||||||
|
cAddressParserResponsePtr := C.libpostal_parse_address(cAddress, cOptions)
|
||||||
|
|
||||||
|
if cAddressParserResponsePtr == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
cAddressParserResponse := *cAddressParserResponsePtr
|
||||||
|
|
||||||
|
cNumComponents := cAddressParserResponse.num_components
|
||||||
|
cComponents := cAddressParserResponse.components
|
||||||
|
cLabels := cAddressParserResponse.labels
|
||||||
|
|
||||||
|
numComponents := uint64(cNumComponents)
|
||||||
|
|
||||||
|
parsedComponents := make([]ParsedComponent, numComponents)
|
||||||
|
|
||||||
|
// Accessing a C array
|
||||||
|
cComponentsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cComponents))[:numComponents:numComponents]
|
||||||
|
cLabelsPtr := (*[1 << 30]*C.char)(unsafe.Pointer(cLabels))[:numComponents:numComponents]
|
||||||
|
|
||||||
|
var i uint64
|
||||||
|
for i = 0; i < numComponents; i++ {
|
||||||
|
parsedComponents[i] = ParsedComponent{
|
||||||
|
Label: C.GoString(cLabelsPtr[i]),
|
||||||
|
Value: C.GoString(cComponentsPtr[i]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
C.libpostal_address_parser_response_destroy(cAddressParserResponsePtr)
|
||||||
|
|
||||||
|
return parsedComponents
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseAddress(address string) []ParsedComponent {
|
||||||
|
return ParseAddressOptions(address, parserDefaultOptions)
|
||||||
|
}
|
||||||
5
pkg/postal/postal.go
Normal file
5
pkg/postal/postal.go
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
package postal
|
||||||
|
|
||||||
|
import "sync"
|
||||||
|
|
||||||
|
var mu sync.Mutex
|
||||||
Reference in New Issue
Block a user