Merging changes from AeroXuk/libpostal_windows.
This commit is contained in:
26
.appveyor.yml
Normal file
26
.appveyor.yml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
version: 1.0.{build}
|
||||||
|
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- master
|
||||||
|
|
||||||
|
image: Visual Studio 2015
|
||||||
|
platform: x64
|
||||||
|
|
||||||
|
environment:
|
||||||
|
matrix:
|
||||||
|
- COMPILER: msys2
|
||||||
|
PLATFORM: x64
|
||||||
|
MSYS2_ARCH: x86_64
|
||||||
|
MSYS2_DIR: msys64
|
||||||
|
MSYSTEM: MINGW64
|
||||||
|
BIT: 64
|
||||||
|
|
||||||
|
install:
|
||||||
|
-'%APPVEYOR_BUILD_FOLDER%\win_install.bat'
|
||||||
|
|
||||||
|
build_script:
|
||||||
|
- '%APPVEYOR_BUILD_FOLDER%\win_build.bat'
|
||||||
|
|
||||||
|
test_script:
|
||||||
|
- 'echo No tests yet'
|
||||||
16
libpostal.def
Normal file
16
libpostal.def
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
EXPORTS
|
||||||
|
libpostal_get_default_options
|
||||||
|
libpostal_expand_address
|
||||||
|
libpostal_expansion_array_destroy
|
||||||
|
libpostal_address_parser_response_destroy
|
||||||
|
libpostal_get_address_parser_default_options
|
||||||
|
libpostal_parse_address
|
||||||
|
libpostal_setup
|
||||||
|
libpostal_setup_datadir
|
||||||
|
libpostal_teardown
|
||||||
|
libpostal_setup_parser
|
||||||
|
libpostal_setup_parser_datadir
|
||||||
|
libpostal_teardown_parser
|
||||||
|
libpostal_setup_language_classifier
|
||||||
|
libpostal_setup_language_classifier_datadir
|
||||||
|
libpostal_teardown_language_classifier
|
||||||
16
src/export.h
Normal file
16
src/export.h
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifndef EXPORT_H
|
||||||
|
#define EXPORT_H
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#ifdef LIBPOSTAL_EXPORTS
|
||||||
|
#define LIBPOSTAL_EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define LIBPOSTAL_EXPORT __declspec(dllimport)
|
||||||
|
#endif
|
||||||
|
#elif __GNUC__ >= 4
|
||||||
|
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
|
||||||
|
#else
|
||||||
|
#define LIBPOSTAL_EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //EXPORT_H
|
||||||
70
src/klib/drand48.c
Normal file
70
src/klib/drand48.c
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 1993 Martin Birgmeier
|
||||||
|
* All rights reserved.
|
||||||
|
|
||||||
|
* You may redistribute unmodified or modified versions of this source
|
||||||
|
* code provided that the above copyright notice and this and the
|
||||||
|
* following conditions are retained.
|
||||||
|
|
||||||
|
* This software is provided ``as is'', and comes with no warranties
|
||||||
|
* of any kind. I shall in no event be liable for anything that happens
|
||||||
|
* to anyone/anything when using this software.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "drand48.h"
|
||||||
|
|
||||||
|
#define RAND48_SEED_0 (0x330e)
|
||||||
|
#define RAND48_SEED_1 (0xabcd)
|
||||||
|
#define RAND48_SEED_2 (0x1234)
|
||||||
|
#define RAND48_MULT_0 (0xe66d)
|
||||||
|
#define RAND48_MULT_1 (0xdeec)
|
||||||
|
#define RAND48_MULT_2 (0x0005)
|
||||||
|
#define RAND48_ADD (0x000b)
|
||||||
|
|
||||||
|
unsigned short _rand48_seed[3] = {
|
||||||
|
RAND48_SEED_0,
|
||||||
|
RAND48_SEED_1,
|
||||||
|
RAND48_SEED_2
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned short _rand48_mult[3] = {
|
||||||
|
RAND48_MULT_0,
|
||||||
|
RAND48_MULT_1,
|
||||||
|
RAND48_MULT_2
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned short _rand48_add = RAND48_ADD;
|
||||||
|
|
||||||
|
void _dorand48(unsigned short xseed[3])
|
||||||
|
{
|
||||||
|
unsigned long accu;
|
||||||
|
unsigned short temp[2];
|
||||||
|
|
||||||
|
accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] + (unsigned long) _rand48_add;
|
||||||
|
temp[0] = (unsigned short) accu; /* lower 16 bits */
|
||||||
|
accu >>= sizeof(unsigned short) * 8;
|
||||||
|
accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] + (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
|
||||||
|
temp[1] = (unsigned short) accu; /* middle 16 bits */
|
||||||
|
accu >>= sizeof(unsigned short) * 8;
|
||||||
|
accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
|
||||||
|
xseed[0] = temp[0];
|
||||||
|
xseed[1] = temp[1];
|
||||||
|
xseed[2] = (unsigned short) accu;
|
||||||
|
}
|
||||||
|
|
||||||
|
double erand48(unsigned short xseed[3])
|
||||||
|
{
|
||||||
|
_dorand48(xseed);
|
||||||
|
return ldexp((double) xseed[0], -48) +
|
||||||
|
ldexp((double) xseed[1], -32) +
|
||||||
|
ldexp((double) xseed[2], -16);
|
||||||
|
}
|
||||||
|
|
||||||
|
double drand48(void)
|
||||||
|
{
|
||||||
|
return erand48(_rand48_seed);
|
||||||
|
}
|
||||||
41
src/klib/drand48.h
Normal file
41
src/klib/drand48.h
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 1993 Martin Birgmeier
|
||||||
|
* All rights reserved.
|
||||||
|
|
||||||
|
* You may redistribute unmodified or modified versions of this source
|
||||||
|
* code provided that the above copyright notice and this and the
|
||||||
|
* following conditions are retained.
|
||||||
|
|
||||||
|
* This software is provided ``as is'', and comes with no warranties
|
||||||
|
* of any kind. I shall in no event be liable for anything that happens
|
||||||
|
* to anyone/anything when using this software.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin
|
||||||
|
|
||||||
|
#ifndef _DRAND48_H
|
||||||
|
#define _DRAND48_H
|
||||||
|
|
||||||
|
#define RAND48_SEED_0 (0x330e)
|
||||||
|
#define RAND48_SEED_1 (0xabcd)
|
||||||
|
#define RAND48_SEED_2 (0x1234)
|
||||||
|
#define RAND48_MULT_0 (0xe66d)
|
||||||
|
#define RAND48_MULT_1 (0xdeec)
|
||||||
|
#define RAND48_MULT_2 (0x0005)
|
||||||
|
#define RAND48_ADD (0x000b)
|
||||||
|
|
||||||
|
unsigned short _rand48_seed[3];
|
||||||
|
|
||||||
|
unsigned short _rand48_mult[3];
|
||||||
|
|
||||||
|
unsigned short _rand48_add;
|
||||||
|
|
||||||
|
void _dorand48(unsigned short xseed[3]);
|
||||||
|
|
||||||
|
double erand48(unsigned short xseed[3]);
|
||||||
|
|
||||||
|
double drand48(void);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _DRAND48_H
|
||||||
@@ -45,6 +45,7 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include "drand48.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
void *left, *right;
|
void *left, *right;
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
|
|||||||
.roman_numerals = true
|
.roman_numerals = true
|
||||||
};
|
};
|
||||||
|
|
||||||
libpostal_normalize_options_t libpostal_get_default_options(void) {
|
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) {
|
||||||
return LIBPOSTAL_DEFAULT_OPTIONS;
|
return LIBPOSTAL_DEFAULT_OPTIONS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_
|
|||||||
char_array_destroy(temp_string);
|
char_array_destroy(temp_string);
|
||||||
}
|
}
|
||||||
|
|
||||||
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
||||||
options.address_components |= LIBPOSTAL_ADDRESS_ANY;
|
options.address_components |= LIBPOSTAL_ADDRESS_ANY;
|
||||||
|
|
||||||
uint64_t normalize_string_options = get_normalize_string_options(options);
|
uint64_t normalize_string_options = get_normalize_string_options(options);
|
||||||
@@ -1021,14 +1021,14 @@ char **libpostal_expand_address(char *input, libpostal_normalize_options_t optio
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
free(expansions[i]);
|
free(expansions[i]);
|
||||||
}
|
}
|
||||||
free(expansions);
|
free(expansions);
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
||||||
if (self == NULL) return;
|
if (self == NULL) return;
|
||||||
|
|
||||||
for (size_t i = 0; i < self->num_components; i++) {
|
for (size_t i = 0; i < self->num_components; i++) {
|
||||||
@@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO
|
|||||||
.country = NULL
|
.country = NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
||||||
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
|
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
|
||||||
}
|
}
|
||||||
|
|
||||||
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
||||||
libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country);
|
libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country);
|
||||||
|
|
||||||
if (parsed == NULL) {
|
if (parsed == NULL) {
|
||||||
@@ -1073,7 +1073,7 @@ libpostal_address_parser_response_t *libpostal_parse_address(char *address, libp
|
|||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_datadir(char *datadir) {
|
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) {
|
||||||
char *transliteration_path = NULL;
|
char *transliteration_path = NULL;
|
||||||
char *numex_path = NULL;
|
char *numex_path = NULL;
|
||||||
char *address_dictionary_path = NULL;
|
char *address_dictionary_path = NULL;
|
||||||
@@ -1114,11 +1114,11 @@ bool libpostal_setup_datadir(char *datadir) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup(void) {
|
LIBPOSTAL_EXPORT bool libpostal_setup(void) {
|
||||||
return libpostal_setup_datadir(NULL);
|
return libpostal_setup_datadir(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||||
char *language_classifier_dir = NULL;
|
char *language_classifier_dir = NULL;
|
||||||
|
|
||||||
if (datadir != NULL) {
|
if (datadir != NULL) {
|
||||||
@@ -1137,11 +1137,11 @@ bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_language_classifier(void) {
|
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) {
|
||||||
return libpostal_setup_language_classifier_datadir(NULL);
|
return libpostal_setup_language_classifier_datadir(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_parser_datadir(char *datadir) {
|
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) {
|
||||||
char *parser_dir = NULL;
|
char *parser_dir = NULL;
|
||||||
|
|
||||||
if (datadir != NULL) {
|
if (datadir != NULL) {
|
||||||
@@ -1160,11 +1160,11 @@ bool libpostal_setup_parser_datadir(char *datadir) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool libpostal_setup_parser(void) {
|
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) {
|
||||||
return libpostal_setup_parser_datadir(NULL);
|
return libpostal_setup_parser_datadir(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_teardown(void) {
|
LIBPOSTAL_EXPORT void libpostal_teardown(void) {
|
||||||
transliteration_module_teardown();
|
transliteration_module_teardown();
|
||||||
|
|
||||||
numex_module_teardown();
|
numex_module_teardown();
|
||||||
@@ -1172,10 +1172,10 @@ void libpostal_teardown(void) {
|
|||||||
address_dictionary_module_teardown();
|
address_dictionary_module_teardown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_teardown_language_classifier(void) {
|
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) {
|
||||||
language_classifier_module_teardown();
|
language_classifier_module_teardown();
|
||||||
}
|
}
|
||||||
|
|
||||||
void libpostal_teardown_parser(void) {
|
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) {
|
||||||
address_parser_module_teardown();
|
address_parser_module_teardown();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ extern "C" {
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include "export.h"
|
||||||
|
|
||||||
#define LIBPOSTAL_MAX_LANGUAGE_LEN 4
|
#define LIBPOSTAL_MAX_LANGUAGE_LEN 4
|
||||||
|
|
||||||
@@ -62,11 +63,11 @@ typedef struct libpostal_normalize_options {
|
|||||||
|
|
||||||
} libpostal_normalize_options_t;
|
} libpostal_normalize_options_t;
|
||||||
|
|
||||||
libpostal_normalize_options_t libpostal_get_default_options(void);
|
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void);
|
||||||
|
|
||||||
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
||||||
|
|
||||||
void libpostal_expansion_array_destroy(char **expansions, size_t n);
|
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Address parser
|
Address parser
|
||||||
@@ -83,25 +84,25 @@ typedef struct libpostal_address_parser_options {
|
|||||||
char *country;
|
char *country;
|
||||||
} libpostal_address_parser_options_t;
|
} libpostal_address_parser_options_t;
|
||||||
|
|
||||||
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
|
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
|
||||||
|
|
||||||
libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
|
LIBPOSTAL_EXPORT libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
|
||||||
|
|
||||||
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
|
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
|
||||||
|
|
||||||
// Setup/teardown methods
|
// Setup/teardown methods
|
||||||
|
|
||||||
bool libpostal_setup(void);
|
LIBPOSTAL_EXPORT bool libpostal_setup(void);
|
||||||
bool libpostal_setup_datadir(char *datadir);
|
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir);
|
||||||
void libpostal_teardown(void);
|
LIBPOSTAL_EXPORT void libpostal_teardown(void);
|
||||||
|
|
||||||
bool libpostal_setup_parser(void);
|
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void);
|
||||||
bool libpostal_setup_parser_datadir(char *datadir);
|
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir);
|
||||||
void libpostal_teardown_parser(void);
|
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void);
|
||||||
|
|
||||||
bool libpostal_setup_language_classifier(void);
|
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void);
|
||||||
bool libpostal_setup_language_classifier_datadir(char *datadir);
|
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||||
void libpostal_teardown_language_classifier(void);
|
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ As well as normalizations for individual string tokens:
|
|||||||
#include "trie.h"
|
#include "trie.h"
|
||||||
#include "tokens.h"
|
#include "tokens.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
|
#include "strndup.h"
|
||||||
|
|
||||||
#define NORMALIZE_STRING_LATIN_ASCII 1 << 0
|
#define NORMALIZE_STRING_LATIN_ASCII 1 << 0
|
||||||
#define NORMALIZE_STRING_TRANSLITERATE 1 << 1
|
#define NORMALIZE_STRING_TRANSLITERATE 1 << 1
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ Utilities for manipulating strings in C.
|
|||||||
#include "collections.h"
|
#include "collections.h"
|
||||||
#include "utf8proc/utf8proc.h"
|
#include "utf8proc/utf8proc.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
|
#include "strndup.h"
|
||||||
|
|
||||||
#define MAX_UTF8_CHAR_SIZE 4
|
#define MAX_UTF8_CHAR_SIZE 4
|
||||||
|
|
||||||
|
|||||||
16
src/strndup.c
Normal file
16
src/strndup.c
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#ifndef HAVE_STRNDUP
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
char *strndup(const char *s, size_t n)
|
||||||
|
{
|
||||||
|
char* new = malloc(n+1);
|
||||||
|
if (new) {
|
||||||
|
strncpy(new, s, n);
|
||||||
|
new[n] = '\0';
|
||||||
|
}
|
||||||
|
return new;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* HAVE_STRNDUP */
|
||||||
6
src/strndup.h
Normal file
6
src/strndup.h
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#ifndef HAVE_STRNDUP
|
||||||
|
#define HAVE_STRNDUP
|
||||||
|
|
||||||
|
char *strndup(const char *s, size_t n);
|
||||||
|
|
||||||
|
#endif /* HAVE_STRNDUP */
|
||||||
@@ -11,6 +11,7 @@
|
|||||||
#include "string_utils.h"
|
#include "string_utils.h"
|
||||||
#include "token_types.h"
|
#include "token_types.h"
|
||||||
#include "vector.h"
|
#include "vector.h"
|
||||||
|
#include "strndup.h"
|
||||||
|
|
||||||
typedef struct token {
|
typedef struct token {
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
#include "trie.h"
|
#include "trie.h"
|
||||||
#include "trie_search.h"
|
#include "trie_search.h"
|
||||||
#include "unicode_scripts.h"
|
#include "unicode_scripts.h"
|
||||||
|
#include "strndup.h"
|
||||||
|
|
||||||
#define LATIN_ASCII "latin-ascii"
|
#define LATIN_ASCII "latin-ascii"
|
||||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||||
|
|||||||
21
win_build.bat
Normal file
21
win_build.bat
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
@echo off
|
||||||
|
|
||||||
|
cd %APPVEYOR_BUILD_FOLDER%
|
||||||
|
|
||||||
|
echo Compiler: %COMPILER%
|
||||||
|
echo Architecture: %MSYS2_ARCH%
|
||||||
|
echo Platform: %PLATFORM%
|
||||||
|
echo MSYS2 directory: %MSYS2_DIR%
|
||||||
|
echo MSYS2 system: %MSYSTEM%
|
||||||
|
echo Configuration: %CONFIGURATION%
|
||||||
|
echo Bits: %BIT%
|
||||||
|
|
||||||
|
IF %COMPILER%==msys2 (
|
||||||
|
@echo on
|
||||||
|
SET "PATH=C:\%MSYS2_DIR%\%MSYSTEM%\bin;C:\%MSYS2_DIR%\usr\bin;%PATH%"
|
||||||
|
|
||||||
|
bash -lc "cd $APPVEYOR_BUILD_FOLDER && . bootstrap.sh"
|
||||||
|
bash -lc "cd $APPVEYOR_BUILD_FOLDER && . configure --datadir=$APPVEYOR_BUILD_FOLDER/data"
|
||||||
|
bash -lc "cd $APPVEYOR_BUILD_FOLDER && make"
|
||||||
|
bash -lc "cd $APPVEYOR_BUILD_FOLDER && make install"
|
||||||
|
)
|
||||||
105
windows/configure.ac
Normal file
105
windows/configure.ac
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
# -*- Autoconf -*-
|
||||||
|
# Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
|
m4_define(LIBPOSTAL_MAJOR_VERSION, [1])
|
||||||
|
m4_define(LIBPOSTAL_MINOR_VERSION, [0])
|
||||||
|
m4_define(LIBPOSTAL_PATCH_VERSION, [0])
|
||||||
|
|
||||||
|
AC_INIT([libpostal], LIBPOSTAL_MAJOR_VERSION.LIBPOSTAL_MINOR_VERSION.LIBPOSTAL_PATCH_VERSION)
|
||||||
|
|
||||||
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
|
|
||||||
|
AM_INIT_AUTOMAKE([foreign subdir-objects])
|
||||||
|
AC_CONFIG_SRCDIR([src])
|
||||||
|
LT_INIT([win32-dll])
|
||||||
|
|
||||||
|
AC_CONFIG_HEADERS([config.h])
|
||||||
|
|
||||||
|
# Checks for programs.
|
||||||
|
AC_PROG_CC_C99
|
||||||
|
AC_PROG_INSTALL
|
||||||
|
|
||||||
|
LDFLAGS="$LDFLAGS -L/usr/local/lib"
|
||||||
|
|
||||||
|
# Checks for libraries.
|
||||||
|
AC_SEARCH_LIBS([log],
|
||||||
|
[m],,[AC_MSG_ERROR([Could not find math library])])
|
||||||
|
|
||||||
|
# Checks for header files.
|
||||||
|
AC_HEADER_STDC
|
||||||
|
AC_HEADER_TIME
|
||||||
|
AC_HEADER_DIRENT
|
||||||
|
AC_HEADER_STDBOOL
|
||||||
|
AC_CHECK_HEADERS([fcntl.h float.h inttypes.h limits.h locale.h malloc.h memory.h stddef.h stdint.h stdlib.h string.h unistd.h])
|
||||||
|
|
||||||
|
# Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
AC_C_INLINE
|
||||||
|
AC_TYPE_INT16_T
|
||||||
|
AC_TYPE_INT32_T
|
||||||
|
AC_TYPE_INT64_T
|
||||||
|
AC_TYPE_INT8_T
|
||||||
|
AC_TYPE_OFF_T
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
AC_TYPE_SSIZE_T
|
||||||
|
AC_TYPE_UINT16_T
|
||||||
|
AC_TYPE_UINT32_T
|
||||||
|
AC_TYPE_UINT64_T
|
||||||
|
AC_TYPE_UINT8_T
|
||||||
|
AC_CHECK_TYPES([ptrdiff_t])
|
||||||
|
|
||||||
|
# Checks for library functions.
|
||||||
|
AC_CHECK_FUNCS([malloc realloc getcwd gettimeofday memmove memset regcomp setlocale sqrt strdup strndup])
|
||||||
|
|
||||||
|
AC_CONFIG_FILES([Makefile
|
||||||
|
libpostal.pc
|
||||||
|
src/Makefile
|
||||||
|
test/Makefile])
|
||||||
|
|
||||||
|
AC_CHECK_PROG([FOUND_SHUF], [shuf], [yes])
|
||||||
|
AC_CHECK_PROG([FOUND_GSHUF], [gshuf], [yes])
|
||||||
|
|
||||||
|
AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf available])])
|
||||||
|
AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])])
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Checks for SSE2 build
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
AC_ARG_ENABLE([sse2],
|
||||||
|
AS_HELP_STRING(
|
||||||
|
[--disable-sse2],
|
||||||
|
[disable SSE2 optimization routines]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
AS_IF([test "x$enable_sse2" != "xno"], [
|
||||||
|
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_CHECK_HEADER(cblas.h, [AX_CBLAS])
|
||||||
|
|
||||||
|
AC_ARG_ENABLE([data-download],
|
||||||
|
[ --disable-data-download Disable downloading data],
|
||||||
|
[case "${enableval}" in
|
||||||
|
yes) DOWNLOAD_DATA=true ;;
|
||||||
|
no) DOWNLOAD_DATA=false ;;
|
||||||
|
*) AC_MSG_ERROR([bad value ${enableval} for --disable-data-download]) ;;
|
||||||
|
esac], [DOWNLOAD_DATA=true])
|
||||||
|
|
||||||
|
AM_CONDITIONAL([DOWNLOAD_DATA], [test "x$DOWNLOAD_DATA" = "xtrue"])
|
||||||
|
|
||||||
|
AC_ARG_WITH(cflags-scanner-extra, [AS_HELP_STRING([--with-cflags-scanner-extra@<:@=VALUE@:>@], [Extra compilation options for scanner.c])],
|
||||||
|
[
|
||||||
|
if test "x$withval" = "xno"; then
|
||||||
|
CFLAGS_SCANNER_EXTRA=""
|
||||||
|
else
|
||||||
|
CFLAGS_SCANNER_EXTRA="$withval"
|
||||||
|
fi
|
||||||
|
],
|
||||||
|
[ CFLAGS_SCANNER_EXTRA="" ]
|
||||||
|
)
|
||||||
|
|
||||||
|
AC_MSG_NOTICE([extra cflags for scanner.c: $CFLAGS_SCANNER_EXTRA])
|
||||||
|
AC_SUBST(CFLAGS_SCANNER_EXTRA)
|
||||||
|
AC_SUBST(LIBPOSTAL_SO_VERSION, LIBPOSTAL_MAJOR_VERSION:LIBPOSTAL_MINOR_VERSION:LIBPOSTAL_PATCH_VERSION)
|
||||||
|
|
||||||
|
AC_OUTPUT
|
||||||
45
windows/src/Makefile.am
Normal file
45
windows/src/Makefile.am
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# this version of the makefile skips building the programs. It only builds the libraries and downloads data so you can use the API.
|
||||||
|
|
||||||
|
# Inherited from autoconf / user-specified
|
||||||
|
CFLAGS_CONF = @CFLAGS@
|
||||||
|
CFLAGS_BASE = -Wall -Wextra -Wno-unused-function -Wformat -Werror=format-security -Winit-self -Wno-sign-compare -DLIBPOSTAL_DATA_DIR='"$(datadir)/libpostal"' -g $(CFLAGS_CONF)
|
||||||
|
CFLAGS_O0 = $(CFLAGS_BASE) -O0
|
||||||
|
CFLAGS_O1 = $(CFLAGS_BASE) -O1
|
||||||
|
CFLAGS_O2 = $(CFLAGS_BASE) -O2
|
||||||
|
CFLAGS_O3 = $(CFLAGS_BASE) -O3
|
||||||
|
DEFAULT_INCLUDES = -I.. -I/usr/local/include
|
||||||
|
|
||||||
|
# Wonky but have to be able to override the user's optimization level to compile the scanner
|
||||||
|
# as it takes an unreasonably long time to compile with the optimizer on.
|
||||||
|
#EDIT - add UTF8PROC_EXPORTS so builds on windows
|
||||||
|
CFLAGS = -D UTF8PROC_EXPORTS -D LIBPOSTAL_EXPORTS
|
||||||
|
|
||||||
|
lib_LTLIBRARIES = libpostal.la
|
||||||
|
libpostal_la_SOURCES = strndup.c libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c trie_utils.c string_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c normalize.c numex.c features.c unicode_scripts.c address_parser.c address_parser_io.c averaged_perceptron.c crf.c crf_context.c sparse_matrix.c averaged_perceptron_tagger.c graph.c graph_builder.c language_classifier.c language_features.c logistic_regression.c logistic.c minibatch.c float_utils.c ngrams.c
|
||||||
|
libpostal_la_LIBADD = libscanner.la $(CBLAS_LIBS)
|
||||||
|
libpostal_la_CFLAGS = $(CFLAGS_O2)
|
||||||
|
libpostal_la_LDFLAGS = -version-info @LIBPOSTAL_SO_VERSION@ -no-undefined
|
||||||
|
|
||||||
|
dist_bin_SCRIPTS = libpostal_data
|
||||||
|
|
||||||
|
# Scanner can take a very long time to compile with higher optimization levels, so always use -O0, scanner is fast enough
|
||||||
|
# On cross-compilation for ARM using gcc-4.7, there are "out of range" errors during compilation that can be fixed by adding
|
||||||
|
# -marm option. For that, CFLAGS_SCANNER_EXTRA is provided that can be filled during configuration stage (see ./configure --help).
|
||||||
|
noinst_LTLIBRARIES = libscanner.la
|
||||||
|
libscanner_la_SOURCES = klib/drand48.c scanner.c
|
||||||
|
libscanner_la_CFLAGS = $(CFLAGS_O0) $(CFLAGS_SCANNER_EXTRA)
|
||||||
|
|
||||||
|
|
||||||
|
# program building skipped here
|
||||||
|
|
||||||
|
pkginclude_HEADERS = libpostal.h
|
||||||
|
|
||||||
|
if DOWNLOAD_DATA
|
||||||
|
all-local:
|
||||||
|
${srcdir}/libpostal_data download all $(datadir)/libpostal
|
||||||
|
endif
|
||||||
|
|
||||||
|
lexer: scanner.re
|
||||||
|
re2c -F -s -b -8 -o scanner.c scanner.re
|
||||||
|
|
||||||
|
.PHONY: lexer
|
||||||
Reference in New Issue
Block a user