Merging changes from AeroXuk/libpostal_windows.
This commit is contained in:
16
src/export.h
Normal file
16
src/export.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef EXPORT_H
|
||||
#define EXPORT_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef LIBPOSTAL_EXPORTS
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT __declspec(dllimport)
|
||||
#endif
|
||||
#elif __GNUC__ >= 4
|
||||
#define LIBPOSTAL_EXPORT __attribute__ ((visibility("default")))
|
||||
#else
|
||||
#define LIBPOSTAL_EXPORT
|
||||
#endif
|
||||
|
||||
#endif //EXPORT_H
|
||||
70
src/klib/drand48.c
Normal file
70
src/klib/drand48.c
Normal file
@@ -0,0 +1,70 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 1993 Martin Birgmeier
|
||||
* All rights reserved.
|
||||
|
||||
* You may redistribute unmodified or modified versions of this source
|
||||
* code provided that the above copyright notice and this and the
|
||||
* following conditions are retained.
|
||||
|
||||
* This software is provided ``as is'', and comes with no warranties
|
||||
* of any kind. I shall in no event be liable for anything that happens
|
||||
* to anyone/anything when using this software.
|
||||
*/
|
||||
|
||||
//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin
|
||||
|
||||
#include <math.h>
|
||||
#include "drand48.h"
|
||||
|
||||
#define RAND48_SEED_0 (0x330e)
|
||||
#define RAND48_SEED_1 (0xabcd)
|
||||
#define RAND48_SEED_2 (0x1234)
|
||||
#define RAND48_MULT_0 (0xe66d)
|
||||
#define RAND48_MULT_1 (0xdeec)
|
||||
#define RAND48_MULT_2 (0x0005)
|
||||
#define RAND48_ADD (0x000b)
|
||||
|
||||
unsigned short _rand48_seed[3] = {
|
||||
RAND48_SEED_0,
|
||||
RAND48_SEED_1,
|
||||
RAND48_SEED_2
|
||||
};
|
||||
|
||||
unsigned short _rand48_mult[3] = {
|
||||
RAND48_MULT_0,
|
||||
RAND48_MULT_1,
|
||||
RAND48_MULT_2
|
||||
};
|
||||
|
||||
unsigned short _rand48_add = RAND48_ADD;
|
||||
|
||||
void _dorand48(unsigned short xseed[3])
|
||||
{
|
||||
unsigned long accu;
|
||||
unsigned short temp[2];
|
||||
|
||||
accu = (unsigned long) _rand48_mult[0] * (unsigned long) xseed[0] + (unsigned long) _rand48_add;
|
||||
temp[0] = (unsigned short) accu; /* lower 16 bits */
|
||||
accu >>= sizeof(unsigned short) * 8;
|
||||
accu += (unsigned long) _rand48_mult[0] * (unsigned long) xseed[1] + (unsigned long) _rand48_mult[1] * (unsigned long) xseed[0];
|
||||
temp[1] = (unsigned short) accu; /* middle 16 bits */
|
||||
accu >>= sizeof(unsigned short) * 8;
|
||||
accu += _rand48_mult[0] * xseed[2] + _rand48_mult[1] * xseed[1] + _rand48_mult[2] * xseed[0];
|
||||
xseed[0] = temp[0];
|
||||
xseed[1] = temp[1];
|
||||
xseed[2] = (unsigned short) accu;
|
||||
}
|
||||
|
||||
double erand48(unsigned short xseed[3])
|
||||
{
|
||||
_dorand48(xseed);
|
||||
return ldexp((double) xseed[0], -48) +
|
||||
ldexp((double) xseed[1], -32) +
|
||||
ldexp((double) xseed[2], -16);
|
||||
}
|
||||
|
||||
double drand48(void)
|
||||
{
|
||||
return erand48(_rand48_seed);
|
||||
}
|
||||
41
src/klib/drand48.h
Normal file
41
src/klib/drand48.h
Normal file
@@ -0,0 +1,41 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 1993 Martin Birgmeier
|
||||
* All rights reserved.
|
||||
|
||||
* You may redistribute unmodified or modified versions of this source
|
||||
* code provided that the above copyright notice and this and the
|
||||
* following conditions are retained.
|
||||
|
||||
* This software is provided ``as is'', and comes with no warranties
|
||||
* of any kind. I shall in no event be liable for anything that happens
|
||||
* to anyone/anything when using this software.
|
||||
*/
|
||||
|
||||
//I've rearranged the source into a header-only implementation for drand48() -Benjamin Kusin
|
||||
|
||||
#ifndef _DRAND48_H
|
||||
#define _DRAND48_H
|
||||
|
||||
#define RAND48_SEED_0 (0x330e)
|
||||
#define RAND48_SEED_1 (0xabcd)
|
||||
#define RAND48_SEED_2 (0x1234)
|
||||
#define RAND48_MULT_0 (0xe66d)
|
||||
#define RAND48_MULT_1 (0xdeec)
|
||||
#define RAND48_MULT_2 (0x0005)
|
||||
#define RAND48_ADD (0x000b)
|
||||
|
||||
unsigned short _rand48_seed[3];
|
||||
|
||||
unsigned short _rand48_mult[3];
|
||||
|
||||
unsigned short _rand48_add;
|
||||
|
||||
void _dorand48(unsigned short xseed[3]);
|
||||
|
||||
double erand48(unsigned short xseed[3]);
|
||||
|
||||
double drand48(void);
|
||||
|
||||
|
||||
#endif // _DRAND48_H
|
||||
@@ -45,6 +45,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "drand48.h"
|
||||
|
||||
typedef struct {
|
||||
void *left, *right;
|
||||
|
||||
@@ -57,7 +57,7 @@ static libpostal_normalize_options_t LIBPOSTAL_DEFAULT_OPTIONS = {
|
||||
.roman_numerals = true
|
||||
};
|
||||
|
||||
libpostal_normalize_options_t libpostal_get_default_options(void) {
|
||||
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void) {
|
||||
return LIBPOSTAL_DEFAULT_OPTIONS;
|
||||
}
|
||||
|
||||
@@ -942,7 +942,7 @@ static void expand_alternative(cstring_array *strings, khash_t(str_set) *unique_
|
||||
char_array_destroy(temp_string);
|
||||
}
|
||||
|
||||
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
||||
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n) {
|
||||
options.address_components |= LIBPOSTAL_ADDRESS_ANY;
|
||||
|
||||
uint64_t normalize_string_options = get_normalize_string_options(options);
|
||||
@@ -1021,14 +1021,14 @@ char **libpostal_expand_address(char *input, libpostal_normalize_options_t optio
|
||||
|
||||
}
|
||||
|
||||
void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
||||
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
free(expansions[i]);
|
||||
}
|
||||
free(expansions);
|
||||
}
|
||||
|
||||
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
||||
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self) {
|
||||
if (self == NULL) return;
|
||||
|
||||
for (size_t i = 0; i < self->num_components; i++) {
|
||||
@@ -1057,11 +1057,11 @@ static libpostal_address_parser_options_t LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIO
|
||||
.country = NULL
|
||||
};
|
||||
|
||||
inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
||||
LIBPOSTAL_EXPORT inline libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void) {
|
||||
return LIBPOSTAL_ADDRESS_PARSER_DEFAULT_OPTIONS;
|
||||
}
|
||||
|
||||
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
||||
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options) {
|
||||
libpostal_address_parser_response_t *parsed = address_parser_parse(address, options.language, options.country);
|
||||
|
||||
if (parsed == NULL) {
|
||||
@@ -1073,7 +1073,7 @@ libpostal_address_parser_response_t *libpostal_parse_address(char *address, libp
|
||||
return parsed;
|
||||
}
|
||||
|
||||
bool libpostal_setup_datadir(char *datadir) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir) {
|
||||
char *transliteration_path = NULL;
|
||||
char *numex_path = NULL;
|
||||
char *address_dictionary_path = NULL;
|
||||
@@ -1114,11 +1114,11 @@ bool libpostal_setup_datadir(char *datadir) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup(void) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup(void) {
|
||||
return libpostal_setup_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
char *language_classifier_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
@@ -1137,11 +1137,11 @@ bool libpostal_setup_language_classifier_datadir(char *datadir) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_language_classifier(void) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void) {
|
||||
return libpostal_setup_language_classifier_datadir(NULL);
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
char *parser_dir = NULL;
|
||||
|
||||
if (datadir != NULL) {
|
||||
@@ -1160,11 +1160,11 @@ bool libpostal_setup_parser_datadir(char *datadir) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool libpostal_setup_parser(void) {
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void) {
|
||||
return libpostal_setup_parser_datadir(NULL);
|
||||
}
|
||||
|
||||
void libpostal_teardown(void) {
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown(void) {
|
||||
transliteration_module_teardown();
|
||||
|
||||
numex_module_teardown();
|
||||
@@ -1172,10 +1172,10 @@ void libpostal_teardown(void) {
|
||||
address_dictionary_module_teardown();
|
||||
}
|
||||
|
||||
void libpostal_teardown_language_classifier(void) {
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void) {
|
||||
language_classifier_module_teardown();
|
||||
}
|
||||
|
||||
void libpostal_teardown_parser(void) {
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void) {
|
||||
address_parser_module_teardown();
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ extern "C" {
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "export.h"
|
||||
|
||||
#define LIBPOSTAL_MAX_LANGUAGE_LEN 4
|
||||
|
||||
@@ -62,11 +63,11 @@ typedef struct libpostal_normalize_options {
|
||||
|
||||
} libpostal_normalize_options_t;
|
||||
|
||||
libpostal_normalize_options_t libpostal_get_default_options(void);
|
||||
LIBPOSTAL_EXPORT libpostal_normalize_options_t libpostal_get_default_options(void);
|
||||
|
||||
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
||||
LIBPOSTAL_EXPORT char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
|
||||
|
||||
void libpostal_expansion_array_destroy(char **expansions, size_t n);
|
||||
LIBPOSTAL_EXPORT void libpostal_expansion_array_destroy(char **expansions, size_t n);
|
||||
|
||||
/*
|
||||
Address parser
|
||||
@@ -83,25 +84,25 @@ typedef struct libpostal_address_parser_options {
|
||||
char *country;
|
||||
} libpostal_address_parser_options_t;
|
||||
|
||||
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
|
||||
LIBPOSTAL_EXPORT void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
|
||||
|
||||
libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
|
||||
LIBPOSTAL_EXPORT libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
|
||||
|
||||
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
|
||||
LIBPOSTAL_EXPORT libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
|
||||
|
||||
// Setup/teardown methods
|
||||
|
||||
bool libpostal_setup(void);
|
||||
bool libpostal_setup_datadir(char *datadir);
|
||||
void libpostal_teardown(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_datadir(char *datadir);
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown(void);
|
||||
|
||||
bool libpostal_setup_parser(void);
|
||||
bool libpostal_setup_parser_datadir(char *datadir);
|
||||
void libpostal_teardown_parser(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_parser_datadir(char *datadir);
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_parser(void);
|
||||
|
||||
bool libpostal_setup_language_classifier(void);
|
||||
bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||
void libpostal_teardown_language_classifier(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier(void);
|
||||
LIBPOSTAL_EXPORT bool libpostal_setup_language_classifier_datadir(char *datadir);
|
||||
LIBPOSTAL_EXPORT void libpostal_teardown_language_classifier(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ As well as normalizations for individual string tokens:
|
||||
#include "trie.h"
|
||||
#include "tokens.h"
|
||||
#include "vector.h"
|
||||
#include "strndup.h"
|
||||
|
||||
#define NORMALIZE_STRING_LATIN_ASCII 1 << 0
|
||||
#define NORMALIZE_STRING_TRANSLITERATE 1 << 1
|
||||
|
||||
@@ -16,6 +16,7 @@ Utilities for manipulating strings in C.
|
||||
#include "collections.h"
|
||||
#include "utf8proc/utf8proc.h"
|
||||
#include "vector.h"
|
||||
#include "strndup.h"
|
||||
|
||||
#define MAX_UTF8_CHAR_SIZE 4
|
||||
|
||||
|
||||
16
src/strndup.c
Normal file
16
src/strndup.c
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef HAVE_STRNDUP
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
char *strndup(const char *s, size_t n)
|
||||
{
|
||||
char* new = malloc(n+1);
|
||||
if (new) {
|
||||
strncpy(new, s, n);
|
||||
new[n] = '\0';
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
#endif /* HAVE_STRNDUP */
|
||||
6
src/strndup.h
Normal file
6
src/strndup.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef HAVE_STRNDUP
|
||||
#define HAVE_STRNDUP
|
||||
|
||||
char *strndup(const char *s, size_t n);
|
||||
|
||||
#endif /* HAVE_STRNDUP */
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "string_utils.h"
|
||||
#include "token_types.h"
|
||||
#include "vector.h"
|
||||
#include "strndup.h"
|
||||
|
||||
typedef struct token {
|
||||
size_t offset;
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "trie.h"
|
||||
#include "trie_search.h"
|
||||
#include "unicode_scripts.h"
|
||||
#include "strndup.h"
|
||||
|
||||
#define LATIN_ASCII "latin-ascii"
|
||||
#define LATIN_ASCII_SIMPLE "latin-ascii-simple"
|
||||
|
||||
Reference in New Issue
Block a user