diff --git a/src/Makefile.am b/src/Makefile.am index 4624216e..894a3e2e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,7 +10,7 @@ DEFAULT_INCLUDES=-I.. CFLAGS = $(CFLAGS_BASE) lib_LTLIBRARIES = libpostal.la -libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c geodb.c geo_disambiguation.c normalize.c bloom.c features.c geonames.c geohash/geohash.c unicode_scripts.c msgpack_utils.c +libpostal_la_SOURCES = libpostal.c address_dictionary.c transliterate.c tokens.c trie.c trie_search.c string_utils.c file_utils.c numex.c utf8proc/utf8proc.c cmp/cmp.c geodb.c geo_disambiguation.c normalize.c bloom.c features.c geonames.c geohash/geohash.c unicode_scripts.c msgpack_utils.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c graph.c graph_builder.c libpostal_la_LIBADD = libscanner.la sparkey/libsparkey.la libpostal_la_CFLAGS = $(CFLAGS_O2) @@ -43,17 +43,14 @@ address_parser_train_CFLAGS = $(CFLAGS_O3) address_parser_test_SOURCES = address_parser_test.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c string_utils.c tokens.c msgpack_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c address_parser_test_LDADD = sparkey/libsparkey.la libscanner.la address_parser_test_CFLAGS = $(CFLAGS_O3) -address_parser_SOURCES = address_parser_cli.c address_parser.c address_parser_io.c averaged_perceptron.c sparse_matrix.c averaged_perceptron_trainer.c averaged_perceptron_tagger.c address_dictionary.c geodb.c geo_disambiguation.c graph.c graph_builder.c normalize.c features.c geonames.c geohash/geohash.c unicode_scripts.c transliterate.c trie.c trie_search.c string_utils.c tokens.c msgpack_utils.c file_utils.c utf8proc/utf8proc.c cmp/cmp.c linenoise/linenoise.c -address_parser_LDADD = sparkey/libsparkey.la libscanner.la +address_parser_SOURCES = address_parser_cli.c linenoise/linenoise.c +address_parser_LDADD = sparkey/libsparkey.la libscanner.la libpostal.la address_parser_CFLAGS = $(CFLAGS_O3) pkginclude_HEADERS = libpostal.h all-local: - ./libpostal_data download base @LIBPOSTAL_DATA_DIR@ - -download_geodb: - ./libpostal_data download geodb @LIBPOSTAL_DATA_DIR@ + ./libpostal_data download all @LIBPOSTAL_DATA_DIR@ lexer: scanner.re re2c -F -s -b -8 -o scanner.c scanner.re diff --git a/src/libpostal_data b/src/libpostal_data index 0db2203d..3b922ef8 100755 --- a/src/libpostal_data +++ b/src/libpostal_data @@ -10,7 +10,7 @@ LIBPOSTAL_S3_KEY="s3://$LIBPOSTAL_S3_BUCKET_NAME" LIBPOSTAL_S3_BUCKET_URL="http://$LIBPOSTAL_S3_BUCKET_NAME.s3.amazonaws.com" LIBPOSTAL_DATA_FILE="libpostal_data.tar.gz" LIBPOSTAL_GEODB_FILE="geodb.tar.gz" - +LIBPOSTAL_PARSER_FILE="parser.tar.gz" COMMAND=$1 FILE=$2 @@ -18,10 +18,11 @@ LIBPOSTAL_DATA_DIR=$3 LIBPOSTAL_DATA_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated LIBPOSTAL_GEO_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_geo - +LIBPOSTAL_PARSER_UPDATED_PATH=$LIBPOSTAL_DATA_DIR/last_updated_parser BASIC_MODULE_DIRS=(address_expansions numex transliteration) GEODB_MODULE_DIR=geodb +PARSER_MODULE_DIR=address_parser EPOCH_DATE="Jan 1 00:00:00 1970" @@ -56,24 +57,33 @@ download_file() { if [ $COMMAND = "download" ]; then mkdir -p $LIBPOSTAL_DATA_DIR - if [ $FILE = "base" ]; then + if [ $FILE = "base" ] || [ $FILE = "all" ]; then download_file $LIBPOSTAL_DATA_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_DATA_FILE "data file" - elif [ $FILE = "geodb" ]; then + fi + if [ $FILE = "geodb" ] || [ $FILE = "all" ]; then download_file $LIBPOSTAL_GEO_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_GEODB_FILE "geodb data file" fi - + if [ $FILE = "parser" ] || [ $FILE = "all" ]; then + download_file $LIBPOSTAL_PARSER_UPDATED_PATH $LIBPOSTAL_DATA_DIR $LIBPOSTAL_PARSER_FILE "parser data file" + fi elif [ $COMMAND = "upload" ]; then - if [ $FILE = "base" ]; then + if [ $FILE = "base" ] || [ $FILE = "all" ]; then tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILE ${BASIC_MODULE_DIRS[*]} aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILE $LIBPOSTAL_S3_KEY - elif [ $FILE = "geodb" ]; then + fi + + if [ $FILE = "geodb" ] || [ $FILE = "all" ]; then tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_GEODB_FILE $GEODB_MODULE_DIR aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_GEODB_FILE $LIBPOSTAL_S3_KEY fi + if [ $FILE = "parser" ] || [ $FILE = "all" ]; then + tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $PARSER_MODULE_DIR + aws s3 cp --acl=public-read $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_PARSER_FILE $LIBPOSTAL_S3_KEY + fi else echo "Invalid command: $COMMAND"