From 2ee412c98ed0cac86c58080a0b119f0a0bef4799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Ber=C4=99sewicz?= Date: Tue, 27 Mar 2018 15:55:34 +0200 Subject: [PATCH 01/62] Update street_types.txt --- resources/dictionaries/pl/street_types.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/resources/dictionaries/pl/street_types.txt b/resources/dictionaries/pl/street_types.txt index a3241ff5..f83ebba3 100644 --- a/resources/dictionaries/pl/street_types.txt +++ b/resources/dictionaries/pl/street_types.txt @@ -1,12 +1,18 @@ aleja|al autostrada boczna -bulwar +bulwar|bulw droga obwodnica +ogród +osiedle|os +park plac|pl rondo rynek +skwer szosa ulica|ul -zaulek \ No newline at end of file +wybrzeże|wyb +wyspa +zaulek From b86c03ddcffbedee219dbff2c01bb2499723f3d2 Mon Sep 17 00:00:00 2001 From: Catalin Marius Ciochina Date: Tue, 10 Apr 2018 08:53:26 +0300 Subject: [PATCH 02/62] Add to Romanian dictionary --- resources/dictionaries/ro/building_types.txt | 6 ++++++ resources/dictionaries/ro/company_types.txt | 5 +++++ resources/dictionaries/ro/cross_streets.txt | 4 ++-- resources/dictionaries/ro/entrances.txt | 2 +- resources/dictionaries/ro/near.txt | 2 +- resources/dictionaries/ro/number.txt | 2 +- resources/dictionaries/ro/personal_titles.txt | 3 ++- resources/dictionaries/ro/place_names.txt | 3 +++ resources/dictionaries/ro/qualifiers.txt | 8 +++++++- resources/dictionaries/ro/stopwords.txt | 3 ++- resources/dictionaries/ro/street_types.txt | 8 ++++---- resources/dictionaries/ro/synonyms.txt | 1 + resources/dictionaries/ro/unit_types_numbered.txt | 8 ++++++-- 13 files changed, 41 insertions(+), 14 deletions(-) create mode 100644 resources/dictionaries/ro/building_types.txt create mode 100644 resources/dictionaries/ro/company_types.txt create mode 100644 resources/dictionaries/ro/place_names.txt create mode 100644 resources/dictionaries/ro/synonyms.txt diff --git a/resources/dictionaries/ro/building_types.txt b/resources/dictionaries/ro/building_types.txt new file mode 100644 index 00000000..28ab10eb --- /dev/null +++ b/resources/dictionaries/ro/building_types.txt @@ -0,0 +1,6 @@ +anexa +bloc|blc|bl +casa +cladirea|cladire +complex +garaj diff --git a/resources/dictionaries/ro/company_types.txt b/resources/dictionaries/ro/company_types.txt new file mode 100644 index 00000000..09dc7797 --- /dev/null +++ b/resources/dictionaries/ro/company_types.txt @@ -0,0 +1,5 @@ +banca +organizatie neguvernamentala|ong +societate comerciala|sc +societate cu raspundere limitata|srl +societate pe actiuni|sa diff --git a/resources/dictionaries/ro/cross_streets.txt b/resources/dictionaries/ro/cross_streets.txt index d141a2ba..386b33fc 100644 --- a/resources/dictionaries/ro/cross_streets.txt +++ b/resources/dictionaries/ro/cross_streets.txt @@ -1,5 +1,5 @@ & -colț|colt +colț|colt|colț cu|colt cu între|intre la colțul de pe|la coltul de pe -și|si \ No newline at end of file +și|si diff --git a/resources/dictionaries/ro/entrances.txt b/resources/dictionaries/ro/entrances.txt index 8703e027..b7acf9bd 100644 --- a/resources/dictionaries/ro/entrances.txt +++ b/resources/dictionaries/ro/entrances.txt @@ -1 +1 @@ -intrare \ No newline at end of file +intrare|intrarea diff --git a/resources/dictionaries/ro/near.txt b/resources/dictionaries/ro/near.txt index 00eac643..c8962fba 100644 --- a/resources/dictionaries/ro/near.txt +++ b/resources/dictionaries/ro/near.txt @@ -4,4 +4,4 @@ din in apropiere de în apropiere|in apropiere în jurul aici|in jurul aici -lângă mine|langa mine \ No newline at end of file +lângă mine|langa mine|lângă|langa diff --git a/resources/dictionaries/ro/number.txt b/resources/dictionaries/ro/number.txt index 1e3fad48..33bc26e4 100644 --- a/resources/dictionaries/ro/number.txt +++ b/resources/dictionaries/ro/number.txt @@ -1 +1 @@ -număr|numar|nr|nº|n°|#|№|no \ No newline at end of file +număr|numar|nr|nº|n°|#|№|no|numarul|numărul diff --git a/resources/dictionaries/ro/personal_titles.txt b/resources/dictionaries/ro/personal_titles.txt index 2634183b..a78988f6 100644 --- a/resources/dictionaries/ro/personal_titles.txt +++ b/resources/dictionaries/ro/personal_titles.txt @@ -8,7 +8,8 @@ general|gen major|maj locotenent locotenent colonel +pictor profesor|prof sergent sublocotenent -vice amiral \ No newline at end of file +vice amiral diff --git a/resources/dictionaries/ro/place_names.txt b/resources/dictionaries/ro/place_names.txt new file mode 100644 index 00000000..f51987d7 --- /dev/null +++ b/resources/dictionaries/ro/place_names.txt @@ -0,0 +1,3 @@ +cinema +cafenea +fabrica diff --git a/resources/dictionaries/ro/qualifiers.txt b/resources/dictionaries/ro/qualifiers.txt index 6d021f70..016a6114 100644 --- a/resources/dictionaries/ro/qualifiers.txt +++ b/resources/dictionaries/ro/qualifiers.txt @@ -1 +1,7 @@ -bloc|bl \ No newline at end of file +bloc|bl +cartier|cartierul +comuna|comunā +kilometrul|kilometru|km +sat|satul +sector|sectorul|sect +zona diff --git a/resources/dictionaries/ro/stopwords.txt b/resources/dictionaries/ro/stopwords.txt index 80195e69..406760a0 100644 --- a/resources/dictionaries/ro/stopwords.txt +++ b/resources/dictionaries/ro/stopwords.txt @@ -1,2 +1,3 @@ și|si|& -cel \ No newline at end of file +cel +intre diff --git a/resources/dictionaries/ro/street_types.txt b/resources/dictionaries/ro/street_types.txt index 46758ffa..ca6985d1 100644 --- a/resources/dictionaries/ro/street_types.txt +++ b/resources/dictionaries/ro/street_types.txt @@ -1,13 +1,13 @@ aleea|ale|alea|al bulevardul|bd|bul|bdul|blv|blvd|b-dul|b.dul|bulev|bulevardu|bulevard -calea|cal -drumul +calea|cale|cal +drumul|drum fundătura|fundatura|fnd fundacul|fdc intrarea|int|intr piaţa|piata|piață|pta|pţa|p-ta|p-ţa -strada|str +strada|str|st stradela|str-la|sdla șoseaua|soseaua|sos|șos splaiul|sp|spl -vârful|varful|virful|vîrful|varf|vf \ No newline at end of file +vârful|varful|virful|vîrful|varf|vf diff --git a/resources/dictionaries/ro/synonyms.txt b/resources/dictionaries/ro/synonyms.txt new file mode 100644 index 00000000..62880a91 --- /dev/null +++ b/resources/dictionaries/ro/synonyms.txt @@ -0,0 +1 @@ +decembrie|dec diff --git a/resources/dictionaries/ro/unit_types_numbered.txt b/resources/dictionaries/ro/unit_types_numbered.txt index d0c72db1..1727c3c6 100644 --- a/resources/dictionaries/ro/unit_types_numbered.txt +++ b/resources/dictionaries/ro/unit_types_numbered.txt @@ -1,4 +1,8 @@ -apartament|ap|apt|apart +apartamentul|apartament|ap|apt|apart birou +cladire|cladirea|clădire|clădirea +corp|corpul +complex +interior|int lotul -sală|sala \ No newline at end of file +sală|sala From 04a77d2e59f37577aa937ccedf714b296966d920 Mon Sep 17 00:00:00 2001 From: missinglink Date: Thu, 12 Apr 2018 16:10:18 +0200 Subject: [PATCH 03/62] [docs] fix minor typo --- ISSUE_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md index 09423a1b..25a01097 100644 --- a/ISSUE_TEMPLATE.md +++ b/ISSUE_TEMPLATE.md @@ -8,7 +8,7 @@ I was checking out the awesome libpostal, and saw something that could be improv --- #### Here's how I'm using libpostal - + --- #### Here's what I did From 87b92f381e73fd13406410fe2ce8b8b404733ee9 Mon Sep 17 00:00:00 2001 From: Antoine D Date: Fri, 27 Apr 2018 12:37:45 +0200 Subject: [PATCH 04/62] add missing "relation" tag --- resources/boundaries/osm/ar.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resources/boundaries/osm/ar.yaml b/resources/boundaries/osm/ar.yaml index 88f0002b..3f7181d7 100644 --- a/resources/boundaries/osm/ar.yaml +++ b/resources/boundaries/osm/ar.yaml @@ -11,8 +11,9 @@ overrides: id: - # Buenos Aires (state boundary coterminous with city) - "3082668": null + relation: + # Buenos Aires (state boundary coterminous with city) + "3082668": null contained_by: relation: # Buenos Aires From 2890f739a3dc38e4631f5626aeb011847ce30fc6 Mon Sep 17 00:00:00 2001 From: Antoine D Date: Fri, 27 Apr 2018 12:38:27 +0200 Subject: [PATCH 05/62] overrides shouldn't be nested in admin_level --- resources/boundaries/osm/tw.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/resources/boundaries/osm/tw.yaml b/resources/boundaries/osm/tw.yaml index 65e5efd5..99f1b868 100644 --- a/resources/boundaries/osm/tw.yaml +++ b/resources/boundaries/osm/tw.yaml @@ -10,10 +10,10 @@ "8": "city" "9": "suburb" - overrides: - id: - relation: - # Taiwan Province - "3777248": "state" - # Fujian Province - "3777250": "state" \ No newline at end of file + overrides: + id: + relation: + # Taiwan Province + "3777248": "state" + # Fujian Province + "3777250": "state" From 1c4d4001de89a563a8c67649dfc8754e3e0a3acf Mon Sep 17 00:00:00 2001 From: Ricardo Alanis Date: Mon, 28 May 2018 13:04:36 -0500 Subject: [PATCH 06/62] [docs] Update Readme.md on free text extraction Updates Readme to reflect the position of the project on address extraction from free text. I believe it could be a commonly asked question and It could be a good idea to reflect that it is not on the scope. Source: https://github.com/openvenues/libpostal/issues/22 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 570ee0e1..669d7e39 100644 --- a/README.md +++ b/README.md @@ -518,6 +518,7 @@ Non-goals - Verifying that a location is a valid address - Actually geocoding addresses to a lat/lon (that requires a database/search index) +- Extracting addresses from free text Raison d'être ------------- From 168132145241f763db6dbd64660f5aa2620e1d2b Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sun, 12 Aug 2018 19:22:21 +0200 Subject: [PATCH 07/62] minor typo minor typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 570ee0e1..a7f3a7b8 100644 --- a/README.md +++ b/README.md @@ -494,7 +494,7 @@ language (IX => 9) which occur in the names of many monarchs, popes, etc. - **Fast, accurate tokenization/lexing**: clocked at > 1M tokens / sec, implements the TR-29 spec for UTF8 word segmentation, tokenizes East Asian -languages chracter by character instead of on whitespace. +languages character by character instead of on whitespace. - **UTF8 normalization**: optionally decompose UTF8 to NFD normalization form, strips accent marks e.g. à => a and/or applies Latin-ASCII transliteration. From e8569f9e26935f9f1f443791280bdeca108d1f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bi=C3=A1n=20Tam=C3=A1s=20L=C3=A1szl=C3=B3?= Date: Tue, 14 Aug 2018 12:19:59 +0200 Subject: [PATCH 08/62] Add haskell-postal to unofficial language bindings --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 570ee0e1..646611f0 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,7 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo - LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal) - Perl: [Geo::libpostal](https://metacpan.org/pod/Geo::libpostal) - Elixir: [Expostal](https://github.com/SweetIQ/expostal) +- Haskell: [haskell-postal](http://github.com/netom/haskell-postal) **Database extensions** From 363e83304a6facb733b0cc5a505bd23930d54116 Mon Sep 17 00:00:00 2001 From: Edward Ding Date: Fri, 26 Oct 2018 15:08:56 -0700 Subject: [PATCH 09/62] fix memory leak in setup when datadir is invalid or setup failed --- src/libpostal.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/libpostal.c b/src/libpostal.c index d56e7000..b6348697 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -261,20 +261,22 @@ bool libpostal_setup_datadir(char *datadir) { numex_path = path_join(3, datadir, LIBPOSTAL_NUMEX_SUBDIR, NUMEX_DATA_FILE); address_dictionary_path = path_join(3, datadir, LIBPOSTAL_ADDRESS_EXPANSIONS_SUBDIR, ADDRESS_DICTIONARY_DATA_FILE); } + + bool setup_succeed = true; if (!transliteration_module_setup(transliteration_path)) { log_error("Error loading transliteration module, dir=%s\n", transliteration_path); - return false; + setup_succeed = false; } - if (!numex_module_setup(numex_path)) { + if (setup_succeed && !numex_module_setup(numex_path)) { log_error("Error loading numex module, dir=%s\n", numex_path); - return false; + setup_succeed = false; } - if (!address_dictionary_module_setup(address_dictionary_path)) { + if (setup_succeed && !address_dictionary_module_setup(address_dictionary_path)) { log_error("Error loading dictionary module, dir=%s\n", address_dictionary_path); - return false; + setup_succeed = false; } if (transliteration_path != NULL) { @@ -289,7 +291,7 @@ bool libpostal_setup_datadir(char *datadir) { free(address_dictionary_path); } - return true; + return setup_succeed; } bool libpostal_setup(void) { From 714d69a7927c2fdb29d0a4bd14af49953cec3de8 Mon Sep 17 00:00:00 2001 From: Yevgen Antymyrov Date: Thu, 1 Nov 2018 00:30:57 +0100 Subject: [PATCH 10/62] Fixing a small typo in Ukrainian transliterated word 'budynok' (house), was: budnyok. --- resources/addresses/uk.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resources/addresses/uk.yaml b/resources/addresses/uk.yaml index 35cca157..a00bb526 100644 --- a/resources/addresses/uk.yaml +++ b/resources/addresses/uk.yaml @@ -49,7 +49,7 @@ numbers: house_numbers: - budnyok: &budnyok + budynok: &budynok canonical: будинок abbreviated: буд sample: true @@ -58,8 +58,8 @@ house_numbers: sample_probability: 0.1 numeric: direction: left - budnyok_latin: &budnyok_latin - canonical: budnyok + budynok_latin: &budynok_latin + canonical: budynok abbreviated: bud sample: true canonical_probability: 0.6 @@ -88,10 +88,10 @@ house_numbers: direction: left alphanumeric: - default: *budnyok + default: *budynok probability: 0.65 alternatives: - - alternative: *budnyok_latin + - alternative: *budynok_latin probability: 0.05 - alternative: *dom probability: 0.25 From 75d6dab4af2126079327c944eade939813d960ff Mon Sep 17 00:00:00 2001 From: Yevgen Antymyrov Date: Thu, 1 Nov 2018 00:58:12 +0100 Subject: [PATCH 11/62] =?UTF-8?q?Adde=20support=20for=20German=20office/ho?= =?UTF-8?q?use=20numbers=20with=20geb=C3=A4ude/geb=20prefix=20word.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- resources/addresses/de.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/resources/addresses/de.yaml b/resources/addresses/de.yaml index 40078b51..143e49a5 100644 --- a/resources/addresses/de.yaml +++ b/resources/addresses/de.yaml @@ -63,10 +63,23 @@ numbers: house_numbers: + gebaude: &gebaude + canonical: gebäude + abbreviated: geb + sample: true + canonical_probability: 0.5 + abbreviated_probability: 0.5 + sample_probability: 0.05 + numeric: + direction: left alphanumeric: default: *nummer + probability: 0.95 + alternatives: + - alternative: *gebaude + probability: 0.05 - alphanumeric_phrase_probability: 0.0001 + alphanumeric_phrase_probability: 0.05 conscription_numbers: alphanumeric: From 64f274b8348da5987e0676490367b5b4d672382d Mon Sep 17 00:00:00 2001 From: Matt W Date: Fri, 23 Nov 2018 14:28:49 -0800 Subject: [PATCH 12/62] Update README.md Clarify what will be done in "datadir" at configure/install time --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 570ee0e1..dbb657ea 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ Then to install the C library: git clone https://github.com/openvenues/libpostal cd libpostal ./bootstrap.sh -./configure --datadir=[...some dir with a few GB of space...] +./configure --datadir=[...some dir with a few GB of space where a "libpostal" directory exists or can be created/modified...] make -j4 sudo make install From 52f6beba353c5f98b67bbaaa74b76e678f7999bf Mon Sep 17 00:00:00 2001 From: 0xflotus <0xflotus@gmail.com> Date: Sun, 30 Dec 2018 03:17:50 +0100 Subject: [PATCH 13/62] fixed structures --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 570ee0e1..f2ad27b3 100644 --- a/README.md +++ b/README.md @@ -472,7 +472,7 @@ optionally be separated so Rosenstraße and Rosen Straße are equivalent. for a wide variety of countries and languages, not just US/English. The model is trained on over 1 billion addresses and address-like strings, using the templates in the [OpenCage address formatting repo](https://github.com/OpenCageData/address-formatting) to construct formatted, -tagged traning examples for every inhabited country in the world. Many types of [normalizations](https://github.com/openvenues/libpostal/blob/master/scripts/geodata/addresses/components.py) +tagged training examples for every inhabited country in the world. Many types of [normalizations](https://github.com/openvenues/libpostal/blob/master/scripts/geodata/addresses/components.py) are performed to make the training data resemble real messy geocoder input as closely as possible. - **Language classification**: multinomial logistic regression @@ -623,7 +623,7 @@ libpostal is written in modern, legible, C99 and uses the following conventions: - Confines almost all mallocs to *name*_new and all frees to *name*_destroy - Efficient existing implementations for simple things like hashtables - Generic containers (via [klib](https://github.com/attractivechaos/klib)) whenever possible -- Data structrues take advantage of sparsity as much as possible +- Data structures take advantage of sparsity as much as possible - Efficient double-array trie implementation for most string dictionaries - Cross-platform as much as possible, particularly for *nix From 495f8f612e7f55ea70a74f952bc00560106317f0 Mon Sep 17 00:00:00 2001 From: Sandeep Date: Thu, 11 Jul 2019 23:08:00 -0500 Subject: [PATCH 14/62] Adding farm to market street type --- resources/dictionaries/en/street_types.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/dictionaries/en/street_types.txt b/resources/dictionaries/en/street_types.txt index 8d75c5a1..e8359347 100644 --- a/resources/dictionaries/en/street_types.txt +++ b/resources/dictionaries/en/street_types.txt @@ -132,6 +132,7 @@ falls|fls fare farm|frm farms|frms +farm to market|fm|farm-to-market fern ferry|fry|fy field|fld|fd @@ -407,4 +408,4 @@ well|wl wells|wls wharf|whrf|whf wynd|wyn -yard|yd|yrd \ No newline at end of file +yard|yd|yrd From aba8131a6d7c700b8f751edf4dc6950d7a8ac53a Mon Sep 17 00:00:00 2001 From: Patrick Nordahl Date: Fri, 12 Jul 2019 15:48:30 -0500 Subject: [PATCH 15/62] mention rust-postal as an available bindings library --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 570ee0e1..a40df26f 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,7 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo - LuaJIT: [lua-resty-postal](https://github.com/bungle/lua-resty-postal) - Perl: [Geo::libpostal](https://metacpan.org/pod/Geo::libpostal) - Elixir: [Expostal](https://github.com/SweetIQ/expostal) +- Rust: [rust-postal](https://github.com/pnordahl/rust-postal) **Database extensions** From b3dcfe610b3646679f911ff3bd292d9421338a85 Mon Sep 17 00:00:00 2001 From: Yuyang Shu Date: Tue, 5 May 2020 11:50:30 +1000 Subject: [PATCH 16/62] whosonfirst neighbourhood reverse geocoder --- .../geodata/neighborhoods/reverse_geocode.py | 116 ++++++++++++------ scripts/geodata/polygons/index.py | 1 - .../whosonfirst/download_wof_admin_polygon.py | 27 ++++ 3 files changed, 105 insertions(+), 39 deletions(-) create mode 100644 scripts/geodata/whosonfirst/download_wof_admin_polygon.py diff --git a/scripts/geodata/neighborhoods/reverse_geocode.py b/scripts/geodata/neighborhoods/reverse_geocode.py index 4c699813..ec8083a1 100644 --- a/scripts/geodata/neighborhoods/reverse_geocode.py +++ b/scripts/geodata/neighborhoods/reverse_geocode.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import argparse +import fnmatch import logging import operator import os @@ -24,7 +25,7 @@ from geodata.osm.components import osm_address_components from geodata.osm.definitions import osm_definitions from geodata.osm.extract import parse_osm, osm_type_and_id, NODE, WAY, RELATION, OSM_NAME_TAGS from geodata.polygons.index import * -from geodata.polygons.reverse_geocode import QuattroshapesReverseGeocoder, OSMCountryReverseGeocoder, OSMReverseGeocoder +from geodata.polygons.reverse_geocode import OSMCountryReverseGeocoder, OSMReverseGeocoder from geodata.statistics.tf_idf import IDFIndex @@ -212,6 +213,9 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): (ClickThatHood > OSM > Quattroshapes) to provide unified point-in-polygon tests for neighborhoods. The properties vary by source but each has source has least a "name" key which in practice is what we care about. + + Quattroshapes data is no longer accessible and has been replaced by + WhosOnFirst. ''' PRIORITIES_FILENAME = 'priorities.json' @@ -224,9 +228,9 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): source_priorities = { 'osm': 0, # Best names/polygons, same coordinate system 'osm_cth': 1, # Prefer the OSM names if possible - 'clickthathood': 2, # Better names/polygons than Quattroshapes - 'osm_quattro': 3, # Prefer OSM names matched with Quattroshapes polygon - 'quattroshapes': 4, # Good results in some countries/areas + 'clickthathood': 2, # Better names/polygons than WhosOnFirst + 'osm_wof': 3, # Prefer OSM names matched with WhosOnFirst polygon + 'wof': 4, # Replacement of Quattroshapes } level_priorities = { @@ -235,7 +239,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): } regex_replacements = [ - # Paris arrondissements, listed like "PARIS-1ER-ARRONDISSEMENT" in Quqttroshapes + # Paris arrondissements, listed like "PARIS-1ER-ARRONDISSEMENT" in Quattroshapes (re.compile('^paris-(?=[\d])', re.I), ''), (re.compile('^prague(?= [\d]+$)', re.I), 'Praha'), ] @@ -254,7 +258,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): return doc @classmethod - def create_from_osm_and_quattroshapes(cls, filename, quattroshapes_dir, country_rtree_dir, osm_rtree_dir, osm_neighborhood_borders_file, output_dir): + def create_from_osm_and_wof(cls, filename, wof_dir, country_rtree_dir, osm_rtree_dir, osm_neighborhood_borders_file, output_dir): ''' Given an OSM file (planet or some other bounds) containing neighborhoods as points (some suburbs have boundaries) @@ -270,17 +274,14 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): logger = logging.getLogger('neighborhoods') - qs_scratch_dir = os.path.join(quattroshapes_dir, 'qs_neighborhoods') - ensure_dir(qs_scratch_dir) - logger.info('Creating ClickThatHood neighborhoods') cth = ClickThatHoodReverseGeocoder.create_neighborhoods_index() logger.info('Creating OSM neighborhoods') osmn = OSMNeighborhoodReverseGeocoder.create_neighborhoods_index(osm_neighborhood_borders_file) - logger.info('Creating Quattroshapes neighborhoods') - qs = QuattroshapesNeighborhoodsReverseGeocoder.create_neighborhoods_index(quattroshapes_dir, qs_scratch_dir) + logger.info('Creating WhosOnFirst neighborhoods') + wof = WhosOnFirstNeighborhoodsReverseGeocoder.create_neighborhoods_index(wof_dir, os.path.join(wof_dir, "wof_neighbourhoods")) country_rtree = OSMCountryReverseGeocoder.load(country_rtree_dir) @@ -292,7 +293,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): char_scripts = get_chars_by_script() - for idx in (cth, qs, osmn): + for idx in (cth, wof, osmn): for i in xrange(idx.i): props = idx.get_properties(i) name = props.get('name') @@ -317,11 +318,11 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): index.index_polygon(poly.context) index.add_polygon(poly.context, props) - qs.matched = [False] * qs.i + wof.matched = [False] * wof.i cth.matched = [False] * cth.i logger.info('Matching OSM points to neighborhood polygons') - # Parse OSM and match neighborhood/suburb points to Quattroshapes/ClickThatHood polygons + # Parse OSM and match neighborhood/suburb points to ClickThatHood/WhosOnFirst polygons num_polys = 0 for element_id, attrs, deps in parse_osm(filename): try: @@ -359,14 +360,14 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): for name_key in OSM_NAME_TAGS: osm_names.extend([v for k, v in six.iteritems(attrs) if k.startswith('{}:'.format(name_key))]) - for idx in (cth, qs): + for idx in (cth, wof): candidates = idx.get_candidate_polygons(lat, lon, return_all=True) if candidates: max_sim = 0.0 arg_max = None - normalized_qs_names = {} + normalized_wof_names = {} for osm_name in osm_names: @@ -375,16 +376,16 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): for i in candidates: props = idx.get_properties(i) - name = normalized_qs_names.get(i) + name = normalized_wof_names.get(i) if not name: name = props.get('name') if not name: continue for pattern, repl in cls.regex_replacements: name = pattern.sub(repl, name) - normalized_qs_names[i] = name + normalized_wof_names[i] = name - if is_neighborhood and idx is qs and props.get(QuattroshapesReverseGeocoder.LEVEL) != 'neighborhood': + if is_neighborhood and idx is wof and props.get(WhosOnFirstNeighborhoodsReverseGeocoder.LEVEL) != 'neighborhood': continue if not contains_ideographs: @@ -446,7 +447,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): continue source = 'osm_cth' else: - level = props.get(QuattroshapesReverseGeocoder.LEVEL, None) + level = props.get(WhosOnFirstNeighborhoodsReverseGeocoder.LEVEL, None) source = 'osm_quattro' if level == 'neighborhood': @@ -467,7 +468,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): if num_polys % 1000 == 0 and num_polys > 0: logger.info('did {} neighborhoods'.format(num_polys)) - for idx, source in ((cth, 'clickthathood'), (qs, 'quattroshapes')): + for idx, source in ((cth, 'clickthathood'), (wof, 'whosonfirst')): for i in xrange(idx.i): props = idx.get_properties(i) poly = idx.get_polygon(i) @@ -482,7 +483,7 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): props['polygon_type'] = 'local_admin' else: continue - elif props.get(QuattroshapesReverseGeocoder.LEVEL, None) == 'neighborhood': + elif props.get(WhosOnFirstNeighborhoodsReverseGeocoder.LEVEL, None) == 'neighborhood': component = AddressFormatter.SUBURB name = props.get('name') if not name: @@ -525,28 +526,67 @@ class NeighborhoodReverseGeocoder(RTreePolygonIndex): return sorted(candidates, key=self.priority) -class QuattroshapesNeighborhoodsReverseGeocoder(GeohashPolygonIndex, QuattroshapesReverseGeocoder): +class WhosOnFirstNeighborhoodsReverseGeocoder(GeohashPolygonIndex): persistent_polygons = False cache_size = None + NAME = "wof:name" + ASCII_NAME = "gn:asciiname" + LEVEL = "wof:placetype" + GEONAMES_ID = "gn:geonameid" + SUPERSEDED = "wof:superseded_by" + + NEIGHBOURHOOD_TYPES = {"localadmin", "locality", "neighbourhood"} + POLYGON_TYPES = {"Polygon", "MultiPolygon"} + @classmethod - def create_neighborhoods_index(cls, quattroshapes_dir, - output_dir, - index_filename=None, - polys_filename=DEFAULT_POLYS_FILENAME): - local_admin_filename = os.path.join(quattroshapes_dir, cls.LOCAL_ADMIN_FILENAME) - neighborhoods_filename = os.path.join(quattroshapes_dir, cls.NEIGHBORHOODS_FILENAME) - return cls.create_from_shapefiles([local_admin_filename, neighborhoods_filename], - output_dir, index_filename=index_filename, - polys_filename=polys_filename) + def is_valid_neighbourhood(cls, geojson): + validity = not geojson["properties"].get(cls.SUPERSEDED) + for field in {cls.NAME, cls.ASCII_NAME, cls.GEONAMES_ID}: + validity &= geojson["properties"].get(field) + return validity and geojson["properties"].get(cls.LEVEL) in cls.NEIGHBOURHOOD_TYPES and geojson["geometry"]["type"] in cls.POLYGON_TYPES + + @classmethod + def create_neighborhoods_index(cls, wof_dir, output_dir, index_filename=None): + index = cls(save_dir=output_dir, index_filename=index_filename) + + for root, dirnames, filenames in os.walk(wof_dir): + for fname in fnmatch.filter(filenames, "*.geojson"): + with open(os.path.join(root, fname)) as f: + geojson = json.load(f) + if cls.is_valid_neighbourhood(geojson): + properties = { + "name": safe_decode(geojson["properties"].get(cls.NAME)), + "name_en": safe_decode(geojson["properties"].get(cls.ASCII_NAME)), + "qs_level": safe_decode(geojson["properties"].get(cls.LEVEL)), + "gn_id": safe_decode(geojson["properties"].get(cls.GEONAMES_ID)) + } + + poly_type = geojson['geometry']['type'] + if poly_type == 'Polygon': + poly = cls.to_polygon(geojson['geometry']['coordinates'][0]) + index.index_polygon(poly) + poly = index.simplify_polygon(poly) + index.add_polygon(poly, dict(geojson['properties']), include_only_properties=include_props) + elif poly_type == 'MultiPolygon': + polys = [] + for coords in geojson['geometry']['coordinates']: + poly = cls.to_polygon(coords[0]) + polys.append(poly) + index.index_polygon(poly) + + multi_poly = index.simplify_polygon(MultiPolygon(polys)) + index.add_polygon(multi_poly, dict(geojson['properties'])) + + return index if __name__ == '__main__': # Handle argument parsing here parser = argparse.ArgumentParser() - parser.add_argument('-q', '--quattroshapes-dir', - help='Path to quattroshapes dir') + parser.add_argument('-w', '--wof-dir', + help='Path to WhosOnFirst dir') parser.add_argument('-a', '--osm-admin-rtree-dir', help='Path to OSM admin rtree dir') @@ -567,16 +607,16 @@ if __name__ == '__main__': logging.basicConfig(level=logging.INFO) args = parser.parse_args() - if args.osm_neighborhoods_file and args.quattroshapes_dir and args.osm_admin_rtree_dir and args.country_rtree_dir and args.osm_neighborhood_borders_file: - index = NeighborhoodReverseGeocoder.create_from_osm_and_quattroshapes( + if args.osm_neighborhoods_file and args.wof_dir and args.osm_admin_rtree_dir and args.country_rtree_dir and args.osm_neighborhood_borders_file: + index = NeighborhoodReverseGeocoder.create_from_osm_and_wof( args.osm_neighborhoods_file, - args.quattroshapes_dir, + args.wof_dir, args.country_rtree_dir, args.osm_admin_rtree_dir, args.osm_neighborhood_borders_file, args.out_dir ) else: - parser.error('Must specify quattroshapes dir or osm admin borders file') + parser.error('Must specify whosonfirst dir, osm-admin, country rtrees, and osm-neighbourhood-border file') index.save() diff --git a/scripts/geodata/polygons/index.py b/scripts/geodata/polygons/index.py index 59010800..41521f81 100644 --- a/scripts/geodata/polygons/index.py +++ b/scripts/geodata/polygons/index.py @@ -226,7 +226,6 @@ class PolygonIndex(object): @classmethod def create_from_geojson_files(cls, inputs, output_dir, index_filename=None, - polys_filename=DEFAULT_POLYS_FILENAME, include_only_properties=None): index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME) for input_file in inputs: diff --git a/scripts/geodata/whosonfirst/download_wof_admin_polygon.py b/scripts/geodata/whosonfirst/download_wof_admin_polygon.py new file mode 100644 index 00000000..51d029d1 --- /dev/null +++ b/scripts/geodata/whosonfirst/download_wof_admin_polygon.py @@ -0,0 +1,27 @@ +import os +import pycountry +import subprocess +import sys + + +this_dir = os.path.realpath(os.path.dirname(__file__)) +sys.path.append(os.path.realpath(os.path.join(os.pardir, os.pardir))) + + +WOF_DATA_ADMIN_REPO_URL_PREFIX = "https://github.com/whosonfirst-data/whosonfirst-data/" +WOF_DATA_ADMIN_REPO_PREFIX = "whosonfirst-data-admin-" + + +def download_wof_data_admin(wof_dir): + for country_object in pycountry.countries: + repo_name = WOF_DATA_ADMIN_REPO_PREFIX + country_object.alpha2.lower() + repo_location = os.path.join(wof_dir, repo_name) + if not os.path.exists(repo_location): + subprocess.call(["git", "clone", WOF_DATA_ADMIN_REPO_URL_PREFIX + repo_name]) + + +if __name__ == '__main__': + if len(sys.argv) < 2: + sys.exit('Usage: python download_whosonfirst_data.py wof_dir') + + download_wof_data_admin(sys.argv[1]) From c2f89ba2f741cafcff9da34ec128f9158f115a9c Mon Sep 17 00:00:00 2001 From: naukmari <32044394+naukmari@users.noreply.github.com> Date: Wed, 8 Jul 2020 11:06:55 +0300 Subject: [PATCH 17/62] Update qualifiers.txt --- resources/dictionaries/uk/qualifiers.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/dictionaries/uk/qualifiers.txt b/resources/dictionaries/uk/qualifiers.txt index 6bf2d5df..595aa9df 100644 --- a/resources/dictionaries/uk/qualifiers.txt +++ b/resources/dictionaries/uk/qualifiers.txt @@ -6,3 +6,5 @@ kvartal|kvart|kv|kv-l oblast|obl район|р-н raion|r-n +місто|міс|м +misto|mis|m From 92ab27840f1aa5d224d03ec7f3c57be5cb480a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A1lm=C3=A1n=20=E2=80=9EKAMI=E2=80=9D=20Szalai?= Date: Mon, 28 Sep 2020 21:42:53 +0200 Subject: [PATCH 18/62] Add more words and synonyms for Hungarian language --- .../dictionaries/hu/ambiguous_expansions.txt | 2 +- .../dictionaries/hu/level_types_mezzanine.txt | 4 +-- resources/dictionaries/hu/personal_titles.txt | 3 +- resources/dictionaries/hu/street_types.txt | 29 ++++++++++++++----- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/resources/dictionaries/hu/ambiguous_expansions.txt b/resources/dictionaries/hu/ambiguous_expansions.txt index 88190f16..13eecc6a 100644 --- a/resources/dictionaries/hu/ambiguous_expansions.txt +++ b/resources/dictionaries/hu/ambiguous_expansions.txt @@ -3,4 +3,4 @@ d e k n -u \ No newline at end of file +u diff --git a/resources/dictionaries/hu/level_types_mezzanine.txt b/resources/dictionaries/hu/level_types_mezzanine.txt index e436dcd6..2fcd8250 100644 --- a/resources/dictionaries/hu/level_types_mezzanine.txt +++ b/resources/dictionaries/hu/level_types_mezzanine.txt @@ -1,2 +1,2 @@ -félemelet|felemelet -magasföldszint|magasfoldszint \ No newline at end of file +félemelet|felemelet|félem|1/2 em|1/2em +magasföldszint|magasfoldszint|mgfszt|mgfsz|mfszt|mfsz \ No newline at end of file diff --git a/resources/dictionaries/hu/personal_titles.txt b/resources/dictionaries/hu/personal_titles.txt index e8af420c..5186864a 100644 --- a/resources/dictionaries/hu/personal_titles.txt +++ b/resources/dictionaries/hu/personal_titles.txt @@ -1 +1,2 @@ -szent|szt \ No newline at end of file +szent|szt +idősebb|id diff --git a/resources/dictionaries/hu/street_types.txt b/resources/dictionaries/hu/street_types.txt index 84863afe..c0548be6 100644 --- a/resources/dictionaries/hu/street_types.txt +++ b/resources/dictionaries/hu/street_types.txt @@ -1,21 +1,34 @@ árok|arok -dűlő|dulo +dűlő|dulo|d.|d fasor +fasora +főközlekedési út|főút|fout +határút|hatarut +kapu kert körönd|korond|krnd +körvasútsor|korvasutsor körút|korut|krt köz|koz +lakótelep|lakotelep|ltp.|ltp lejtő|lejto lépcső|lepcso liget mező|mezo +országút|orszagut park -rakpart|rpt -sétány|setany -sor -sugárút|sugarut +parkja +rakpart|rkpt|rkp|rpt +sétány|setany|stny.|stny +sor|s.|s +sétány|setany|sét +sugárút|sugarut|sgrt.|sgrt|srt.|srt|sgt.|sgt +sziget +telep tér|ter tere -utca|u -út|ut -útja|utja \ No newline at end of file +tanya|t.|t +udvar +utca|u.|u +út|ut|u.|u +útja|utja From 826c836a77c7fb86b67891362158d6ded3b40884 Mon Sep 17 00:00:00 2001 From: ebogit <69436904+ebogit@users.noreply.github.com> Date: Fri, 27 Nov 2020 17:46:31 +0000 Subject: [PATCH 19/62] Update street_types.txt Correction to mispelling and addition of abbreviation --- resources/dictionaries/pt/street_types.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/dictionaries/pt/street_types.txt b/resources/dictionaries/pt/street_types.txt index b661dd05..6b2756e5 100644 --- a/resources/dictionaries/pt/street_types.txt +++ b/resources/dictionaries/pt/street_types.txt @@ -10,10 +10,10 @@ calçada|calcada|cc calçadinha|caclcadinha|ccnh câmara municipal|camara municipal|cm|c.m.|c. m. caminho|cam|camno -direito|dto +direito|dto|dt esquerdo|esq estrada|estr -astrada marginal|estr marg +estrada marginal|estr marg estrada municipal|em|e m|estr m estrada nacional|en|e n|estr n estrada regional|er|e r|estr r @@ -50,4 +50,4 @@ viaduto|vd|vdto viela|ve vila|vl volta -zona|zn \ No newline at end of file +zona|zn From d09ffebebd960d40e94a2894454481510953a741 Mon Sep 17 00:00:00 2001 From: Aaron Madlon-Kay Date: Wed, 27 Jan 2021 15:37:13 +0900 Subject: [PATCH 20/62] Mention install with MacPorts in readme --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 570ee0e1..4630a1bd 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,15 @@ sudo apt-get install curl autoconf automake libtool pkg-config sudo yum install curl autoconf automake libtool pkgconfig ``` -**On Mac OSX** +**On macOS** + +Install with one command via [MacPorts](https://www.macports.org/): +``` +port install libpostal +``` + +Or as follows with [Homebrew](https://brew.sh/): + ``` brew install curl autoconf automake libtool pkg-config ``` From 221e830e6358b4b4209cf74879d94a57cc0cff1c Mon Sep 17 00:00:00 2001 From: dixstonz3 Date: Mon, 5 Jul 2021 15:24:43 +0200 Subject: [PATCH 21/62] Update entrances.txt --- resources/dictionaries/ru/entrances.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resources/dictionaries/ru/entrances.txt b/resources/dictionaries/ru/entrances.txt index 7a864ade..8cdf7834 100644 --- a/resources/dictionaries/ru/entrances.txt +++ b/resources/dictionaries/ru/entrances.txt @@ -1,2 +1,4 @@ вход -vkhod \ No newline at end of file +vkhod +подъезд +pod'ezd From 339252c3a19ff1eda1141b308ed2cc9980525860 Mon Sep 17 00:00:00 2001 From: James Gates Date: Wed, 8 Sep 2021 13:47:23 -0500 Subject: [PATCH 22/62] Modyfied install steps with notes that worked for me. Thanks for the porject, everyone. I just went through the install process and thought maybe the mac directions could use a tiny bit of clarification. I'm by no means familiar enough with the project to know if this is the best way to convey my experience but I figured I'd give it a shot and maybe it will help someone in the future. --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 365aef70..0d0cfa7e 100644 --- a/README.md +++ b/README.md @@ -116,9 +116,15 @@ Then to install the C library: ``` git clone https://github.com/openvenues/libpostal cd libpostal + +# skip if installing for the first time make distclean + ./bootstrap.sh + +# omit --datadir flag to install data in current directory ./configure --datadir=[...some dir with a few GB of space...] + make -j4 sudo make install From 3e8cfc2f807113a59cf68d3c02e38347cb28f0d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 Jun 2022 21:16:56 +0000 Subject: [PATCH 23/62] Bump numpy from 1.10.4 to 1.22.0 in /scripts Bumps [numpy](https://github.com/numpy/numpy) from 1.10.4 to 1.22.0. - [Release notes](https://github.com/numpy/numpy/releases) - [Changelog](https://github.com/numpy/numpy/blob/main/doc/HOWTO_RELEASE.rst) - [Commits](https://github.com/numpy/numpy/compare/v1.10.4...v1.22.0) --- updated-dependencies: - dependency-name: numpy dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 9c2cd513..08b5f6f6 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -20,7 +20,7 @@ leveldb==0.193 lxml==4.6.3 lru-dict==1.1.3 marisa-trie==0.7.2 -numpy==1.10.4 +numpy==1.22.0 pycountry==1.20 git+https://github.com/kmike/pymorphy2 pymorphy2-dicts-ru==2.4.394633.4298366 From 2cf12869eab0f3a663467e77c094d0198b3309a4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 5 Jul 2022 21:09:24 +0000 Subject: [PATCH 24/62] Bump ujson from 1.33 to 5.4.0 in /scripts Bumps [ujson](https://github.com/ultrajson/ultrajson) from 1.33 to 5.4.0. - [Release notes](https://github.com/ultrajson/ultrajson/releases) - [Commits](https://github.com/ultrajson/ultrajson/commits/5.4.0) --- updated-dependencies: - dependency-name: ujson dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements-simple.txt | 2 +- scripts/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/requirements-simple.txt b/scripts/requirements-simple.txt index 2c307904..cc56d015 100644 --- a/scripts/requirements-simple.txt +++ b/scripts/requirements-simple.txt @@ -1,4 +1,4 @@ requests==2.20.0 six==1.10.0 PyYAML==5.4 -ujson==1.33 \ No newline at end of file +ujson==5.4.0 \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 9c2cd513..d216049f 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -32,6 +32,6 @@ python-geohash==0.8.5 requests==2.20.0 s3transfer==0.1.3 six==1.10.0 -ujson==1.35 +ujson==5.4.0 urlnorm==1.1.3 wsgiref==0.1.2 From b74decadd8be80354c26a1f493517fb977cd9280 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Jul 2022 19:45:49 +0000 Subject: [PATCH 25/62] Bump lxml from 4.6.3 to 4.9.1 in /scripts Bumps [lxml](https://github.com/lxml/lxml) from 4.6.3 to 4.9.1. - [Release notes](https://github.com/lxml/lxml/releases) - [Changelog](https://github.com/lxml/lxml/blob/master/CHANGES.txt) - [Commits](https://github.com/lxml/lxml/compare/lxml-4.6.3...lxml-4.9.1) --- updated-dependencies: - dependency-name: lxml dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 9c2cd513..872944d0 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -17,7 +17,7 @@ gevent==1.1.2 greenlet==0.4.10 jmespath==0.9.0 leveldb==0.193 -lxml==4.6.3 +lxml==4.9.1 lru-dict==1.1.3 marisa-trie==0.7.2 numpy==1.10.4 From a157aeee777edc85b01ef18cbe7971963fd4c9be Mon Sep 17 00:00:00 2001 From: Zijian Date: Thu, 9 Feb 2023 14:28:30 +0800 Subject: [PATCH 26/62] add malaysia federal territory dict --- resources/dictionaries/ms/chains.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 resources/dictionaries/ms/chains.txt diff --git a/resources/dictionaries/ms/chains.txt b/resources/dictionaries/ms/chains.txt new file mode 100644 index 00000000..c20ba458 --- /dev/null +++ b/resources/dictionaries/ms/chains.txt @@ -0,0 +1,3 @@ +kuala lumpur|federal territory kuala lumpur|federal territory of kuala lumpur|wilayah persekutuan kuala lumpur|kl +labuan|federal territory labuan|federal territory of labuan|wilayah persekutuan labuan +putrajaya|federal territory putrajaya|federal territory of putrajaya|wilayah persekutuan putrajaya \ No newline at end of file From eafc75326044265f9bc57a3a0705f027d69e2c97 Mon Sep 17 00:00:00 2001 From: Zijian Date: Thu, 9 Feb 2023 14:56:35 +0800 Subject: [PATCH 27/62] add malaysia toponyms --- resources/dictionaries/ms/toponyms.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 resources/dictionaries/ms/toponyms.txt diff --git a/resources/dictionaries/ms/toponyms.txt b/resources/dictionaries/ms/toponyms.txt new file mode 100644 index 00000000..046f27bf --- /dev/null +++ b/resources/dictionaries/ms/toponyms.txt @@ -0,0 +1,3 @@ +kuala lumpur|federal territory kuala lumpur|federal territory of kuala lumpur|wilayah persekutuan kuala lumpur|kl +labuan|federal territory labuan|federal territory of labuan|wilayah persekutuan labuan +putrajaya|federal territory putrajaya|federal territory of putrajaya|wilayah persekutuan putrajaya From 60e626882f3545d5b51569088edf719b4afc97cd Mon Sep 17 00:00:00 2001 From: Zijian Date: Thu, 9 Feb 2023 16:41:52 +0800 Subject: [PATCH 28/62] add malaysia state abbreviation --- resources/states/my.yaml | 93 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 resources/states/my.yaml diff --git a/resources/states/my.yaml b/resources/states/my.yaml new file mode 100644 index 00000000..436a0bcb --- /dev/null +++ b/resources/states/my.yaml @@ -0,0 +1,93 @@ +"KL": + en: Kuala Lumpur + ms: Kuala Lumpur +"federal territory kuala lumpur": + en: Kuala Lumpur + ms: Kuala Lumpur +"federal territory of kuala lumpur": + en: Kuala Lumpur + ms: Kuala Lumpur +"wilayah persekutuan kuala lumpur": + en: Kuala Lumpur + ms: Kuala Lumpur +"federal territory labuan": + en: Labuan + ms: Labuan +"federal territory of labuan": + en: Labuan + ms: Labuan +"wilayah persekutuan labuan": + en: Labuan + ms: Labuan +"federal territory putrajaya": + en: Putrajaya + ms: Putrajaya +"federal territory of putrajaya": + en: Putrajaya + ms: Putrajaya +"wilayah persekutuan putrajaya": + en: Putrajaya + ms: Putrajaya +"pulau pinang": + en: Penang + ms: Pulau Pinang +"penang": + en: Penang + ms: Pulau Pinang +JHR: + en: Johor + ms: Johor +KDH: + en: Kedah + ms: Kedah +KTN: + en: Kelantan + ms: Kelantan +MLK: + en: Melaka + ms: Melaka +NSN: + en: Negeri Sembilan + ms: Negeri Sembilan +PHG: + en: Pahang + ms: Pahang +PRK: + en: Perak + ms: Perak +PLS: + en: Perlis + ms: Perlis +PNG: + en: Penang + ms: Pulau Pinang +SBH: + en: Sabah + ms: Sabah +SWK: + en: Sarawak + ms: Sarawak +SGR: + en: Selangor + ms: Selangor +TRG: + en: Terengganu + ms: Terengganu +KUL: + en: Kuala Lumpur + ms: Kuala Lumpur +LBN: + en: Labuan + ms: Labuan +PJY: + en: Putrajaya + ms: Putrajaya +KL: + en: Kuala Lumpur + ms: Kuala Lumpur +LB: + en: Labuan + ms: Labuan +PY: + en: Putrajaya + ms: Putrajaya \ No newline at end of file From 1ff4cafed580f5ffb34cc5f27e17d6f6ac633ab5 Mon Sep 17 00:00:00 2001 From: Zijian Date: Thu, 9 Feb 2023 17:21:17 +0800 Subject: [PATCH 29/62] move states abbreviation to toponyms.txt --- resources/dictionaries/ms/chains.txt | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 resources/dictionaries/ms/chains.txt diff --git a/resources/dictionaries/ms/chains.txt b/resources/dictionaries/ms/chains.txt deleted file mode 100644 index c20ba458..00000000 --- a/resources/dictionaries/ms/chains.txt +++ /dev/null @@ -1,3 +0,0 @@ -kuala lumpur|federal territory kuala lumpur|federal territory of kuala lumpur|wilayah persekutuan kuala lumpur|kl -labuan|federal territory labuan|federal territory of labuan|wilayah persekutuan labuan -putrajaya|federal territory putrajaya|federal territory of putrajaya|wilayah persekutuan putrajaya \ No newline at end of file From 18b298668b54eaba0bce75e4be8501751e7efe4a Mon Sep 17 00:00:00 2001 From: karanj Date: Thu, 16 Feb 2023 21:45:06 +1100 Subject: [PATCH 30/62] [dictionaries] adding common Hindi tokens Reviewed the Hindi ngrams list, identified a number of common items which could be added to the model --- resources/dictionaries/hi/building_types.txt | 5 ++++ resources/dictionaries/hi/directionals.txt | 4 ++++ resources/dictionaries/hi/people.txt | 1 + resources/dictionaries/hi/personal_titles.txt | 24 +++++++++++++++++++ resources/dictionaries/hi/qualifiers.txt | 3 +++ resources/dictionaries/hi/street_types.txt | 3 ++- 6 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 resources/dictionaries/hi/building_types.txt create mode 100644 resources/dictionaries/hi/directionals.txt create mode 100644 resources/dictionaries/hi/people.txt create mode 100644 resources/dictionaries/hi/personal_titles.txt create mode 100644 resources/dictionaries/hi/qualifiers.txt diff --git a/resources/dictionaries/hi/building_types.txt b/resources/dictionaries/hi/building_types.txt new file mode 100644 index 00000000..180e3606 --- /dev/null +++ b/resources/dictionaries/hi/building_types.txt @@ -0,0 +1,5 @@ +mandir|मन्दिर|मंदिर +station +police station +post office +office \ No newline at end of file diff --git a/resources/dictionaries/hi/directionals.txt b/resources/dictionaries/hi/directionals.txt new file mode 100644 index 00000000..4dfff200 --- /dev/null +++ b/resources/dictionaries/hi/directionals.txt @@ -0,0 +1,4 @@ +dakshin|दक्षिण +uttar|उत्तर +poorva|poorav|पूर्व +paschim|पश्चिम \ No newline at end of file diff --git a/resources/dictionaries/hi/people.txt b/resources/dictionaries/hi/people.txt new file mode 100644 index 00000000..b79d681f --- /dev/null +++ b/resources/dictionaries/hi/people.txt @@ -0,0 +1 @@ +mahatma gandhi|mg|m g \ No newline at end of file diff --git a/resources/dictionaries/hi/personal_titles.txt b/resources/dictionaries/hi/personal_titles.txt new file mode 100644 index 00000000..6f60d0cd --- /dev/null +++ b/resources/dictionaries/hi/personal_titles.txt @@ -0,0 +1,24 @@ +baba +babu +bhagat +guru +jagirdar +maharaja|maharaj +mahatma|महात्मा +pandit +raja +rajarshi +rajkumar +rajkumari +rani +rishi +sahib +sant +sardar +senapati +shah +shrimati|smt|srimathi|श्रीमती +shri|shree|sri|श्री +sushri +swami +ustad \ No newline at end of file diff --git a/resources/dictionaries/hi/qualifiers.txt b/resources/dictionaries/hi/qualifiers.txt new file mode 100644 index 00000000..ba5b38f7 --- /dev/null +++ b/resources/dictionaries/hi/qualifiers.txt @@ -0,0 +1,3 @@ +nagar|नगर +colony +cantonment|cantt \ No newline at end of file diff --git a/resources/dictionaries/hi/street_types.txt b/resources/dictionaries/hi/street_types.txt index fde3a96b..b5cb5528 100644 --- a/resources/dictionaries/hi/street_types.txt +++ b/resources/dictionaries/hi/street_types.txt @@ -1,3 +1,4 @@ bazaar|bazar marg -nagar \ No newline at end of file +flyover +रोड \ No newline at end of file From e2590bca9764e34cb916470ad6e8e2c0759bd244 Mon Sep 17 00:00:00 2001 From: PIT-Development Date: Thu, 13 Apr 2023 08:38:52 +0200 Subject: [PATCH 31/62] docs: fix typos in contributing.md (#622) * Respect typo Repeect should be respect * Update CONTRIBUTING.md Also include guildelines to guidelines --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 26bd9bdb..7e5d2804 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ ## Submitting Issues -When submitting issues to libpostal, please repeect these guildelines: +When submitting issues to libpostal, please respect these guidelines: - Be constructive. Try to help solve the problem. - Always search for existing issues before submitting one. From 0ad268f991f6f3191e9fdeeb65d9731b7c542a80 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:57:53 +0200 Subject: [PATCH 32/62] Add support for OpenBLAS --- m4/ax_cblas.m4 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 0c87c29f..4acda990 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -157,6 +157,11 @@ if test $ax_cblas_ok = no; then AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lcblas"]) fi +# Generic BLAS library? (for instance OpenBLAS) +if test $ax_cblas_ok = no; then + AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) +fi + AC_SUBST(CBLAS_LIBS) LIBS="$ax_cblas_save_LIBS" From 8eb721f6a1bfc358e2ad7763921756af866ebb91 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 11:19:31 +0200 Subject: [PATCH 33/62] Fix typo --- m4/ax_cblas.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 4acda990..0e0d68d3 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -159,7 +159,7 @@ fi # Generic BLAS library? (for instance OpenBLAS) if test $ax_cblas_ok = no; then - AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) + AC_CHECK_LIB(blas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) fi AC_SUBST(CBLAS_LIBS) From 6f9567742711dbd929ab6df7eb2caf01ab6325e0 Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Tue, 18 Apr 2023 12:00:10 +0200 Subject: [PATCH 34/62] Explicit -lopenblas --- m4/ax_cblas.m4 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/m4/ax_cblas.m4 b/m4/ax_cblas.m4 index 0e0d68d3..da89cab9 100644 --- a/m4/ax_cblas.m4 +++ b/m4/ax_cblas.m4 @@ -152,12 +152,17 @@ if test $ax_cblas_ok = no; then [], [-lblas])]) fi +# BLAS in OpenBLAS library? +if test $ax_cblas_ok = no; then + AC_CHECK_LIB(openblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lopenblas"]) +fi + # Generic CBLAS library? if test $ax_cblas_ok = no; then AC_CHECK_LIB(cblas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lcblas"]) fi -# Generic BLAS library? (for instance OpenBLAS) +# Generic BLAS library? if test $ax_cblas_ok = no; then AC_CHECK_LIB(blas, cblas_dgemm, [ax_cblas_ok=yes; CBLAS_LIBS="-lblas"]) fi From ef215786f11c0a1ce950802a3a78279863b1df0b Mon Sep 17 00:00:00 2001 From: ddelange <14880945+ddelange@users.noreply.github.com> Date: Sat, 29 Apr 2023 22:21:18 +0200 Subject: [PATCH 35/62] Unify and clean the unofficial project references --- README.md | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d8e2cb9c..104d7123 100644 --- a/README.md +++ b/README.md @@ -402,21 +402,15 @@ Libpostal is designed to be used by higher-level languages. If you don't see yo - Elixir: [Expostal](https://github.com/SweetIQ/expostal) - Rust: [rustpostal](https://crates.io/crates/rustpostal) -**Database extensions** +**Unofficial database extensions** - PostgreSQL: [pgsql-postal](https://github.com/pramsey/pgsql-postal) -**Unofficial REST API** +**Unofficial servers** -- Libpostal REST: [libpostal REST](https://github.com/johnlonganecker/libpostal-rest) - -**Libpostal REST Docker** - -- Libpostal REST Docker [Libpostal REST Docker](https://github.com/johnlonganecker/libpostal-rest-docker) - -**Libpostal ZeroMQ Docker** - -- Libpostal ZeroMQ Docker image: [pasupulaphani/libpostal-zeromq](https://hub.docker.com/r/pasupulaphani/libpostal-zeromq/) , Source: [Github](https://github.com/pasupulaphani/libpostal-docker) +- Libpostal REST Go Docker: [libpostal-rest-docker](https://github.com/johnlonganecker/libpostal-rest-docker) +- Libpostal REST FastAPI Docker: [libpostal-fastapi](https://github.com/alpha-affinity/libpostal-fastapi) +- Libpostal ZeroMQ Docker: [libpostal-zeromq](https://github.com/pasupulaphani/libpostal-docker) Tests From 5d77298e88ebdd0ba5c20be2191cc67eff444c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 23 Jun 2023 12:16:35 +0300 Subject: [PATCH 36/62] avoid UB in bit shifts unsigned char* gets promoted to `int`, which cannot always be shifted by 24 bits. Justine Tunney blogs about it here: https://justine.lol/endian.html Example: ```deserialize.c #include #include #include uint32_t file_deserialize_uint32_ok(unsigned char *buf) { return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3]; } uint32_t file_deserialize_uint32(unsigned char *buf) { return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; } int main() { unsigned char arr[4] = {0xaa, 0xaa, 0xaa, 0xaa}; printf("%d\n", file_deserialize_uint32_ok((unsigned char*)arr)); printf("%d\n", file_deserialize_uint32((unsigned char*)arr)); } ``` Output: ``` $ clang-16 -fsanitize=undefined ./deserialize.c -o deserialize && ./deserialize -1431655766 deserialize.c:10:20: runtime error: left shift of 170 by 24 places cannot be represented in type 'int' SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior deserialize.c:10:20 in -1431655766 ``` --- src/file_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file_utils.c b/src/file_utils.c index f25e5ee6..5fc2dfbe 100644 --- a/src/file_utils.c +++ b/src/file_utils.c @@ -198,7 +198,7 @@ bool file_write_float(FILE *file, float value) { } inline uint32_t file_deserialize_uint32(unsigned char *buf) { - return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + return ((uint32_t)buf[0] << 24) | ((uint32_t)buf[1] << 16) | ((uint32_t)buf[2] << 8) | (uint32_t)buf[3]; } bool file_read_uint32(FILE *file, uint32_t *value) { @@ -243,7 +243,7 @@ bool file_write_uint32(FILE *file, uint32_t value) { inline uint16_t file_deserialize_uint16(unsigned char *buf) { - return (buf[0] << 8) | buf[1]; + return ((uint16_t)buf[0] << 8) | buf[1]; } From dcb63d8768a5e187025de338fa99cc10f44469ba Mon Sep 17 00:00:00 2001 From: Davide Madrisan Date: Thu, 29 Jun 2023 14:36:10 +0200 Subject: [PATCH 37/62] Fix dash syntax error in libpostal_data Fix the syntax error reported by dash: ./src/libpostal_data: 39: [: ==: unexpected operatora when the variable DATAMODEL is empty. Signed-off-by: Davide Madrisan --- src/libpostal_data.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libpostal_data.in b/src/libpostal_data.in index 8c18270f..a749a623 100755 --- a/src/libpostal_data.in +++ b/src/libpostal_data.in @@ -36,7 +36,7 @@ LIBPOSTAL_LANG_CLASS_FILE="language_classifier.tar.gz" LIBPOSTAL_BASE_URL="https://github.com/$LIBPOSTAL_REPO_NAME/releases/download" -if [ $DATAMODEL == "senzing" ]; then +if [ "$DATAMODEL" = "senzing" ]; then LIBPOSTAL_DATA_FILE_CHUNKS=1 LIBPOSTAL_PARSER_MODEL_CHUNKS=1 LIBPOSTAL_LANG_CLASS_MODEL_CHUNKS=1 From 2f20c9359e17a65ed847a395f16c5f5b21b15b0c Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:38:48 -0400 Subject: [PATCH 38/62] [github] adding Github action to run tests on mac and ubuntu initially --- .github/workflows/test.yml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..36799bc7 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,36 @@ +name: Test + +on: + push: + branches: [master] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build_and_test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: Install Dependencies Linux + if: matrix.os == 'ubuntu-latest' + run: | + apt-get update -y + apt-get install curl autoconf automake libtool pkg-config + - name: Install Dependencies MacOS + if: runner.os == 'macOS' + run: | + brew update + brew install curl autoconf automake libtool pkg-config + - name: Build + env: + LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data + run: | + ./bootstrap.sh + ./configure --datadir=$LIBPOSTAL_DATA_DIR + make + - name: Test + run: make check From 5669372a90113fa949c79ce3d45d083780396968 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:42:50 -0400 Subject: [PATCH 39/62] [fix] sudo in github actions for build tool installs --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36799bc7..ff2c99c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,13 +18,13 @@ jobs: - name: Install Dependencies Linux if: matrix.os == 'ubuntu-latest' run: | - apt-get update -y - apt-get install curl autoconf automake libtool pkg-config + sudo apt-get update -y + sudo apt-get install curl autoconf automake libtool pkg-config - name: Install Dependencies MacOS if: runner.os == 'macOS' run: | - brew update - brew install curl autoconf automake libtool pkg-config + sudo brew update + sudo brew install curl autoconf automake libtool pkg-config - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data From 2b93af09d959c132f2e97f92dcf0278a433073b2 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:43:18 -0400 Subject: [PATCH 40/62] [build] removing travis build --- .travis.yml | 83 ----------------------------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1f3dbacf..00000000 --- a/.travis.yml +++ /dev/null @@ -1,83 +0,0 @@ -language: c -branches: - only: - - master -env: - global: - - secure: "bHrAu46oecEj3gjamT+XWXtf2J0ZJCFa8tUdgM4evscaJiiwv1TtsGXyhIj/ai7DlRIPVJUtBUy6uoGGjr6GT43zTrzSxYAOMdVXZYsnTDcdL1/0dbwcIK6/u0EI377s1buGIxG1fHveWKXuXwJWDAw4KS+5HU88a42+zMbhKe4=" - - secure: "SkvNYucKVns9qDjOEW2WIhDlOMKBOwhzVcwY++HWTRtn04ErrqR4k01Mmho0jGBQD9JrPLhDgnX1BNy5s+Kmq/bxn9OZm7K1z24qBKb0mBBiNEnf2jvT0AvF5xxM+cJf4KKNL+CC0MwNf5y7HVPq1xibOV4/CNIrc1ZZc9aqdkE=" - - secure: "am/rRca5akv7gSSMeNQfHnWiTHhk8fQhOZvZ0Ut+PezkQlLgKp7bzmMFkkuQ4L5hpJU40kFzuWmIPgO33dacgq69Vx/Xct1bEnxGBGjriI5qOhMizmzLYPs5uWiRjtJnBqb4JOUh5K7JBlwrgvD72fY5ZK2lwtzTksfWo8N+ahU=" - - secure: "mh/WDQapGJb6MAFvgCjiMAAv1aa8gUaIs2Ohtx7yPrDBwsD8UqlyEM7ktGLZGQ1q/7OJ/Z6QfDMfJQwDKzxyUSY1yHZTNkP3QzkTt2D1Qyvi++O6EkGqSdSS6Lb3aID3IsEaye/yasJ+rxiRSp05O9+OYvhJlqRZnzaimiAv5KI=" - - secure: "OGNJ6Cj3trq4nASgm4BK331aij+FZ11St7/YF9rfxeQBwg4MCPH2+D0jvAULBHvJR7K2RmepX/FG5d4S+rtwKNGngg3ovPdd1MbwFltHpn5/KM+hxe7kCZx2+V9/FN+4YSyO0zSUDra6AXHOs72mfyrZoB3a36SS4lg2sAp33gU=" - - GH_REF=github.com/openvenues/libpostal - - DICTIONARIES_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/dictionaries/.*/.*.txt\|src/gazetteer_data.c" | wc -l) - - NUMEX_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "resources/numex\|src/numex_table_builder.c" | wc -l) - - TRANSLIT_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE | grep "src/transliteration_data.c" | wc -l) - - TAG_VERSION=$(cat ./versions/base).$TRAVIS_BUILD_NUMBER - - SRC_TARBALL_FILENAME=libpostal-$(cat ./versions/base).tar.gz - - LIBPOSTAL_DATA_DIR=$(pwd)/data - - LIBPOSTAL_DATA_FILENAME=libpostal_data.tar.gz -compiler: - - clang - - gcc -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-4.8 - - pkg-config -before_script: - - ./bootstrap.sh - - if [[ $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 ]]; then git clone https://github.com/pypa/virtualenv; cd virtualenv; git checkout master; python virtualenv.py ../env; cd ..; env/bin/pip install -r scripts/requirements-simple.txt; fi; - - if [ $NUMEX_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/numbers/numex.py; fi; - - if [ $DICTIONARIES_CHANGED -ne 0 ]; then env/bin/python scripts/geodata/address_expansions/address_dictionaries.py; fi; -install: - - if [ "$CC" = "gcc" ]; then export CC="gcc-4.8"; fi -script: - - ./configure --datadir=$LIBPOSTAL_DATA_DIR - - make -j4 - - if [[ $DICTIONARIES_CHANGED -ne 0 ]]; then ./src/build_address_dictionary; fi; - - if [[ $NUMEX_CHANGED -ne 0 ]]; then ./src/build_numex_table; fi; - - if [[ $TRANSLIT_CHANGED -ne 0 ]]; then ./src/build_trans_table; fi; - - make check - -after_success: - - | - if [[ "$CC" == "gcc" && "$TRAVIS_PULL_REQUEST" = "false" && "$TRAVIS_BRANCH" = "master" ]]; then - if [[ ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 ) ]]; then - export PATH=$PATH:env/bin/; - git clone -b master "https://${GH_TOKEN}@${GH_REF}" _travis > /dev/null 2>&1 || exit 1 - cp src/*_data.c _travis/src - echo "$TAG_VERSION" > _travis/versions/base_data - cd _travis - git config user.name "$GIT_COMMITTER_NAME"; - git config user.email "$GIT_COMMITTER_EMAIL"; - git commit -a -m "[auto][ci skip] Adding data files from Travis build #$TRAVIS_BUILD_NUMBER"; - git push --quiet origin master; - - tar -C $LIBPOSTAL_DATA_DIR -cvzf $LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME $BASIC_MODULE_DIRS - fi - git tag $TAG_VERSION -a -m "[auto][ci skip] Generating tag for Travis build #$TRAVIS_BUILD_NUMBER"; - git push --tags --quiet origin master; - fi; - -before_deploy: - - make dist - -deploy: - - provider: releases - file: - - "$SRC_TARBALL_FILENAME" - on: - tags: true - branch: master - skip_cleanup: true - - provider: releases - file: - - "$LIBPOSTAL_DATA_DIR/$LIBPOSTAL_DATA_FILENAME" - on: - tags: true - branch: master - condition: "$CC = gcc && ( $DICTIONARIES_CHANGED -ne 0 || $NUMEX_CHANGED -ne 0 || $TRANSLIT_CHANGED -ne 0 )" - skip_cleanup: true From b65e7d5bce82a30dec221ce144ee1a19c5fc617c Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 20:47:14 -0400 Subject: [PATCH 41/62] [fix] no sudo on brew on Mac in github actions, just like on a regular machine/in the docs --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ff2c99c9..ea9dca07 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,8 +23,8 @@ jobs: - name: Install Dependencies MacOS if: runner.os == 'macOS' run: | - sudo brew update - sudo brew install curl autoconf automake libtool pkg-config + brew update + brew install curl autoconf automake libtool pkg-config - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data From 7a448b718d58cece0384d540067d48bbe4a73774 Mon Sep 17 00:00:00 2001 From: Al Date: Wed, 5 Jul 2023 21:02:41 -0400 Subject: [PATCH 42/62] [crf] using 32 bytes for posix_memalign to align blocks of 4 doubles for remez algorithm to fix test which uses an odd-sized context --- src/crf_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/crf_context.c b/src/crf_context.c index 0f399a1a..e8635e28 100644 --- a/src/crf_context.c +++ b/src/crf_context.c @@ -41,7 +41,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->flag & CRF_CONTEXT_MARGINALS) { #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_state = double_matrix_new_aligned(T, L, 16); + context->exp_state = double_matrix_new_aligned(T, L, 32); if (context->exp_state == NULL) goto exit_context_created; double_matrix_zero(context->exp_state); #else @@ -53,7 +53,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->mexp_state == NULL) goto exit_context_created; #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_state_trans = double_matrix_new_aligned(T, L * L, 16); + context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32); if (context->exp_state_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_state_trans); #else @@ -65,7 +65,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { if (context->mexp_state_trans == NULL) goto exit_context_created; #if defined(INTEL_SSE) || defined(ARM_NEON) - context->exp_trans = double_matrix_new_aligned(L, L, 16); + context->exp_trans = double_matrix_new_aligned(L, L, 32); if (context->exp_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_trans); #else @@ -131,13 +131,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) { if (self->flag & CRF_CONTEXT_MARGINALS && ( #if defined(INTEL_SSE) || defined(ARM_NEON) - !double_matrix_resize_aligned(self->exp_state, T, L, 16) || + !double_matrix_resize_aligned(self->exp_state, T, L, 32) || #else !double_matrix_resize(self->exp_state, T, L) || #endif !double_matrix_resize(self->mexp_state, T, L) || #if defined(INTEL_SSE) || defined(ARM_NEON) - !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 16) || + !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) || #else !double_matrix_resize(self->exp_state_trans, T, L * L) || #endif From 59325c3b13cb941aa0938af22a02294093de728d Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:16:22 -0400 Subject: [PATCH 43/62] [test] testing with sse2 disabled to see if the build is working generally --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea9dca07..b9b0b0c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh - ./configure --datadir=$LIBPOSTAL_DATA_DIR + ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - name: Test run: make check From d979fbb779bfa3236999107d91afb2aa8c5a10c5 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:28:49 -0400 Subject: [PATCH 44/62] [test] trying make check in the same step, to see if that makes a difference --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9b0b0c1..141f37da 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,12 +25,11 @@ jobs: run: | brew update brew install curl autoconf automake libtool pkg-config - - name: Build + - name: Build and Test env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - - name: Test - run: make check + make check From c76d020c18ce03b0e35f0dc281038a1e8abdcd86 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:36:23 -0400 Subject: [PATCH 45/62] [fix] same result running test as a separate step --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 141f37da..b9b0b0c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,11 +25,12 @@ jobs: run: | brew update brew install curl autoconf automake libtool pkg-config - - name: Build and Test + - name: Build env: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 make - make check + - name: Test + run: make check From 57eaa414ceadb48d5922099eeaa446b02894a2e4 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 01:49:02 -0400 Subject: [PATCH 46/62] [revert] reverting the commits from #578, leaving header file in repo for the moment --- README.md | 2 ++ configure.ac | 53 +++++++------------------------------------- src/crf_context.c | 16 ++++++------- src/vector_math.h | 6 ++--- test/Makefile.am | 2 +- windows/configure.ac | 53 +++++++------------------------------------- 6 files changed, 29 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index d8e2cb9c..3d641717 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,8 @@ brew install curl autoconf automake libtool pkg-config Then to install the C library: +If you're using an M1 Mac, add `--disable-sse2` to the `./configure` command. This will result in poorer performance but the build will succeed. + ``` git clone https://github.com/openvenues/libpostal cd libpostal diff --git a/configure.ac b/configure.ac index ed997e32..b7339215 100644 --- a/configure.ac +++ b/configure.ac @@ -84,57 +84,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download], diff --git a/src/crf_context.c b/src/crf_context.c index e8635e28..8e1a759e 100644 --- a/src/crf_context.c +++ b/src/crf_context.c @@ -40,7 +40,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { } if (context->flag & CRF_CONTEXT_MARGINALS) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state = double_matrix_new_aligned(T, L, 32); if (context->exp_state == NULL) goto exit_context_created; double_matrix_zero(context->exp_state); @@ -52,7 +52,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state = double_matrix_new_zeros(T, L); if (context->mexp_state == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_state_trans = double_matrix_new_aligned(T, L * L, 32); if (context->exp_state_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_state_trans); @@ -64,7 +64,7 @@ crf_context_t *crf_context_new(int flag, size_t L, size_t T) { context->mexp_state_trans = double_matrix_new_zeros(T, L * L); if (context->mexp_state_trans == NULL) goto exit_context_created; -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) context->exp_trans = double_matrix_new_aligned(L, L, 32); if (context->exp_trans == NULL) goto exit_context_created; double_matrix_zero(context->exp_trans); @@ -130,13 +130,13 @@ bool crf_context_set_num_items(crf_context_t *self, size_t T) { if (self->flag & CRF_CONTEXT_MARGINALS && ( -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state, T, L, 32) || #else !double_matrix_resize(self->exp_state, T, L) || #endif !double_matrix_resize(self->mexp_state, T, L) || -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) !double_matrix_resize_aligned(self->exp_state_trans, T, L * L, 32) || #else !double_matrix_resize(self->exp_state_trans, T, L * L) || @@ -184,7 +184,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state); #else double_matrix_destroy(self->exp_state); @@ -200,7 +200,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_state_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_state_trans); #else double_matrix_destroy(self->exp_state_trans); @@ -216,7 +216,7 @@ void crf_context_destroy(crf_context_t *self) { } if (self->exp_trans != NULL) { -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) double_matrix_destroy_aligned(self->exp_trans); #else double_matrix_destroy(self->exp_trans); diff --git a/src/vector_math.h b/src/vector_math.h index 7dbdb049..eff90466 100644 --- a/src/vector_math.h +++ b/src/vector_math.h @@ -8,10 +8,8 @@ #define ks_lt_index(a, b) ((a).value < (b).value) -#if defined(INTEL_SSE) +#if defined(USE_SSE) #include -#elif defined(ARM_NEON) -#include "sse2neon.h" #endif /* @@ -340,7 +338,7 @@ -#if defined(INTEL_SSE) || defined(ARM_NEON) +#if defined(USE_SSE) /* From https://github.com/herumi/fmath/blob/master/fastexp.cpp diff --git a/test/Makefile.am b/test/Makefile.am index 5289e3c2..f2e911f2 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -5,7 +5,7 @@ CFLAGS_O2 = $(CFLAGS_BASE) -O2 CFLAGS_O3 = $(CFLAGS_BASE) -O3 DEFAULT_INCLUDES = -I.. -I/usr/local/include -CFLAGS = $(SIMDFLAGS) $(CFLAGS_BASE) +CFLAGS = $(CFLAGS_BASE) TESTS = test_libpostal noinst_PROGRAMS = test_libpostal diff --git a/windows/configure.ac b/windows/configure.ac index d19cd967..24e73fec 100644 --- a/windows/configure.ac +++ b/windows/configure.ac @@ -73,57 +73,20 @@ AS_IF([test "x$FOUND_SHUF" = xyes], [AC_DEFINE([HAVE_SHUF], [1], [shuf availabl AS_IF([test "x$FOUND_GSHUF" = xyes], [AC_DEFINE([HAVE_GSHUF], [1], [gshuf available])]) # ------------------------------------------------------------------ -# Architecture-specific options +# Checks for SSE2 build # ------------------------------------------------------------------ -# allow enabling hardware optimization on any system: -case "$host_cpu" in - arm*|aarch64*) - enable_arm_neon=yes - enable_intel_sse=no - AC_DEFINE([ARM_NEON], [1], - [Enable ARM_NEON optimizations]) - ;; - i?86|x86_64) - enable_intel_sse=yes - enable_arm_neon=no - AC_DEFINE([INTEL_SSE], [1], - [Enable Intel SSE optimizations]) - ;; -esac - -AC_ARG_ENABLE([neon], - AS_HELP_STRING([[[--disable-neon]]], - [Disable ARM NEON hardware optimizations]), - [ - enable_arm_neon=no - AC_DEFINE([ARM_NEON], [0], - [Disable ARM_NEON optimizations]) - ]) - AC_ARG_ENABLE([sse2], - AS_HELP_STRING([[[--disable-sse2]]], - [Disable Intel SSE2 hardware optimizations]), - [ - enable_intel_sse=no - AC_DEFINE([INTEL_SSE], [0], - [Disable INTEL_SSE optimizations]) - ]) + AS_HELP_STRING( + [--disable-sse2], + [disable SSE2 optimization routines] + ) + ) -SIMDFLAGS="" - -AS_IF([test "x$enable_intel_sse" != "xno"], [ - SIMDFLAGS="-mfpmath=sse -msse2 -DINTEL_SSE" +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) -AS_IF([test "x$enable_arm_neon" != "xno"], [ - SIMDFLAGS="-march=armv8-a+fp+simd+crypto+crc -DARM_NEON" -]) - -CFLAGS="${SIMDFLAGS} ${CFLAGS}" - -AC_SUBST([SIMDFLAGS], [$SIMDFLAGS]) - AC_CHECK_HEADER(cblas.h, [AX_CBLAS]) AC_ARG_ENABLE([data-download], From 5a1f6df5a90aaad0da81fec20a1f0c11d869a438 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 02:05:17 -0400 Subject: [PATCH 47/62] [sse] ok something about that PR breaks either way. Now trying it with SSE --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9b0b0c1..ea9dca07 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: LIBPOSTAL_DATA_DIR: ${GITHUB_WORKSPACE}/data run: | ./bootstrap.sh - ./configure --datadir=$LIBPOSTAL_DATA_DIR --disable-sse2 + ./configure --datadir=$LIBPOSTAL_DATA_DIR make - name: Test run: make check From 7bdcf96c9d9c61811ffd4570ba9fbbac5ffd237f Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 6 Jul 2023 16:00:55 -0400 Subject: [PATCH 48/62] [memalign] no more realloc on aligned pointers, just do an aligned malloc and copy to it. Slower but safe and this is not called that often in practice, usually to resize larger matrices. --- src/vector.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/vector.h b/src/vector.h index 78a0fad4..462a8baf 100644 --- a/src/vector.h +++ b/src/vector.h @@ -21,27 +21,19 @@ static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) return NULL; } - if (size == 0) { + if (p == NULL) { return NULL; } - void *rp = realloc(p, size); - - /* If realloc result is not already at an aligned boundary, - _aligned_malloc a new block and copy the contents of the realloc'd - pointer to the aligned block, free the realloc'd pointer and return - the aligned pointer. - */ - if ( ((size_t)rp & (alignment - 1)) != 0) { - void *p1 = _aligned_malloc(size, alignment); - if (p1 != NULL) { - memcpy(p1, rp, size); - } - free(rp); - rp = p1; + void *p1 = _aligned_malloc(size, alignment); + if (p1 == NULL) { + free(p); + return NULL; } - return rp; + memcpy(p1, p, size); + free(p); + return p1; } static inline void _aligned_free(void *p) { From e4982b733fc864ac940918c437c7fa83223edc77 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 18 Aug 2023 13:39:36 -0400 Subject: [PATCH 49/62] [fix] memcpy in aligned vector/matrix resize needs to know the original size. Since this is an internal static function and does not affect client APIs, changing it to aligned_malloc, aligned_free, and aligned_resize, which takes the original pointer size as well as the new. On Windows it simply passes the pointer, new size, and alignment through _aligned_realloc, whereas on other platforms only the aligned_malloc is for new_size bytes and uses old_size bytes for memcpy --- src/matrix.h | 8 ++++---- src/vector.h | 27 ++++++++++++++++++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/matrix.h b/src/matrix.h index f6a31f1a..05b43db2 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -33,7 +33,7 @@ typedef enum { } name##_t; \ \ static name##_t *name##_new(size_t m, size_t n) { \ - name##_t *matrix = malloc(sizeof(name##_t)); \ + name##_t *matrix = malloc(sizeof(name##_t)); \ \ if (matrix == NULL) { \ return NULL; \ @@ -62,7 +62,7 @@ typedef enum { matrix->m = m; \ matrix->n = n; \ \ - matrix->values = _aligned_malloc(sizeof(type) * m * n, alignment); \ + matrix->values = aligned_malloc(sizeof(type) * m * n, alignment); \ if (matrix->values == NULL) { \ free(matrix); \ return NULL; \ @@ -86,7 +86,7 @@ typedef enum { if (self == NULL) return; \ \ if (self->values != NULL) { \ - _aligned_free(self->values); \ + aligned_free(self->values); \ } \ \ free(self); \ @@ -118,7 +118,7 @@ typedef enum { if (self == NULL) return false; \ \ if (m * n > (self->m * self->n)) { \ - type *ptr = _aligned_realloc(self->values, sizeof(type) * m * n, alignment); \ + type *ptr = aligned_resize(self->values, sizeof(type) * self->m * self->n, sizeof(type) * m * n, alignment); \ if (ptr == NULL) { \ return false; \ } \ diff --git a/src/vector.h b/src/vector.h index 462a8baf..562ed6cb 100644 --- a/src/vector.h +++ b/src/vector.h @@ -7,15 +7,24 @@ #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #include +static inline void *aligned_alloc(size_t size, size_t alignment) { + return _aligned_malloc(size, alignment); +} +static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) { + return _aligned_realloc(p, new_size, alignment); +} +static inline void aligned_free(void *p) { + _aligned_free(p); +} #else #include -static inline void *_aligned_malloc(size_t size, size_t alignment) +static inline void *aligned_malloc(size_t size, size_t alignment) { void *p; int ret = posix_memalign(&p, alignment, size); return (ret == 0) ? p : NULL; } -static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) +static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) { if ((alignment == 0) || ((alignment & (alignment - 1)) != 0) || (alignment < sizeof(void *))) { return NULL; @@ -25,17 +34,17 @@ static inline void *_aligned_realloc(void *p, size_t size, size_t alignment) return NULL; } - void *p1 = _aligned_malloc(size, alignment); + void *p1 = aligned_malloc(new_size, alignment); if (p1 == NULL) { free(p); return NULL; } - memcpy(p1, p, size); + memcpy(p1, p, old_size); free(p); return p1; } -static inline void _aligned_free(void *p) +static inline void aligned_free(void *p) { free(p); } @@ -71,7 +80,7 @@ static inline void _aligned_free(void *p) name *array = malloc(sizeof(name)); \ if (array == NULL) return NULL; \ array->n = array->m = 0; \ - array->a = _aligned_malloc(size * sizeof(type), alignment); \ + array->a = aligned_malloc(size * sizeof(type), alignment); \ if (array->a == NULL) return NULL; \ array->m = size; \ return array; \ @@ -86,7 +95,7 @@ static inline void _aligned_free(void *p) } \ static inline bool name##_resize_aligned(name *array, size_t size, size_t alignment) { \ if (size <= array->m) return true; \ - type *ptr = _aligned_realloc(array->a, sizeof(type) * size, alignment); \ + type *ptr = aligned_resize(array->a, sizeof(type) * array->m, sizeof(type) * size, alignment); \ if (ptr == NULL) return false; \ array->a = ptr; \ array->m = size; \ @@ -152,7 +161,7 @@ static inline void _aligned_free(void *p) } \ static inline void name##_destroy_aligned(name *array) { \ if (array == NULL) return; \ - if (array->a != NULL) _aligned_free(array->a); \ + if (array->a != NULL) aligned_free(array->a); \ free(array); \ } @@ -174,7 +183,7 @@ static inline void _aligned_free(void *p) free_func(array->a[i]); \ } \ } \ - _aligned_free(array->a); \ + aligned_free(array->a); \ free(array); \ } From 330bd2e158846a52d04ed040503cba4e1087d82e Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 18 Aug 2023 22:37:50 -0400 Subject: [PATCH 50/62] [fix] Windows name --- src/vector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vector.h b/src/vector.h index 562ed6cb..52b8b8d0 100644 --- a/src/vector.h +++ b/src/vector.h @@ -7,7 +7,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #include -static inline void *aligned_alloc(size_t size, size_t alignment) { +static inline void *aligned_malloc(size_t size, size_t alignment) { return _aligned_malloc(size, alignment); } static inline void *aligned_resize(void *p, size_t old_size, size_t new_size, size_t alignment) { From e36f8599ba652eddd1d178ca230a20569d9d0ebf Mon Sep 17 00:00:00 2001 From: Siarhei Fedartsou Date: Tue, 3 Dec 2024 18:33:47 +0100 Subject: [PATCH 51/62] Do not use SSE flags on arm64 platform --- .github/workflows/test.yml | 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea9dca07..3e4d16f0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,7 @@ jobs: build_and_test: strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, macos-15] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/configure.ac b/configure.ac index b7339215..dc77930e 100644 --- a/configure.ac +++ b/configure.ac @@ -94,7 +94,7 @@ AC_ARG_ENABLE([sse2], ) ) -AS_IF([test "x$enable_sse2" != "xno"], [ +AS_IF([test "x$enable_sse2" != "xno" && test "x$(uname -m)" != "xarm64"], [ CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" ]) From ff623ae2dc337761f4bee17b4e77e55f2e8d8813 Mon Sep 17 00:00:00 2001 From: Siarhei Fedartsou Date: Tue, 3 Dec 2024 18:35:47 +0100 Subject: [PATCH 52/62] Do not use SSE flags on arm64 platform --- .github/workflows/test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3e4d16f0..044508db 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,7 +11,10 @@ jobs: build_and_test: strategy: matrix: - os: [ubuntu-latest, macos-latest, macos-15] + os: + - ubuntu-latest + - macos-15-large # x86_64 + - macos-15 # arm64 runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 From ff72880e621405ce877091552a09f9054549d55f Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 23 Dec 2024 17:35:51 -0500 Subject: [PATCH 53/62] Update README.md to include build-essential in apt list and 3 options for configuration --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3d641717..de6575f7 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Before you install, make sure you have the following prerequisites: **On Ubuntu/Debian** ``` -sudo apt-get install curl autoconf automake libtool pkg-config +sudo apt-get install -y curl build-essential autoconf automake libtool pkg-config ``` **On CentOS/RHEL** @@ -118,9 +118,19 @@ If you're using an M1 Mac, add `--disable-sse2` to the `./configure` command. Th ``` git clone https://github.com/openvenues/libpostal cd libpostal + ./bootstrap.sh + +# For Intel/AMD processors and the default model ./configure --datadir=[...some dir with a few GB of space...] -make -j4 + +# For Apple / ARM cpus and the default model +./configure --datadir=/tmp --disable-sse2 + +# For the improved Senzing model: +./configure --datadir=[...some dir with a few GB of space...] MODEL=senzing + +make -j8 sudo make install # On Linux it's probably a good idea to run From 3caca83f048e5a87321f8336da7a6202ef298af5 Mon Sep 17 00:00:00 2001 From: Russell Jurney Date: Mon, 23 Dec 2024 17:39:43 -0500 Subject: [PATCH 54/62] Update README.md to fix /tmp reference to [...some dir with a few GB of space...] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de6575f7..ab0c207b 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ cd libpostal ./configure --datadir=[...some dir with a few GB of space...] # For Apple / ARM cpus and the default model -./configure --datadir=/tmp --disable-sse2 +./configure --datadir=[...some dir with a few GB of space...] --disable-sse2 # For the improved Senzing model: ./configure --datadir=[...some dir with a few GB of space...] MODEL=senzing From 4f51945402184431ff9052de2b2c97250dcacb1d Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 23 Dec 2024 19:06:08 -0500 Subject: [PATCH 55/62] [github] no need to use macos-15-latest which requires upgrading, just doing macos-latest which is arm64 --- .github/workflows/test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 044508db..ea9dca07 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,10 +11,7 @@ jobs: build_and_test: strategy: matrix: - os: - - ubuntu-latest - - macos-15-large # x86_64 - - macos-15 # arm64 + os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 From 0540d7c7e3f5366686ff110ae1073ff129d7240a Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 30 Jan 2025 01:45:18 -0500 Subject: [PATCH 56/62] [api/compat] PR #465 redefined the language classifier response struct in the API and was casting between incompatible pointer types. Using the exported struct throughout. --- src/expand.c | 4 ++-- src/language_classifier.c | 8 ++++---- src/language_classifier.h | 15 +++++---------- src/language_classifier_cli.c | 2 +- src/language_classifier_test.c | 2 +- src/libpostal.c | 2 +- src/near_dupe.c | 2 +- src/place.c | 4 ++-- src/place.h | 4 ++-- 9 files changed, 19 insertions(+), 24 deletions(-) diff --git a/src/expand.c b/src/expand.c index 898c17d1..2e1dc977 100644 --- a/src/expand.c +++ b/src/expand.c @@ -1567,7 +1567,7 @@ cstring_array *expand_address_phrase_option(char *input, libpostal_normalize_opt size_t len = strlen(input); - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; if (options.num_languages == 0) { lang_response = classify_languages(input); @@ -1627,7 +1627,7 @@ cstring_array *expand_address_phrase_option(char *input, libpostal_normalize_opt kh_destroy(str_set, unique_strings); if (lang_response != NULL) { - language_classifier_response_destroy(lang_response); + libpostal_language_classifier_response_destroy(lang_response); } char_array_destroy(temp_string); diff --git a/src/language_classifier.c b/src/language_classifier.c index 25273d92..d107d558 100644 --- a/src/language_classifier.c +++ b/src/language_classifier.c @@ -46,7 +46,7 @@ language_classifier_t *get_language_classifier(void) { return language_classifier; } -void language_classifier_response_destroy(language_classifier_response_t *self) { +void language_classifier_response_destroy(libpostal_language_classifier_response_t *self) { if (self == NULL) return; if (self->languages != NULL) { free(self->languages); @@ -59,7 +59,7 @@ void language_classifier_response_destroy(language_classifier_response_t *self) free(self); } -language_classifier_response_t *classify_languages(char *address) { +libpostal_language_classifier_response_t *classify_languages(char *address) { language_classifier_t *classifier = get_language_classifier(); if (classifier == NULL) { @@ -88,7 +88,7 @@ language_classifier_response_t *classify_languages(char *address) { size_t n = classifier->num_labels; double_matrix_t *p_y = double_matrix_new_zeros(1, n); - language_classifier_response_t *response = NULL; + libpostal_language_classifier_response_t *response = NULL; bool model_exp = false; if (classifier->weights_type == MATRIX_DENSE) { model_exp = logistic_regression_model_expectation(classifier->weights.dense, x, p_y); @@ -129,7 +129,7 @@ language_classifier_response_t *classify_languages(char *address) { free(indices); - response = malloc(sizeof(language_classifier_response_t)); + response = malloc(sizeof(libpostal_language_classifier_response_t)); response->num_languages = num_languages; response->languages = languages; response->probs = probs; diff --git a/src/language_classifier.h b/src/language_classifier.h index c5402b39..2a638e9c 100644 --- a/src/language_classifier.h +++ b/src/language_classifier.h @@ -6,6 +6,8 @@ #include #include +#include "libpostal.h" + #include "collections.h" #include "language_features.h" #include "logistic_regression.h" @@ -29,21 +31,14 @@ typedef struct language_classifier { } weights; } language_classifier_t; - -typedef struct language_classifier_response { - size_t num_languages; - char **languages; - double *probs; -} language_classifier_response_t; - // General usage language_classifier_t *language_classifier_new(void); language_classifier_t *get_language_classifier(void); language_classifier_t *get_language_classifier_country(void); -language_classifier_response_t *classify_languages(char *address); -void language_classifier_response_destroy(language_classifier_response_t *self); +libpostal_language_classifier_response_t *classify_languages(char *address); +void language_classifier_response_destroy(libpostal_language_classifier_response_t *self); void language_classifier_destroy(language_classifier_t *self); @@ -58,4 +53,4 @@ bool language_classifier_module_setup(char *dir); void language_classifier_module_teardown(void); -#endif \ No newline at end of file +#endif diff --git a/src/language_classifier_cli.c b/src/language_classifier_cli.c index e67be84a..d612132b 100644 --- a/src/language_classifier_cli.c +++ b/src/language_classifier_cli.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) { } - language_classifier_response_t *response = classify_languages(address); + libpostal_language_classifier_response_t *response = classify_languages(address); if (response == NULL) { printf("Could not classify language\n"); exit(EXIT_FAILURE); diff --git a/src/language_classifier_test.c b/src/language_classifier_test.c index b795be99..262020ee 100644 --- a/src/language_classifier_test.c +++ b/src/language_classifier_test.c @@ -34,7 +34,7 @@ double test_accuracy(char *filename) { continue; } - language_classifier_response_t *response = classify_languages(address); + libpostal_language_classifier_response_t *response = classify_languages(address); if (response == NULL || response->num_languages == 0) { printf("%s\tNULL\t%s\n", language, address); continue; diff --git a/src/libpostal.c b/src/libpostal.c index 066a3015..a2fa9e0a 100644 --- a/src/libpostal.c +++ b/src/libpostal.c @@ -119,7 +119,7 @@ char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels char **libpostal_place_languages(size_t num_components, char **labels, char **values, size_t *num_languages) { - language_classifier_response_t *lang_response = place_languages(num_components, labels, values); + libpostal_language_classifier_response_t *lang_response = place_languages(num_components, labels, values); if (lang_response == NULL) { *num_languages = 0; return NULL; diff --git a/src/near_dupe.c b/src/near_dupe.c index 06a89ac4..f28b2140 100644 --- a/src/near_dupe.c +++ b/src/near_dupe.c @@ -670,7 +670,7 @@ cstring_array *near_dupe_hashes_languages(size_t num_components, char **labels, libpostal_normalize_options_t normalize_options = libpostal_get_default_options(); - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; if (num_languages == 0) { lang_response = place_languages(num_components, labels, values); diff --git a/src/place.c b/src/place.c index 549f1f48..f5f05037 100644 --- a/src/place.c +++ b/src/place.c @@ -17,10 +17,10 @@ static inline bool is_address_text_component(char *label) { ); } -language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values) { +libpostal_language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values) { if (num_components == 0 || values == NULL || labels == NULL) return NULL; - language_classifier_response_t *lang_response = NULL; + libpostal_language_classifier_response_t *lang_response = NULL; char *label; char *value; diff --git a/src/place.h b/src/place.h index 88920582..22d28b62 100644 --- a/src/place.h +++ b/src/place.h @@ -32,7 +32,7 @@ typedef struct place { char *website; } place_t; -language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values); +libpostal_language_classifier_response_t *place_languages(size_t num_components, char **labels, char **values); place_t *place_new(void); @@ -40,4 +40,4 @@ place_t *place_from_components(size_t num_components, char **labels, char **valu void place_destroy(place_t *place); -#endif \ No newline at end of file +#endif From d491739b21089dc24f01dbaef6bba73e86130039 Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 30 Jan 2025 13:13:29 -0500 Subject: [PATCH 57/62] [fix] sparse_matrix_utils also had an incompatible pointer --- src/sparse_matrix_utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sparse_matrix_utils.c b/src/sparse_matrix_utils.c index 53fcaf97..70be9d14 100644 --- a/src/sparse_matrix_utils.c +++ b/src/sparse_matrix_utils.c @@ -94,15 +94,15 @@ inline bool sparse_matrix_add_unique_columns_alias(sparse_matrix_t *matrix, khas } uint32_array *sparse_matrix_unique_columns(sparse_matrix_t *matrix) { - khash_t(int_set) *unique_columns = kh_init(int_set); + khash_t(int_uint32) *unique_columns = kh_init(int_uint32); uint32_array *ret = uint32_array_new(); if (sparse_matrix_add_unique_columns(matrix, unique_columns, ret)) { - kh_destroy(int_set, unique_columns); + kh_destroy(int_uint32, unique_columns); return ret; } - kh_destroy(int_set, unique_columns); + kh_destroy(int_uint32, unique_columns); uint32_array_destroy(ret); return NULL; } From 5d0b6dece5c5b306c4f5fc3ade114ecacbe18fbf Mon Sep 17 00:00:00 2001 From: Al Date: Thu, 30 Jan 2025 16:10:01 -0500 Subject: [PATCH 58/62] [readme] Github Actions status instead of Travis build --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ab0c207b..26d98ed2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # libpostal: international street address NLP -[![Build Status](https://travis-ci.org/openvenues/libpostal.svg?branch=master)](https://travis-ci.org/openvenues/libpostal) +[![Build Status](https://github.com/openvenues/libpostal/actions/workflows/test.yml/badge.svg)](https://github.com/openvenues/libpostal/actions) [![Build Status](https://ci.appveyor.com/api/projects/status/github/openvenues/libpostal?branch=master&svg=true)](https://ci.appveyor.com/project/albarrentine/libpostal/branch/master) [![License](https://img.shields.io/github/license/openvenues/libpostal.svg)](https://github.com/openvenues/libpostal/blob/master/LICENSE) [![OpenCollective Sponsors](https://opencollective.com/libpostal/sponsors/badge.svg)](#sponsors) From 052d99de74371236216ac50227ec7ec90a684e43 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 31 Jan 2025 16:14:33 -0500 Subject: [PATCH 59/62] [fix] ifdef includes around stndup --- src/expand.c | 8 ++++++++ src/numex.c | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/src/expand.c b/src/expand.c index 2e1dc977..90ade598 100644 --- a/src/expand.c +++ b/src/expand.c @@ -15,6 +15,14 @@ #include "token_types.h" #include "transliterate.h" +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifndef HAVE_STRNDUP +#include "strndup.h" +#endif + #define DEFAULT_KEY_LEN 32 diff --git a/src/numex.c b/src/numex.c index 6edaca1a..c20efd34 100644 --- a/src/numex.c +++ b/src/numex.c @@ -5,6 +5,15 @@ #include "log/log.h" +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifndef HAVE_STRNDUP +#include "strndup.h" +#endif + + #define NUMEX_TABLE_SIGNATURE 0xBBBBBBBB #define NUMEX_SETUP_ERROR "numex module not setup, call libpostal_setup() or numex_module_setup()\n" From 68acd36411a4ab71ea4189e6dc370e77d0b11dd9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 8 Feb 2025 17:29:35 +0000 Subject: [PATCH 60/62] Bump requests from 2.20.0 to 2.32.2 in /scripts Bumps [requests](https://github.com/psf/requests) from 2.20.0 to 2.32.2. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.20.0...v2.32.2) --- updated-dependencies: - dependency-name: requests dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements-simple.txt | 2 +- scripts/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/requirements-simple.txt b/scripts/requirements-simple.txt index cc56d015..0d9a3764 100644 --- a/scripts/requirements-simple.txt +++ b/scripts/requirements-simple.txt @@ -1,4 +1,4 @@ -requests==2.20.0 +requests==2.32.2 six==1.10.0 PyYAML==5.4 ujson==5.4.0 \ No newline at end of file diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 23a41a47..8a123fbd 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -29,7 +29,7 @@ pyproj==1.9.5.1 pystache==0.5.4 python-Levenshtein==0.12.0 python-geohash==0.8.5 -requests==2.20.0 +requests==2.32.2 s3transfer==0.1.3 six==1.10.0 ujson==5.4.0 From 4cbedc94357066cf9e409d8e34e23f1d193584d1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 8 Feb 2025 17:29:39 +0000 Subject: [PATCH 61/62] Bump fiona from 1.6.3.post1 to 1.10.0 in /scripts Bumps [fiona](https://github.com/Toblerity/Fiona) from 1.6.3.post1 to 1.10.0. - [Release notes](https://github.com/Toblerity/Fiona/releases) - [Changelog](https://github.com/Toblerity/Fiona/blob/main/CHANGES.txt) - [Commits](https://github.com/Toblerity/Fiona/compare/1.6.3-post1...1.10.0) --- updated-dependencies: - dependency-name: fiona dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 23a41a47..ad2e8394 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,6 +1,6 @@ boto3==1.4.0 botocore==1.4.53 -Fiona==1.6.3.post1 +Fiona==1.10.0 PyYAML==5.4 Rtree==0.8.2 Shapely==1.5.14 From 59a6c550f0b532300ad8898f1744cfe9e0e2f2a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 8 Feb 2025 17:29:41 +0000 Subject: [PATCH 62/62] Bump gevent from 1.1.2 to 23.9.0 in /scripts Bumps [gevent](https://github.com/gevent/gevent) from 1.1.2 to 23.9.0. - [Release notes](https://github.com/gevent/gevent/releases) - [Changelog](https://github.com/gevent/gevent/blob/master/docs/changelog_1_1.rst) - [Commits](https://github.com/gevent/gevent/compare/v1.1.2...23.9.0) --- updated-dependencies: - dependency-name: gevent dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- scripts/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 23a41a47..e0046358 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -13,7 +13,7 @@ distribute==0.7.3 future==0.15.2 futures==3.0.5 ftfy==4.2.0 -gevent==1.1.2 +gevent==23.9.0 greenlet==0.4.10 jmespath==0.9.0 leveldb==0.193