From 771a360a8510e8171547152ce3b1bb89f3c23953 Mon Sep 17 00:00:00 2001 From: Al Date: Mon, 2 May 2016 15:45:39 -0400 Subject: [PATCH] [phrases] Using safe_encode/safe_decode as default trie serializer/deserializer --- scripts/geodata/address_expansions/gazetteers.py | 3 +++ scripts/geodata/text/phrases.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/geodata/address_expansions/gazetteers.py b/scripts/geodata/address_expansions/gazetteers.py index 3da015b0..7932d116 100644 --- a/scripts/geodata/address_expansions/gazetteers.py +++ b/scripts/geodata/address_expansions/gazetteers.py @@ -1,5 +1,6 @@ import os import sys +import ujson as json from collections import defaultdict, OrderedDict @@ -27,6 +28,8 @@ POSSIBLE_ROMAN_NUMERALS = set(['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', class DictionaryPhraseFilter(PhraseFilter): + serialize = safe_encode + deserialize = safe_decode def __init__(self, *dictionaries): self.dictionaries = dictionaries diff --git a/scripts/geodata/text/phrases.py b/scripts/geodata/text/phrases.py index 9d6992df..bd4864f9 100644 --- a/scripts/geodata/text/phrases.py +++ b/scripts/geodata/text/phrases.py @@ -1,8 +1,8 @@ import six -import ujson as json from collections import * from marisa_trie import BytesTrie +from geodata.encoding import safe_encode, safe_decode SENTINEL = None @@ -11,8 +11,8 @@ class PhraseFilter(object): def __init__(self, phrases): self.trie = [(key, self.serialize(val)) for key, val in six.iteritems(phrases)] - serialize = json.dumps - deserialize = json.loads + serialize = safe_encode + deserialize = safe_decode def filter(self, tokens): def return_item(item):