[tokenization] Adding an acronym token type for things like U.N. so we can delete internal periods on those tokens

This commit is contained in:
Al
2015-06-29 03:00:46 -04:00
parent 47efce4b7e
commit 3279b31b09
3 changed files with 6 additions and 4 deletions

View File

@@ -163,7 +163,7 @@ email = ([a-zA-Z0-9\._%+\-]+"@"([a-zA-Z0-9]+[\.])+[a-zA-Z0-9]{2,3});
{us_phone_number} { return US_PHONE; }
{international_phone_number} { return INTL_PHONE; }
{multi_punct_abbreviation} { return ABBREVIATION; }
{multi_punct_abbreviation} { return ACRONYM; }
{hyphen_plus_abbreviation} { return ABBREVIATION; }
{abbreviation} { return ABBREVIATION; }