From d4d8fa81d1a271503f4e4e084a2cb7bc59dc7d59 Mon Sep 17 00:00:00 2001 From: Al Date: Fri, 27 May 2016 13:37:43 -0400 Subject: [PATCH] [addresses] Adding increasing null_phrase_probability for plain numerics in Spain so things like 2o B make it into the training data --- resources/addresses/es.yaml | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/resources/addresses/es.yaml b/resources/addresses/es.yaml index ae7d8275..e157291c 100644 --- a/resources/addresses/es.yaml +++ b/resources/addresses/es.yaml @@ -360,6 +360,7 @@ cross_streets: sample: true canonical_probability: 0.8 sample_probability: 0.2 + parentheses_probabililty: 0.5 po_boxes: apartado: &apartado @@ -728,16 +729,6 @@ units: canonical: salas numeric: direction: left - directions: - alternatives: - - alternative: *derecha - probability: 0.45 - - alternative: *izquierda - probability: 0.45 - - alternative: *trasera - probability: 0.05 - - alternative: *frente - probability: 0.05 alphanumeric: &unit_alphanumeric default: *departamento @@ -1023,13 +1014,13 @@ countries: <<: *puerta numeric: direction: left - null_phrase_probability: 0.05 - # Unrelated to others. If it's just puerta B, most of the time don't include puerta + # If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B" + null_phrase_probability: 0.15 ordinal: direction: right gender: f direction_probability: 0.95 - null_phrase_probability: 0.8 + null_phrase_probability: 0.8 # Let e.g. 5a be the entire unit string # These sum to 1 numeric_probability: 0.25 ordinal_probability: 0.75