# en.yaml # ------- # Supplement to the per-country address formats for English around the world. # Note: by default, we use the UK conventions as they cover more countries # US/Canada-specific conventions and any others (e.g. Hong Kong, Australia) # go in country overrides # # Remember, these rules only get applied sometimes with random probabilities # unless specified using "frequency: always" (which should almost never be used) # Number # ====== # Number, No., #, etc. can be used in both floor and apartment numbers, # so we'll define it separately number: canonical: number # canonical word in libpostal dictionary abbreviated: "no" # abbreviated form. no is a boolean in YAML, needs to be quoted prefer_abbreviated: true # Use abbreviated form more often than the canonical form sample: true # Randomly sample other variations (e.g. num, nr) numeric_abbreviated: affix: "#" # e.g. #3, #2F, etc. direction: left # affix goes on the number's left # Floor/level # =========== # OSM doesn't usually concern itself with the address beyond the front door # yet many real-world addresses will have qualifying strings like "6th floor" # and we'd like the parser to handle those. # # When we do get floor numbers in OSM addresses, it's usually in the form of the # addr:floor or level tag, where the value is typically an integer or a half-floor # (to indicate mezzanines). Those tags are relatively scarce in OSM, but many OSM # addresses do have a building:levels tag. If we know there are 20 floors in the # building, we can randomly sample numbers <= the # of floors and come up with plausible # sounding addresses (i.e. a Floor 20 address is not as likely outside major cities). # # We're not done yet, because the integer value by itself isn't what people use when # writing addresses. This part of the config helps us rewrite the raw integer floor # numers as the sort of natural language text used in addresses like "Fl #1". The config # is designed to be cross-lingual, so we can use the same structure with different words # and do this for addresses in pretty much any language. level: # Numbered floors floor: canonical: floor abbreviated: fl sample: true # e.g. Floor 1 numeric: direction: left # Floor/Fl goes to the left of the number use_number_phrase: true # Occasionally add variation of "number", e.g. Floor #1, Floor No. 1 # e.g. 2/F, 3/F numeric_abbreviated: affix: /f direction: right # affix goes to number's right # e.g. 1st Floor ordinal: direction: right # Special instructions for ground floor ground_floor: number: 0 # the 0th floor is typically the ground level in the UK/Commonwealth canonical: ground floor abbreviated: g/f sample: true # Special instructions for lower ground floor (added randomly, not an alias for a floor number) lower_ground_floor: canonical: lower ground floor abbreviated: lg sample: true # Special instructions for upper ground floor (added randomly, not an alias for a floor number) upper_ground_floor: canonical: upper ground floor abbreviated: ug sample: true # Special instructions for podium level (added randomly) podium_level: canonical: podium level abbreviated: pd lvl sample: true alternates: - canonical: podium abbreviated: pd sample: true # Used when floor number is < 0 (starts at -1 in all countries) basement: canonical: basement abbreviated: bsmt sample: true # e.g. Basement 1 numeric: direction: left # e.g. B1 numeric_abbreviated: affix: b direction: left # e.g. 2nd Basement ordinal: direction: right # Floor number of <= -2 can be basement 2, sub-basement, sub-basement 1, etc. sub_basement: canonical: sub basement abbreviated: sb sample: true # e.g. Sub-basement 1 numeric: direction: left # e.g. SB1 numeric_abbreviated: affix: sb direction: left # e.g. 2nd Sub-basement ordinal: direction: right # Mezzanine level (floor number {0.5, 1.5, ...}, also be added at random) mezzanine: # Floor 0.5 is just plain mezzanine, no number canonical: mezzanine abbreviated: mezz half_floors: true sample: true # Mezzanine/Mezz 2 numeric: direction: left # M2 numeric_abbreviated: affix: m direction: left # 2nd Mezzanine ordinal: direction: right # Intersections # ============= # For constructing intersections like 5th Avenue & Broadway # In OSM, a node that's part of two ways is an intersection. # # These simple rules make it possible to create training examples # like: 26th/road Street/road and/intersection 6th/road Avenue/road intersections: # 26th & 6th Avenue and: canonical: and abbreviated: "&" sample: true # 26th @ Broadway at: canonical: at abbreviated: "@" sample: true # 26th betw 5th Ave and 6th Ave between: canonical: between abbreviated: betw sample: true # PO Box addresses # ================ # For PO box addresses, there's almost no data in OSM, so we'll need to # generate them somewhat randomly. # # The strategy is: for every amenity=post_office, generate a number of PO box # addresses using random numbers (and some alpha-numerics so we capture patterns # like PO Box 1Q, etc.) It doesn't matter if the post boxes themselves actually # exist, as long as they cover the patterns of digits we expect in real addresses. # The parser cares more about how many digits a number has and the surrounding # words/phrases than the specific number i.e. numbers in the range 1000-9999 # can simply be normalized to DDDD. po_box: numeric: # Don't sample all the forms in post_office.txt as many of the PO box # phrases appear only in Australia sample: false default: canonical: post office box abbreviated: po box sample: true prefer_abbreviated: true # Alternative phrases to use alternates: - canonical: box sample: true - canonical: private mail box abbreviated: pmb prefer_abbreviated: true sample: true # Categories # ========== # Use the operators "in" and "near" for building category queries # such as "restaurants in Hackney, London" categories: near: canonical: near alternates: - around nearby: canonical: nearby alternates: - near here - around here near_me: canonical: near me in: canonical: in # Directions # ========== # Unit types, stairways, etc. may have a direction associated # with them whether it's right/left or a cardinal direction # like "East Entrance". directions: right: canonical: right numeric_abbreviated: affix: r direction: right left: canonical: left numeric_abbreviated: affix: l direction: right rear: canonical: rear numeric_abbreviated: affix: r direction: right front: canonical: front numeric_abbreviated: affix: f direction: right east: canonical: east numeric_abbreviated: affix: e direction: right west: canonical: west numeric_abbreviated: affix: w direction: right north: canonical: north numeric_abbreviated: affix: n direction: right south: canonical: south numeric_abbreviated: affix: s direction: right # Entrance # ======== # For deriving strings like "North Entrance" entrance: canonical: entrance abbreviated: ent sample: true # Staircase # ========= # For deriving strings like "Staircase A" in apartment buildings staircase: canonical: stair sample: true alternates: - canonical: stairway sample: true - canonical: staircase sample: true # Unit types # ========== # Unit information is common in residential addresses, offices, business parks, etc. # Just like thoroughfare types (Street, Avenue, etc.), there are many common ways to # refer to the unit_types: # Units are not part of the global address formats (and are not always standard) # This is a list of places in the address where the unit line might go order: # e.g. Flat 18, Da Vinci House, 44 Saffron Hill, London - before: house # e.g. Da Vinci House, Flat 18, 44 Saffron Hill, London - before: road # e.g. 123 East 45th St, Apt 6, NYC - after: road # Special terms suite: &suite canonical: suite abbreviated: ste sample: true penthouse: &penthouse canonical: penthouse abbreviated: ph sample: true office: &office canonical: office abbreviated: ofc sample: true door: &door canonical: door sample: true room: &room canonical: room abbreviated: rm sample: true apartment: &apartment canonical: apartment abbreviated: apt prefer_abbreviated: true sample: true flat: &flat canonical: flat abbreviated: fl sample: true lot: &lot canonical: lot sample: true unit: &unit canonical: unit abbreviated: u use_number_phrase: true sample: true numeric: # Many unit types that apply only in Australia # For most English-speaking countries, only use the terms defined above sample: false use_number_phrase: true use_direction_phrase: true default: *flat alternates: - *apartment - *unit - *door - *room - *office - *penthouse - *lot # Country-specific overrides # ========================== # For each country, we allow a copy of the structures listed above # in order to override the default values countries: # United States us: level: ground_floor: number: 1 unit_types: numeric: default: *apartment country_alternates: - *flat # Canada # Note: this is Canadian English only. If the address is in French it will use the French config ca: level: ground_floor: number: 1 unit_types: numeric: default: *apartment country_alternates: - *flat # Australia au: po_box: numeric: # Australia has many strings for this e.g. Roadside Mail Bag sample: true unit_types: numeric: # Australia has all kinds of unit types (e.g. Marine Berth) not used elsewhere sample: true