Skip to content

Commit a498d0e

Browse files
committed
format refinery code
1 parent b871176 commit a498d0e

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

extractors/personal_identifiers/zipcode_extraction/code_snippet_refinery.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import re
33
from typing import List
44

5-
ATTRIBUTE: str = "text" # only text attributes.
6-
COUNTRY_IDS: List[str] = ["US"] # see list below for more countries
5+
ATTRIBUTE: str = "text" # only text attributes.
6+
COUNTRY_IDS: List[str] = ["US"] # see list below for more countries
77
LABEL: str = "zip code"
88

9+
910
def zipcode_extraction(record):
10-
text = record[ATTRIBUTE].text # SpaCy doc, hence we need to use .text to get the string.
11+
# SpaCy doc, hence we need to use .text to get the string.
12+
text = record[ATTRIBUTE].text
1113

1214
for country_id in COUNTRY_IDS:
1315
for match in zip_codes[country_id].finditer(text):
@@ -16,6 +18,7 @@ def zipcode_extraction(record):
1618

1719
yield LABEL, span.start, span.end
1820

21+
1922
zip_regex_lookup = {
2023
"GB": r"GIR[ ]?0AA|((AB|AL|B|BA|BB|BD|BH|BL|BN|BR|BS|BT|CA|CB|CF|CH|CM|CO|CR|CT|CV|CW|DA|DD|DE|DG|DH|DL|DN|DT|DY|E|EC|EH|EN|EX|FK|FY|G|GL|GY|GU|HA|HD|HG|HP|HR|HS|HU|HX|IG|IM|IP|IV|JE|KA|KT|KW|KY|L|LA|LD|LE|LL|LN|LS|LU|M|ME|MK|ML|N|NE|NG|NN|NP|NR|NW|OL|OX|PA|PE|PH|PL|PO|PR|RG|RH|RM|S|SA|SE|SG|SK|SL|SM|SN|SO|SP|SR|SS|ST|SW|SY|TA|TD|TF|TN|TQ|TR|TS|TW|UB|W|WA|WC|WD|WF|WN|WR|WS|WV|YO|ZE)(\d[\dA-Z]?[ ]?\d[ABD-HJLN-UW-Z]{2}))|BFPO[ ]?\d{1,4}",
2124
"JE": r"JE\d[\dA-Z]?[ ]?\d[ABD-HJLN-UW-Z]{2}",
@@ -174,8 +177,10 @@ zip_regex_lookup = {
174177
"TC": r"TKCA 1ZZ",
175178
"WF": r"986\d{2}",
176179
"XK": r"\d{5}",
177-
"YT": r"976\d{2}"
180+
"YT": r"976\d{2}",
178181
}
179182
zip_codes = {
180-
country_id:re.compile(zip_regex_lookup[country_id]) for country_id in COUNTRY_IDS}
183+
country_id: re.compile(zip_regex_lookup[country_id]) for country_id in COUNTRY_IDS
184+
}
185+
181186
```

0 commit comments

Comments
 (0)