Bases: RegexPredictor
Email address regex pattern matcher.
Source code in src/nemo_safe_synthesizer/pii_replacer/ner/regexes/email.py
| def __init__(self):
entity = Entity.EMAIL_ADDRESS
match = Pattern(
pattern=r"\b((([!#$%&'*+\-/=?^_`{|}~\w])|([!#$%&'*+\-/=?^_`{|}~\w][!#$%&'*+\-/=?^_`{|}~\.\w]{0,}[!#$%&'"
r"*+\-/=?^_`{|}~\w]))[@]\w+([-.]\w+)*\.\w+([-.]\w+)*)\b",
context_score=Score.HIGH,
raw_score=Score.HIGH,
)
self.tld_extract = tldextract.TLDExtract(suffix_list_urls=None)
super().__init__(entity=entity, patterns=[match])
|