Skip to content

Instantly share code, notes, and snippets.

@ltakens
Created January 11, 2023 21:22
Show Gist options
  • Select an option

  • Save ltakens/caf01cd3018b8187b35687c84a245725 to your computer and use it in GitHub Desktop.

Select an option

Save ltakens/caf01cd3018b8187b35687c84a245725 to your computer and use it in GitHub Desktop.
Permissive Python email regex allowing some common obfuscations. E.g. "(at)" and "(dot)".
"""Here are some keywords one might use to find this email regex:
- Python etract email addresses using regular expression
- Python extract email from text
- How to extract email addresses from a block of text using Python
"""
import doctest
import re
EMAIL_REGEX = re.compile(
r'''( # start capture full address
([a-z0-9\.\+-_]|\(dot\)|\[dot\])+ # e.g. 'john_91[dot]goodman'
(@|\(at\)|\[at\]) # '@' or '(at)' or '[at]'
[\w-]+ # e.g. 'a-subdomain'
((\(dot\)|\[dot\]|\.)([a-z-_]+)){1,4} # e.g. 'domain.com.au'
)''', # end capture full address
re.VERBOSE | re.IGNORECASE
)
def find_all_email_addresses(text: str) -> list:
"""Return all email addresses from text.
>>> find_all_email_addresses(
... '''Hello [email protected],
... Are you still at john+goodman.is.awesome(at)his-domain.com.au?
... Or have you changed your email format to
... john[at]his(dot)domain.com[dot]au?'''
... )
['[email protected]', '[email protected]', '[email protected]']
"""
results = []
for addr, *_ in re.findall(EMAIL_REGEX, text):
addr = re.sub('\(at\)|\[at\]', '@', addr, flags=re.IGNORECASE)
addr = re.sub('\(dot\)|\[dot\]', '.', addr, flags=re.IGNORECASE)
results.append(addr)
return results
if __name__ == '__main__':
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment