Select Git revision
FindICU.cmake
email.py 1.06 KiB
"""Module for the email detector."""
from typing import Any, Dict, List, Tuple
import regex as re
from src.detections import EmailDetection
from src.detectors.interface import Detector
EMAIL_REGEX = re.compile(
r"(?P<local_part>[a-z0-9!#$%&\'*+/=?^_`{|}~-]+"
r"(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*@)"
r"(?P<domain>(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+)"
r"(?P<tld>[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)",
re.I,
)
class EmailDetector(Detector):
"""Detector for emails."""
def detect(
self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
) -> List[Tuple[int, int, EmailDetection]]:
"""Detects emails in the text.
Args:
text (str): the text to be searched
Returns:
List[Tuple[int, int, EmailDetection]]: a list of tuples containing
(start, end, entity_type)
"""
matches = EMAIL_REGEX.finditer(text)
emails = []
for match in matches:
emails.append((match.start(), match.end(), EmailDetection()))
return emails