Select Git revision
learning_rates.py
email.py 1.06 KiB
"""Module for the email detector."""
from typing import Any, Dict, List, Tuple
import regex as re
from src.detections import EmailDetection
from src.detectors.interface import Detector
EMAIL_REGEX = re.compile(
r"(?P<local_part>[a-z0-9!#$%&\'*+/=?^_`{|}~-]+"
r"(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*@)"
r"(?P<domain>(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+)"
r"(?P<tld>[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)",
re.I,
)
class EmailDetector(Detector):
"""Detector for emails."""
def detect(
self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
) -> List[Tuple[int, int, EmailDetection]]:
"""Detects emails in the text.
Args:
text (str): the text to be searched
Returns:
List[Tuple[int, int, EmailDetection]]: a list of tuples containing
(start, end, entity_type)
"""
matches = EMAIL_REGEX.finditer(text)
emails = []
for match in matches:
emails.append((match.start(), match.end(), EmailDetection()))
return emails