Skip to content
Snippets Groups Projects
Select Git revision
  • a11d8a8c16acf37a41b8426f184a538022c37c85
  • master default protected
  • vertical_relations
  • lu_without_semantic_frames
  • hierarchy
  • additional-unification-filters
  • v0.1.1
  • v0.1.0
  • v0.0.9
  • v0.0.8
  • v0.0.7
  • v0.0.6
  • v0.0.5
  • v0.0.4
  • v0.0.3
  • v0.0.2
  • v0.0.1
17 results

admin.py

Blame
  • email.py 1.06 KiB
    """Module for the email detector."""
    
    from typing import Any, Dict, List, Tuple
    
    import regex as re
    
    from src.detections import EmailDetection
    from src.detectors.interface import Detector
    
    
    EMAIL_REGEX = re.compile(
        r"(?P<local_part>[a-z0-9!#$%&\'*+/=?^_`{|}~-]+"
        r"(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*@)"
        r"(?P<domain>(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+)"
        r"(?P<tld>[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)",
        re.I,
    )
    
    
    class EmailDetector(Detector):
        """Detector for emails."""
    
        def detect(
            self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
        ) -> List[Tuple[int, int, EmailDetection]]:
            """Detects emails in the text.
    
            Args:
                text (str): the text to be searched
    
            Returns:
                List[Tuple[int, int, EmailDetection]]: a list of tuples containing
                    (start, end, entity_type)
    
            """
            matches = EMAIL_REGEX.finditer(text)
            emails = []
            for match in matches:
                emails.append((match.start(), match.end(), EmailDetection()))
    
            return emails