Skip to content
Snippets Groups Projects
Select Git revision
  • master
  • develop
  • python2.7
3 results

FindICU.cmake

Blame
  • email.py 1.06 KiB
    """Module for the email detector."""
    
    from typing import Any, Dict, List, Tuple
    
    import regex as re
    
    from src.detections import EmailDetection
    from src.detectors.interface import Detector
    
    
    EMAIL_REGEX = re.compile(
        r"(?P<local_part>[a-z0-9!#$%&\'*+/=?^_`{|}~-]+"
        r"(?:\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*@)"
        r"(?P<domain>(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+)"
        r"(?P<tld>[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)",
        re.I,
    )
    
    
    class EmailDetector(Detector):
        """Detector for emails."""
    
        def detect(
            self, text: str, annotations: Dict[str, List[Tuple[int, int, Any]]]
        ) -> List[Tuple[int, int, EmailDetection]]:
            """Detects emails in the text.
    
            Args:
                text (str): the text to be searched
    
            Returns:
                List[Tuple[int, int, EmailDetection]]: a list of tuples containing
                    (start, end, entity_type)
    
            """
            matches = EMAIL_REGEX.finditer(text)
            emails = []
            for match in matches:
                emails.append((match.start(), match.end(), EmailDetection()))
    
            return emails