Skip to content
Snippets Groups Projects
anonymizer.py 2.82 KiB
Newer Older
Bartłomiej Koptyra's avatar
Bartłomiej Koptyra committed
"""Implementation of anonymizer functionality."""
import re
from string import punctuation, ascii_lowercase, ascii_uppercase
import random
Bartłomiej Koptyra's avatar
Bartłomiej Koptyra committed


class Anonymizer:
    """Class used to edit sentences based on options."""

    def __init__(self, task_options):
        self._method = task_options.get('method', 'delete')
        self._mail_token = '[MAIL]'
        self._user_token = '@[USER]'
Bartłomiej Koptyra's avatar
Bartłomiej Koptyra committed

    def process(self):
        if ctag == 'ign':
            # sprawddz czy to nick a potem email
            # sprawdz czy to nazwa własna jak mBank? nie wiem
            print()

    @staticmethod
    def _get_random_chatacter(upper=False):
        return random.choice(ascii_uppercase) \
            if upper else random.choice(ascii_lowercase)

    def _generate_pseudo_email(self, email):
        new_mail = []
        it = iter(email)
        top_domain_len = len(email) - email.rfind('.')
        for char in it:
            if char == '@':
                new_mail.append(char)
                break
            elif char in punctuation:
                new_mail.append(char)
            else:
                new_mail.append(self._get_random_chatacter(char.isupper()))
        for char in it:
            if char == '.':
                if len(list(it)) == top_domain_len:
                    new_mail.append(char)
                    break
                new_mail.append(char)
            elif char in punctuation:
                new_mail.append(char)
            else:
                new_mail.append(self._get_random_chatacter(char.isupper()))
        for char in it:
            new_mail.append(char)
        return ''.join(new_mail)

    def _generate_pseudo_user(self, user):
        it = iter(user)
        new_user = []
        new_user.append(next(it))
        for char in it:
            if char in punctuation:
                new_user.append(char)
            else:
                new_user.append(self._get_random_chatacter(char.isupper()))
        return ''.join(new_user)

    def _anonoymize_email(self, token):
        """Handles removal/changing of emails addresses."""
        email_regex = r'[\w\.-]+@[\w\.-]+\.\w{2,4}'
        if self._method == 'delete':
            token = re.sub(email_regex, r'', token)
        elif self._method == 'tag':
            token = re.sub(email_regex, self._mail_token, token)
        elif self._method == 'pseudo':
            token = self._generate_pseudo_email(token)
        return token

    def _anonoymize_user(self, token):
        """Handles removal/change of users."""
        mention_regex = r'\B\@([\w\-]+)'
        if self._method == 'delete':
            token = re.sub(mention_regex, r'', token)
        elif self._method == 'tag':
            token = re.sub(mention_regex, self._user_token, token)
        elif self._method == 'pseudo':
            token = self._generate_pseudo_user(token)
        return token