From 773f801141c81f7ec8ef62c9af4699744c21d40c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bart=C5=82omiej=20Koptyra?= <rikain@e-science.pl>
Date: Fri, 7 Aug 2020 16:32:40 +0200
Subject: [PATCH] Added some emails and users handling.

---
 src/anonymizer.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/anonymizer.py b/src/anonymizer.py
index 577638f..06c80dc 100644
--- a/src/anonymizer.py
+++ b/src/anonymizer.py
@@ -1,14 +1,84 @@
 """Implementation of anonymizer functionality."""
 import re
+from string import punctuation, ascii_lowercase, ascii_uppercase
+import random
 
 
 class Anonymizer:
     """Class used to edit sentences based on options."""
 
     def __init__(self, task_options):
-        self.method = task_options.get('method', 'delete')
+        self._method = task_options.get('method', 'delete')
+        self._mail_token = '[MAIL]'
+        self._user_token = '@[USER]'
 
     def process(self):
         if ctag == 'ign':
             # sprawddz czy to nick a potem email
             # sprawdz czy to nazwa własna jak mBank? nie wiem
+            print()
+
+    @staticmethod
+    def _get_random_chatacter(upper=False):
+        return random.choice(ascii_uppercase) \
+            if upper else random.choice(ascii_lowercase)
+
+    def _generate_pseudo_email(self, email):
+        new_mail = []
+        it = iter(email)
+        top_domain_len = len(email) - email.rfind('.')
+        for char in it:
+            if char == '@':
+                new_mail.append(char)
+                break
+            elif char in punctuation:
+                new_mail.append(char)
+            else:
+                new_mail.append(self._get_random_chatacter(char.isupper()))
+        for char in it:
+            if char == '.':
+                if len(list(it)) == top_domain_len:
+                    new_mail.append(char)
+                    break
+                new_mail.append(char)
+            elif char in punctuation:
+                new_mail.append(char)
+            else:
+                new_mail.append(self._get_random_chatacter(char.isupper()))
+        for char in it:
+            new_mail.append(char)
+        return ''.join(new_mail)
+
+    def _generate_pseudo_user(self, user):
+        it = iter(user)
+        new_user = []
+        new_user.append(next(it))
+        for char in it:
+            if char in punctuation:
+                new_user.append(char)
+            else:
+                new_user.append(self._get_random_chatacter(char.isupper()))
+        return ''.join(new_user)
+
+    def _anonoymize_email(self, token):
+        """Handles removal/changing of emails addresses."""
+        email_regex = r'[\w\.-]+@[\w\.-]+\.\w{2,4}'
+        if self._method == 'delete':
+            token = re.sub(email_regex, r'', token)
+        elif self._method == 'tag':
+            token = re.sub(email_regex, self._mail_token, token)
+        elif self._method == 'pseudo':
+            token = self._generate_pseudo_email(token)
+        return token
+
+    def _anonoymize_user(self, token):
+        """Handles removal/change of users."""
+        mention_regex = r'\B\@([\w\-]+)'
+        if self._method == 'delete':
+            token = re.sub(mention_regex, r'', token)
+        elif self._method == 'tag':
+            token = re.sub(mention_regex, self._user_token, token)
+        elif self._method == 'pseudo':
+            token = self._generate_pseudo_user(token)
+        return token
+
-- 
GitLab