From 34c45ba651e67e427ebce3d376506570473b3d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pogoda?= <mipo57@e-science.pl> Date: Thu, 19 Jan 2023 08:41:20 +0100 Subject: [PATCH] Remove legacy code --- src/anonymizers/english_anonymizer.py | 192 ------------------------- src/anonymizers/polish_anonymizer.py | 200 -------------------------- src/anonymizers/russian_anonymizer.py | 191 ------------------------ src/utils/__init__.py | 1 - src/utils/utils.py | 33 ----- 5 files changed, 617 deletions(-) delete mode 100644 src/anonymizers/english_anonymizer.py delete mode 100644 src/anonymizers/polish_anonymizer.py delete mode 100644 src/anonymizers/russian_anonymizer.py delete mode 100644 src/utils/__init__.py delete mode 100644 src/utils/utils.py diff --git a/src/anonymizers/english_anonymizer.py b/src/anonymizers/english_anonymizer.py deleted file mode 100644 index 0961c24..0000000 --- a/src/anonymizers/english_anonymizer.py +++ /dev/null @@ -1,192 +0,0 @@ -"""Implementation of anonymizer functionality for English language.""" -import math -import random - -import regex - - -from src.utils.utils import consume -from src.ccl_handler import CCLHandler -from src.base_anonymizer import BaseAnonymizer -from src.generators import (generate_pseudo_email, generate_pseudo_phone_number, - generate_pseudo_user, generate_pseudo_website, - generate_pseudo_date) - - -class EnglishAnonymizer(BaseAnonymizer): - """Class with an anonymization implementation for the English language.""" - - skip_ann = ['CARDINAL', 'LAW', 'DATE', 'QUANTITY', 'TIME', 'EVENT'] - date_regex = regex.compile( - r'\b(?P<day_or_month_year>' - r'(?P<day_month1>[0-3]?\d)(?P<punct1>[ \t\-\./,]{1,2})' - r'(?P<day_month2>[0-3]?\d)(?P<punct2>[ \t\-\./,]{1,2})' - r'(?P<year1>\d{4}|\d{2}))\b|' - - r'\b(?P<year_month_or_day>(?P<year2>\d{4}|\d{2})' - r'(?P<punct3>[ \t\-\./,]{1,2})(?P<day_month3>[0-3]?\d)' - r'(?P<punct4>[ \t\-\./,]{1,2})(?P<day_month4>[0-3]?\d))\b|' - - r'(?P<month_in_words>' - r'(?:(?P<day1>[0-3]?\d)(?P<punct5>[ \t\-\./,]{0,2}))?' - r'\b(?P<month>Jan(?:|uary)|Feb(?:|ruary)|Mar(?:|ch)|' - r'Apr(?:|il)|May|Jun(?:|e)|Jul(?:|y)|Aug(?:|ust)|Sep(?:|tember)' - r'|Oct(?:|ober)|Nov(?:|ember)|Dec(?:|ember))\b' - r'(?:(?P<punct7>[ \t\-\./,]{0,2})(?P<day2>[0-3]?\d))?' - r'(?:(?P<punct6>[ \t\-\./,]{1,2})(?P<year3>\d{4}|\d{2}))?' - r'(?<!\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\b))', regex.I - ) - - spacy_tag_map = { - 'PERSON': '[PERSON]', - 'GPE': '[LOCATION]', - 'FAC': '[LOCATION]', - } - - def __init__(self, task_options): - """Initialize anonymizer with base regexes.""" - super().__init__(task_options) - self.lang = task_options.get('language', 'en') - # Order is important, first more specific - self._category_anonymisation = { - 'user': (self.user_regex, self._user_token, - generate_pseudo_user, {}), - 'email': (self.email_regex, self._mail_token, - generate_pseudo_email, {}), - 'website': (self.website_regex, self._website_token, - generate_pseudo_website, {}), - 'date': (self.date_regex, self._date_token, - generate_pseudo_date, {'lang': self.lang}), - 'phone_number': (self.phone_number_regex, self._digits_token, - generate_pseudo_phone_number, {}), - } - self.unmarshallers = { - 'chunk': lambda *args: '\n', - 'sentence': lambda *args: self._process_sent_tree(*args), - } - self._load_dict_file() - - def _load_dict_file(self, filename='en_dict.txt'): - with open(filename, 'r', encoding='utf-8') as f: - for line in f.readlines(): - l_list = line.strip('\n').split() - cat, subtype = l_list[0], l_list[1] - length = len(l_list[2:]) - text = ' '.join(l_list[2:]) - if cat not in self._form_dict: - self._form_dict[cat] = {} - if subtype not in self._form_dict[cat]: - self._form_dict[cat][subtype] = [] - self._form_dict[cat][subtype].append((text, length)) - - def _handle_annotated(self, id, text, tag, ann): - if self._method == 'delete': - return '' - elif self._method == 'tag': - if ann in self.spacy_tag_map: - return self.spacy_tag_map[ann] - elif self._method == 'pseudo': - if ann in self.spacy_tag_map: - self._pseudo_ann_list.append((id, text, tag, ann)) - return text - - def _process_sentence(self, string_builder): - string_builder = self._handle_pseudo_ann(string_builder) - sentence = ''.join(string_builder) - return self._anonymize(sentence) - - def _get_pseudo_ann(self, ann, text, length): - new_text = [] - if ann == 'PERSON': - gen = random.choice(['FIRST_M', 'FIRST_F']) - name_length = length - 1 if length > 1 else 1 - while name_length > 0: - names = [p for p in self._form_dict['PERSON'][gen] - if p[1] <= name_length] - random_name = random.choice(names) - name_length -= random_name[1] - new_text.append(random_name[0]) - if length > 1: - last_name = random.choice(self._form_dict['PERSON']['LAST']) - new_text.append(last_name[0]) - elif ann == 'GPE': - found = False - for _, values in self._form_dict['GPE'].items(): - if ' '.join(text) in values: - new_text = [random.choice(values)[0]] - found = True - if not found: - new_text = [random.choice(self._form_dict['GPE']['CITY'])[0]] - else: - new_text = ' '.join(text) - return ' '.join(new_text) - - def _handle_pseudo_ann(self, string_builder): - if not self._pseudo_ann_list: - return string_builder - shifted_id = 0 - pseudo_ann_iter = enumerate(iter(self._pseudo_ann_list)) - for i, (id_, text, _, ann) in pseudo_ann_iter: - if ann in self.skip_ann: - continue - j = i + 1 - id_ += shifted_id - start_id = id_ - ann_len = 1 - phrase = [text] - skip_tokens = 1 - while j < len(self._pseudo_ann_list): - next_id, next_text, _, next_ann = self._pseudo_ann_list[j] - next_id += shifted_id - if ann != next_ann: - break - if next_id == id_ + 1 and string_builder[next_id] == '-': - skip_tokens += 1 - elif next_id == id_ + 1 and string_builder[id_] == '-': - ann_len += 1 - skip_tokens += 1 - phrase.append(next_text) - elif next_id == id_ + 2 and string_builder[id_ + 1] == ' ': - ann_len += 1 - skip_tokens += 2 - phrase.append(next_text) - else: - break - id_ = next_id - j += 1 - new_text = self._get_pseudo_ann( - ann=ann, - text=phrase, - length=ann_len - ) - new_text = regex.split('( )', new_text) - string_builder = string_builder[:start_id] + new_text + \ - string_builder[start_id + skip_tokens:] - if ann_len > 1: - consume(pseudo_ann_iter, ann_len - 1) - if math.ceil(len(new_text) / 2) != ann_len: - shifted_id += len(new_text) - (ann_len * 2) + 1 - self._pseudo_ann_list.clear() - return string_builder - - def _anonymize(self, sentence): - if self._method == 'delete': - for pattern, _, _, _ in self._category_anonymisation.values(): - sentence = regex.sub(pattern, '', sentence) - elif self._method == 'tag': - sentence = self._tagging(sentence) - elif self._method == 'pseudo': - sentence = self._pseudonymization(sentence) - return sentence - - def process(self, input_filename, output_filename): - """Anonymize the file in CCL format to the resulting file in plain text. - - Args: - input_filename (str): Input filename in CCL format. \ - Text tagged and processed with spacy NER. - output_filename (str): Output filename. - - """ - ccl_handler = CCLHandler(input_filename) - ccl_handler.process(output_filename, self.unmarshallers) diff --git a/src/anonymizers/polish_anonymizer.py b/src/anonymizers/polish_anonymizer.py deleted file mode 100644 index f725254..0000000 --- a/src/anonymizers/polish_anonymizer.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Implementation of anonymizer functionality for Polish language.""" -import math -import regex -import random - - -from src.utils.utils import consume -from src.base_anonymizer import BaseAnonymizer -from src.ccl_handler import CCLHandler -from src.generators import (generate_pseudo_email, generate_pseudo_phone_number, - generate_pseudo_user, generate_pseudo_website, - generate_pseudo_date) - - -class PolishAnonymizer(BaseAnonymizer): - """Class with an anonymization implementation for the Polish language.""" - - date_regex = regex.compile( - r'\b(?P<day_or_month_year>' - r'(?P<day_month1>[0-3]?\d)(?P<punct1>[ \t\-\./,]{1,2})' - r'(?P<day_month2>[0-3]?\d)(?P<punct2>[ \t\-\./,]{1,2})' - r'(?P<year1>\d{4}|\d{2}))\b|' - - r'\b(?P<year_month_or_day>(?P<year2>\d{4}|\d{2})' - r'(?P<punct3>[ \t\-\./,]{1,2})(?P<day_month3>[0-3]?\d)' - r'(?P<punct4>[ \t\-\./,]{1,2})(?P<day_month4>[0-3]?\d))\b|' - - r'(?P<month_in_words>' - r'(?!\b(sty|lut|mar|kwi|maj|cze|lip|sie|wrz|paz|lis|gru)\b)' - r'(?:(?P<day1>[0-3]?\d)(?P<punct5>[ \t\-\./,]{0,2}))?' - r'\b(?P<month>Sty(?:|cze[nÅ„]|cznia)|Lut(?:|y|ego)|Mar(?:|zec|ca)|' - r'Kwi(?:|ecie[nÅ„]|etnia)|Maj(?:|a)|Cze(?:|rwiec|rwca)|Lip(?:|iec|ca)' - r'|Sie(?:|rpie[nÅ„]|rpnia)|Wrz(?:|esie[nÅ„]|e[Å›s]nia)' - r'|Pa[zź](?:|dziernik|dziernika)|Lis(?:|topad|topada)' - r'|Gru(?:|dzie[nÅ„]|dnia))\b' - r'((?:(?P<punct7>[ \t\-\./,]{0,2})(?P<day2>[0-3]?\d))' - r'(?:(?P<punct8>[ \t\-\./,]{1,2})(?P<year4>\d{4}|\d{2}))|' - r'(?:(?P<punct6>[ \t\-\./,]{0,2})(?P<year3>\d{4}|\d{2})))?)', regex.I - ) - - _file_to_liner_dispatch = { - 'nam_liv_person': 'person_first_nam', - 'nam_liv_person_last': 'person_last_nam', - 'nam_fac_road': 'road_nam', - 'nam_loc_gpe_city': 'city_nam', - 'nam_org_group_team': 'country_nam' - } - - _liner_to_tag_dispatch = { - 'person_first_nam': '[OSOBA]', - 'person_last_nam': '[OSOBA]', - 'road_nam': '[MIEJSCE]', - 'city_nam': '[MIEJSCE]', - 'country_nam': '[MIEJSCE]' - } - - def __init__(self, task_options): - """Initialize anonymizer with base regexes.""" - super().__init__(task_options) - self.lang = task_options.get('language', 'pl') - # Order is important, first more specific - self._category_anonymisation = { - 'user': (self.user_regex, self._user_token, - generate_pseudo_user, {}), - 'email': (self.email_regex, self._mail_token, - generate_pseudo_email, {}), - 'website': (self.website_regex, self._website_token, - generate_pseudo_website, {}), - 'date': (self.date_regex, self._date_token, - generate_pseudo_date, {'lang': self.lang}), - 'phone_number': (self.phone_number_regex, self._digits_token, - generate_pseudo_phone_number, {}), - } - self.unmarshallers = { - 'chunk': lambda *args: '\n', - 'sentence': lambda *args: self._process_sent_tree(*args), - } - self._form_dict = dict() - self._pseudo_ann_list = list() - self._load_file() - - def _load_file(self, filename='dictionaries/pl_dict.txt'): - with open(filename, 'r', encoding='utf-8') as f: - for line in f.readlines(): - l_list = line.split() - cat = l_list[0] - if cat in self._file_to_liner_dispatch: - cat_name = self._file_to_liner_dispatch[cat] - length = int((len(l_list) - 2) / 2) - gen_name = ' '.join(l_list[(1 + length):(1 + 2 * length)]) - flx_name = ' '.join(l_list[1:(1 + length)]) - flex = l_list[-1] - if cat_name not in self._form_dict: - self._form_dict[cat_name] = dict() - if length not in self._form_dict[cat_name]: - self._form_dict[cat_name][length] = dict() - if gen_name not in self._form_dict[cat_name][length]: - self._form_dict[cat_name][length][gen_name] = dict() - self._form_dict[cat_name][length][gen_name][flex] = flx_name - for cat in self._form_dict: - for length in self._form_dict[cat]: - self._form_dict[cat][length] = list( - self._form_dict[cat][length].items() - ) - - def _handle_annotated(self, id, text, tag, ann): - if self._method == 'delete': - return '' - elif self._method == 'tag': - if ann in self._liner_to_tag_dispatch: - return self._liner_to_tag_dispatch[ann] - elif self._method == 'pseudo': - if ann in self._form_dict: - self._pseudo_ann_list.append((id, text, tag, ann)) - return text - - def _process_sentence(self, string_builder): - string_builder = self._handle_pseudo_ann(string_builder) - sentence = ''.join(string_builder) - return self._anonymize(sentence) - - def _get_pseudo_ann(self, ann, tag, length): - while length not in self._form_dict[ann] and length > 0: - length -= 1 - if length == 0: - return '' - new_tag = ':'.join(tag.split(':')[1:4]) - for _ in range(0, 10): - random_entry = random.choice(self._form_dict[ann][length]) - if new_tag in random_entry[1]: - return random_entry[1][new_tag] - if new_tag == 'ign': - return random_entry[0] - random_entry = random.choice(self._form_dict[ann][length]) - return random_entry[0] - - def _handle_pseudo_ann(self, string_builder): - if not self._pseudo_ann_list: - return string_builder - shifted_id = 0 - pseudo_ann_iter = enumerate(iter(self._pseudo_ann_list)) - for i, (id_, _, tag, ann) in pseudo_ann_iter: - j = i + 1 - start_id = id_ + shifted_id - ann_len = 1 - skip_tokens = 1 - while j < len(self._pseudo_ann_list): - next_id, _, _, next_ann = self._pseudo_ann_list[j] - next_id += shifted_id - if ann != next_ann or (ann == 'person_first_nam' or - ann == 'person_last_nam'): - break - if next_id == id_ + 1 and string_builder[next_id] == '-': - skip_tokens += 1 - elif next_id == id_ + 1 and string_builder[id_] == '-': - ann_len += 1 - skip_tokens += 1 - elif next_id == id_ + 2 and string_builder[id_ + 1] == ' ': - ann_len += 1 - skip_tokens += 2 - else: - break - id_ = next_id - j += 1 - new_text = self._get_pseudo_ann( - ann=ann, - tag=tag, - length=ann_len - ) - new_text = regex.split('( )', new_text) - string_builder = string_builder[:start_id] + new_text + \ - string_builder[start_id + skip_tokens:] - if ann_len > 1: - consume(pseudo_ann_iter, ann_len - 1) - if math.ceil(len(new_text) / 2) != ann_len: - shifted_id += len(new_text) - (ann_len * 2) + 1 - self._pseudo_ann_list.clear() - return string_builder - - def _anonymize(self, sentence): - if self._method == 'delete': - for pattern, _, _, _ in self._category_anonymisation.values(): - sentence = regex.sub(pattern, '', sentence) - elif self._method == 'tag': - sentence = self._tagging(sentence) - elif self._method == 'pseudo': - sentence = self._pseudonymization(sentence) - return sentence - - def process(self, input_filename, output_filename): - """Anonymize the file in CCL format to the resulting file in plain text. - - Args: - input_filename (str): Input filename in CCL format. \ - Text tagged and processed with LINER. - output_filename (str): Output filename. - - """ - ccl_handler = CCLHandler(input_filename) - ccl_handler.process(output_filename, self.unmarshallers) diff --git a/src/anonymizers/russian_anonymizer.py b/src/anonymizers/russian_anonymizer.py deleted file mode 100644 index d9e6c07..0000000 --- a/src/anonymizers/russian_anonymizer.py +++ /dev/null @@ -1,191 +0,0 @@ -"""Implementation of anonymizer functionality for Russian language.""" -import math -import random - -import regex - - -from src.utils.utils import consume -from src.ccl_handler import CCLHandler -from src.base_anonymizer import BaseAnonymizer -from src.generators import (generate_pseudo_email, generate_pseudo_phone_number, - generate_pseudo_user, generate_pseudo_website, - generate_pseudo_date) - - -class RussianAnonymizer(BaseAnonymizer): - """Class with an anonymization implementation for the Russian language.""" - - date_regex = regex.compile( - r'\b(?P<day_or_month_year>' - r'(?P<day_month1>[0-3]?\d)(?P<punct1>[ \t\-\./,]{1,2})' - r'(?P<day_month2>[0-3]?\d)(?P<punct2>[ \t\-\./,]{1,2})' - r'(?P<year1>\d{4}|\d{2}))\b|' - - r'\b(?P<year_month_or_day>(?P<year2>\d{4}|\d{2})' - r'(?P<punct3>[ \t\-\./,]{1,2})(?P<day_month3>[0-3]?\d)' - r'(?P<punct4>[ \t\-\./,]{1,2})(?P<day_month4>[0-3]?\d))\b|' - - r'(?P<month_in_words>' - r'(?!\b(Янв|Фев|Мар|Ðпр|Май|Июн|Июл|Ðвг|Сен|Окт|ÐоÑ|Дек)\b)' - r'(?:(?P<day1>[0-3]?\d)(?P<punct5>[ \t\-\./,]{0,2}))?' - r'\b(?P<month>Янв(?:|ар[ьеÑ])|Фев(?:|рал[ьеÑ])|Мар(?:|Ñ‚|те|та)|' - r'Ðпр(?:|ел[ьеÑ])|Ма[йеÑ]|Июн(?:|[ьеÑ])|Июл(?:|[ьеÑ])|' - r'Ðвг(?:|уÑÑ‚|уÑÑ‚[еа])|Сен(?:|Ñ‚Ñбр[ьеÑ])|Окт(?:|Ñбр[ьеÑ])|' - r'ÐоÑ(?:|бр[ьеÑ])|Дек(?:|абр[ьеÑ]))\b' - r'((?:(?P<punct7>[ \t\-\./,]{0,2})(?P<day2>[0-3]?\d))' - r'(?:(?P<punct8>[ \t\-\./,]{1,2})(?P<year4>\d{4}|\d{2}))|' - r'(?:(?P<punct6>[ \t\-\./,]{0,2})(?P<year3>\d{4}|\d{2})))?' - r'(?<!\b(Янв|Фев|Мар|Ðпр|Май|Июн|Июл|Ðвг|Сен|Окт|ÐоÑ|Дек)\b))', regex.I - ) - - spacy_tag_map = { - 'PER': '[PERSON]', - 'LOC': '[LOCATION]', - } - - def __init__(self, task_options): - """Initialize anonymizer with base regexes.""" - super().__init__(task_options) - self.lang = task_options.get('language', 'ru') - # Order is important, first more specific - self._category_anonymisation = { - 'user': (self.user_regex, self._user_token, - generate_pseudo_user, {}), - 'email': (self.email_regex, self._mail_token, - generate_pseudo_email, {}), - 'website': (self.website_regex, self._website_token, - generate_pseudo_website, {}), - 'date': (self.date_regex, self._date_token, - generate_pseudo_date, {'lang': self.lang}), - 'phone_number': (self.phone_number_regex, self._digits_token, - generate_pseudo_phone_number, {}), - } - self.unmarshallers = { - 'chunk': lambda *args: '\n', - 'sentence': lambda *args: self._process_sent_tree(*args), - } - self._load_dict_file() - - def _load_dict_file(self, filename='ru_dict.txt'): - with open(filename, 'r', encoding='utf-8') as f: - for line in f.readlines(): - l_list = line.strip('\n').split() - cat, subtype = l_list[0], l_list[1] - length = len(l_list[2:]) - text = ' '.join(l_list[2:]) - if cat not in self._form_dict: - self._form_dict[cat] = {} - if subtype not in self._form_dict[cat]: - self._form_dict[cat][subtype] = [] - self._form_dict[cat][subtype].append((text, length)) - - def _handle_annotated(self, id, text, tag, ann): - if self._method == 'delete': - return '' - elif self._method == 'tag': - if ann in self.spacy_tag_map: - return self.spacy_tag_map[ann] - elif self._method == 'pseudo': - if ann in self.spacy_tag_map: - self._pseudo_ann_list.append((id, text, tag, ann)) - return text - - def _process_sentence(self, string_builder): - string_builder = self._handle_pseudo_ann(string_builder) - sentence = ''.join(string_builder) - return self._anonymize(sentence) - - def _get_pseudo_ann(self, ann, text, length): - new_text = [] - if ann == 'PER': - gen = random.choice(['FIRST_M', 'FIRST_F']) - name_length = length - 1 if length > 1 else 1 - while name_length > 0: - names = [p for p in self._form_dict['PERSON'][gen] - if p[1] <= name_length] - random_name = random.choice(names) - name_length -= random_name[1] - new_text.append(random_name[0]) - if length > 1: - last_name = random.choice(self._form_dict['PERSON']['LAST']) - new_text.append(last_name[0]) - elif ann == 'LOC': - found = False - for _, values in self._form_dict['LOCATION'].items(): - if ' '.join(text) in values: - new_text = [random.choice(values)[0]] - found = True - if not found: - default_loc = self._form_dict['LOCATION']['CITY'] - new_text = [random.choice(default_loc)[0]] - else: - new_text = ' '.join(text) - return ' '.join(new_text) - - def _handle_pseudo_ann(self, string_builder): - if not self._pseudo_ann_list: - return string_builder - shifted_id = 0 - pseudo_ann_iter = enumerate(iter(self._pseudo_ann_list)) - for i, (id_, text, _, ann) in pseudo_ann_iter: - j = i + 1 - start_id = id_ + shifted_id - ann_len = 1 - phrase = [text] - skip_tokens = 1 - while j < len(self._pseudo_ann_list): - next_id, next_text, _, next_ann = self._pseudo_ann_list[j] - next_id += shifted_id - if ann != next_ann: - break - if next_id == id_ + 1 and string_builder[next_id] == '-': - skip_tokens += 1 - elif next_id == id_ + 1 and string_builder[id_] == '-': - ann_len += 1 - skip_tokens += 1 - phrase.append(next_text) - elif next_id == id_ + 2 and string_builder[id_ + 1] == ' ': - ann_len += 1 - skip_tokens += 2 - phrase.append(next_text) - else: - break - id_ = next_id - j += 1 - new_text = self._get_pseudo_ann( - ann=ann, - text=phrase, - length=ann_len - ) - new_text = regex.split('( )', new_text) - string_builder = string_builder[:start_id] + new_text + \ - string_builder[start_id + skip_tokens:] - if ann_len > 1: - consume(pseudo_ann_iter, ann_len - 1) - if math.ceil(len(new_text) / 2) != ann_len: - shifted_id += len(new_text) - (ann_len * 2) + 1 - self._pseudo_ann_list.clear() - return string_builder - - def _anonymize(self, sentence): - if self._method == 'delete': - for pattern, _, _, _ in self._category_anonymisation.values(): - sentence = regex.sub(pattern, '', sentence) - elif self._method == 'tag': - sentence = self._tagging(sentence) - elif self._method == 'pseudo': - sentence = self._pseudonymization(sentence) - return sentence - - def process(self, input_filename, output_filename): - """Anonymize the file in CCL format to the resulting file in plain text. - - Args: - input_filename (str): Input filename in CCL format. \ - Text tagged and processed with spacy NER. - output_filename (str): Output filename. - - """ - ccl_handler = CCLHandler(input_filename) - ccl_handler.process(output_filename, self.unmarshallers) diff --git a/src/utils/__init__.py b/src/utils/__init__.py deleted file mode 100644 index a8b0bd1..0000000 --- a/src/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from src.utils.utils import * \ No newline at end of file diff --git a/src/utils/utils.py b/src/utils/utils.py deleted file mode 100644 index c0035e6..0000000 --- a/src/utils/utils.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Module for useful functions.""" - -import itertools - - -def consume(iterative, n): - """Consume n elements from iterative object. - - Args: - iterative (iter): Python iterative object. - n (int): Number of elements to consume. - - """ - next(itertools.islice(iterative, n - 1, n), None) - - -def subdict(dictionary, keys, all_must_be_present=True): - """Return a subdictionary of dictionary containing only keys. - - Args: - dictionary (dict): Dictionary to take a subdictionary from. - keys (list): List of keys to take from dictionary. - all_must_be_present (bool): If True, all keys must be present in - dictionary. If False, only keys that are present are returned. - - Returns: - dict: Subdictionary of dictionary containing only keys. - - """ - if all_must_be_present: - return {key: dictionary[key] for key in keys} - else: - return {key: dictionary[key] for key in keys if key in dictionary} \ No newline at end of file -- GitLab