diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d7cd87773a46c98797d378a195ef8d818df1728..60459e3413a835a1885fdbe6471271c8aa5b7a68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 # PolDeepNer2 Changelog
 
+## 0.6.7
+### Changed
+- Fixed tokens with empty subtokens.
+
 ## 0.6.6
 ### Added
 - Script for batch training.
diff --git a/Dockerfiles/base/Dockerfile b/Dockerfiles/base/Dockerfile
index 10f145e7ecd551a17a88a0ddab8b734f5e797b39..5a90bf1b8b9c6dabc778296fb801eeae0d8ef013 100644
--- a/Dockerfiles/base/Dockerfile
+++ b/Dockerfiles/base/Dockerfile
@@ -10,7 +10,7 @@ ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
-# Python 3.6
+# Python 3.8
 #RUN apt-get install -y software-properties-common vim
 RUN apt-get install -y python3.8 python3-pip
 RUN python3.8 --version
diff --git a/Dockerfiles/kpwr_n82_base/Dockerfile b/Dockerfiles/kpwr_n82_base/Dockerfile
index 509714a39b1681b2b4069db24e86754ce14c4b25..905a9c74fe3e7e101fbc40fbee9f9ece40f81f02 100644
--- a/Dockerfiles/kpwr_n82_base/Dockerfile
+++ b/Dockerfiles/kpwr_n82_base/Dockerfile
@@ -8,4 +8,4 @@ RUN rm kpwr_n82_base.zip
 
 EXPOSE 8000
 
-CMD python3.6 server.py --model models/kpwr_n82_base/kpwr_n82_base --pretrained_path xlmr:models/roberta_base_fairseq
+CMD python3.8 server.py --model models/kpwr_n82_base/kpwr_n82_base --pretrained_path xlmr:models/roberta_base_fairseq
diff --git a/Dockerfiles/kpwr_n82_large/Dockerfile b/Dockerfiles/kpwr_n82_large/Dockerfile
index e7759a7010478dfa8deb8131158361cae6fba07b..63bf6f62981ca3bbf0dde9c8ceb15022833258af 100644
--- a/Dockerfiles/kpwr_n82_large/Dockerfile
+++ b/Dockerfiles/kpwr_n82_large/Dockerfile
@@ -8,4 +8,4 @@ RUN rm roberta_large_fairseq.zip
 
 EXPOSE 8000
 
-CMD python3.6 server.py --model models/kpwr_n82_large/kpwr_n82_large --pretrained_path xlmr:models/roberta_base_fairseq
+CMD python3.8 server.py --model models/kpwr_n82_large/kpwr_n82_large --pretrained_path xlmr:models/roberta_base_fairseq
diff --git a/Dockerfiles/merged-base/Dockerfile b/Dockerfiles/merged-base/Dockerfile
index f7590f453d92af4760be8d3de85172e76b73fe9d..7ac3c86fad02f4cfbf75c0843add55cb368becb0 100644
--- a/Dockerfiles/merged-base/Dockerfile
+++ b/Dockerfiles/merged-base/Dockerfile
@@ -10,9 +10,9 @@ ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
-# Python 3.6
+# Python 3.8
 RUN apt-get install -y software-properties-common vim
-RUN apt-get install -y python3.6 python3-pip
+RUN apt-get install -y python3.8 python3-pip
 
 # update pip
 RUN pip3 install pip --upgrade
@@ -22,7 +22,7 @@ RUN pip3 install wheel
 WORKDIR "/poldeepner2"
 ADD ./requirements.txt /poldeepner2/requirements.txt
 RUN pip3 install -r requirements.txt
-RUN python3.6 -m spacy download pl_core_news_sm
+RUN python3.8 -m spacy download pl_core_news_sm
 
 RUN apt-get install -y wget
 RUN apt-get install -y unzip
@@ -43,4 +43,4 @@ COPY . .
 
 EXPOSE 8000
 
-CMD python3.6 server.py --model models/kpwr_n82_base/kpwr_n82_base --pretrained_path xlmr:models/roberta_base_fairseq
+CMD python3.8 server.py --model models/kpwr_n82_base/kpwr_n82_base --pretrained_path xlmr:models/roberta_base_fairseq
diff --git a/Dockerfiles/merged-large/Dockerfile b/Dockerfiles/merged-large/Dockerfile
index 425df77af810526ecfa8bc8ca5d6c904104db03f..1592e7d6ec1425c7aea725541309bd541801dafa 100644
--- a/Dockerfiles/merged-large/Dockerfile
+++ b/Dockerfiles/merged-large/Dockerfile
@@ -10,9 +10,9 @@ ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
-# Python 3.6
+# Python 3.8
 RUN apt-get install -y software-properties-common vim
-RUN apt-get install -y python3.6 python3-pip
+RUN apt-get install -y python3.8 python3-pip
 
 # update pip
 RUN pip3 install pip --upgrade
@@ -22,7 +22,7 @@ RUN pip3 install wheel
 WORKDIR "/poldeepner2"
 ADD ./requirements.txt /poldeepner2/requirements.txt
 RUN pip3 install -r requirements.txt
-RUN python3.6 -m spacy download pl_core_news_sm
+RUN python3.8 -m spacy download pl_core_news_sm
 
 RUN apt-get install -y wget
 RUN apt-get install -y unzip
@@ -43,4 +43,4 @@ COPY . .
 
 EXPOSE 8000
 
-CMD python3.6 server.py --model models/kpwr_n82_large/kpwr_n82_large --pretrained_path xlmr:models/roberta_base_fairseq
+CMD python3.8 server.py --model models/kpwr_n82_large/kpwr_n82_large --pretrained_path xlmr:models/roberta_base_fairseq
diff --git a/Dockerfiles/nkjp_base/Dockerfile b/Dockerfiles/nkjp_base/Dockerfile
index c44168190ccb10b0e03c0d5f711b899685d3c144..6950e343cfdee53d8a521d1615fd397153c506f3 100644
--- a/Dockerfiles/nkjp_base/Dockerfile
+++ b/Dockerfiles/nkjp_base/Dockerfile
@@ -8,4 +8,4 @@ RUN rm nkjp_base.zip
 
 EXPOSE 8000
 
-CMD python3.6 server.py --model models/nkjp_base/nkjp_base --pretrained_path xlmr:models/roberta_base_fairseq
+CMD python3.8 server.py --model models/nkjp_base/nkjp_base --pretrained_path xlmr:models/roberta_base_fairseq
diff --git a/README.md b/README.md
index e1fef629dbdfcd915882eecba958b242b5220283..0fc14621ebb22e7febfdb031195a961411a22313 100644
--- a/README.md
+++ b/README.md
@@ -18,9 +18,9 @@ It offers a set of pretrained models for Polish. The main features are:
 
 ### Requirements
 
-* Python 3.6
+* Python 3.8
 * CUDA 10.0+
-* PyTorch 1.7
+* PyTorch 1.9
 
 ### Virtual environment 
 
@@ -29,7 +29,7 @@ It offers a set of pretrained models for Polish. The main features are:
 ```
 sudo apt-get install python3-pip python3-dev python-virtualenv
 sudo pip install -U pip
-virtualenv -p python3.6 venv
+virtualenv -p python3.8 venv
 source venv/bin/activate
 pip install -U pip
 pip install -r requirements.txt
@@ -38,9 +38,9 @@ pip install -r requirements.txt
 #### Conda
 
 ```
-conda create -n pdn2 python=3.6
+conda create -n pdn2 python=3.8
 conda activate pdn2
-conda install -c anaconda cudatoolkit=10.1
+conda install -c anaconda cudatoolkit=10.2
 conda install -c anaconda cudnn
 pip install -r requirements.txt
 ```
diff --git a/augment_dataset.py b/augment_dataset.py
index 29c4fd3c6e10126de40f45715a37f8c094589a92..53e06f0a8c9c3d9303b380cebb7225936f88ec5e 100644
--- a/augment_dataset.py
+++ b/augment_dataset.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
@@ -9,12 +11,26 @@ from poldeepner2.utils.data_utils import read_tsv
 
 
 def write_sentence(fout: str, tokens: List[str], labels: List[str]):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        fout: str
+        tokens: List[str]
+        labels: List[str]
+
+    """
     for token, label in zip(tokens, labels):
         fout.write("%s\t%s\n" % (token, label))
     fout.write("\n")
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    """
     sentences_labels = read_tsv(args.input, True)
     with codecs.open(args.output, "w", "utf8") as fout:
         for sentence, labels in sentences_labels:
@@ -23,22 +39,33 @@ def main(args):
         if args.upper:
             logging.info("Augment data — upper case")
             for sentence, labels in sentences_labels:
-                write_sentence(fout, [token.upper() for token in sentence], labels)
+                write_sentence(fout, [token.upper() for token in sentence],
+                               labels)
 
         if args.lower:
             logging.info("Augment data — lower case")
             for sentence, labels in sentences_labels:
-                write_sentence(fout, [token.lower() for token in sentence], labels)
+                write_sentence(fout, [token.lower() for token in sentence],
+                               labels)
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns: parser.parse_args()
+
+    """
     parser = argparse.ArgumentParser(
         description='Process a single TSV with a NER model')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to a TSV file')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to save the augmented dataset')
-    parser.add_argument('--lower', required=False, default=False, action="store_true",
+    parser.add_argument('--input', required=True, metavar='PATH',
+                        help='path to a TSV file')
+    parser.add_argument('--output', required=True, metavar='PATH',
+                        help='path to save the augmented dataset')
+    parser.add_argument('--lower', required=False, default=False,
+                        ction="store_true",
                         help='augment lower-case data')
-    parser.add_argument('--upper', required=False, default=False, action="store_true",
+    parser.add_argument('--upper', required=False, default=False,
+                        action="store_true",
                         help='augment upper-case data')
     return parser.parse_args()
 
diff --git a/config.cfg b/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..78e00ba3dcaaeab11e65a7a1921e7e424b3f91ef
--- /dev/null
+++ b/config.cfg
@@ -0,0 +1,55 @@
+[model]
+device = cpu
+gpu_num = 0
+path = /mnt/sda/pdn2scripts/nkjp_base
+pretrained_path = /mnt/sda/pdn2scripts/roberta_base
+
+[predict]
+device = cpu
+save_to_file = true
+path = /mnt/sda/pdn2scripts/roberta_base
+max_seq_len = 100
+path_to_save = predict_res.txt
+
+[evaluate]
+device = cpu
+gpu_num = 0
+path = E:/ClarinProjects/nkjp_base
+pretrained_path = ./roberta_base
+squeeze = false
+max_seq_len = 100
+hidden_size = 32
+dropout = 0.05
+
+[data]
+tag_column_index = 3
+eval_path = data/coNLL-2003/test.txt
+pred_path = tests/resources/text_krakow.txt
+
+[train]
+adam_epsilon = 0.1
+data_test = data/coNLL-2003/test.txt
+data_train = data/coNLL-2003/train.txt
+data_tune = data/coNLL-2003/valid.txt
+device = cuda
+dropout = 0.05
+epoch_save_model = True
+eval_batch_size = 16
+fp16 = false
+fp16_opt_level = ''
+freeze_model = True
+gradient_accumulation_steps = 5
+hidden_size = 32
+learning_rate = 0.001
+max_grad_norm = 5
+max_seq_length = 32
+num_train_epochs = 100
+output_dir = test_res
+pretrained_path = /mnt/sda/pdn2scripts/roberta_base
+seed = 42
+squeeze = true
+train_batch_size = 16
+training_mix = False
+transfer = None
+warmup_proportion = 0.3
+weight_decay = 0.1
diff --git a/core/poldeepner.py b/core/poldeepner.py
new file mode 100644
index 0000000000000000000000000000000000000000..317c5f2b53791afd26a38550d32920190e75bfe2
--- /dev/null
+++ b/core/poldeepner.py
@@ -0,0 +1,161 @@
+"""A message of shame -- documentation must be completed."""
+
+import codecs
+import os
+import torch
+# import tqdm NOT USED
+from torch.utils.data.dataloader import DataLoader
+
+from core.model.xlmr_for_token_classification import XLMRForTokenClassification
+from core.utils.data_utils import InputExample, convert_examples_to_features, \
+    create_dataset, read_params, wrap_annotations, align_tokens_with_text
+from core.utils.tokenization import TokenizerSpaces
+
+
+class PolDeepNer2:
+    """A message of shame -- documentation must be completed."""
+
+    def __init__(self, model_path, pretrained_path,
+                 device="cpu", squeeze=False, max_seq_length=256,
+                 tokenizer=TokenizerSpaces()):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            model_path:A message of shame -- documentation must be completed.
+            pretrained_path:A message of shame -- documentation must be
+            completed.
+            device:A message of shame -- documentation must be completed.
+            squeeze:A message of shame -- documentation must be completed.
+            max_seq_length:A message of shame -- documentation must be
+            completed.
+            tokenizer:A message of shame -- documentation must be completed.
+
+        """
+        if not os.path.exists(model_path):
+
+            raise ValueError("Model not found on path '%s'" % model_path)
+
+        if not os.path.exists(pretrained_path):
+            raise ValueError("RoBERTa language model not found on path '%s'"
+                             % pretrained_path)
+
+        dropout, num_labels, label_list = read_params(model_path)
+        self.label_list = label_list
+        model = XLMRForTokenClassification(pretrained_path=pretrained_path,
+                                           n_labels=len(self.label_list) + 1,
+                                           dropout_p=dropout,
+                                           device=device,
+                                           hidden_size=768
+                                           if 'base' in pretrained_path
+                                           else 1024)
+        state_dict = torch.load(
+            open(os.path.join(model_path, 'model.pt'), 'rb'))
+        model.load_state_dict(state_dict)
+        model.eval()
+        model.to(device)
+        self.model = model
+        self.device = device
+        self.squeeze = squeeze
+        self.max_seq_length = max_seq_length
+        self.tokenizer = tokenizer
+
+    @staticmethod
+    def load_labels(path):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            path:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return [line.strip() for line in codecs.open(
+            path, "r", "utf8").readlines() if len(line.strip()) > 0]
+
+    def process(self, sentences):
+        """A message of shame -- documentation must be completed.
+
+        @param sentences -- array of array of words,
+        [['Jan', 'z', 'Warszawy'], ['IBM', 'i', 'Apple']]
+        @param max_seq_length -- the maximum total input sequence length after
+        WordPiece tokenization
+        @param squeeze -- boolean enabling squeezing multiple sentences into
+        one Input Feature
+        """
+        examples = []
+        for idx, tokens in enumerate(sentences):
+            guid = str(idx)
+            text_a = ' '.join(tokens)
+            label = ["O"] * len(tokens)
+            examples.append(InputExample(guid=guid, text_a=text_a,
+                                         text_b=None, label=label))
+
+        eval_features = convert_examples_to_features(examples,
+                                                     self.label_list,
+                                                     self.max_seq_length,
+                                                     self.model.encode_word,
+                                                     self.squeeze)
+        eval_dataset = create_dataset(eval_features)
+        eval_dataloader = DataLoader(eval_dataset, batch_size=1)
+
+        y_pred = []
+        sum_pred = []
+        label_map = {i: label for i, label in enumerate(self.label_list, 1)}
+
+        for input_ids, label_ids, l_mask, valid_ids in eval_dataloader:
+            input_ids = input_ids.to(self.device)
+            label_ids = label_ids.to(self.device)
+            valid_ids = valid_ids.to(self.device)
+
+            with torch.no_grad():
+                logits = self.model(input_ids, labels=None,
+                                    labels_mask=None, valid_mask=valid_ids)
+
+            logits = torch.argmax(logits, dim=2)
+            logits = logits.detach().cpu().numpy()
+            label_ids = label_ids.cpu().numpy()
+            for i, cur_label in enumerate(label_ids):
+                temp_1 = []
+                temp_2 = []
+                for j, m in enumerate(cur_label):
+                    if valid_ids[i][j]:
+                        temp_1.append(label_map[m])
+                        temp_2.append(label_map[logits[i][j]])
+                assert len(temp_1) == len(temp_2)
+                if self.squeeze:
+                    sum_pred.extend(temp_2)
+                else:
+                    y_pred.append(temp_2)
+        pointer = 0
+        for sentence in sentences:
+            y_pred.append(sum_pred[pointer: (pointer + len(sentence))])
+            pointer += len(sentence)
+        return y_pred
+
+    def process_text(self, text: str):
+        """A message of shame -- documentation must be completed.
+
+        @texts: Array of sentences. Each sentence is a string.
+                "John lives in New York. Mary lives in Chicago"
+
+        return:[(PER, 0, 4, "John"), (LOC, 14, 22, "New York"),
+                (PER, 24, 28, "Mary"), (LOC, 38, 45, "Chicago")]]
+        """
+        sentences = self.tokenizer.tokenize([text])
+        predictions = self.process(sentences)
+        annotations = wrap_annotations(predictions)
+        return align_tokens_with_text(text, sentences, annotations)
+
+    def process_tokenized(self, tokens: [[str]], text: str):
+        """A message of shame -- documentation must be completed.
+
+        @tokens: Array of sentences. Each sentence is an array of words.
+                 [["John", "lives", "in", "New", "York"],
+                  ["Mary", "lives", "in", "Chicago"]]
+
+        return: [["B-PER", "O", "O", "B-LOC", "I-LOC"],
+                 ["B-PER", "O", "O", "B-LOC"]]
+        """
+        predictions = self.process(tokens)
+        annotations = wrap_annotations(predictions)
+        return align_tokens_with_text(text, tokens, annotations)
diff --git a/evaluate_tsv.py b/evaluate_tsv.py
index c346cb762b845490e16c809088c9a7c2b7791729..54eac10016c88007f18d65b0eae75ab35cf82b26 100644
--- a/evaluate_tsv.py
+++ b/evaluate_tsv.py
@@ -1,10 +1,12 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
 import os
-from time import time
 
 import time
+# from time import time F811 redefinition of unused 'time'
 
 import poldeepner2
 from poldeepner2.utils.data_utils import read_tsv
@@ -13,6 +15,12 @@ from poldeepner2.utils.sequence_labeling import classification_report
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    """
     print("Loading the NER model ...")
 
     ner = poldeepner2.load(args.model, device=args.device)
@@ -51,6 +59,11 @@ def main(args):
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns: parser.parse_args()
+
+    """
     parser = argparse.ArgumentParser(
         description='Process a single TSV with a NER model')
     parser.add_argument('--input', required=True, metavar='PATH', help='path to a file with a list of files')
diff --git a/evaluator.py b/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..006204ddfe844d3787b3c91fa179895e862ad37c
--- /dev/null
+++ b/evaluator.py
@@ -0,0 +1,91 @@
+"""Script for evaluating models on a pre-defined set of data."""
+
+import configparser
+import os
+import time
+
+from poldeepner2.utils.data_utils import NerProcessor, create_dataset, \
+    convert_examples_to_features
+from poldeepner2.utils.train_utils import evaluate_model
+
+
+def main():
+    config_file = "config.cfg"
+    config = configparser.ConfigParser()
+    config.read(config_file)
+
+    pretrained_model = config['evaluate']['pretrained_path']
+
+    device = config['evaluate']['device']
+    squeeze = config.getboolean('evaluate', 'squeeze')
+    tag_column_index = config.getint('data', 'tag_column_index')
+    processor = NerProcessor()
+
+    data_path = config['data']['eval_path']
+    datasets = [data_path]
+
+    labels_list = \
+        processor.get_labels(datasets, config.getint('data',
+                                                     'tag_column_index'))
+
+    num_labels = len(labels_list) + 1
+    hidden_size = config.getint('evaluate', 'hidden_size')
+    dropout = config.getfloat('train', 'dropout')
+
+    hidden_size = 1024 if 'large' in pretrained_model \
+        else (768 if 'base' in pretrained_model else hidden_size)
+    device = device
+
+    pretrained_path = config['model']['pretrained_path']
+
+    if pretrained_path.startswith("hf:"):
+        from poldeepner2.model.hf_for_token_calssification \
+            import HfModelForTokenClassification
+        pretrained_dir = pretrained_path.split(':')[1]
+        model = HfModelForTokenClassification(
+            pretrained_path=pretrained_dir, n_labels=num_labels,
+            hidden_size=hidden_size, dropout_p=dropout,
+            device=device)
+    elif pretrained_path.startswith("mt5:"):
+        from poldeepner2.model.mt5_for_token_calssification \
+            import Mt5ModelForTokenClassification
+        variant = pretrained_path.split(':')[1]
+        model = Mt5ModelForTokenClassification(
+            variant=variant, n_labels=num_labels,
+            hidden_size=hidden_size, dropout_p=dropout, device=device)
+    else:
+        from poldeepner2.model.xlmr_for_token_classification \
+            import XLMRForTokenClassification
+        pretrained_dir = pretrained_path
+        if ":" in pretrained_dir:
+            pretrained_dir = pretrained_dir.split(':')[1]
+        if not os.path.exists(pretrained_dir):
+            raise ValueError("RoBERTa language model not found on path '%s'"
+                             % pretrained_dir)
+
+        model = XLMRForTokenClassification(
+            pretrained_path=pretrained_dir, n_labels=num_labels,
+            hidden_size=hidden_size, dropout_p=dropout,
+            device=device)
+
+    max_seq_len = config.getint('evaluate', 'max_seq_len')
+
+    eval_examples = processor.get_examples(datasets[0], tag_column_index,
+                                           'eval')
+
+    eval_features = convert_examples_to_features(
+        eval_examples, labels_list, max_seq_len, model.encode_word,
+        squeeze=squeeze)
+
+    eval_data = create_dataset(eval_features)
+
+    time_start = time.time()
+    f1, report = evaluate_model(model, eval_data, labels_list, 16, device)
+    time_end = time.time()
+    print(f' f1: {f1}')
+    print(f' report {report}')
+    print(f'time {time_end - time_start}')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/poldeepner2/data/document.py b/poldeepner2/data/document.py
index 750a743f9438a74eb6506a237542942d3ef8bc7f..2b0400df34ea086ef98cb6f9de2592389afef2de 100644
--- a/poldeepner2/data/document.py
+++ b/poldeepner2/data/document.py
@@ -1,11 +1,25 @@
+"""A message of shame -- documentation must be completed."""
+
 from poldeepner2.data.span import Span
 from poldeepner2.data.token import Token
 from poldeepner2.utils.annotation import Annotation
 
 
 class Document:
+    """A message of shame -- documentation must be completed."""
+
+    def __init__(self, content: str,
+                 tokens: [Token] = [], sentences: [Span] = [],
+                 annotations: [Annotation] = []):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            content:A message of shame -- documentation must be completed.
+            tokens:A message of shame -- documentation must be completed.
+            sentences:A message of shame -- documentation must be completed.
+            annotations:A message of shame -- documentation must be completed.
 
-    def __init__(self, content: str, tokens: [Token] = [], sentences: [Span] = [], annotations: [Annotation] = []):
+        """
         self.content = content
         self.tokens = tokens
         self.annotations = annotations
diff --git a/poldeepner2/data/span.py b/poldeepner2/data/span.py
index 36c1307a0c9bbdac0ec042678cf43efed2541e5c..d13e043d90705db299fd21a1e8e931d77f88a581 100644
--- a/poldeepner2/data/span.py
+++ b/poldeepner2/data/span.py
@@ -1,16 +1,26 @@
+"""A message of shame -- documentation must be completed."""
+
 from dataclasses import dataclass
 
 
 @dataclass
 class Span:
-    """
+    """A message of shame -- documentation must be completed.
+
     Args:
-        orth (str):
+        orth (str):A message of shame -- documentation must be completed.
         start (int): Index of the first token.
         end (int): Index of the last token +1.
+
     """
+
     start: int
     end: int
 
     def __str__(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return f"Span(begin={self.begin},end={self.end})"
diff --git a/poldeepner2/data/token.py b/poldeepner2/data/token.py
index a4733b925601bdcbb1ebaba9ca0ec509806cf132..c8120a6aea0dd119c54c4b6985b4718cb96e2219 100644
--- a/poldeepner2/data/token.py
+++ b/poldeepner2/data/token.py
@@ -1,9 +1,12 @@
+"""A message of shame -- documentation must be completed."""
+
 from dataclasses import dataclass
 
 
 @dataclass
 class Token:
-    """
+    """A message of shame -- documentation must be completed.
+
     Args:
         orth (str):
         start (int): Index of the first orth character in the original text.
@@ -12,7 +15,9 @@ class Token:
         ws (str): White spaces after the token in the original text.
         morph (str):
         eos (str): True if the token ends a sentence.
+
     """
+
     orth: str
     start: int
     end: int
@@ -22,4 +27,9 @@ class Token:
     eos: bool = False
 
     def __str__(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return f"Token(orth={self.orth},lemma={self.lemma},morph={self.morph})"
diff --git a/poldeepner2/io/debug.py b/poldeepner2/io/debug.py
index 3b4c9aa7c6c7fe86aeeb81f03dd34906f664843e..269c9ff7327e167fccd9d711cf141d99e798ca85 100644
--- a/poldeepner2/io/debug.py
+++ b/poldeepner2/io/debug.py
@@ -1,7 +1,17 @@
+"""A message of shame -- documentation must be completed."""
+
 import logging
 
 
 def debug_tokens_and_labels(tokenized_sentences, predictions):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        tokenized_sentences:A message of shame -- documentation must be
+        completed.
+        predictions:A message of shame -- documentation must be completed.
+
+    """
     for tokens, labels in zip(tokenized_sentences, predictions):
         for token, label in zip(tokens, labels):
             logging.debug(f"TOKENIZATION: {token}\t{label}")
diff --git a/poldeepner2/model/hf_for_token_calssification.py b/poldeepner2/model/hf_for_token_calssification.py
index fab1dcf3cdca323a840c304bd97efb36750b4f22..504a0c864cbfcd12530b2d32bfb11d4c6d82efb7 100644
--- a/poldeepner2/model/hf_for_token_calssification.py
+++ b/poldeepner2/model/hf_for_token_calssification.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from pathlib import Path
 import yaml
 from typing import List
@@ -63,22 +65,24 @@ class Pdn2TokenClassification(nn.Module):
             self.model = AutoModel.from_pretrained(path)
 
     def forward(self, inputs_ids, labels, labels_mask, valid_mask):
-        '''
-        Computes a forward pass through the sequence tagging model.
+        """Computes a forward pass through the sequence tagging model.
+
         Args:
             inputs_ids: tensor of size (bsz, max_seq_len). padding idx = 1
             labels: tensor of size (bsz, max_seq_len)
-            labels_mask and valid_mask: indicate where loss gradients should be propagated and where 
+            labels_mask: indicate where loss gradients should be
+             propagated and where
             labels should be ignored
+            valid_mask: indicate where loss gradients should be
 
         Returns :
             logits: unnormalized model outputs.
             loss: Cross Entropy loss between labels and logits
 
-        '''
+        """
         self.model.train()
 
-        transformer_out  = self.model(inputs_ids, return_dict=True)[0]
+        transformer_out = self.model(inputs_ids, return_dict=True)[0]
         out_1 = F.relu(self.linear_1(transformer_out))
         out_1 = self.dropout(out_1)
         logits = self.classification_head(out_1)
@@ -100,8 +104,14 @@ class Pdn2TokenClassification(nn.Module):
             return logits
 
     def encode_word(self, s):
-        """
-        takes a string and returns a list of token ids
+        """Takes a string and returns a list of token ids.
+
+        Args:
+            self:A message of shame -- documentation must be completed.
+            s:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
         """
         tensor_ids = self.tokenizer.encode(s)
         # remove <s> and </s> ids
diff --git a/poldeepner2/model/mt5_for_token_calssification.py b/poldeepner2/model/mt5_for_token_calssification.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a2e4aa8a7308e510d0238dc49a4030c5aa6af3e
--- /dev/null
+++ b/poldeepner2/model/mt5_for_token_calssification.py
@@ -0,0 +1,104 @@
+"""A message of shame -- documentation must be completed."""
+
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+
+
+class Mt5ModelForTokenClassification(nn.Module):
+    """A message of shame -- documentation must be completed."""
+
+    def __init__(self, variant, n_labels, hidden_size=768, dropout_p=0.2,
+                 label_ignore_idx=0,
+                 head_init_range=0.04, device='cuda'):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            variant: A message of shame -- documentation must be completed.
+            n_labels: A message of shame -- documentation must be completed.
+            hidden_size: A message of shame -- documentation must be completed.
+            dropout_p: A message of shame -- documentation must be completed.
+            label_ignore_idx: A message of shame -- documentation must be
+            completed.
+            head_init_range: A message of shame -- documentation must be
+            completed.
+            device: A message of shame -- documentation must be completed.
+
+        """
+        super().__init__()
+
+        self.n_labels = n_labels
+
+        self.linear_1 = nn.Linear(hidden_size, hidden_size)
+        self.classification_head = nn.Linear(hidden_size, n_labels)
+
+        self.label_ignore_idx = label_ignore_idx
+
+        # self.tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
+        # self.model = AutoModel.from_pretrained(pretrained_path)
+        self.tokenizer = T5Tokenizer.from_pretrained(
+            f"google/mt5-{variant}")
+        self.model = T5ForConditionalGeneration.from_pretrained(
+            f'google/mt5-{variant}')
+
+        self.dropout = nn.Dropout(dropout_p)
+        self.device = device
+
+        # initializing classification head
+        self.classification_head.weight.data.normal_(mean=0.0,
+                                                     std=head_init_range)
+
+    def forward(self, inputs_ids, labels,
+                labels_mask, valid_mask):
+        """Computes a forward pass through the sequence tagging model.
+
+        Args:
+            inputs_ids: tensor of size (bsz, max_seq_len). padding idx = 1
+            labels: tensor of size (bsz, max_seq_len)
+            labels_mask: indicate where loss gradients should be
+             propagated and where
+            labels should be ignored
+            valid_mask: A message of shame -- documentation must be completed.
+
+        Returns :
+            logits: unnormalized model outputs.
+            loss: Cross Entropy loss between labels and logits
+
+        """
+        self.model.train()
+
+        transformer_out = self.model.encoder(input_ids=inputs_ids,
+                                             return_dict=True)[0]
+        out_1 = F.relu(self.linear_1(transformer_out))
+        out_1 = self.dropout(out_1)
+        logits = self.classification_head(out_1)
+
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(ignore_index=self.label_ignore_idx)
+            # Only keep active parts of the loss
+            if labels_mask is not None:
+                active_loss = valid_mask.view(-1) == 1
+
+                active_logits = logits.view(-1, self.n_labels)[active_loss]
+                active_labels = labels.view(-1)[active_loss]
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(
+                    logits.view(-1, self.n_labels), labels.view(-1))
+            return loss
+        else:
+            return logits
+
+    def encode_word(self, s):
+        """Takes a string and returns a list of token ids.
+
+        Args:
+            self:A message of shame -- documentation must be completed.
+            s:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        tensor_ids = self.tokenizer.encode(s)
+        # remove last special
+        return tensor_ids[0:-1]
diff --git a/poldeepner2/model/xlmr_for_token_classification.py b/poldeepner2/model/xlmr_for_token_classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..dce3481934501400cfc681aa452d58d5a9b31ca4
--- /dev/null
+++ b/poldeepner2/model/xlmr_for_token_classification.py
@@ -0,0 +1,93 @@
+"""A message of shame -- documentation must be completed."""
+
+from fairseq.models.roberta import XLMRModel
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class XLMRForTokenClassification(nn.Module):
+    """A message of shame -- documentation must be completed."""
+
+    def __init__(self, pretrained_path, n_labels,
+                 hidden_size, dropout_p=0.2, label_ignore_idx=0,
+                 head_init_range=0.04, device='cuda'):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            pretrained_path:A message of shame -- documentation must be
+            completed.
+            n_labels:A message of shame -- documentation must be completed.
+            hidden_size:A message of shame -- documentation must be completed.
+            dropout_p:A message of shame -- documentation must be completed.
+            label_ignore_idx:A message of shame -- documentation must be
+            completed.
+            head_init_range:A message of shame -- documentation must be
+            completed.
+            device:A message of shame -- documentation must be completed.
+
+        """
+        super().__init__()
+
+        self.n_labels = n_labels
+        self.linear_1 = nn.Linear(hidden_size, hidden_size)
+        self.classification_head = nn.Linear(hidden_size, n_labels)
+        self.label_ignore_idx = label_ignore_idx
+
+        self.xlmr = XLMRModel.from_pretrained(pretrained_path)
+        self.model = self.xlmr.model
+        self.dropout = nn.Dropout(dropout_p)
+        self.device = device
+
+        # initializing classification head
+        self.classification_head.weight.data.normal_(mean=0.0,
+                                                     std=head_init_range)
+
+    def forward(self, inputs_ids, labels, labels_mask, valid_mask):
+        """Computes a forward pass through the sequence tagging model.
+
+        Args:
+            inputs_ids: tensor of size (bsz, max_seq_len). padding idx = 1
+            labels: tensor of size (bsz, max_seq_len)
+            labels_mask: indicate where loss gradients should be
+             propagated and where
+            labels should be ignored
+            valid_mask: indicate where loss gradients should be
+
+        Returns :
+            logits: unnormalized model outputs.
+            loss: Cross Entropy loss between labels and logits
+
+        """
+        transformer_out, _ = self.model(inputs_ids, features_only=True)
+
+        out_1 = F.relu(self.linear_1(transformer_out))
+        out_1 = self.dropout(out_1)
+        logits = self.classification_head(out_1)
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(ignore_index=self.label_ignore_idx)
+            # Only keep active parts of the loss
+            if labels_mask is not None:
+                active_loss = valid_mask.view(-1) == 1
+                active_logits = logits.view(-1, self.n_labels)[active_loss]
+                active_labels = labels.view(-1)[active_loss]
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(
+                    logits.view(-1, self.n_labels), labels.view(-1))
+            return loss
+        else:
+            return logits
+
+    def encode_word(self, s):
+        """Takes a string and returns a list of token ids.
+
+        Args:
+            self:A message of shame -- documentation must be completed.
+            s:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        tensor_ids = self.xlmr.encode(s)
+        # remove <s> and </s> ids
+        return tensor_ids.cpu().numpy().tolist()[1:-1]
diff --git a/poldeepner2/models.py b/poldeepner2/models.py
index 81e84a8636cde37f4972936a809eba5ece29d05f..368257bde4eaccfa55102137283ec146a56207d1 100644
--- a/poldeepner2/models.py
+++ b/poldeepner2/models.py
@@ -1,3 +1,6 @@
+"""A message of shame -- documentation must be completed."""
+
+import logging
 import os
 from typing import List
 
@@ -18,6 +21,7 @@ from poldeepner2.utils.sequences import convert_examples_to_features
 
 
 class PolDeepNer2:
+    """A message of shame -- documentation must be completed."""
 
     def __init__(self, path: str, tokenizer: Tokenizer = None,
                  processor_annotations: List[ProcessorAnnotations] = None, device: str = None):
@@ -39,7 +43,8 @@ class PolDeepNer2:
             text_a = ' '.join(tokens)
             text_b = None
             label = ["O"] * len(tokens)
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+            examples.append(InputExample(guid=guid, text_a=text_a,
+                                         text_b=text_b, label=label))
 
         eval_features = convert_examples_to_features(examples, self.model.config.labels,
                                                      self.model.config.max_seq_length,
@@ -51,7 +56,8 @@ class PolDeepNer2:
         label_map = {i: label for i, label in enumerate(self.model.config.labels, 1)}
 
         if show_progress:
-            outer = tqdm.tqdm(total=len(eval_dataloader), desc='Processing', position=0)
+            outer = tqdm.tqdm(total=len(eval_dataloader), desc='Processing',
+                              position=0)
         for input_ids, label_ids, l_mask, valid_ids in eval_dataloader:
             if show_progress:
                 outer.update(1)
@@ -60,7 +66,8 @@ class PolDeepNer2:
             valid_ids = valid_ids.to(self.model.config.device)
 
             with torch.no_grad():
-                logits = self.model(input_ids, labels=None, labels_mask=None, valid_mask=valid_ids)
+                logits = self.model(input_ids, labels=None, labels_mask=None,
+                                    valid_mask=valid_ids)
 
             logits = torch.argmax(logits, dim=2)
             logits = logits.detach().cpu().numpy()
@@ -73,7 +80,8 @@ class PolDeepNer2:
 
         token_count = sum([len(s) for s in sentences])
         assert token_count == len(y_pred), \
-            f"The number of returned labels differ from the number of tokens. Number of tokens: {token_count}, " \
+            f"The number of returned labels differ from the number of " \
+            f"tokens. Number of tokens: {token_count}, " \
             f"number of labels: {len(y_pred)}"
 
         sentences_y_pred = []
@@ -84,7 +92,8 @@ class PolDeepNer2:
         return sentences_y_pred
 
     def process_text(self, text: str) -> [AnnotationText]:
-        """
+        """A message of shame -- documentation must be completed.
+
         @texts: Array of sentences. Each sentence is a string.
                 "John lives in New York. Mary lives in Chicago"
 
@@ -92,6 +101,7 @@ class PolDeepNer2:
                 AnnotationText(14, 22, "LOC", "New York"),
                 AnnotationText(24, 28, "PER", "Mary"),
                 AnnotationText(38, 45, "LOC", "Chicago")]
+
         """
         sentences = self.tokenizer.tokenize([text])
         predictions = self.process(sentences)
@@ -99,8 +109,11 @@ class PolDeepNer2:
         return align_tokens_with_text(text, sentences, annotations)
 
     def process_document(self, text: str) -> Document:
-        """
-        Process given texts and return Document structure representing the result of processing.
+        """A message of shame -- documentation must be completed.
+
+        Process given texts and return Document structure representing the
+        result of processing.
+
         """
         polem = AnnotationLemmatizerPolem()
 
@@ -129,10 +142,14 @@ class PolDeepNer2:
         return document
 
     def process_tokenized(self, tokens: [[str]]) -> [[str]]:
-        """
+        """A message of shame -- documentation must be completed.
+
         @tokens: Array of sentences. Each sentence is an array of words.
-                 [["John", "lives", "in", "New", "York"], ["Mary", "lives", "in", "Chicago"]]
+                 [["John", "lives", "in", "New", "York"],
+                 ["Mary", "lives", "in", "Chicago"]]
+
+        return: [["B-PER", "O", "O", "B-LOC", "I-LOC"],
+        ["B-PER", "O", "O", "B-LOC"]]
 
-        return: [["B-PER", "O", "O", "B-LOC", "I-LOC"], ["B-PER", "O", "O", "B-LOC"]]
         """
         return self.process(tokens)
diff --git a/poldeepner2/pipeline/__init__.py b/poldeepner2/pipeline/__init__.py
index 8b137891791fe96927ad78e64b0aad7bded08bdc..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/poldeepner2/pipeline/__init__.py
+++ b/poldeepner2/pipeline/__init__.py
@@ -1 +0,0 @@
-
diff --git a/poldeepner2/pipeline/lemmatization.py b/poldeepner2/pipeline/lemmatization.py
index 2883abf5ef18ac4f08c0b9773843b831f3947ac3..af27e7c08276731255388d95210c471d8fd3b2ad 100644
--- a/poldeepner2/pipeline/lemmatization.py
+++ b/poldeepner2/pipeline/lemmatization.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 import logging
 
 import requests
@@ -6,22 +8,45 @@ from poldeepner2.utils.annotation import Annotation
 
 
 class ProcessorAnnotations:
+    """A message of shame -- documentation must be completed."""
 
     def process(self, annotations: [Annotation]):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            annotations:A message of shame -- documentation must be completed.
+
+        """
         pass
 
 
 class AnnotationLemmatizerPolem (ProcessorAnnotations):
+    """A message of shame -- documentation must be completed."""
 
     def __init__(self):
+        """A message of shame -- documentation must be completed."""
         self.url = 'http://localhost:8000'
         pass
 
     def process(self, annotations: [Annotation]):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            annotations:A message of shame -- documentation must be completed.
+
+        """
         for an in annotations:
             an.lemma = self.lemmatize(an)
 
     def lemmatize(self, annotation: Annotation):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            annotation:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         orths = [an.orth for an in annotation.tokens]
         lemmas = [an.lemma for an in annotation.tokens]
         spaces = [str(len(an.ws) > 0) for an in annotation.tokens]
@@ -41,4 +66,3 @@ class AnnotationLemmatizerPolem (ProcessorAnnotations):
         except Exception as ex:
             logging.error(ex)
             return None
-
diff --git a/poldeepner2/pipeline/tokenization.py b/poldeepner2/pipeline/tokenization.py
index cab50694da28232940e2f0112f04ed5535b3cb1d..3cc7bce7fcd0fa186d67f925deb967d31b4531ee 100644
--- a/poldeepner2/pipeline/tokenization.py
+++ b/poldeepner2/pipeline/tokenization.py
@@ -1,18 +1,37 @@
+"""A message of shame -- documentation must be completed."""
+
 import re
 
 import requests
 
 from poldeepner2.data.token import Token
-from poldeepner2.utils.preprocess import split_hashtags, split_leading_name, split_underscore
+from poldeepner2.utils.preprocess import split_hashtags, split_leading_name, \
+    split_underscore
 
 
 class Tokenizer:
+    """A message of shame -- documentation must be completed."""
 
     def tokenize(self, texts: [str]) -> [[Token]]:
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            texts:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return []
 
     @staticmethod
     def align_tokens_with_text(text: str, sentences: [[Token]]):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            text: A message of shame -- documentation must be completed.
+            sentences: A message of shame -- documentation must be completed.
+
+        """
         idx = 0
         for sentence in sentences:
             for token in sentence:
@@ -23,19 +42,30 @@ class Tokenizer:
 
 
 class TokenizerFast(Tokenizer):
+    """A message of shame -- documentation must be completed."""
 
     def __init__(self):
+        """A message of shame -- documentation must be completed."""
         self.pattern_tokens = re.compile(r"(\W)")
         self.abbrev_no_eos = set(["tzw", "np", "m.in", "tj"])
 
     def tokenize(self, texts: [str]) -> [[str]]:
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            texts:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         out = []
         for text in texts:
             text_out = []
             tokens = self.pattern_tokens.split(text.strip())
             tokens = [w for w in tokens if len(w.strip()) > 0]
             interp_ends = set(".?!")
-            ends = [idx + 1 for idx, w in enumerate(tokens) if w in interp_ends or idx == len(tokens) - 1]
+            ends = [idx + 1 for idx, w in enumerate(tokens)
+                    if w in interp_ends or idx == len(tokens) - 1]
             for sent_start, sent_end in zip([0] + ends[:-1], ends):
                 text_out.append(tokens[sent_start:sent_end])
 
@@ -58,22 +88,58 @@ class TokenizerFast(Tokenizer):
         return out
 
     def is_ended_with_abbrev(self, sequence: [str]) -> bool:
-        return len(sequence) > 1 and sequence[-1] == "." and sequence[-2] in self.abbrev_no_eos
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            sequence:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return len(sequence) > 1 and sequence[-1] == "." \
+            and sequence[-2] in self.abbrev_no_eos
 
     def is_ended_with_name_initial(self, sequence: [str]) -> bool:
-        return len(sequence) > 1 and sequence[-1] == "." and len(sequence[-2]) == 1 \
-               and sequence[-2].isupper() and sequence[-2].isalpha()
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            sequence: A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return len(sequence) > 1 and sequence[-1] == "." \
+            and len(sequence[-2]) == 1 \
+            and sequence[-2].isupper() and sequence[-2].isalpha()
 
 
 class TokenizerSpaces(Tokenizer):
+    """A message of shame -- documentation must be completed."""
 
     def tokenize(self, texts: [str]) -> [[str]]:
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            texts:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return [re.sub(r"\s+", " ", text.strip()).split(" ") for text in texts]
 
 
 class TokenizerKrnnt(Tokenizer):
+    """A message of shame -- documentation must be completed."""
 
     def tokenize(self, texts: [str]) -> [[str]]:
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            texts:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         out = []
         for text in texts:
             sentences = TokenizerKrnnt.request(text)
@@ -85,6 +151,14 @@ class TokenizerKrnnt(Tokenizer):
         return out
 
     def tokenize_tokens(self, texts: [str]) -> [[Token]]:
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            texts:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         out = []
         for text in texts:
             sentences = TokenizerKrnnt.request(text)
@@ -93,6 +167,14 @@ class TokenizerKrnnt(Tokenizer):
 
     @staticmethod
     def request(text: str):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            text:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         url = 'http://localhost:9003'
         x = requests.post(url, data=text.encode('utf-8'))
         tokens = TokenizerKrnnt.parse_krnnt_output(x.text)
@@ -101,6 +183,14 @@ class TokenizerKrnnt(Tokenizer):
 
     @staticmethod
     def parse_krnnt_output(output):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            output:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         sentences = []
         tokens = []
         orth = None
@@ -117,7 +207,9 @@ class TokenizerKrnnt(Tokenizer):
                     if len(tokens) > 0:
                         tokens[-1].ws = "" if parts[1] == "none" else " "
                 elif parts[0] == "" and orth is not None:
-                    tokens.append(Token(orth, 0, 0, lemma=parts[1], morph=parts[2], ws=""))
+                    tokens.append(Token(orth, 0,
+                                        0, lemma=parts[1],
+                                        morph=parts[2], ws=""))
                     orth = None
         if len(tokens) > 0:
             tokens[-1].eos = True
@@ -126,6 +218,13 @@ class TokenizerKrnnt(Tokenizer):
 
 
 def load(tokenizer_type: str) -> Tokenizer:
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        tokenizer_type: strA message of shame -- documentation must be
+        completed.
+
+    """
     if tokenizer_type == "space":
         return TokenizerSpaces()
     elif tokenizer_type == "krnnt":
diff --git a/poldeepner2/utils/annotation.py b/poldeepner2/utils/annotation.py
index 8e8c23b016a936731cb066a4f96cd7c658bd28f8..a3685f7ac1880a8c0685a7c5ffd756f279aabe9f 100644
--- a/poldeepner2/utils/annotation.py
+++ b/poldeepner2/utils/annotation.py
@@ -1,11 +1,23 @@
+"""A message of shame -- documentation must be completed."""
 from dataclasses import dataclass
 
 from poldeepner2.data.token import Token
 
 
 class Annotation:
+    """A message of shame -- documentation must be completed."""
 
-    def __init__(self, label, sid: int = None, token_id: int = None, tokens: [Token] = []):
+    def __init__(self, label, sid: int = None, token_id: int = None,
+                 tokens: [Token] = []):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+           label: A message of shame -- documentation must be completed.
+           sid: A message of shame -- documentation must be completed.
+           token_id: A message of shame -- documentation must be completed.
+           tokens: A message of shame -- documentation must be completed.
+
+        """
         self.sentence_id = sid
         self.token_ids = [token_id] if token_id is not None else []
         self.tokens = tokens
@@ -13,37 +25,87 @@ class Annotation:
         self.lemma = ""
 
     def add_id(self, id):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            id: A message of shame -- documentation must be completed.
+
+        """
         self.token_ids.append(id)
 
     def add_token(self, token: Token):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            token:A message of shame -- documentation must be completed.
+
+        """
         self.tokens.append(token)
 
     def get_text(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns: A message of shame -- documentation must be completed.
+
+        """
         return "".join([t.orth + t.ws for t in self.tokens]).strip()
 
     def __str__(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return self.annotation
 
     def __eq__(self, other):
-        return self.annotation == other.annotation and self.token_ids[0] == other.token_ids[0] and \
-               self.token_ids[-1] == other.token_ids[-1] and self.sentence_id == other.sentence_id
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            other:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return self.annotation == other.annotation \
+            and self.token_ids[0] == other.token_ids[0] \
+            and self.token_ids[-1] == other.token_ids[-1] \
+            and self.sentence_id == other.sentence_id
 
     def __hash__(self):
-        return hash(self.annotation + str(self.sentence_id) + str(self.token_ids[0]) + str(self.token_ids[-1]))
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return hash(self.annotation + str(self.sentence_id) +
+                    str(self.token_ids[0]) + str(self.token_ids[-1]))
 
     @property
     def annotation_length(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return self.token_ids[-1] - self.token_ids[0]
 
 
 @dataclass
 class AnnotationText:
+    """A message of shame -- documentation must be completed."""
+
     start: int
     end: int
     label: str
     text: str
 
     def dict(self):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         return {
             'begin': self.start,
             'end': self.end,
diff --git a/poldeepner2/utils/data_utils.py b/poldeepner2/utils/data_utils.py
index fba29064064e46bf145a329d3b303e6165516a75..08520b99bf6762f36bfb820fdbf13e0635df1d66 100644
--- a/poldeepner2/utils/data_utils.py
+++ b/poldeepner2/utils/data_utils.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 import codecs
 import json
 from typing import List
@@ -7,7 +8,6 @@ from torch.utils.data import TensorDataset
 
 from poldeepner2.utils.annotation import Annotation, AnnotationText
 
-
 LABEL_IGNORE_ID = 0
 
 
@@ -17,14 +17,13 @@ class InputExample(object):
     def __init__(self, guid, text_a, text_b=None, label=None):
         """Constructs a InputExample.
 
-        Args:
-            guid: Unique id for the example.
-            text_a: string. The untokenized text of the first sequence. For single
-            sequence tasks, only this sequence must be specified.
-            text_b: (Optional) string. The untokenized text of the second sequence.
-            Only must be specified for sequence pair tasks.
-            label: (Optional) string. The label of the example. This should be
-            specified for train and dev examples, but not for test examples.
+        Args: guid: Unique id for the example. text_a: string. The
+        untokenized text of the first sequence. For single sequence tasks,
+        only this sequence must be specified. text_b: (Optional) string. The
+        untokenized text of the second sequence. Only must be specified for
+        sequence pair tasks. label: (Optional) string. The label of the
+        example. This should be specified for train and dev examples,
+        but not for test examples.
         """
         self.guid = guid
         self.text_a = text_a
@@ -43,6 +42,14 @@ class NerProcessor:
         return examples
 
     def get_labels(self, paths):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            paths: A message of shame -- documentation must be completed.
+
+        Returns: A message of shame -- documentation must be completed.
+
+        """
         label_set = set([])
         for path in paths:
             examples = self.get_examples(path)
@@ -50,13 +57,22 @@ class NerProcessor:
         return sorted(list(label_set))
 
     def _read_file(self, filename):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            filename: A message of shame -- documentation must be completed.
+
+        Returns: A message of shame -- documentation must be completed.
+
+        """
         f = open(filename)
         data = []
         sentence = []
         label = []
-        
+
         for i, line in enumerate(f, 1):
-            if not line.strip() or len(line) == 0 or line.startswith('-DOCSTART') or line[0] == "\n":
+            if not line.strip() or len(line) == 0 or \
+                    line.startswith('-DOCSTART') or line[0] == "\n":
                 if len(sentence) > 0:
                     data.append((sentence, label))
                     sentence = []
@@ -64,7 +80,8 @@ class NerProcessor:
                 continue
 
             splits = line.split()
-            assert len(splits) >= 2, "error on line {}. Found {} splits".format(i, len(splits))
+            assert len(splits) >= 2, "error on line {}. Found {} " \
+                                     "splits".format(i, len(splits))
             word, tag = splits[0], splits[-1]
             sentence.append(word.strip())
             label.append(tag.strip())
@@ -72,7 +89,44 @@ class NerProcessor:
             data.append((sentence, label))
         return data
 
+    def _read_iob(self, filename, column_index):
+
+        data = []
+        sentence = []
+        label = []
+        with open(filename, encoding='utf-8') as f:
+            for i, line in enumerate(f, 1):
+                line = line.strip('\n')
+
+                # check if begining of the file or empty line
+                if line.startswith('-DOCSTART') or len(line) == 0:
+                    if len(sentence) > 0:
+                        data.append((sentence, label))
+                        sentence = []
+                        label = []
+                    continue
+
+                splits = line.split()
+                assert len(splits) >= 2, "error on line {}. Found {} splits".format(
+                    i, len(splits))
+
+                word, tag = splits[0], splits[column_index]
+                sentence.append(word)
+                label.append(tag)
+        if len(sentence) > 0:
+            data.append((sentence, label))
+        return data
+
     def _create_examples(self, lines, set_type):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            lines: A message of shame -- documentation must be completed.
+            set_type: A message of shame -- documentation must be completed.
+
+        Returns: A message of shame -- documentation must be completed.
+
+        """
         examples = []
         for i, (sentence, label) in enumerate(lines):
             guid = "%s-%s" % (set_type, i)
@@ -85,6 +139,14 @@ class NerProcessor:
 
     @staticmethod
     def _get_labels(sentences):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            sentences: A message of shame -- documentation must be completed.
+
+        Returns: A message of shame -- documentation must be completed.
+
+        """
         label_set = set([])
         for t in sentences:
             label_set.update(t.label)
@@ -92,14 +154,34 @@ class NerProcessor:
 
 
 def create_dataset(features) -> TensorDataset:
-    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
-    all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)
-    all_valid_ids = torch.tensor([f.valid_ids for f in features], dtype=torch.long)
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        features: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
+    all_input_ids = torch.tensor([f.input_ids for f in features],
+                                 dtype=torch.long)
+    all_label_ids = torch.tensor([f.label_id for f in features],
+                                 dtype=torch.long)
+    all_valid_ids = torch.tensor([f.valid_ids for f in features],
+                                 dtype=torch.long)
     # ToDo: at some point the TensorDataset should be reduced to three items.
-    return TensorDataset(all_input_ids, all_label_ids, all_valid_ids, all_valid_ids)
+    return TensorDataset(all_input_ids, all_label_ids, all_valid_ids,
+                         all_valid_ids)
 
 
 def wrap_annotations(sentences) -> [Annotation]:
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        sentences: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     annotations = []
     tid = 0
     for sid, labels in enumerate(sentences):
@@ -108,7 +190,8 @@ def wrap_annotations(sentences) -> [Annotation]:
             current_token_annotations = {}
             for ann in label.split('#'):
                 type = ann[2:]
-                if 'B-' in ann or ('I-' in ann and type not in last_token_annotations):
+                if 'B-' in ann or ('I-' in ann and
+                                   type not in last_token_annotations):
                     an = Annotation(type, sid, tid)
                     current_token_annotations[type] = an
                     annotations.append(an)
@@ -122,6 +205,15 @@ def wrap_annotations(sentences) -> [Annotation]:
 
 
 def align_tokens_to_text(sentences: [[str]], text):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        sentences: A message of shame -- documentation must be completed.
+        text: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     offsets = []
     tid = 0
     text = text.lower()
@@ -131,7 +223,8 @@ def align_tokens_to_text(sentences: [[str]], text):
         for t in s:
             start = text.find(t.lower(), tid)
             if start == -1:
-                raise Exception(f"Could not align tokens to text: {t} in '{text}")
+                raise Exception(f"Could not align tokens to text: {t} in '"
+                                f"{text}")
             end = start + len(t)
             offsets.append((start, end))
             tid = end
@@ -139,32 +232,49 @@ def align_tokens_to_text(sentences: [[str]], text):
 
 
 def align_tokens_with_text(text, sentences, annotations) -> [AnnotationText]:
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        text: A message of shame -- documentation must be completed.
+        sentences: A message of shame -- documentation must be completed.
+        annotations: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     offsets = align_tokens_to_text(sentences, text)
     output = []
     for an in annotations:
         begin = offsets[an.token_ids[0]][0]
         end = offsets[an.token_ids[-1]][1]
         orth = text[begin:end]
-        output.append(AnnotationText(begin, end, an.annotation.replace("-", "_"), orth))
+        output.append(AnnotationText(begin, end, an.annotation.replace("-",
+                                                                       "_"),
+                                     orth))
     return output
 
 
 def get_poleval_dict(doc_id, text, sentences, annotations):
-    ''' Returns PolEval dict
+    """A message of shame -- documentation must be completed.
+
+    Returns PolEval dict
     {
-        text:
-        id:
-        answers:
+        text: A message of shame -- documentation must be completed.
+        id: A message of shame -- documentation must be completed.
+        answers: A message of shame -- documentation must be completed.
     }
-    Note that arguments it takes is FILE, PATH, FILE as utils.load_data_and_labels opens file itself
-    '''
+    Note that arguments it takes is FILE, PATH, FILE as
+    utils.load_data_and_labels opens file itself
+
+    """
     offsets = align_tokens_to_text(sentences, text)
     answers = []
     for an in annotations:
         begin = offsets[an.token_ids[0]][0]
         end = offsets[an.token_ids[-1]][1]
         orth = text[begin:end]
-        answers.append("%s %d %d\t%s" % (an.annotation.replace("-", "_"), begin, end, orth))
+        answers.append("%s %d %d\t%s" % (an.annotation.replace("-", "_"),
+                                         begin, end, orth))
     return ({
         'text': text,
         'id': doc_id,
@@ -173,17 +283,41 @@ def get_poleval_dict(doc_id, text, sentences, annotations):
 
 
 def read_params(path):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     data = read_params_json(path)
     return data['dropout'], data['num_labels'], data['label_list']
 
 
 def read_params_json(path):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     with open(path + '/params.json') as json_file:
         data = json.load(json_file)
         return data
 
 
 def read_json(path):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     json_out = {}
     with open(path, encoding='utf-8') as f:
         data = json.load(f)
@@ -193,11 +327,20 @@ def read_json(path):
 
 
 def read_tsv(filename, with_labels=False):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        filename: A message of shame -- documentation must be completed.
+        with_labels: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     f = open(filename, encoding="utf-8")
     data = []
     sentence = []
     label = []
-    
+
     for i, line in enumerate(f, 1):
         if not line.strip() or len(line) == 0 \
                 or line.startswith('-DOCSTART') or line[0] == "\n":
@@ -219,6 +362,14 @@ def read_tsv(filename, with_labels=False):
 
 
 def save_tsv(output_path, sentences, predictions):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        output_path: A message of shame -- documentation must be completed.
+        sentences: A message of shame -- documentation must be completed.
+        predictions: A message of shame -- documentation must be completed.
+
+    """
     with codecs.open(output_path, "w", "utf8") as fout:
         assert len(sentences) == len(predictions)
         for tokens, labels in zip(sentences, predictions):
@@ -228,6 +379,14 @@ def save_tsv(output_path, sentences, predictions):
 
 
 def get_dict_for_record(json_ann):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        json_ann: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     token_dict = {}
     derives = 0
     for ann in json_ann['data']['brat'].split('\n'):
@@ -237,21 +396,32 @@ def get_dict_for_record(json_ann):
                 token = ann.split('\t')[-1]
 
                 if token in token_dict.keys():
-                    token_dict[token] = ''.join([token_dict[token],'#',annotation])
+                    token_dict[token] = ''.join([token_dict[token], '#',
+                                                 annotation])
                 else:
-                    token_dict[token] = annotation 
+                    token_dict[token] = annotation
         else:
             derives += 1
     return token_dict, derives
 
 
 def map_json_to_iob(json_ann, iob):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        json_ann: A message of shame -- documentation must be completed.
+        iob: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     token_dict, derives = get_dict_for_record(json_ann)
     successfully_added = 0
     out_iob = ''
     with open(iob, encoding='utf-8') as f:
         for line in f:
-            if not line.strip() or len(line) == 0 or line.startswith('-DOCSTART') or line[0] == "\n":
+            if not line.strip() or len(line) == 0 or \
+                    line.startswith('-DOCSTART') or line[0] == "\n":
                 out_iob += line
             else:
                 splitline = line.split('\t')
@@ -266,6 +436,15 @@ def map_json_to_iob(json_ann, iob):
 
 
 def has_same_neighbour(annotation, next_annotations):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        annotation: A message of shame -- documentation must be completed.
+        next_annotations: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     if next_annotations == ['O']:
         return False
     searched_ann = 'B-{0}'.format(annotation)
@@ -273,6 +452,12 @@ def has_same_neighbour(annotation, next_annotations):
 
 
 def iob2_to_iob(iob2_text):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        iob2_text:A message of shame -- documentation must be completed.
+
+    """
     iob2_list = []
     iob1_list = []
     
@@ -291,11 +476,12 @@ def iob2_to_iob(iob2_text):
                 else:
                     current_ann.append(ann)
             iob1_list.append((line[0], '#'.join(current_ann)))
-        elif i == len(iob2_list)-1:
+        elif i == len(iob2_list) - 1:
             for ann in line[1]:
                 split = ann.split('-')
                 if split[0] == 'B':
-                    if len(tags_to_separate) > 0 and split[1] in tags_to_separate:
+                    if len(tags_to_separate) > 0 and split[1] \
+                            in tags_to_separate:
                         current_ann.append(ann)
                         tags_to_separate.remove(split[1])
                     else:
@@ -307,18 +493,19 @@ def iob2_to_iob(iob2_text):
             for ann in line[1]:
                 split = ann.split('-')
                 if split[0] == 'B':
-                    if len(tags_to_separate) > 0 and split[1] in tags_to_separate:
+                    if len(tags_to_separate) > 0 and split[1] \
+                            in tags_to_separate:
                         current_ann.append(ann)
                         tags_to_separate.remove(split[1])
                     else:
                         current_ann.append('I-{0}'.format(split[1]))
-                        if has_same_neighbour(split[1], iob2_list[i+1][1]):
+                        if has_same_neighbour(split[1], iob2_list[i + 1][1]):
                             tags_to_separate.append(split[1])
                 elif split[0] == 'I':
                     current_ann.append(ann)
-                    if has_same_neighbour(split[1], iob2_list[i+1][1]):
+                    if has_same_neighbour(split[1], iob2_list[i + 1][1]):
                         tags_to_separate.append(split[1])
                 else:
                     current_ann.append(ann)
             iob1_list.append((line[0], '#'.join(current_ann)))
-    return '\n'.join(map(lambda x: '{} {}'.format(x[0], x[1]) , iob1_list))
+    return '\n'.join(map(lambda x: '{} {}'.format(x[0], x[1]), iob1_list))
diff --git a/poldeepner2/utils/file_utils.py b/poldeepner2/utils/file_utils.py
index 45c6d5d1f5bc12216ded68fcb38c5697b9e340fa..ad08a6e5932092dc3240a3b09ae09ad53d162ed6 100644
--- a/poldeepner2/utils/file_utils.py
+++ b/poldeepner2/utils/file_utils.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 from urllib.request import urlopen
 import requests
 import os
@@ -7,7 +8,8 @@ from tqdm import tqdm
 
 
 def download_from_url(url, dst):
-    """
+    """A message of shame -- documentation must be completed.
+
     @param: url to download file
     @param: dst place to put the file
     """
@@ -36,24 +38,51 @@ def download_from_url(url, dst):
 
 
 def unpack_gz(path, output):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path: A message of shame -- documentation must be completed.
+        output: A message of shame -- documentation must be completed.
+
+    """
     with tarfile.open(path, 'r') as tar:
-        for member in tqdm(iterable=tar.getmembers(), total=len(tar.getmembers())):
+        for member in tqdm(iterable=tar.getmembers(),
+                           total=len(tar.getmembers())):
             tar.extract(path=output, member=member)
 
 
 def unpack_zip(path, output):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path: A message of shame -- documentation must be completed.
+        output: A message of shame -- documentation must be completed.
+
+    """
     with ZipFile(path, 'r') as zipObj:
         zipObj.extractall(output)
 
 
 def download_file(url, path, compression, extract_to_subfolder=False):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        url: A message of shame -- documentation must be completed.
+        path: A message of shame -- documentation must be completed.
+        compression: A message of shame -- documentation must be completed.
+        extract_to_subfolder: A message of shame -- documentation must be
+        completed.
+
+    """
     ext = "" if compression is None else '.' + compression
     download_from_url(url, path + ext)
     if compression == 'zip':
-        unpack_zip(f'{path}.zip', path if extract_to_subfolder else os.path.dirname(path))
+        unpack_zip(f'{path}.zip',
+                   path if extract_to_subfolder else os.path.dirname(path))
         os.remove(f"{path}{ext}")
     elif compression == 'tar.gz':
-        unpack_gz(f'path.tar.gz', path if extract_to_subfolder else os.path.dirname(path))
+        unpack_gz(f"{path}.tar.gz",
+                  path if extract_to_subfolder else os.path.dirname(path))
         os.remove(f"{path}{ext}")
     elif compression is None:
         pass
diff --git a/poldeepner2/utils/preprocess.py b/poldeepner2/utils/preprocess.py
index 020aaa848a005f2999e42a070e9bac737b73d5a5..d8613c32cdc7ff39cd95041f171ba2788110efeb 100644
--- a/poldeepner2/utils/preprocess.py
+++ b/poldeepner2/utils/preprocess.py
@@ -1,13 +1,23 @@
+"""A message of shame -- documentation must be completed."""
 import re
 
 
 def split_hashtags(tokens):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        tokens: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     output = []
     i = 0
     while i < len(tokens):
-        if tokens[i] == "#" and i+1 < len(tokens) and re.fullmatch(r"([A-Z][a-z]+)([A-Z][a-z]+)+", tokens[i+1]):
+        if tokens[i] == "#" and i + 1 < len(tokens) and \
+                re.fullmatch(r"([A-Z][a-z]+)([A-Z][a-z]+)+", tokens[i + 1]):
             output.append("#")
-            for m in re.findall(r"([A-Z][a-z]+)", tokens[i+1]):
+            for m in re.findall(r"([A-Z][a-z]+)", tokens[i + 1]):
                 output.append(str(m))
             i += 2
         else:
@@ -17,7 +27,16 @@ def split_hashtags(tokens):
 
 
 def split_leading_name(tokens):
-    if len(tokens) > 1 and re.fullmatch(r"([A-Z][a-z]+)([A-Z][a-z]+)+", tokens[0]) and tokens[1] == ":":
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        tokens: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
+    if len(tokens) > 1 and re.fullmatch(r"([A-Z][a-z]+)([A-Z][a-z]+)+",
+                                        tokens[0]) and tokens[1] == ":":
         output = []
         for m in re.findall(r"([A-Z][a-z]+)", tokens[0]):
             output.append(str(m))
@@ -28,6 +47,14 @@ def split_leading_name(tokens):
 
 
 def split_underscore(tokens):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        tokens: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     output = []
     for token in tokens:
         if "_" in token:
diff --git a/poldeepner2/utils/seed.py b/poldeepner2/utils/seed.py
index 7a2b4614246b9681c6ea1dddd70eeedd960b5c1d..dd022c54dd7e694f756e3a38f5e0ea129683896a 100644
--- a/poldeepner2/utils/seed.py
+++ b/poldeepner2/utils/seed.py
@@ -1,9 +1,18 @@
+"""A message of shame -- documentation must be completed."""
 import numpy as np
 import torch
 import random
 
 
 def setup_seed(n=101):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        n: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     random.seed(n)
     np.random.seed(n)
     torch.manual_seed(n)
diff --git a/poldeepner2/utils/sequence_labeling.py b/poldeepner2/utils/sequence_labeling.py
index 8f6410c373606231a80a288510d252e10f35c27c..9aa83f4d59c6bf5bc8e98312475a0be7ece79aec 100644
--- a/poldeepner2/utils/sequence_labeling.py
+++ b/poldeepner2/utils/sequence_labeling.py
@@ -1,6 +1,8 @@
-"""Metrics to assess performance on sequence labeling task given prediction
-Functions named as ``*_score`` return a scalar value to maximize: the higher
-the better
+"""Metrics to assess performance on sequence labeling task given prediction.
+
+Functions named as ``*_score`` return a scalar value to maximize: the higher.
+the better.
+
 """
 
 from __future__ import absolute_import
@@ -16,6 +18,7 @@ def get_entities(seq, suffix=False):
     """Gets entities from sequence.
 
     Args:
+        suffix: A message of shame -- documentation must be completed.
         seq (list): sequence of labels.
 
     Returns:
@@ -27,6 +30,7 @@ def get_entities(seq, suffix=False):
         >>> seq = ['B-PER', 'I-PER', 'O', 'B-LOC']
         >>> get_entities(seq)
         [('PER', 0, 1), ('LOC', 3, 3)]
+
     """
     # for nested list
     if any(isinstance(s, list) for s in seq):
@@ -37,32 +41,43 @@ def get_entities(seq, suffix=False):
     for i, chunk in enumerate(seq + ['O']):
         for et in existing_tags:
             et['continued'] = False
-        active_types = map(lambda x: x['type'] ,existing_tags)
+        active_types = map(lambda x: x['type'], existing_tags)
         i_chunk = []
         if '#' in chunk:
             for ann in chunk.split('#'):
-               i_chunk.append(ann) 
+                i_chunk.append(ann)
         else:
-            i_chunk.append(chunk)  
+            i_chunk.append(chunk)
         for subchunk in i_chunk:
             tag, type_ = get_tag_type(suffix, subchunk)
-            if start_of_chunk(tag,type_) and (tag == 'B' or type_ not in active_types):
-                existing_tags.append( {'begin': i, 'continued': True, 'type': type_} )
+            if start_of_chunk(tag, type_) and (tag == 'B' or type_ not in
+                                               active_types):
+                existing_tags.append({'begin': i, 'continued': True,
+                                      'type': type_})
             if tag == 'I':
                 for et in existing_tags:
                     if et['type'] == type_:
                         et['continued'] = True
         notFinished = []
         for et in existing_tags:
-            if et['continued'] :
+            if et['continued']:
                 notFinished.append(et)
             else:
-                chunks.append((et['type'], et['begin'], i-1))
+                chunks.append((et['type'], et['begin'], i - 1))
         existing_tags = notFinished
     return chunks
 
 
 def get_tag_type(suffix, chunk):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        suffix: A message of shame -- documentation must be completed.
+        chunk: A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     if suffix:
         tag = chunk[-1]
         type_ = chunk.split('-')[0]
@@ -83,18 +98,27 @@ def end_of_chunk(tag, prev_type, type_, prev_tag):
 
     Returns:
         chunk_end: boolean.
+
     """
     chunk_end = False
 
-    if prev_tag == 'E': chunk_end = True
-    if prev_tag == 'S': chunk_end = True
+    if prev_tag == 'E':
+        chunk_end = True
+    if prev_tag == 'S':
+        chunk_end = True
 
-    if prev_tag == 'B' and tag == 'B': chunk_end = True
-    if prev_tag == 'B' and tag == 'S': chunk_end = True
-    if prev_tag == 'B' and tag == 'O': chunk_end = True
-    if prev_tag == 'I' and tag == 'B': chunk_end = True
-    if prev_tag == 'I' and tag == 'S': chunk_end = True
-    if prev_tag == 'I' and tag == 'O': chunk_end = True
+    if prev_tag == 'B' and tag == 'B':
+        chunk_end = True
+    if prev_tag == 'B' and tag == 'S':
+        chunk_end = True
+    if prev_tag == 'B' and tag == 'O':
+        chunk_end = True
+    if prev_tag == 'I' and tag == 'B':
+        chunk_end = True
+    if prev_tag == 'I' and tag == 'S':
+        chunk_end = True
+    if prev_tag == 'I' and tag == 'O':
+        chunk_end = True
 
     if prev_tag != 'O' and prev_tag != '.' and prev_type != type_:
         chunk_end = True
@@ -113,18 +137,27 @@ def start_of_chunk(tag, type_, prev_type=None, prev_tag='O'):
 
     Returns:
         chunk_start: boolean.
+
     """
     chunk_start = False
 
-    if tag == 'B': chunk_start = True
-    if tag == 'S': chunk_start = True
+    if tag == 'B':
+        chunk_start = True
+    if tag == 'S':
+        chunk_start = True
 
-    if prev_tag == 'E' and tag == 'E': chunk_start = True
-    if prev_tag == 'E' and tag == 'I': chunk_start = True
-    if prev_tag == 'S' and tag == 'E': chunk_start = True
-    if prev_tag == 'S' and tag == 'I': chunk_start = True
-    if prev_tag == 'O' and tag == 'E': chunk_start = True
-    if prev_tag == 'O' and tag == 'I': chunk_start = True
+    if prev_tag == 'E' and tag == 'E':
+        chunk_start = True
+    if prev_tag == 'E' and tag == 'I':
+        chunk_start = True
+    if prev_tag == 'S' and tag == 'E':
+        chunk_start = True
+    if prev_tag == 'S' and tag == 'I':
+        chunk_start = True
+    if prev_tag == 'O' and tag == 'E':
+        chunk_start = True
+    if prev_tag == 'O' and tag == 'I':
+        chunk_start = True
 
     if tag != 'O' and tag != '.' and prev_type != type_:
         chunk_start = True
@@ -143,6 +176,7 @@ def f1_score(y_true, y_pred, average='micro', suffix=False):
         F1 = 2 * (precision * recall) / (precision + recall)
 
     Args:
+        average:
         y_true : 2d array. Ground truth (correct) target values.
         y_pred : 2d array. Estimated targets as returned by a tagger.
 
@@ -151,10 +185,13 @@ def f1_score(y_true, y_pred, average='micro', suffix=False):
 
     Example:
         >>> from seqeval.metrics import f1_score
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> f1_score(y_true, y_pred)
         0.50
+
     """
     true_entities = set(get_entities(y_true, suffix))
     pred_entities = set(get_entities(y_pred, suffix))
@@ -186,16 +223,19 @@ def accuracy_score(y_true, y_pred):
 
     Example:
         >>> from seqeval.metrics import accuracy_score
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> accuracy_score(y_true, y_pred)
         0.80
+
     """
     if any(isinstance(s, list) for s in y_true):
         y_true = [item for sublist in y_true for item in sublist]
         y_pred = [item for sublist in y_pred for item in sublist]
 
-    nb_correct = sum(y_t==y_p for y_t, y_p in zip(y_true, y_pred))
+    nb_correct = sum(y_t == y_p for y_t, y_p in zip(y_true, y_pred))
     nb_true = len(y_true)
 
     score = nb_correct / nb_true
@@ -208,7 +248,8 @@ def precision_score(y_true, y_pred, average='micro', suffix=False):
 
     The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
     true positives and ``fp`` the number of false positives. The precision is
-    intuitively the ability of the classifier not to label as positive a sample.
+    intuitively the ability of the classifier not to label as positive a
+    sample.
 
     The best value is 1 and the worst value is 0.
 
@@ -221,10 +262,13 @@ def precision_score(y_true, y_pred, average='micro', suffix=False):
 
     Example:
         >>> from seqeval.metrics import precision_score
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> precision_score(y_true, y_pred)
         0.50
+
     """
     true_entities = set(get_entities(y_true, suffix))
     pred_entities = set(get_entities(y_pred, suffix))
@@ -255,10 +299,13 @@ def recall_score(y_true, y_pred, average='micro', suffix=False):
 
     Example:
         >>> from seqeval.metrics import recall_score
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> recall_score(y_true, y_pred)
         0.50
+
     """
     true_entities = set(get_entities(y_true, suffix))
     pred_entities = set(get_entities(y_pred, suffix))
@@ -272,8 +319,7 @@ def recall_score(y_true, y_pred, average='micro', suffix=False):
 
 
 def performance_measure(y_true, y_pred):
-    """
-    Compute the performance metrics: TP, FP, FN, TN
+    """Compute the performance metrics: TP, FP, FN, TN.
 
     Args:
         y_true : 2d array. Ground truth (correct) target values.
@@ -284,10 +330,13 @@ def performance_measure(y_true, y_pred):
 
     Example:
         >>> from seqeval.metrics import performance_measure
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'O', 'B-ORG'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'O', 'B-ORG'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> performance_measure(y_true, y_pred)
         (3, 3, 1, 4)
+
     """
     performace_dict = dict()
     if any(isinstance(s, list) for s in y_true):
@@ -307,15 +356,19 @@ def classification_report(y_true, y_pred, digits=2, suffix=False):
     Args:
         y_true : 2d array. Ground truth (correct) target values.
         y_pred : 2d array. Estimated targets as returned by a classifier.
-        digits : int. Number of digits for formatting output floating point values.
+        digits : int. Number of digits for formatting output floating point
+        values.
 
     Returns:
-        report : string. Text summary of the precision, recall, F1 score for each class.
+        report : string. Text summary of the precision, recall, F1 score for
+        each class.
 
     Examples:
         >>> from seqeval.metrics import classification_report
-        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
-        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
+        >>> y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
+        >>> y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'],
+        ['B-PER', 'I-PER', 'O']]
         >>> print(classification_report(y_true, y_pred))
                      precision    recall  f1-score   support
         <BLANKLINE>
@@ -325,6 +378,7 @@ def classification_report(y_true, y_pred, digits=2, suffix=False):
           micro avg       0.50      0.50      0.50         2
           macro avg       0.50      0.50      0.50         2
         <BLANKLINE>
+
     """
     true_entities = set(get_entities(y_true, suffix))
     pred_entities = set(get_entities(y_pred, suffix))
@@ -359,7 +413,8 @@ def classification_report(y_true, y_pred, digits=2, suffix=False):
         r = nb_correct / nb_true if nb_true > 0 else 0
         f1 = 2 * p * r / (p + r) if p + r > 0 else 0
 
-        report += row_fmt.format(*[type_name, p, r, f1, nb_true], width=width, digits=digits)
+        report += row_fmt.format(*[type_name, p, r, f1, nb_true], width=width,
+                                 digits=digits)
 
         ps.append(p)
         rs.append(r)
diff --git a/poldeepner2/utils/sequences.py b/poldeepner2/utils/sequences.py
index 884e1cee8ab65e96c6ad460372080ecb3a981dcb..25bfd2d0548727ebb18b23b7e14f45d220a6a7ed 100644
--- a/poldeepner2/utils/sequences.py
+++ b/poldeepner2/utils/sequences.py
@@ -80,8 +80,10 @@ def tokens_and_labels_into_token_features(tokens: List[str], labels: List[str],
     for word, label_1 in zip(tokens, labels):
         subtokens = encode_method(word.strip())
         if len(subtokens) == 0:
-            logging.warning(f"Token '{word}' has no subwords")
-            continue
+            print(tokens)
+            replacement = "x" * len(word.strip())
+            logging.warning(f"Token '{word}' has no subwords. It was replaced with '{replacement}'")
+            subtokens = encode_method(replacement)
         tfs.append(TokenFeatures(subtokens, label_map[label_1]))
     return tfs
 
diff --git a/poldeepner2/utils/train_utils.py b/poldeepner2/utils/train_utils.py
index d8063b9649a4aab75db4266db2ea45f2962abd9b..facd3535904cd1b015a1a5650b0e9d08d919df95 100644
--- a/poldeepner2/utils/train_utils.py
+++ b/poldeepner2/utils/train_utils.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 import torch
 from torch.utils.data import SequentialSampler, DataLoader
 from poldeepner2.utils.sequence_labeling import classification_report, f1_score, precision_score, recall_score
diff --git a/poleval_ner_test.py b/poleval_ner_test.py
index 1c45cf1528af99bc573d05c2c4ff21f69e203abb..0bba6d5c64c8e6e76ee24be6ad7b3662e19efa2f 100644
--- a/poleval_ner_test.py
+++ b/poleval_ner_test.py
@@ -1,7 +1,22 @@
-import sys, json, getopt
+"""A message of shame -- documentation must be completed."""
+
+import getopt
+import json
+import sys
+
 from dateutil import parser
 
+
 def overlap(offsetsa, offsetsb):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        offsetsa:A message of shame -- documentation must be completed.
+        offsetsb:A message of shame -- documentation must be completed.
+
+    Returns: not (int(end1) < int(start2) or int(end2) < int(start1))
+
+    """
     try:
         start1, end1 = offsetsa.split('_')
         start2, end2 = offsetsb.split('_')
@@ -10,7 +25,17 @@ def overlap(offsetsa, offsetsb):
         print(offsetsb)
     return not (int(end1) < int(start2) or int(end2) < int(start1))
 
+
 def exact(offsetsa, offsetsb):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        offsetsa:A message of shame -- documentation must be completed.
+        offsetsb:A message of shame -- documentation must be completed.
+
+    Returns: (int(start1) == int(start2)) and (int(end1) == int(end2))
+
+    """
     try:
         start1, end1 = offsetsa.split('_')
         start2, end2 = offsetsb.split('_')
@@ -19,11 +44,31 @@ def exact(offsetsa, offsetsb):
         print(offsetsb)
     return (int(start1) == int(start2)) and (int(end1) == int(end2))
 
-# this to ensure we get rid of derived types when loading entities (redundant otherwise)
+
+# this to ensure we get rid of derived types when loading entities
+# (redundant otherwise)
 def removeDerivs(annots):
-    return { (a,c) for a,c in annots if c.find('derivType') < 0 }
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        annots:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
+    return {(a, c) for a, c in annots if c.find('derivType') < 0}
+
 
 def compareTextsOverlap(eGold, eModel):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        eGold:A message of shame -- documentation must be completed.
+        eModel:A message of shame -- documentation must be completed.
+
+    Returns: [tp, fp, fn]
+
+    """
     eGold = removeDerivs(eGold)
     eModel = removeDerivs(eModel)
     tp, fp, fn = 0, 0, 0
@@ -36,7 +81,17 @@ def compareTextsOverlap(eGold, eModel):
     fn = len(eGold) - tp
     return [tp, fp, fn]
 
+
 def compareTextsExact(eGold, eModel):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        eGold:A message of shame -- documentation must be completed.
+        eModel:A message of shame -- documentation must be completed.
+
+    Returns: [tp, fp, fn]
+
+    """
     eGold = removeDerivs(eGold)
     eModel = removeDerivs(eModel)
     tp, fp, fn = 0, 0, 0
@@ -49,35 +104,59 @@ def compareTextsExact(eGold, eModel):
     fn = len(eGold) - tp
     return [tp, fp, fn]
 
+
 def makeAnnsFormat(inputDoc, cols, htype):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        inputDoc:A message of shame -- documentation must be completed.
+        cols:A message of shame -- documentation must be completed.
+        htype:A message of shame -- documentation must be completed.
+
+    Returns: z_anns
+
+    """
     z_anns = []
     for ben in inputDoc.split('\n'):
         pcs = ben.split('\t')
         try:
-            if len(pcs)==cols:
+            if len(pcs) == cols:
                 cat, ofrom, oto = pcs[-2].split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
+                z_anns.append([ofrom + "_" + oto, cat])
         except ValueError:
             # handling fragmented entity, two strategies:
-            if htype=='merge':
+            if htype == 'merge':
                 # take start and end, use as a single big entity
                 cat, ofrom, ignored, oto = pcs[-2].split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
-            if htype=='split':
+                z_anns.append([ofrom + "_" + oto, cat])
+            if htype == 'split':
                 # split into two entities
                 catAndOffsets1, offsets2 = pcs[-2].split(';')
                 cat, ofrom, oto = catAndOffsets1.split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
+                z_anns.append([ofrom + "_" + oto, cat])
                 ofrom, oto = offsets2.split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )            
+                z_anns.append([ofrom + "_" + oto, cat])
     return z_anns
 
+
 # compute micro F1 scores for exact and overlap matches
-# htype parameter reflects two possible strategies for handling fragmented entities ("split" or "merge")
+# htype parameter reflects two possible strategies for handling fragmented
+# entities ("split" or "merge")
 def computeScores(goldfile, userfile, htype="split"):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        goldfile:A message of shame -- documentation must be completed.
+        userfile:A message of shame -- documentation must be completed.
+        htype:A message of shame -- documentation must be completed.
 
-    global_tp_ov = 0 ; global_fp_ov = 0 ; global_fn_ov = 0
-    global_tp_ex = 0 ; global_fp_ex = 0 ; global_fn_ex = 0
+    """
+    global_tp_ov = 0
+    global_fp_ov = 0
+    global_fn_ov = 0
+    global_tp_ex = 0
+    global_fp_ex = 0
+    global_fn_ex = 0
 
     idsToAnnsUser = {}
     with open(userfile) as json_data:
@@ -89,7 +168,7 @@ def computeScores(goldfile, userfile, htype="split"):
             else:
                 idsToAnnsUser[userjson[nr]['id']] = ''
 
-    found = 0;
+    found = 0
     nonfound = 0
 
     idsToAnnsGold = {}
@@ -97,33 +176,40 @@ def computeScores(goldfile, userfile, htype="split"):
         goldjson = json.load(json_data)
 
     for nr in range(len(goldjson['questions'])):
-        idGold = '/'.join(goldjson['questions'][nr]['input']['fname'].split('/')[4:])
+        idGold = '/'.join(
+            goldjson['questions'][nr]['input']['fname'].split('/')[4:])
         # print(idGold)
         if idGold in idsToAnnsUser:
             found += 1
             # find the most recent answer:
             if len(goldjson['questions'][nr]['answers']) > 1:
-                maximum = parser.parse('1900-01-02T14:22:41.439308+00:00');
+                maximum = parser.parse('1900-01-02T14:22:41.439308+00:00')
                 index = 0
-                for i, value in enumerate(goldjson['questions'][nr]['answers']):
-                    value = parser.parse(goldjson['questions'][nr]['answers'][i]['created'])
+                for i, value in enumerate(
+                        goldjson['questions'][nr]['answers']):
+                    value = parser.parse(
+                        goldjson['questions'][nr]['answers'][i]['created'])
                     if value > maximum:
                         maximum = value
                         index = i
-                idsToAnnsGold[idGold] = goldjson['questions'][nr]['answers'][index]['data']['brat']
+                idsToAnnsGold[idGold] = \
+                    goldjson['questions'][nr]['answers'][index]['data']['brat']
             else:
-                idsToAnnsGold[idGold] = goldjson['questions'][nr]['answers'][0]['data']['brat']
+                idsToAnnsGold[idGold] = \
+                    goldjson['questions'][nr]['answers'][0]['data']['brat']
 
                 # overlap scores:
-                ovtp = compareTextsOverlap(makeAnnsFormat(idsToAnnsGold[idGold], 3, htype),
-                                           makeAnnsFormat(idsToAnnsUser[idGold], 2, htype))
+                ovtp = compareTextsOverlap(
+                    makeAnnsFormat(idsToAnnsGold[idGold], 3, htype),
+                    makeAnnsFormat(idsToAnnsUser[idGold], 2, htype))
                 global_tp_ov += ovtp[0]
                 global_fp_ov += ovtp[1]
                 global_fn_ov += ovtp[2]
 
                 # exact match scores:
-                extp = compareTextsExact(makeAnnsFormat(idsToAnnsGold[idGold], 3, htype),
-                                         makeAnnsFormat(idsToAnnsUser[idGold], 2, htype))
+                extp = compareTextsExact(
+                    makeAnnsFormat(idsToAnnsGold[idGold], 3, htype),
+                    makeAnnsFormat(idsToAnnsUser[idGold], 2, htype))
                 global_tp_ex += extp[0]
                 global_fp_ex += extp[1]
                 global_fn_ex += extp[2]
@@ -133,26 +219,34 @@ def computeScores(goldfile, userfile, htype="split"):
             nonfound += 1
 
     print(userfile)
-    print("Nr of documents identified by ID in both data sets: "+str(found)+", not identified (left out): "+str(nonfound))
+    print("Nr of documents identified by ID in both data sets: " + str(
+        found) + ", not identified (left out): " + str(nonfound))
 
     prec = float(global_tp_ov) / float(global_fp_ov + global_tp_ov)
     recall = float(global_tp_ov) / float(global_fn_ov + global_tp_ov)
     f1o = float(2 * prec * recall) / float(prec + recall)
-    print("OVERLAP precision: %0.3f recall: %0.3f F1: %0.3f " % (prec, recall, f1o))
+    print("OVERLAP precision: %0.3f recall: %0.3f F1: %0.3f " % (
+        prec, recall, f1o))
 
     prec = float(global_tp_ex) / float(global_fp_ex + global_tp_ex)
     recall = float(global_tp_ex) / float(global_fn_ex + global_tp_ex)
     f1e = float(2 * prec * recall) / float(prec + recall)
-    print("EXACT precision: %0.3f recall: %0.3f F1: %0.3f " % (prec, recall, f1e))
+    print("EXACT precision: %0.3f recall: %0.3f F1: %0.3f " % (
+        prec, recall, f1e))
 
-    print("Final score: %0.3f" % (f1o*0.8 + f1e*0.2))
-
-    print("Exact TP=%d ; FP=%d; FN=%d" % (global_tp_ex, global_fp_ex, global_fn_ex))
+    print("Final score: %0.3f" % (f1o * 0.8 + f1e * 0.2))
 
+    print("Exact TP=%d ; FP=%d; FN=%d" % (
+        global_tp_ex, global_fp_ex, global_fn_ex))
 
 
 def main(argv):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        argv:A message of shame -- documentation must be completed.
 
+    """
     goldfile = 'POLEVAL-NER_GOLD.json'
     userfile = ''
     try:
@@ -162,18 +256,19 @@ def main(argv):
         sys.exit(2)
 
     for opt, arg in opts:
-      if opt == '-h':
-        print('poleval_ner_test.py -g <goldfile> -u <userfile>')
-        sys.exit()
-      elif opt in ("-u", "--userfile"):
-        userfile = arg
-      elif opt in ("-g", "--goldfile"):
-        goldfile = arg
+        if opt == '-h':
+            print('poleval_ner_test.py -g <goldfile> -u <userfile>')
+            sys.exit()
+        elif opt in ("-u", "--userfile"):
+            userfile = arg
+        elif opt in ("-g", "--goldfile"):
+            goldfile = arg
 
     print('gold file is: ' + goldfile)
-    print('user file is: '+ userfile)
+    print('user file is: ' + userfile)
 
     computeScores(goldfile, userfile, htype="split")
 
+
 if __name__ == "__main__":
     main(sys.argv[1:])
diff --git a/poleval_ner_test_v2.py b/poleval_ner_test_v2.py
index 83f46207346aa118d8d32ab0f9aadcf2a760c4c0..d86cadc216a23c9f6d8ed5de4a5963b6fc592cff 100644
--- a/poleval_ner_test_v2.py
+++ b/poleval_ner_test_v2.py
@@ -1,8 +1,12 @@
-import sys, json, getopt
+"""A message of shame -- documentation must be completed."""
+
+import getopt
+import json
+import sys
 
-from tqdm import tqdm
 from attr import dataclass
 from dateutil import parser
+from tqdm import tqdm
 
 """
 Source: http://poleval.pl/tasks/
@@ -11,11 +15,20 @@ Source: http://poleval.pl/tasks/
 
 @dataclass
 class CategoryNormalizer:
+    """A message of shame -- documentation must be completed."""
 
     lower: bool = False
     only_main: bool = False
 
     def normalize(self, name):
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            name:A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         if self.lower:
             name = name.lower()
         if self.only_main:
@@ -24,6 +37,15 @@ class CategoryNormalizer:
 
 
 def overlap(offsetsa, offsetsb):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        offsetsa:A message of shame -- documentation must be completed.
+        offsetsb:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     try:
         start1, end1 = offsetsa.split('_')
         start2, end2 = offsetsb.split('_')
@@ -34,6 +56,15 @@ def overlap(offsetsa, offsetsb):
 
 
 def exact(offsetsa, offsetsb):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        offsetsa:A message of shame -- documentation must be completed.
+        offsetsb:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     try:
         start1, end1 = offsetsa.split('_')
         start2, end2 = offsetsb.split('_')
@@ -43,12 +74,30 @@ def exact(offsetsa, offsetsb):
     return (int(start1) == int(start2)) and (int(end1) == int(end2))
 
 
-# this to ensure we get rid of derived types when loading entities (redundant otherwise)
+# this to ensure we get rid of derived types when loading entities (
+# redundant otherwise)
 def removeDerivs(annots):
-    return { (a,c) for a,c in annots if c.find('derivType') < 0 }
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        annots:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
+    return {(a, c) for a, c in annots if c.find('derivType') < 0}
 
 
 def getAnnotatonText(content, spans):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        content:A message of shame -- documentation must be completed.
+        spans:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     texts = []
     for span in spans.split(";"):
         range = span.split("_")
@@ -57,12 +106,24 @@ def getAnnotatonText(content, spans):
 
 
 def compareTextsOverlap(eGold, eModel, content, cn: CategoryNormalizer):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        eGold:A message of shame -- documentation must be completed.
+        eModel:A message of shame -- documentation must be completed.
+        content:A message of shame -- documentation must be completed.
+        cn:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     eGold = removeDerivs(eGold)
     eModel = removeDerivs(eModel)
     tp, fp, fn = 0, 0, 0
     for (offsets_gold, cat_gold) in eGold:
         for (offsets_model, cat_model) in eModel:
-            if overlap(offsets_gold, offsets_model) and cn.normalize(cat_gold) == cn.normalize(cat_model):
+            if overlap(offsets_gold, offsets_model) and cn.normalize(
+                    cat_gold) == cn.normalize(cat_model):
                 tp += 1
                 break
     fp = len(eModel) - tp
@@ -71,12 +132,23 @@ def compareTextsOverlap(eGold, eModel, content, cn: CategoryNormalizer):
 
 
 def compareTextsExact(eGold, eModel, cn: CategoryNormalizer):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        eGold:A message of shame -- documentation must be completed.
+        eModel:A message of shame -- documentation must be completed.
+        cn:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     eGold = removeDerivs(eGold)
     eModel = removeDerivs(eModel)
     tp, fp, fn = 0, 0, 0
     for (offsets_gold, cat_gold) in eGold:
         for (offsets_model, cat_model) in eModel:
-            if exact(offsets_gold, offsets_model) and cn.normalize(cat_gold) == cn.normalize(cat_model):
+            if exact(offsets_gold, offsets_model) and cn.normalize(
+                    cat_gold) == cn.normalize(cat_model):
                 tp += 1
                 break
     fp = len(eModel) - tp
@@ -85,34 +157,60 @@ def compareTextsExact(eGold, eModel, cn: CategoryNormalizer):
 
 
 def makeAnnsFormat(inputDoc, cols, htype):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        inputDoc:A message of shame -- documentation must be completed.
+        cols:A message of shame -- documentation must be completed.
+        htype:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     z_anns = []
     for ben in inputDoc.split('\n'):
         pcs = ben.split('\t')
         try:
-            if len(pcs)==cols:
+            if len(pcs) == cols:
                 cat, ofrom, oto = pcs[-2].split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
+                z_anns.append([ofrom + "_" + oto, cat])
         except ValueError:
             # handling fragmented entity, two strategies:
-            if htype=='merge':
+            if htype == 'merge':
                 # take start and end, use as a single big entity
                 cat, ofrom, ignored, oto = pcs[-2].split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
-            if htype=='split':
+                z_anns.append([ofrom + "_" + oto, cat])
+            if htype == 'split':
                 # split into two entities
                 catAndOffsets1, offsets2 = pcs[-2].split(';')
                 cat, ofrom, oto = catAndOffsets1.split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
+                z_anns.append([ofrom + "_" + oto, cat])
                 ofrom, oto = offsets2.split(' ')
-                z_anns.append( [ofrom+"_"+oto,  cat] )
+                z_anns.append([ofrom + "_" + oto, cat])
     return z_anns
 
-# compute micro F1 scores for exact and overlap matches
-# htype parameter reflects two possible strategies for handling fragmented entities ("split" or "merge")
-def computeScores(goldfile, userfile, cn: CategoryNormalizer, htype="split", types=None):
 
-    global_tp_ov = 0 ; global_fp_ov = 0 ; global_fn_ov = 0
-    global_tp_ex = 0 ; global_fp_ex = 0 ; global_fn_ex = 0
+# compute micro F1 scores for exact and overlap matches htype parameter
+# reflects two possible strategies for handling fragmented entities ("split"
+# or "merge")
+def computeScores(goldfile, userfile, cn: CategoryNormalizer, htype="split",
+                  types=None):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        goldfile:A message of shame -- documentation must be completed.
+        userfile:A message of shame -- documentation must be completed.
+        cn:A message of shame -- documentation must be completed.
+        htype:A message of shame -- documentation must be completed.
+        types:A message of shame -- documentation must be completed.
+
+    """
+    global_tp_ov = 0
+    global_fp_ov = 0
+    global_fn_ov = 0
+    global_tp_ex = 0
+    global_fp_ex = 0
+    global_fn_ex = 0
 
     idsToAnnsUser = {}
     with open(userfile) as json_data:
@@ -132,29 +230,36 @@ def computeScores(goldfile, userfile, cn: CategoryNormalizer, htype="split", typ
         goldjson = json.load(json_data)
 
     for nr in tqdm(range(len(goldjson['questions']))):
-        idGold = '/'.join(goldjson['questions'][nr]['input']['fname'].split('/')[4:])
+        idGold = '/'.join(
+            goldjson['questions'][nr]['input']['fname'].split('/')[4:])
         if idGold in idsToAnnsUser:
             found += 1
             # find the most recent answer:
             if len(goldjson['questions'][nr]['answers']) > 1:
-                maximum = parser.parse('1900-01-02T14:22:41.439308+00:00');
+                maximum = parser.parse('1900-01-02T14:22:41.439308+00:00')
                 index = 0
-                for i, value in enumerate(goldjson['questions'][nr]['answers']):
-                    value = parser.parse(goldjson['questions'][nr]['answers'][i]['created'])
+                for i, value in enumerate(
+                        goldjson['questions'][nr]['answers']):
+                    value = parser.parse(
+                        goldjson['questions'][nr]['answers'][i]['created'])
                     if value > maximum:
                         maximum = value
                         index = i
-                idsToAnnsGold[idGold] = goldjson['questions'][nr]['answers'][index]['data']['brat']
+                idsToAnnsGold[idGold] = \
+                    goldjson['questions'][nr]['answers'][index]['data']['brat']
             else:
                 content = goldjson['questions'][nr]['input']['fileContent']
-                idsToAnnsGold[idGold] = goldjson['questions'][nr]['answers'][0]['data']['brat']
+                idsToAnnsGold[idGold] = \
+                    goldjson['questions'][nr]['answers'][0]['data']['brat']
 
                 gold = makeAnnsFormat(idsToAnnsGold[idGold], 3, htype)
                 user = makeAnnsFormat(idsToAnnsUser[idGold], 2, htype)
 
                 if types is not None:
-                    gold = [(span, type) for span, type in gold if type in types]
-                    user = [(span, type) for span, type in user if type in types]
+                    gold = [(span, type) for span, type in gold if
+                            type in types]
+                    user = [(span, type) for span, type in user if
+                            type in types]
 
                 # overlap scores:
                 ovtp = compareTextsOverlap(gold, user, content, cn)
@@ -173,55 +278,66 @@ def computeScores(goldfile, userfile, cn: CategoryNormalizer, htype="split", typ
             nonfound += 1
 
     print(userfile)
-    print("Nr of documents identified by ID in both data sets: "+str(found)+", not identified (left out): "+str(nonfound))
+    print("Nr of documents identified by ID in both data sets: " + str(
+        found) + ", not identified (left out): " + str(nonfound))
     if types is not None:
         print("NE types to evaluate: " + ", ".join(types))
 
     prec = float(global_tp_ov) / float(global_fp_ov + global_tp_ov)
     recall = float(global_tp_ov) / float(global_fn_ov + global_tp_ov)
     f1o = float(2 * prec * recall) / float(prec + recall)
-    print("OVERLAP precision: %0.3f recall: %0.3f F1: %0.3f " % (prec, recall, f1o))
+    print("OVERLAP precision: %0.3f recall: %0.3f F1: %0.3f " % (
+        prec, recall, f1o))
 
     prec = float(global_tp_ex) / float(global_fp_ex + global_tp_ex)
     recall = float(global_tp_ex) / float(global_fn_ex + global_tp_ex)
     f1e = float(2 * prec * recall) / float(prec + recall)
-    print("EXACT precision: %0.3f recall: %0.3f F1: %0.3f " % (prec, recall, f1e))
+    print("EXACT precision: %0.3f recall: %0.3f F1: %0.3f " % (
+        prec, recall, f1e))
 
-    print("Final score: %0.3f" % (f1o*0.8 + f1e*0.2))
+    print("Final score: %0.3f" % (f1o * 0.8 + f1e * 0.2))
 
-    print("Exact TP=%d ; FP=%d; FN=%d" % (global_tp_ex, global_fp_ex, global_fn_ex))
+    print("Exact TP=%d ; FP=%d; FN=%d" % (
+        global_tp_ex, global_fp_ex, global_fn_ex))
 
 
 def main(argv):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        argv:A message of shame -- documentation must be completed.
 
+    """
     goldfile = 'POLEVAL-NER_GOLD.json'
     userfile = ''
     lower = False
     main_categories = False
     try:
-        opts, args = getopt.getopt(argv, "g:u:m:h:l", ["goldfile=", "userfile=", "categories-main", "categories-lower"])
+        opts, args = getopt.getopt(argv, "g:u:m:h:l",
+                                   ["goldfile=", "userfile=",
+                                    "categories-main", "categories-lower"])
     except getopt.GetoptError:
         print('poleval_ner_test.py -g <inputfile> -u <userfile>')
         sys.exit(2)
 
     for opt, arg in opts:
-      if opt == '-h':
-        print('poleval_ner_test.py -g <goldfile> -u <userfile>')
-        sys.exit()
-      elif opt in ("-u", "--userfile"):
-        userfile = arg
-      elif opt in ("-g", "--goldfile"):
-        goldfile = arg
-      elif opt in ("-m", "--categories-main"):
-        main_categories = True
-      elif opt in ("-l", "--categories-lower"):
-        lower = True
+        if opt == '-h':
+            print('poleval_ner_test.py -g <goldfile> -u <userfile>')
+            sys.exit()
+        elif opt in ("-u", "--userfile"):
+            userfile = arg
+        elif opt in ("-g", "--goldfile"):
+            goldfile = arg
+        elif opt in ("-m", "--categories-main"):
+            main_categories = True
+        elif opt in ("-l", "--categories-lower"):
+            lower = True
 
     print('gold file is: ' + goldfile)
     print('user file is: ' + userfile)
 
-    types = None
-    #types = set(["date"])
+    # types = None NOT USED
+    # types = set(["date"])
 
     category_normalizer = CategoryNormalizer(lower, main_categories)
 
diff --git a/poleval_to_iob.py b/poleval_to_iob.py
index 106fb27e3d9a53acb3ebb90fd3e7411fa9ed76a8..0f0f19a5b1109adc83f2919473ed37493f5649ed 100644
--- a/poleval_to_iob.py
+++ b/poleval_to_iob.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
@@ -8,19 +10,35 @@ from poldeepner2.utils.data_utils import read_json, map_json_to_iob
 
 
 def get_id(ini_file):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        ini_file:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     for line in codecs.open(ini_file, "r", "utf8"):
         if 'id = ' in line:
             return line.replace('id = ', '')
 
+
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
 
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     print("Loading the NER model ...")
     json_ann = read_json(args.json)
     parent = os.path.dirname(args.input)
     paths = codecs.open(args.input, "r", "utf8").readlines()
     if not os.path.exists(args.output):
         os.makedirs(args.output)
-    output_dir = args.output.replace('index.list','') 
+    output_dir = args.output.replace('index.list', '')
     paths_count = len(paths)
     global_success = 0
     global_failed = 0
@@ -32,24 +50,39 @@ def main(args):
         path = os.path.dirname(abs_path)
         doc_id = get_id(os.path.join(path, name + ".ini")).split('/')[-1]
         print("%d from %d: %s" % (n, paths_count, doc_id))
-        mapped_iob, success, failed, derives = map_json_to_iob(json_ann[doc_id], os.path.join(path, name + '.iob'))
+        mapped_iob, success, failed, derives = map_json_to_iob(
+            json_ann[doc_id], os.path.join(path, name + '.iob'))
         global_success += success
         global_failed += failed
         global_derives += derives
         codecs.open(output_dir + name + '.iob', "w", "utf8").write(mapped_iob)
-    print("Successed:{}, Failed: {}, Derives skipped {}".format(global_success, global_failed, global_derives))
+    print("Successed:{}, Failed: {}, Derives skipped {}".format(global_success,
+                                                                global_failed,
+                                                                global_derives)
+          )
+
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     parser = argparse.ArgumentParser(
-        description='Convert set of IOB files into a single json file in PolEval 2018 NER format')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to input')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to output directory')
-    parser.add_argument('--json', required=True, metavar='PATH', help='path to json')
+        description='Convert set of IOB files into a single json file in '
+                    'PolEval 2018 NER format')
+    parser.add_argument('--input', required=True, metavar='PATH',
+                        help='path to input')
+    parser.add_argument('--output', required=True, metavar='PATH',
+                        help='path to output directory')
+    parser.add_argument('--json', required=True, metavar='PATH',
+                        help='path to json')
     return parser.parse_args()
 
+
 if __name__ == "__main__":
     args = parse_args()
     try:
         main(args)
     except ValueError as er:
-        print("[ERROR] %s" % er)
\ No newline at end of file
+        print("[ERROR] %s" % er)
diff --git a/predictor.py b/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d7151ad64d70cab1ba9beb6d59fc5f2a476ea4e
--- /dev/null
+++ b/predictor.py
@@ -0,0 +1,44 @@
+"""Script for tagging raw data."""
+
+import configparser
+
+from poldeepner2.models import PolDeepNer2
+
+
+def main():
+    # config serializuje razem z modelem
+    # json
+    config_file = "config.cfg"
+    config = configparser.ConfigParser()
+    config.read(config_file)
+
+    model = config['model']['path']
+    pretrained_model = config['model']['pretrained_path']
+
+    ner = PolDeepNer2.load(model=model, pretrained_path=pretrained_model)
+
+    data_path = config['data']['pred_path']
+    with open(data_path) as f:
+        data = f.readlines()
+
+    if not config.getboolean('predict', 'save_to_file'):
+        for sentence in data:
+            if sentence != '\n':
+                print(sentence)
+                text_prediction = ner.process_text(sentence)
+                for pred in text_prediction:
+                    print(f'{pred.text}, {pred.label}')
+
+    else:
+        with open(config['predict']['path_to_save'], 'w+') as f_res:
+            for sentence in data:
+                if sentence != '\n':
+                    text_prediction = ner.process_text(sentence)
+                    for pred in text_prediction:
+                        f_res.write(f'{pred.text}, {pred.label}\n')
+                else:
+                    f_res.write('\n')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/process_poleval.py b/process_poleval.py
index a4c221992b899e3ef609d1f77be3ad7ef27b1f97..6d327b378c8d9433fb2cb11b9a6f7abe725d729d 100644
--- a/process_poleval.py
+++ b/process_poleval.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import logging
@@ -10,13 +12,20 @@ import time
 
 from tqdm import tqdm
 
-import poldeepner2
 from poldeepner2.models import PolDeepNer2
 from poldeepner2.pipeline import tokenization
 from poldeepner2.utils.data_utils import get_poleval_dict, wrap_annotations
 
 
 def merge_sentences(sentences: [[str]]):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        sentences:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     flat_list = []
     for lit in sentences:
         flat_list.extend(lit)
@@ -24,16 +33,26 @@ def merge_sentences(sentences: [[str]]):
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     print("Loading the NER model ...")
     t0 = time.time()
-    # if args.pretrained_path:
-    #     #ner = PolDeepNer2(args.model, args.pretrained_path, device=args.device, max_seq_length=args.max_seq_length,
-    #     #                  squeeze=args.squeeze, seed=args.seed, tokenizer=tokenizer)
-    # else:
-    #     # ner = poldeepner2.models.load(args.model, device=args.device, resources_path=".models")
-    #     # ner.max_seq_length = args.max_seq_length
-
-    tokenizer = tokenization.load(args.tokenization) if args.tokenization else None
+    # if args.pretrained_path: #ner = PolDeepNer2(args.model,
+    # args.pretrained_path, device=args.device,
+    # max_seq_length=args.max_seq_length, #
+    # squeeze=args.squeeze, seed=args.seed, tokenizer=tokenizer) else:
+    # ner = poldeepner2.models.load(args.model, device=args.device,
+    # resources_path=".models") # ner.max_seq_length = args.max_seq_length
+
+    tokenizer = tokenization.load(args.tokenization) \
+        if args.tokenization \
+        else None
     ner = PolDeepNer2.load(
         model=args.model,
         pretrained_path=args.pretrained_path,
@@ -44,7 +63,6 @@ def main(args):
         tokenizer=tokenizer
     )
 
-
     time_load = time.time() - t0
 
     time_preprocess = 0
@@ -53,10 +71,13 @@ def main(args):
 
     dict_list = []
 
-    with open(os.path.join(pathlib.Path(__file__).parent.absolute(), args.input), encoding='UTF-8') as f:
+    with open(
+            os.path.join(pathlib.Path(__file__).parent.absolute(), args.input),
+            encoding='UTF-8') as f:
         sentences = json.load(f)['questions']
         for i, sentence in tqdm(enumerate(sentences), total=len(sentences)):
-            id = sentence['input']['fname'].replace("/home/a.wawer/poleval/", "")
+            id = sentence['input']['fname'].replace("/home/a.wawer/poleval/",
+                                                    "")
             file_content = sentence['input']['fileContent']
             data_size += len(file_content)
             texts = file_content.split('\n')
@@ -71,37 +92,61 @@ def main(args):
                 predictions = merge_sentences(predictions)
                 tokenized_sentences = merge_sentences(tokenized_sentences)
             annotations = wrap_annotations(predictions)
-            dict_list.append(get_poleval_dict(id, file_content, tokenized_sentences, annotations))
+            dict_list.append(
+                get_poleval_dict(id, file_content, tokenized_sentences,
+                                 annotations))
             time_ner += (time.time() - t0)
 
-    codecs.open(args.output, "w", "utf8").write(json.dumps(dict_list, indent=4))
+    codecs.open(args.output, "w", "utf8").write(
+        json.dumps(dict_list, indent=4))
 
     print(f"Model loading time          : {time_load:8.4} second(s)")
     print(f"Data preprocessing time     : {time_preprocess:8.4} second(s)")
     print(f"Data NE recognition time    : {time_ner:8.4} second(s)")
-    print(f"Total time                  : {time_load+time_preprocess+time_ner:8.4} second(s)")
-    print(f"Data size:                  : {data_size/1000000:8.4}M characters")
+    print(f'Total time                  : '
+          f'{time_load + time_preprocess + time_ner:8.4} second(s)')
+    print(f"Data size:                  : "
+          f"{data_size / 1000000:8.4}M characters")
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     parser = argparse.ArgumentParser(
-        description='Convert set of IOB files into a single json file in PolEval 2018 NER format')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to a file with a list of files')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to a json output file')
-    parser.add_argument('--model', required=True, metavar='PATH', help='model name or path to a model name')
-    parser.add_argument('--pretrained_path', required=False, metavar='PATH', help='pretrained XLM-Roberta model path')
-    parser.add_argument('--max_seq_length', required=False, default=256, metavar='N', type=int,
-                        help='the maximum total input sequence length after WordPiece tokenization.')
-    parser.add_argument('--device', required=False, default="cpu", metavar='cpu|cuda',
+        description='Convert set of IOB files into a single json file in '
+                    'PolEval 2018 NER format')
+    parser.add_argument('--input', required=True, metavar='PATH',
+                        help='path to a file with a list of files')
+    parser.add_argument('--output', required=True, metavar='PATH',
+                        help='path to a json output file')
+    parser.add_argument('--model', required=True, metavar='PATH',
+                        help='model name or path to a model name')
+    parser.add_argument('--pretrained_path', required=False, metavar='PATH',
+                        help='pretrained XLM-Roberta model path')
+    parser.add_argument('--max_seq_length', required=False, default=256,
+                        metavar='N', type=int,
+                        help='the maximum total input sequence length after '
+                             'WordPiece tokenization.')
+    parser.add_argument('--device', required=False, default="cpu",
+                        metavar='cpu|cuda',
                         help='device type used for processing')
-    parser.add_argument('--tokenization', required=False, default=None, choices=tokenization.names,
+    parser.add_argument('--tokenization', required=False, default=None,
+                        choices=tokenization.names,
                         help='Tokenization method')
-    parser.add_argument('--squeeze', required=False, default=False, action="store_true",
-                        help='try to squeeze multiple examples into one Input Feature')
-    parser.add_argument('--seed', required=False, default=377, metavar='N', type=int,
+    parser.add_argument('--squeeze', required=False, default=False,
+                        action="store_true",
+                        help='try to squeeze multiple examples into one '
+                             'Input Feature')
+    parser.add_argument('--seed', required=False, default=377, metavar='N',
+                        type=int,
                         help='a seed used to initialize a number generator')
-    parser.add_argument('--merge', required=False, default=False, action="store_true",
-                        help='merge sentences into a single sentence before wrapping labels into annotations')
+    parser.add_argument('--merge', required=False, default=False,
+                        action="store_true",
+                        help='merge sentences into a single sentence before '
+                             'wrapping labels into annotations')
     return parser.parse_args()
 
 
diff --git a/process_poleval_pretokenized.py b/process_poleval_pretokenized.py
index 175fd0d6f90881d3873dc750f8e5225af990c4df..63ac26be9a9b77416f8057fe7f8777226e5e3f9a 100644
--- a/process_poleval_pretokenized.py
+++ b/process_poleval_pretokenized.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
@@ -7,17 +9,34 @@ import json
 import logging
 
 from poldeepner2.models import PolDeepNer2
-from poldeepner2.utils.data_utils import get_poleval_dict, read_tsv, wrap_annotations
+from poldeepner2.utils.data_utils import get_poleval_dict, read_tsv, \
+    wrap_annotations
 from poldeepner2.utils.preprocess import split_hashtags, split_leading_name
 
 
 def get_id(ini_file):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        ini_file:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     for line in codecs.open(ini_file, "r", "utf8"):
         if 'id = ' in line:
             return line.replace('id = ', '')
 
 
 def load_document(abs_path):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        abs_path:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     namext = os.path.basename(abs_path)
     name = os.path.splitext(namext)[0]
     path = os.path.dirname(abs_path)
@@ -29,12 +48,20 @@ def load_document(abs_path):
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    """
     print("Loading the NER model ...")
-    ner = PolDeepNer2(args.model, args.pretrained_path, args.device, max_seq_length=args.max_seq_length,
+    ner = PolDeepNer2(args.model, args.pretrained_path,
+                      args.device, max_seq_length=args.max_seq_length,
                       squeeze=args.squeeze)
     parent = os.path.dirname(args.input)
     dict_list = []
-    for n, rel_path in enumerate(codecs.open(args.input, "r", "utf8").readlines()):
+    for n, rel_path in enumerate(
+            codecs.open(args.input, "r", "utf8").readlines()):
         abs_path = os.path.abspath(os.path.join(parent, rel_path.strip()))
         doc_id, text, sentences_raw = load_document(abs_path)
         print("Processing %d: %s" % (n, doc_id))
@@ -42,24 +69,43 @@ def main(args):
         sentences = [split_leading_name(sentence) for sentence in sentences]
         predictions = ner.process(sentences)
         annotations = wrap_annotations(predictions)
-        dict_list.append(get_poleval_dict(doc_id, text, sentences, annotations))
+        dict_list.append(
+            get_poleval_dict(doc_id, text, sentences, annotations))
         # debug_tokens_and_labels(sentences_raw, predictions)
-    codecs.open(args.output, "w", "utf8").write(json.dumps(dict_list, indent=4))
+    codecs.open(
+        args.output, "w", "utf8").write(json.dumps(dict_list, indent=4))
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     parser = argparse.ArgumentParser(
-        description='Convert set of IOB files into a single json file in PolEval 2018 NER format')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to a file with a list of files')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to a json output file')
-    parser.add_argument('--model', required=True, metavar='PATH', help='path to NER model')
-    parser.add_argument('--pretrained_path', required=True, metavar='PATH', help='pretrained XLM-Roberta model path')
-    parser.add_argument('--max_seq_length', required=False, default=256, metavar='N', type=int,
-                        help='the maximum total input sequence length after WordPiece tokenization.')
-    parser.add_argument('--device', required=False, default="cpu", metavar='cpu|cuda',
+        description='Convert set of IOB files into a single json file in '
+                    'PolEval 2018 NER format')
+    parser.add_argument('--input', required=True,
+                        metavar='PATH', help='path to a file with a list of '
+                                             'files')
+    parser.add_argument('--output', required=True,
+                        metavar='PATH', help='path to a json output file')
+    parser.add_argument('--model', required=True,
+                        metavar='PATH', help='path to NER model')
+    parser.add_argument('--pretrained_path', required=True,
+                        metavar='PATH', help='pretrained XLM-Roberta model '
+                                             'path')
+    parser.add_argument('--max_seq_length', required=False,
+                        default=256, metavar='N',
+                        type=int, help='the maximum total input sequence '
+                                       'length after WordPiece tokenization.')
+    parser.add_argument('--device', required=False,
+                        default="cpu", metavar='cpu|cuda',
                         help='device type used for processing')
-    parser.add_argument('--squeeze', required=False, default=False, action="store_true",
-                        help='try to squeeze multiple examples into one Input Feature')
+    parser.add_argument('--squeeze', required=False,
+                        default=False, action="store_true",
+                        help='try to squeeze multiple examples into one '
+                             'Input Feature')
     return parser.parse_args()
 
 
diff --git a/process_texts.py b/process_texts.py
index c0dc34738a82b5df226a82783c31e30ad6f7d142..f2a69b55fb6bc3803aa012ca51bc93ddc5d1c71f 100644
--- a/process_texts.py
+++ b/process_texts.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import codecs
@@ -8,7 +10,7 @@ import glob
 import os
 from pathlib import Path
 
-import tqdm
+# import tqdm F811 redefinition of unused 'tqdm'
 
 from tqdm import tqdm
 
@@ -18,6 +20,14 @@ from poldeepner2.utils.data_utils import wrap_annotations
 
 
 def flatten(list_of_lists):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        list_of_lists:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     flat_list = []
     for lit in list_of_lists:
         flat_list.extend(lit)
@@ -25,6 +35,14 @@ def flatten(list_of_lists):
 
 
 def read_content_autobom(path: str) -> str:
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        path:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     bytes = min(32, os.path.getsize(path))
     content = open(path, 'rb').read(bytes)
     if content.startswith(codecs.BOM_UTF8):
@@ -35,14 +53,26 @@ def read_content_autobom(path: str) -> str:
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     print("Loading the NER model ...")
     t0 = time.time()
     if args.pretrained_path:
         tokenizer = tokenization.load(args.tokenization)
-        ner = PolDeepNer2(args.model, args.pretrained_path, device=args.device, max_seq_length=args.max_seq_length,
-                          squeeze=args.squeeze, seed=args.seed, tokenizer=tokenizer)
+        ner = PolDeepNer2(args.model, args.pretrained_path,
+                          device=args.device,
+                          max_seq_length=args.max_seq_length,
+                          squeeze=args.squeeze, seed=args.seed,
+                          tokenizer=tokenizer)
     else:
-        ner = PolDeepNer2.load(args.model, device=args.device, resources_path=".models")
+        ner = PolDeepNer2.load(args.model, device=args.device,
+                               resources_path=".models")
         if args.max_seq_length:
             ner.max_seq_length = args.max_seq_length
         if tokenization:
@@ -72,36 +102,56 @@ def main(args):
         output = Path(args.output) / Path(path).name
         with open(output, "w") as fout:
             for an in annotations:
-                text = " ".join([tokenized_sentences[0][n] for n in an.token_ids])
+                text = " ".join(
+                    [tokenized_sentences[0][n] for n in an.token_ids])
                 token_start = min(an.token_ids)
                 token_end = max(an.token_ids)
-                fout.write(f"{an.annotation}\t{token_start}\t{token_end}\t{text}\n")
+                fout.write(
+                    f"{an.annotation}\t{token_start}\t{token_end}\t{text}\n")
 
     print(f"Model loading time          : {time_load:8.4} second(s)")
     print(f"Data preprocessing time     : {time_preprocess:8.4} second(s)")
     print(f"Data NE recognition time    : {time_ner:8.4} second(s)")
-    print(f"Total time                  : {time_load+time_preprocess+time_ner:8.4} second(s)")
+    print(f"Total time                  : "
+          f"{time_load+time_preprocess+time_ner:8.4} second(s)")
     print(f"Data size:                  : {data_size/1000000:8.4}M characters")
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns:A message of shame -- documentation must be completed.
+
+    """
     parser = argparse.ArgumentParser(
-        description='Process a set of plain text files from given folder. The output is save to another folder.')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to an input  folder with texts')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to an output folder')
-    parser.add_argument('--model', required=True, metavar='PATH', help='model name or path to a model name')
+        description='Process a set of plain text files from given folder. '
+                    'The output is save to another folder.')
+    parser.add_argument('--input', required=True, metavar='PATH',
+                        help='path to an input  folder with texts')
+    parser.add_argument('--output', required=True, metavar='PATH',
+                        help='path to an output folder')
+    parser.add_argument('--model', required=True, metavar='PATH',
+                        help='model name or path to a model name')
 
     # Required if the pretrained_path is given
-    parser.add_argument('--pretrained_path', required=False, metavar='PATH', help='pretrained XLM-Roberta model path')
-    parser.add_argument('--max_seq_length', required=False, default=None, metavar='N', type=int,
-                        help='the maximum total input sequence length after WordPiece tokenization.')
-    parser.add_argument('--device', required=False, default="cpu", metavar='cpu|cuda',
+    parser.add_argument('--pretrained_path', required=False, metavar='PATH',
+                        help='pretrained XLM-Roberta model path')
+    parser.add_argument('--max_seq_length', required=False, default=None,
+                        metavar='N', type=int,
+                        help='the maximum total input sequence length after '
+                             'WordPiece tokenization.')
+    parser.add_argument('--device', required=False, default="cpu",
+                        metavar='cpu|cuda',
                         help='device type used for processing')
-    parser.add_argument('--tokenization', required=False, default=None, choices=tokenization.names,
+    parser.add_argument('--tokenization', required=False, default=None,
+                        choices=tokenization.names,
                         help='Tokenization method')
-    parser.add_argument('--squeeze', required=False, default=False, action="store_true",
-                        help='try to squeeze multiple examples into one Input Feature')
-    parser.add_argument('--seed', required=False, default=377, metavar='N', type=int,
+    parser.add_argument('--squeeze', required=False, default=False,
+                        action="store_true",
+                        help='try to squeeze multiple examples into one '
+                             'Input Feature')
+    parser.add_argument('--seed', required=False, default=377,
+                        metavar='N', type=int,
                         help='a seed used to initialize a number generator')
     return parser.parse_args()
 
diff --git a/process_tsv.py b/process_tsv.py
index b3218c71127bba12a8777993a3463486e53f6770..4fe0d70787a3e939f6f6f574c9bfc19b55529d09 100644
--- a/process_tsv.py
+++ b/process_tsv.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
@@ -9,17 +11,24 @@ from poldeepner2.utils.data_utils import read_tsv, save_tsv
 
 
 def main(args):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        args:A message of shame -- documentation must be completed.
+
+    """
     logging.info("Loading the NER model ...")
-    #ner = PolDeepNer2(args.model, args.pretrained_path, args.device, args.squeeze, args.max_seq_length)
+    # ner = PolDeepNer2(args.model, args.pretrained_path, args.device,
+    # args.squeeze, args.max_seq_length)
     ner = PolDeepNer2.load(
         model=args.model,
         pretrained_path=args.pretrained_path,
         device=args.device,
         max_seq_length=args.max_seq_length,
         squeeze=args.squeeze,
-        #seed=args.seed
+        # seed=args.seed
     )
-    
+
     logging.info("Processing ...")
     sentences_labels = read_tsv(os.path.join(args.input))
     sentences = [sentence[0] for sentence in sentences_labels]
@@ -32,18 +41,31 @@ def main(args):
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed.
+
+    Returns: A message of shame -- documentation must be completed.
+
+    """
     parser = argparse.ArgumentParser(
         description='Process a single TSV with a NER model')
-    parser.add_argument('--input', required=True, metavar='PATH', help='path to a file with a list of files')
-    parser.add_argument('--output', required=True, metavar='PATH', help='path to a json output file')
-    parser.add_argument('--model', required=True, metavar='PATH', help='path to NER model')
-    parser.add_argument('--pretrained_path', required=False, metavar='PATH', help='pretrained XLM-Roberta model path')
-    parser.add_argument('--max_seq_length', required=False, default=256, metavar='N', type=int,
-                        help='the maximum total input sequence length after WordPiece tokenization.')
-    parser.add_argument('--device', required=False, default="cpu", metavar='cpu|cuda',
+    parser.add_argument('--input', required=True, metavar='PATH',
+                        help='path to a file with a list of files')
+    parser.add_argument('--output', required=True, metavar='PATH',
+                        help='path to a json output file')
+    parser.add_argument('--model', required=True, metavar='PATH',
+                        help='path to NER model')
+    parser.add_argument('--pretrained_path', required=False, metavar='PATH',
+                        help='pretrained XLM-Roberta model path')
+    parser.add_argument('--max_seq_length', required=False, default=256,
+                        metavar='N', type=int,
+                        help='the maximum total input sequence length after '
+                             'WordPiece tokenization.')
+    parser.add_argument('--device', required=False, default="cpu",
+                        metavar='cpu|cuda',
                         help='device type used for processing')
     parser.add_argument('--squeeze', required=False, default=False,
-                        help='try to squeeze multiple examples into one Input Feature')
+                        help='try to squeeze multiple examples into one '
+                             'Input Feature')
     return parser.parse_args()
 
 
diff --git a/requirements.txt b/requirements.txt
index d48e2ff8692d575721b8daaa517cb0f65bdb65cd..47053e257f8c6f5d6c799c64b209c16dead15662 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ tqdm
 fastapi==0.61.1
 uvicorn==0.12.2
 pandas==1.1.1
-wandb==0.10.7
-transformers==4.0.1
-torch==1.7.1
+wandb==0.10.7 
+transformers==4.16.2
+torch==1.9.0.
 torchsummary==1.5.1
\ No newline at end of file
diff --git a/sample.py b/sample.py
index 5314ab3885f6ee68362634c3ff13abecf8374ba6..59958407b02ac42cea8274e74ad8122d17cb4208 100644
--- a/sample.py
+++ b/sample.py
@@ -3,11 +3,12 @@ import poldeepner2
 path_or_name = "pdn2-v07-cen-n82-base-01"
 ner = poldeepner2.load(path_or_name)
 
-sentences = ["Marek Nowak z Politechniki Wrocławskiej mieszka przy ul. Sądeckiej.",
+sentences = ["Marek Nowak z Politechniki Wrocławskiej mieszka przy ul. "
+             "Sądeckiej.",
              "#PoselAdamNowak Co Pan myśli na temat fuzji Orlenu i Lotosu?"]
 
 for sentence in sentences:
-    print("-"*20)
+    print("-" * 20)
     print(sentence)
     for name in ner.process_text(sentence):
         name_range = "%d:%d" % (name.start, name.end)
diff --git a/sample_conll.py b/sample_conll.py
index ab19a053817bbfcfe9ffb01ce2888a729a9f7711..2cc31aee32cd3fa38b5a5aeed96c7d9339d9fce4 100644
--- a/sample_conll.py
+++ b/sample_conll.py
@@ -1,11 +1,17 @@
+"""A message of shame -- documentation must be completed."""
+
 import poldeepner2.models
 
-ner = poldeepner2.models.load("conll-english-large-sq", device="cuda:0", resources_path="/tmp")
+ner = poldeepner2.models.load("conll-english-large-sq", device="cuda:0",
+                              resources_path="/tmp")
 
-sentences = ["""(CNN)In a new criminal court case against a woman alleged to have entered the US Capitol on January 6, the FBI noted that a tipster raised the possibility of a laptop being stolen from House Speaker Nancy Pelosi's office to potentially sell to Russia."""]
+sentences = ["""(CNN)In a new criminal court case against a woman alleged to
+have entered the US Capitol on January 6, the FBI noted that a tipster
+raised the possibility of a laptop being stolen from House Speaker Nancy
+Pelosi's office to potentially sell to Russia."""]
 
 for sentence in sentences:
-    print("-"*20)
+    print("-" * 20)
     print(sentence)
     for name in ner.process_text(sentence):
         name_range = "%d:%d" % (name.start, name.end)
diff --git a/sample_polem.py b/sample_polem.py
index 02f101bc4977825bc0419587a40011ec50c88ec5..11a4cedd6b68cd3429a49d997029bc63d0584336 100644
--- a/sample_polem.py
+++ b/sample_polem.py
@@ -1,29 +1,36 @@
+"""A message of shame -- documentation must be completed."""
+
 import time
 from poldeepner2.models import PolDeepNer2, ModelFactory
 
 resources_path = "../poldeepner2_models"
 t0 = time.time()
-model = ModelFactory.get_resource("pdn2_cen_n82_roberta_large_sq_krnnt_cuda.pdn2", resources_path)
+model = ModelFactory.get_resource("pdn2_cen_n82_roberta_large_sq_krnnt_cuda"
+                                  ".pdn2", resources_path)
 ner = PolDeepNer2.load(model)
-time_model = time.time()-t0
+time_model = time.time() - t0
 
-sentences = ["Spotkałem Marka Nowaka na Politechnice Wrocławskiej, który pracuje w Intelu.",
+sentences = ["Spotkałem Marka Nowaka na Politechnice Wrocławskiej, który "
+             "pracuje w Intelu.",
              "Wczoraj mieliśmy kontrolę Naczelnej Izby Skarbowej.",
-             open("tests/resources/text_krakow.txt", "r", encoding="utf-8").read()]
+             open("tests/resources/text_krakow.txt", "r",
+                  encoding="utf-8").read()]
 
 token_count = 0
 
 t0 = time.time()
 for sentence in sentences:
-    print("-"*20)
+    print("-" * 20)
     print(sentence.strip())
     doc = ner.process_document(sentence)
     token_count += len(doc.tokens)
 
     for name in doc.annotations:
         name_range = "%d:%d" % (name.start, name.end)
-        char_range = "%d:%d" % (doc.tokens[name.start].start, doc.tokens[name.end - 1].end)
-        print(f"{name_range:<8} {char_range:<12} {name.label:<25} {name.get_text():<25}  {name.lemma}")
+        char_range = "%d:%d" % (doc.tokens[name.start].start,
+                                doc.tokens[name.end - 1].end)
+        print(f"{name_range:<8} {char_range:<12} {name.label:<25} "
+              f"{name.get_text():<25}  {name.lemma}")
     print()
 
 print()
diff --git a/scripts/config.cfg b/scripts/config.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..9ac6aabd6e4ac56bb40aa8c4f59f4e170ced81cf
--- /dev/null
+++ b/scripts/config.cfg
@@ -0,0 +1,36 @@
+[model]
+path =
+cpu_or_gpu = cpu
+gpu_num = 0
+
+[predict]
+data_path
+save_to_file = yes
+
+[train]
+adam_epsilon =
+data_test =
+data_train =
+data_tune =
+device = gpu
+dropout = 0.05
+epoch_save_model = 5
+eval_batch_size = 16
+fp16 = false
+fp16_opt_level =
+freeze_model =
+gradient_accumulation_steps =
+hidden_size = 32
+learning_rate = 0.001
+max_grad_norm =
+max_seq_length = 32
+num_train_epochs = 100
+output_dir =
+pretrained_path =
+seed = 42
+squeeze =
+train_batch_size = 16
+training_mix = 0.5
+transfer =
+warmup_proportion =
+weight_decay = 0.1
diff --git a/scripts/evaluator.py b/scripts/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..983f1127af2685a7e73f81ca2ba2b5168b02948a
--- /dev/null
+++ b/scripts/evaluator.py
@@ -0,0 +1,48 @@
+"""Script for evaluating models on a pre-defined set of data."""
+
+import configparser
+from sklearn.metrics import accuracy_score
+from poldeepner2.poldeepner2.models import PolDeepNer2
+from poldeepner2.poldeepner2.utils.data_utils import NerProcessor
+
+
+def main():
+    # config serializuje razem z modelem
+    # json
+    config_file = "config.cfg"
+    config = configparser.ConfigParser()
+    config.read(config_file)
+
+    model = config['model']['path']
+
+    ner = PolDeepNer2.load(model=model)
+
+    data_path = config['data']['path']
+    processor = NerProcessor()
+
+    #Prediction
+    data = processor.get_examples(data_path)
+    prediction_labels = []
+    for sentence in data:
+        print(sentence)
+        prediction = ner.process_text(sentence)
+        print(prediction)
+
+        #predicted label
+        predict_label = prediction[2][2]
+        prediction_labels.append(predict_label)
+
+
+    #Comparing
+    true_labels = processor.get_labels(data_path)
+
+    eval_res = accuracy_score(true_labels, prediction_labels)
+    print(eval_res)
+
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except ValueError as er:
+        print("[ERROR] %s" % er)
diff --git a/server.py b/server.py
index 90c130db99ac830ef2ea309eff7a0ef9c73efdbd..644d7f8ab6aa629182ae7b062680d7ad403515ba 100644
--- a/server.py
+++ b/server.py
@@ -1,8 +1,10 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 import uvicorn
 import argparse
 from fastapi import FastAPI
-from typing import Dict, List, Optional
+from typing import List, Optional
 
 from poldeepner2.data.token import Token
 from poldeepner2.models import PolDeepNer2
@@ -14,16 +16,22 @@ from poldeepner2.utils.annotation import Annotation
 
 
 class PredictionReq(BaseModel):
+    """A message of shame -- documentation must be completed."""
+
     text: str
     tokenization: Optional[str] = 'spacy'
 
 
 class Prediction(BaseModel):
+    """A message of shame -- documentation must be completed."""
+
     text: str
     doc: List[List[str]]
 
 
 class ResponseToken(BaseModel):
+    """A message of shame -- documentation must be completed."""
+
     orth: str
     lemma: str
     start: int
@@ -32,10 +40,21 @@ class ResponseToken(BaseModel):
 
     @staticmethod
     def generate(token: Token):
-        return {"orth": token.orth, "lemma": token.lemma, "start": token.start, "end": token.end, "eos": token.eos}
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            token: A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return {"orth": token.orth, "lemma": token.lemma, "start": token.start,
+                "end": token.end, "eos": token.eos}
 
 
 class ResponseAnnotation(BaseModel):
+    """A message of shame -- documentation must be completed."""
+
     text: str
     label: str
     lemma: str
@@ -44,29 +63,47 @@ class ResponseAnnotation(BaseModel):
 
     @staticmethod
     def generate(an: Annotation):
-        return {"text": an.get_text(), "label": an.annotation, "lemma": an.lemma, "start": an.start, "end": an.end}
+        """A message of shame -- documentation must be completed.
+
+        Args:
+            an: A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
+        return {"text": an.get_text(), "label": an.annotation,
+                "lemma": an.lemma, "start": an.start, "end": an.end}
 
 
 class ResponsePolem(BaseModel):
+    """A message of shame -- documentation must be completed."""
+
     text: str
     tokens: List[ResponseToken]
     annotations: List[ResponseAnnotation]
 
 
 class Server:
+    """A message of shame -- documentation must be completed."""
+
     app = FastAPI()
-    
+
     app.add_middleware(CORSMiddleware,
                        allow_origins=['*'],
                        allow_credentials=True,
                        allow_methods=['*'],
                        allow_headers=['*'])
-    
+
     global spacyTokenizer
     spacyTokenizer = load('spacy')
 
     @app.post('/predict', response_model=Prediction)
     async def predict(pred_req: PredictionReq):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         text = pred_req.text
         sentences = text.split('\n')
         tokens = spacyTokenizer.tokenize(sentences)
@@ -75,28 +112,45 @@ class Server:
 
     @app.post('/polem', response_model=ResponsePolem)
     async def polem(pred_req: PredictionReq):
+        """A message of shame -- documentation must be completed.
+
+        Returns:A message of shame -- documentation must be completed.
+
+        """
         text = pred_req.text
         doc = ner.process_document(text)
         return {"text": text,
                 "tokens": [ResponseToken.generate(t) for t in doc.tokens],
-                "annotations": [ResponseAnnotation.generate(t) for t in doc.annotations]}
+                "annotations": [ResponseAnnotation.generate(t) for t
+                                in doc.annotations]}
 
 
 def parse_args():
+    """A message of shame -- documentation must be completed."""
     parser = argparse.ArgumentParser(
         description='Process a single TSV with a NER model')
-    parser.add_argument('--model', required=True, metavar='PATH', help='path to NER model')
-    parser.add_argument('--device', required=False, default="cpu", metavar='cpu|cuda',
+    parser.add_argument('--model', required=True, metavar='PATH',
+                        help='path to NER model')
+    parser.add_argument('--device', required=False, default="cpu",
+                        metavar='cpu|cuda',
                         help='device type used for processing')
-    parser.add_argument('--max_seq_length', required=False, default=256, metavar='N', type=int,
-                        help='the maximum total input sequence length after WordPiece tokenization.')
-    parser.add_argument('--pretrained_path', required=False, metavar='PATH', default=None,
-                        help='pretrained XLM-Roberta model path with model name as prefix, a.e automodel:allegro/herbert-large-cased')
+    parser.add_argument('--max_seq_length', required=False, default=256,
+                        metavar='N', type=int,
+                        help='the maximum total input sequence length after '
+                             'WordPiece tokenization.')
+    parser.add_argument('--pretrained_path', required=False, metavar='PATH',
+                        default=None,
+                        help='pretrained XLM-Roberta model path with model '
+                             'name as prefix, '
+                             'a.e automodel:allegro/herbert-large-cased')
     parser.add_argument('--processes', help='number of processes', default=1)
-    parser.add_argument('--tokenization', required=False, default="spacy-ext", choices=names,
+    parser.add_argument('--tokenization', required=False, default="spacy-ext",
+                        choices=names,
                         help='Tokenization method')
-    parser.add_argument('--squeeze', required=False, default=False, action="store_true",
-                        help='try to squeeze multiple examples into one Input Feature')
+    parser.add_argument('--squeeze', required=False, default=False,
+                        action="store_true",
+                        help='try to squeeze multiple examples into one '
+                             'Input Feature')
     parser.add_argument('--host', required=False, default="0.0.0.0")
     parser.add_argument('--port', required=False, default=8001, type=int)
     return parser.parse_args()
@@ -108,11 +162,15 @@ if __name__ == "__main__":
     cliargs = parse_args()
     try:
         global ner
-        ner = PolDeepNer2.load(cliargs.model, pretrained_path=cliargs.pretrained_path, device=cliargs.device,
-                               max_seq_length=cliargs.max_seq_length, squeeze=cliargs.squeeze,
+        ner = PolDeepNer2.load(cliargs.model,
+                               pretrained_path=cliargs.pretrained_path,
+                               device=cliargs.device,
+                               max_seq_length=cliargs.max_seq_length,
+                               squeeze=cliargs.squeeze,
                                tokenizer=TokenizerSpaces())
-        
+
         # threaded=True, processes=cliargs.processes
-        uvicorn.run(server.app, host=cliargs.host, port=cliargs.port, log_level="info")
+        uvicorn.run(server.app, host=cliargs.host, port=cliargs.port,
+                    log_level="info")
     except ValueError as er:
         print("[ERROR] %s" % er)
diff --git a/setup.py b/setup.py
index 5762a74b3b0f102b561f66db5bb69576b4ba06f6..b32d86af9ba5fcdec6ba6d753a6fa060a20032c3 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,8 @@ setuptools.setup(
     version="0.7.0",
     author="Michał Marcińczuk",
     author_email="michal.marcinczuk@pwr.edu.pl",
-    description="PolDeepNer2 is a tool for sequence labeling tasks based on transformer language models.",
+    description="PolDeepNer2 is a tool for sequence labeling tasks based on "
+                "transformer language models.",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/CLARIN-PL/PolDeepNer2",
diff --git a/tests/pipeline/test_lemmatization.py b/tests/pipeline/test_lemmatization.py
index 4d0afea3296ac5a10cc09f2b5520169eba358cba..d5191ab45bf16f0fac90ee08e9934cc358d05dea 100644
--- a/tests/pipeline/test_lemmatization.py
+++ b/tests/pipeline/test_lemmatization.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
 
 from poldeepner2.data.token import Token
@@ -7,17 +8,31 @@ from poldeepner2.utils.annotation import Annotation
 
 @pytest.mark.external
 @pytest.mark.parametrize("annotation, lemma", [
-    (Annotation("nam_liv_person", tokens=[Token("Tomka", 0, 5, "Tomek", "", "subst:sg:gen:m1")]), "Tomek"),
-    (Annotation("nam_liv_person", tokens=[Token("Mickiewicza", 0, 11, "Mickiewicz", "", "subst:sg:gen:m1")]),
+    (Annotation("nam_liv_person",
+                tokens=[Token("Tomka", 0, 5, "Tomek", "", "subst:sg:gen:m1")]),
+     "Tomek"),
+    (Annotation("nam_liv_person",
+                tokens=[Token("Mickiewicza", 0, 11, "Mickiewicz", "",
+                              "subst:sg:gen:m1")]),
      "Mickiewicz"),
     (Annotation("nam_org_institution", tokens=[
         Token("Lidze", 0, 5, "liga", " ", "subst:sg:loc:f"),
-        Token("światowej", 7, 16, "światowy", " ", "adj:sg:loc:f:pos")]), "Liga światowa"),
+        Token("światowej", 7, 16, "światowy", " ", "adj:sg:loc:f:pos")]),
+     "Liga światowa"),
     (Annotation("", tokens=[
         Token("Lidze", 0, 5, "liga", " ", "subst:sg:loc:f"),
-        Token("światowej", 7, 16, "światowy", " ", "adj:sg:loc:f:pos")]), "liga światowa")
+        Token("światowej", 7, 16, "światowy", " ", "adj:sg:loc:f:pos")]),
+     "liga światowa")
 ])
-def test_annotation_lemmatizer_polem_single(annotation: Annotation, lemma: str):
+def test_annotation_lemmatizer_polem_single(annotation: Annotation,
+                                            lemma: str):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        annotation: A message of shame -- documentation must be completed.
+        lemma: A message of shame -- documentation must be completed.
+
+    """
     annotations = [annotation]
     polem = AnnotationLemmatizerPolem()
     polem.process(annotations)
diff --git a/tests/pipeline/test_tokenization.py b/tests/pipeline/test_tokenization.py
index 8cf7ede01825b0168bd9a0324e0584aba9cfb5ae..7ebd484d8bf7ea7e87f9f06dc00addfb579571cb 100644
--- a/tests/pipeline/test_tokenization.py
+++ b/tests/pipeline/test_tokenization.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
 
 from poldeepner2.pipeline.tokenization import TokenizerKrnnt, TokenizerSpaces
@@ -5,16 +6,37 @@ from poldeepner2.pipeline.tokenization import TokenizerKrnnt, TokenizerSpaces
 
 @pytest.fixture(scope='session', autouse=True)
 def tokenizer_krnnt():
+    """A message of shame -- documentation must be completed.
+
+    Returns: TokenizerKrnnt()
+
+    """
     return TokenizerKrnnt()
 
 
 @pytest.mark.external
 @pytest.mark.parametrize("text, orths, lemmas, ws, morphs, starts, ends", [
-    ("Ala ma kota.", ["Ala", "ma", "kota", "."], ["Ala", "mieć", "kot", "."], [" ", " ", "", ""],
-     ["subst:sg:nom:f", "fin:sg:ter:imperf", "subst:sg:acc:m2", "interp"], [0, 4, 7, 11], [3, 6, 11, 12])
+    ("Ala ma kota.", ["Ala", "ma", "kota", "."], ["Ala", "mieć", "kot", "."],
+     [" ", " ", "", ""],
+     ["subst:sg:nom:f", "fin:sg:ter:imperf", "subst:sg:acc:m2", "interp"],
+     [0, 4, 7, 11], [3, 6, 11, 12])
 ])
-def test_tokenizer_krrnt_text(text: str, orths: [str], lemmas: [str], ws: [str], morphs: [str],
+def test_tokenizer_krrnt_text(text: str, orths: [str], lemmas: [str],
+                              ws: [str], morphs: [str],
                               starts: [int], ends: [int], tokenizer_krnnt):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        text: A message of shame -- documentation must be completed.
+        orths: A message of shame -- documentation must be completed.
+        lemmas: A message of shame -- documentation must be completed.
+        ws: A message of shame -- documentation must be completed.
+        morphs: A message of shame -- documentation must be completed.
+        starts: A message of shame -- documentation must be completed.
+        ends: A message of shame -- documentation must be completed.
+        tokenizer_krnnt: A message of shame -- documentation must be completed.
+
+    """
     sentence = tokenizer_krnnt.tokenize_tokens([text])[0]
 
     assert len(sentence) == len(orths)
@@ -31,10 +53,18 @@ def test_tokenizer_krrnt_text(text: str, orths: [str], lemmas: [str], ws: [str],
         (["Ala ma kota"], [["Ala", "ma", "kota"]]),
         ([" Ala ma kota"], [["Ala", "ma", "kota"]]),
         (["Ala  ma kota"], [["Ala", "ma", "kota"]]),
-        (["Ala ma kota", "Kot jest łaciaty"], [["Ala", "ma", "kota"], ["Kot", "jest", "łaciaty"]])
+        (["Ala ma kota", "Kot jest łaciaty"], [["Ala", "ma", "kota"],
+                                               ["Kot", "jest", "łaciaty"]])
     ]
 )
 def test_tokenizer_spaces(texts, tokens):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        texts: A message of shame -- documentation must be completed.
+        tokens: A message of shame -- documentation must be completed.
+
+    """
     tokenizer = TokenizerSpaces()
     output = tokenizer.tokenize(texts)
     output == tokens
diff --git a/tests/unit/utils/test_align_tokens_to_text.py b/tests/unit/utils/test_align_tokens_to_text.py
index 1a402d43e22969814b8368f219455dc7cd1afda5..5ceff1a52c075c7042d388c67f719cdb196df850 100644
--- a/tests/unit/utils/test_align_tokens_to_text.py
+++ b/tests/unit/utils/test_align_tokens_to_text.py
@@ -1,4 +1,6 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
+
 from poldeepner2.utils.data_utils import align_tokens_to_text
 
 
@@ -6,8 +8,15 @@ from poldeepner2.utils.data_utils import align_tokens_to_text
     ("Ala ma kota", [["Ala", "ma", "kota"]], [(0, 3), (4, 6), (7, 11)]),
     ("Ala ma  kota", [["Ala", "ma", "kota"]], [(0, 3), (4, 6), (8, 12)]),
     (" Ala ma kota", [["Ala", "ma", "kota"]], [(1, 4), (5, 7), (8, 12)])
-    ]
+]
 )
 def test_align_tokens_to_text(text, tokens, expected_offsets):
+    """A message of shame -- documentation must be completed.
+
+    Args: text: A message of shame -- documentation must be completed.
+    tokens: A message of shame -- documentation must be completed.
+    expected_offsets: A message of shame -- documentation must be completed.
+
+    """
     offsets = align_tokens_to_text(tokens, text)
     assert offsets == expected_offsets
diff --git a/tests/unit/utils/test_iob2_to_iob.py b/tests/unit/utils/test_iob2_to_iob.py
index e2f0ee50a384dd654028cb315a1a7d3f26276d19..1cecd0d4baa4021cfea4ec364e463627e9e6253d 100644
--- a/tests/unit/utils/test_iob2_to_iob.py
+++ b/tests/unit/utils/test_iob2_to_iob.py
@@ -1,9 +1,9 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
 import sys
 import pathlib
-
-sys.path.append(str(pathlib.Path(__file__).absolute().parents[3].resolve()))
 from poldeepner2.utils.data_utils import iob2_to_iob
+sys.path.append(str(pathlib.Path(__file__).absolute().parents[3].resolve()))
 
 
 @pytest.mark.parametrize(
@@ -12,25 +12,48 @@ from poldeepner2.utils.data_utils import iob2_to_iob
          'Alex I-PER\nis O\ngoing O\nto O\nLos I-LOC\nAngeles I-LOC'),
         ('Alex B-PER',
          'Alex I-PER'),
-        ('Alex B-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos B-LOC\nAngeles I-LOC',
-         'Alex I-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos I-LOC\nAngeles I-LOC'),
-        ('is O\ngoing O\nAlex B-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos B-LOC\nAngeles I-LOC',
-         'is O\ngoing O\nAlex I-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos I-LOC\nAngeles I-LOC'),
-        ('Alex B-PER\nis O\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles I-LOC',
-         'Alex I-PER\nis O\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles I-LOC'),
-        ('Alex B-PER\nis O\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles I-LOC\nAlex B-PER',
-         'Alex I-PER\nis O\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles I-LOC\nAlex I-PER'),
+        ('Alex B-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos B-LOC\nAngeles '
+         'I-LOC',
+         'Alex I-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos I-LOC\nAngeles '
+         'I-LOC'),
+        ('is O\ngoing O\nAlex B-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos '
+         'B-LOC\nAngeles I-LOC',
+         'is O\ngoing O\nAlex I-PER\nAngeles I-PER\nis O\ngoing O\nto O\nLos '
+         'I-LOC\nAngeles I-LOC'),
+        ('Alex B-PER\nis O\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles '
+         'I-LOC',
+         'Alex I-PER\nis O\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles '
+         'I-LOC'),
+        ('Alex B-PER\nis O\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles '
+         'I-LOC\nAlex B-PER',
+         'Alex I-PER\nis O\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles '
+         'I-LOC\nAlex I-PER'),
         # nested
-        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles I-LOC\nAlex B-PER#B-LOC',
-         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles I-LOC\nAlex I-PER#B-LOC'),
-        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos B-LOC\nAngeles I-LOC\nAlex B-PER#B-LOC\nAlex B-PER#B-LOC',
-         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos I-LOC\nAngeles I-LOC\nAlex I-PER#B-LOC\nAlex B-PER#I-LOC'),
-        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos B-LOC#B-PER\nAngeles B-LOC#B-PER',
-         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos I-LOC#I-PER\nAngeles B-LOC#B-PER'),
-        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER#B-NAV\nto I-NAV\nLos B-LOC#B-PER\nAngeles I-LOC#B-PER',
-         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER#I-NAV\nto I-NAV\nLos I-LOC#I-PER\nAngeles I-LOC#B-PER')
+        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos '
+         'B-LOC\nAngeles I-LOC\nAlex B-PER#B-LOC',
+         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos '
+         'I-LOC\nAngeles I-LOC\nAlex I-PER#B-LOC'),
+        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos '
+         'B-LOC\nAngeles I-LOC\nAlex B-PER#B-LOC\nAlex B-PER#B-LOC',
+         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos '
+         'I-LOC\nAngeles I-LOC\nAlex I-PER#B-LOC\nAlex B-PER#I-LOC'),
+        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER\nto O\nLos '
+         'B-LOC#B-PER\nAngeles B-LOC#B-PER',
+         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER\nto O\nLos '
+         'I-LOC#I-PER\nAngeles B-LOC#B-PER'),
+        ('Alex B-PER#B-ORG\nis I-ORG\ngoing O\nAlex B-PER#B-NAV\nto '
+         'I-NAV\nLos B-LOC#B-PER\nAngeles I-LOC#B-PER',
+         'Alex I-PER#I-ORG\nis I-ORG\ngoing O\nAlex I-PER#I-NAV\nto '
+         'I-NAV\nLos I-LOC#I-PER\nAngeles I-LOC#B-PER')
     ]
 )
 def test_iob2_to_iob(iob2_input, expected_output):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        iob2_input: A message of shame -- documentation must be completed.
+        expected_output: A message of shame -- documentation must be completed.
+
+    """
     iob1 = iob2_to_iob(iob2_input)
     assert iob1.split('\n') == expected_output.split('\n')
diff --git a/tests/unit/utils/test_poleval_dict.py b/tests/unit/utils/test_poleval_dict.py
index 94a7d147bb32ef212abc5e911c45f2abafbeceee..4aa9a65b8ab528c7020c4099cd517085d735276b 100644
--- a/tests/unit/utils/test_poleval_dict.py
+++ b/tests/unit/utils/test_poleval_dict.py
@@ -1,12 +1,14 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
-from poldeepner2.utils.data_utils import get_poleval_dict, read_tsv, wrap_annotations
+from poldeepner2.utils.data_utils import get_poleval_dict, wrap_annotations
 
 
 @pytest.mark.parametrize(
     "id, text, tokens, labels, answers", [
         ('9 from 1828: PCCwR-1.1-TXT/short/Literatura piękna/2955.txt',
          'Pojutrze - wyszeptał niepewnie, a Beny kaszlnął. Raz Benjamin trzy',
-         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny', 'kaszlnął', '.'],
+         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny',
+           'kaszlnął', '.'],
           ['Raz', 'Benjamin', 'trzy']],
          [['O', 'O', 'O', 'O', 'O', 'O', 'B-persName', 'O', 'O'],
           ['O', 'B-persName', 'O']],
@@ -14,7 +16,8 @@ from poldeepner2.utils.data_utils import get_poleval_dict, read_tsv, wrap_annota
 
         ('9 from 1828: PCCwR-1.1-TXT/short/Literatura piękna/2955.txt',
          'Pojutrze - wyszeptał niepewnie, a Beny kaszlnął. Raz Benjamin trzy',
-         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny', 'kaszlnął', '.'],
+         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny',
+           'kaszlnął', '.'],
           ['Raz', 'Benjamin', 'trzy']],
          [['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
           ['O', 'O', 'O']],
@@ -22,30 +25,48 @@ from poldeepner2.utils.data_utils import get_poleval_dict, read_tsv, wrap_annota
 
         ('9 from 1828: PCCwR-1.1-TXT/short/Literatura piękna/2955.txt',
          'Pojutrze - wyszeptał niepewnie, a Beny kaszlnął. Raz Benjamin trzy',
-         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny', 'kaszlnął', '.'],
+         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Beny',
+           'kaszlnął', '.'],
           ['Raz', 'Benjamin', 'trzy']],
-         [['O', 'O', 'O', 'O', 'O', 'O', 'B-persName#B-persName-forename', 'O', 'O'],
+         [['O', 'O', 'O', 'O', 'O', 'O', 'B-persName#B-persName-forename',
+           'O', 'O'],
           ['O', 'B-persName#B-persName-forename', 'O']],
-         'persName 34 38\tBeny\npersName_forename 34 38\tBeny\npersName 53 61\tBenjamin\npersName_forename 53 61\tBenjamin'),
+         'persName 34 38\tBeny\npersName_forename 34 38\tBeny\npersName 53 '
+         '61\tBenjamin\npersName_forename 53 61\tBenjamin'),
 
         ('9 from 1828: PCCwR-1.1-TXT/short/Literatura piękna/2955.txt',
          'Pojutrze - wyszeptał niepewnie, a Londyn kaszlnął. Raz Londyn trzy',
-         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Londyn', 'kaszlnął', '.'],
+         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Londyn',
+           'kaszlnął', '.'],
           ['Raz', 'Londyn', 'trzy']],
          [['O', 'O', 'O', 'O', 'O', 'O', 'B-LOC#B-PER', 'O', 'O'],
           ['O', 'B-LOC#B-PER', 'O']],
-         'LOC 34 40\tLondyn\nPER 34 40\tLondyn\nLOC 55 61\tLondyn\nPER 55 61\tLondyn'),
+         'LOC 34 40\tLondyn\nPER 34 40\tLondyn\nLOC 55 61\tLondyn\nPER 55 '
+         '61\tLondyn'),
 
         ('9 from  1828: PCCwR-1.1-TXT/short/Literatura piękna/2955.txt',
-         'Pojutrze -   wyszeptał niepewnie, a  Londyn kaszlnął.   Raz Londyn  trzy',
-         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Londyn', 'kaszlnął', '.'],
+         'Pojutrze -   wyszeptał niepewnie, a  Londyn kaszlnął.   Raz Londyn '
+         ' trzy',
+         [['Pojutrze', '-', 'wyszeptał', 'niepewnie', ',', 'a', 'Londyn',
+           'kaszlnął', '.'],
           ['Raz', 'Londyn', 'trzy']],
          [['O', 'O', 'O', 'O', 'O', 'O', 'B-LOC#B-PER', 'O', 'O'],
           ['O', 'B-LOC#B-PER', 'O']],
-         'LOC 37 43\tLondyn\nPER 37 43\tLondyn\nLOC 60 66\tLondyn\nPER 60 66\tLondyn')
+         'LOC 37 43\tLondyn\nPER 37 43\tLondyn\nLOC 60 66\tLondyn\nPER 60 '
+         '66\tLondyn')
     ]
 )
 def test_get_poleval_dict(id, text, tokens, labels, answers):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        id: A message of shame -- documentation must be completed.
+        text: A message of shame -- documentation must be completed.
+        tokens: A message of shame -- documentation must be completed.
+        labels: A message of shame -- documentation must be completed.
+        answers: A message of shame -- documentation must be completed.
+
+    """
     annotations = wrap_annotations(labels)
     poleval_dict = get_poleval_dict(id, text, tokens, annotations)
     assert poleval_dict == {'text': text, 'id': id, 'answers': answers}
diff --git a/tests/unit/utils/test_read_tsv.py b/tests/unit/utils/test_read_tsv.py
index 51be8728ea248d12e051ec4d83b0bfa61dc74b4e..7ebb214717d17c0d7f78b2d062cae7b5437be369 100644
--- a/tests/unit/utils/test_read_tsv.py
+++ b/tests/unit/utils/test_read_tsv.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 from pathlib import Path
 
 import pytest
@@ -10,7 +11,8 @@ from poldeepner2.utils.data_utils import read_tsv
     "path, expected_output", [
         ('tsv_test.tsv',
          [('Ala z Krakowa jeździ Audi'.split(" "), ['O'] * 5),
-          ('Marek Nowak z Politechniki Wrocławskiej mieszka przy ul . Sądeckiej'.split(" "), ['O'] * 10)])
+          ('Marek Nowak z Politechniki Wrocławskiej mieszka przy ul . '
+           'Sądeckiej'.split(" "), ['O'] * 10)])
     ]
 )
 def test_get_read_tsv(path, expected_output):
diff --git a/tests/unit/utils/test_sequence_labeling.py b/tests/unit/utils/test_sequence_labeling.py
index 8db73de4f717252e69dc57cdb3f3f62c45d9f497..97db2b68b92c11b61feaf9b2a2f261072ec720b8 100644
--- a/tests/unit/utils/test_sequence_labeling.py
+++ b/tests/unit/utils/test_sequence_labeling.py
@@ -1,3 +1,4 @@
+"""A message of shame -- documentation must be completed."""
 import pytest
 
 from poldeepner2.utils.sequence_labeling import get_entities
@@ -9,12 +10,19 @@ from poldeepner2.utils.sequence_labeling import get_entities
         (["O", "B-PER", "I-PER"], [("PER", 1, 2)]),
         (["B-PER", "B-PER"], [("PER", 0, 0), ("PER", 1, 1)]),
         (["B-LOC", "I-PER", "I-PER"], [("LOC", 0, 0), ("PER", 1, 2)]),
-        (["B-LOC#B-PER", "I-LOC", "I-LOC#B-PER"], [("LOC", 0, 2), ("PER", 0, 0), ("PER", 2, 2)]),
-        (["B-LOC#I-PER", "I-LOC", "I-LOC#I-PER"], [("LOC", 0, 2), ("PER", 0, 0), ("PER", 2, 2)]),
-        (["B-nam_liv_person#B-nam_liv_person_first", "B-nam_liv_person#B-nam_liv_person_last"],
-         [('nam_liv_person', 0, 0), ('nam_liv_person_first', 0, 0), ('nam_liv_person', 1, 1), ('nam_liv_person_last', 1, 1)]),
+        (["B-LOC#B-PER", "I-LOC", "I-LOC#B-PER"], [("LOC", 0, 2),
+                                                   ("PER", 0, 0),
+                                                   ("PER", 2, 2)]),
+        (["B-LOC#I-PER", "I-LOC", "I-LOC#I-PER"], [("LOC", 0, 2),
+                                                   ("PER", 0, 0),
+                                                   ("PER", 2, 2)]),
+        (["B-nam_liv_person#B-nam_liv_person_first",
+          "B-nam_liv_person#B-nam_liv_person_last"],
+         [('nam_liv_person', 0, 0), ('nam_liv_person_first', 0, 0),
+          ('nam_liv_person', 1, 1), ('nam_liv_person_last', 1, 1)]),
         (["B-persName#B-persName-forename", "B-persName"],
-         [("persName", 0, 0), ("persName-forename", 0, 0), ("persName", 1, 1)]),
+         [("persName", 0, 0), ("persName-forename", 0, 0),
+          ("persName", 1, 1)]),
         (["I-PER", "I-PER"], [("PER", 0, 1)]),
         (["B-PER", "B-PER"], [("PER", 0, 0), ("PER", 1, 1)]),
         # (["B-PER", "S-PER"], [("PER", 0, 0), ("PER", 1, 1)]),
@@ -23,5 +31,12 @@ from poldeepner2.utils.sequence_labeling import get_entities
     ]
 )
 def test_get_entities(labels, expected):
+    """A message of shame -- documentation must be completed.
+
+    Args:
+        labels: A message of shame -- documentation must be completed.
+        expected: A message of shame -- documentation must be completed.
+
+    """
     entities = get_entities(labels)
     assert set(entities) == set(expected)
diff --git a/tests/unit/utils/test_wrap_annotations.py b/tests/unit/utils/test_wrap_annotations.py
index 6836cba37e3d47653a707f694bb57287bf04eb2c..d0452712c2d3adad9d06c5a437612dde0a226092 100644
--- a/tests/unit/utils/test_wrap_annotations.py
+++ b/tests/unit/utils/test_wrap_annotations.py
@@ -1,10 +1,13 @@
+"""A message of shame -- documentation must be completed."""
 import codecs
 from pathlib import Path
 
-from poldeepner2.utils.data_utils import read_tsv, wrap_annotations, align_tokens_to_text
+from poldeepner2.utils.data_utils import read_tsv, wrap_annotations, \
+    align_tokens_to_text
 
 
 def test_wrap_and_align_tokens_to_text():
+    """A message of shame -- documentation must be completed."""
     root = Path(__file__).parents[2].absolute() / "resources"
 
     path_iob = str(root / "poleval_0337_iob.tsv")
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000000000000000000000000000000000000..8f84e117ac1320bd94573cc6ceef3a383c9129a2
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,44 @@
+[tox]
+envlist = pep8,docstyle
+skipsdist = True
+
+[testenv:pep8]
+deps =
+    flake8
+basepython = python3.8
+commands =
+    flake8 {posargs}
+
+[testenv:docstyle]
+deps =
+    pydocstyle
+basepython = python3.8
+commands =
+    pydocstyle --verbose {posargs}
+
+[flake8]
+# W504 skipped because it is overeager and unnecessary
+ignore = W504
+show-source = True
+exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
+import-order-style = pep8
+max-line-length = 80
+
+
+[pydocstyle]
+# D104 Missing docstring in public package
+# D203 1 blank line required before class docstring
+# D213 Multi-line docstring summary should start at the second line
+# D214 Section is over-indented
+# D215 Section underline is over-indented
+# D401 First line should be in imperative mood; try rephrasing
+# D405 Section name should be properly capitalized
+# D406 Section name should end with a newline
+# D407 Missing dashed underline after section
+# D408 Section underline should be in the line following the section’s name
+# D409 Section underline should match the length of its name
+# D410 Missing blank line after section
+# D411 Missing blank line before section
+ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
+match-dir = ^(?!\.tox|venv).*
+match = ^(?!setup).*\.py
\ No newline at end of file
diff --git a/train.py b/train.py
index acbc67c28fc498500b703f7d8ebd7f61cf5d30e3..382309cc51d15d46b758eac8e86475e5f1df752b 100644
--- a/train.py
+++ b/train.py
@@ -1,3 +1,5 @@
+"""A message of shame -- documentation must be completed."""
+
 from __future__ import absolute_import, division, print_function
 
 import argparse
@@ -39,19 +41,19 @@ def train_model(args: Namespace):
     args.output_dir += suffix
 
     config = {
-       "epochs": args.num_train_epochs,
-       "language_model": args.pretrained_path,
-       "batch_size": args.train_batch_size,
-       "data_train": args.data_train,
-       "data_tune": args.data_tune,
-       "data_test": args.data_test,
-       "max_seq_length": args.max_seq_length,
-       "warmup_proportion": args.warmup_proportion,
-       "learning_rate": args.learning_rate,
-       "gradient_accumulation_steps": args.gradient_accumulation_steps,
-       "squeeze": args.squeeze,
-       "dropout": args.dropout,
-       "output_dir": args.output_dir
+        "epochs": args.num_train_epochs,
+        "language_model": args.pretrained_path,
+        "batch_size": args.train_batch_size,
+        "data_train": args.data_train,
+        "data_tune": args.data_tune,
+        "data_test": args.data_test,
+        "max_seq_length": args.max_seq_length,
+        "warmup_proportion": args.warmup_proportion,
+        "learning_rate": args.learning_rate,
+        "gradient_accumulation_steps": args.gradient_accumulation_steps,
+        "squeeze": args.squeeze,
+        "dropout": args.dropout,
+        "output_dir": args.output_dir
     }
 
     if args.wandb:
@@ -61,7 +63,8 @@ def train_model(args: Namespace):
         wandb.run.save()
 
     if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
-        raise ValueError("Output directory (%s) already exists and is not empty." % args.output_dir)
+        raise ValueError("Output directory (%s) already exists and is not "
+                         "empty." % args.output_dir)
 
     Path(args.output_dir).mkdir(parents=True, exist_ok=True)
 
@@ -70,7 +73,8 @@ def train_model(args: Namespace):
         logger.info(item)
 
     if args.gradient_accumulation_steps < 1:
-        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
+        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, "
+                         "should be >= 1 "
                          % args.gradient_accumulation_steps)
 
     args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps
@@ -92,7 +96,7 @@ def train_model(args: Namespace):
     logger.info("Loading training data...")
     t0 = time.time()
     train_examples = processor.get_examples(args.data_train, "train")
-    logger.info(f"Training data was loaded in {time.time()-t0} second(s)")
+    logger.info(f"Training data was loaded in {time.time() - t0} second(s)")
 
     # preparing model configs
     hidden_size = 1024 if 'large' in args.pretrained_path else \
@@ -116,14 +120,19 @@ def train_model(args: Namespace):
     logger.info(f"Pretrained model was loaded in {time.time()-t0} second(s)")
 
     train_features = convert_examples_to_features(
-        train_examples, label_list, args.max_seq_length, model.encode_word, args.squeeze)
+        train_examples, label_list,
+        args.max_seq_length, model.encode_word,
+        args.squeeze)
 
     if args.training_mix:
         train_features.extend(convert_examples_to_features(
-            train_examples, label_list, args.max_seq_length, model.encode_word, not args.squeeze))
+            train_examples, label_list,
+            args.max_seq_length, model.encode_word,
+            not args.squeeze))
 
     num_train_optimization_steps = int(
-        len(train_features) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs
+        len(train_features) / args.train_batch_size /
+        args.gradient_accumulation_steps) * args.num_train_epochs
 
     no_decay = ['bias', 'final_layer_norm.weight']
     params = list(model.named_parameters())
@@ -135,8 +144,10 @@ def train_model(args: Namespace):
     ]
 
     warmup_steps = int(args.warmup_proportion * num_train_optimization_steps)
-    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
-    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=num_train_optimization_steps)
+    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate,
+                      eps=args.adam_epsilon)
+    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
+                                     t_total=num_train_optimization_steps)
 
     # freeze model if necessary
     if args.freeze_model:
@@ -153,7 +164,8 @@ def train_model(args: Namespace):
             from apex import amp
         except ImportError:
             raise ImportError(
-                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
+                "Please install apex from https://www.github.com/nvidia/apex "
+                "to use fp16 training.")
         model, optimizer = amp.initialize(
             model, optimizer, opt_level=args.fp16_opt_level)
 
@@ -172,16 +184,19 @@ def train_model(args: Namespace):
     if args.data_tune:
         val_examples = processor.get_examples(args.data_tune, "tune")
         val_features = convert_examples_to_features(
-            val_examples, label_list, args.max_seq_length, model.encode_word, args.squeeze)
+            val_examples, label_list,
+            args.max_seq_length, model.encode_word,
+            args.squeeze)
         val_data = create_dataset(val_features)
 
     if args.data_test:
         eval_examples = processor.get_examples(args.data_test, "test")
         eval_features = convert_examples_to_features(
-            eval_examples, label_list, args.max_seq_length, model.encode_word, args.squeeze)
+            eval_examples, label_list, args.max_seq_length,
+            model.encode_word, args.squeeze)
         eval_data = create_dataset(eval_features)
 
-    for epoch_no in range(1, args.num_train_epochs+1):
+    for epoch_no in range(1, args.num_train_epochs + 1):
         epoch_stats = {"epoch": epoch_no}
         logger.info("Epoch %d" % epoch_no)
         tr_loss = 0
@@ -202,10 +217,12 @@ def train_model(args: Namespace):
             if args.fp16:
                 with amp.scale_loss(loss, optimizer) as scaled_loss:
                     scaled_loss.backward()
-                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
+                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
+                                               args.max_grad_norm)
             else:
                 loss.backward()
-                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
+                torch.nn.utils.clip_grad_norm_(model.parameters(),
+                                               args.max_grad_norm)
 
             tr_loss += loss.item()
             nb_tr_examples += input_ids.size(0)
@@ -238,7 +255,8 @@ def train_model(args: Namespace):
 
             if f1 > best_val_f1:
                 best_val_f1 = f1
-                logger.info("\nFound better f1=%.4f on validation set. Saving model\n" % f1)
+                logger.info("\nFound better f1=%.4f on validation set. "
+                            "Saving model\n" % f1)
                 logger.info("%s\n" % report)
                 model.save(args.output_dir)
             else:
@@ -256,7 +274,8 @@ def train_model(args: Namespace):
             logger.info("%s\n" % report)
 
         if args.epoch_save_model:
-            epoch_output_dir = os.path.join(args.output_dir, "e%03d" % epoch_no)
+            epoch_output_dir = os.path.join(args.output_dir,
+                                            "e%03d" % epoch_no)
             os.makedirs(epoch_output_dir)
             model.save(epoch_output_dir)
 
diff --git a/trainer.py b/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..12cbeb43b8b91be55f80b201629bd36f0766e528
--- /dev/null
+++ b/trainer.py
@@ -0,0 +1,365 @@
+"""Script to teach new models compatible with the library."""
+import configparser
+import logging
+import os
+import random
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import torch
+from pytorch_transformers import AdamW, WarmupLinearSchedule
+from torch.utils.data import DataLoader, RandomSampler
+from tqdm import tqdm
+
+from poldeepner2.utils.data_utils import NerProcessor
+from poldeepner2.utils.data_utils import create_dataset, \
+    convert_examples_to_features, save_params
+from poldeepner2.utils.train_utils import evaluate_model
+
+
+def main():
+
+    config_file = "config.cfg"
+    config = configparser.ConfigParser()
+    config.read(config_file)
+
+    # HYPERPARAMETERS
+    adam_epsilon = config.getfloat('train', 'adam_epsilon')
+    data_test = config['train']['data_test']
+    data_train = config['train']['data_train']
+    data_tune = config['train']['data_tune']
+    device = config['train']['device']
+    dropout = config.getfloat('train', 'dropout')
+    epoch_save_model = config.getboolean('train', 'epoch_save_model')
+    eval_batch_size = config.getint('train', 'eval_batch_size')
+    fp16 = config.getboolean('train', 'fp16')
+    fp16_opt_level = config['train']['fp16_opt_level']
+    freeze_model = config.getboolean('train', 'freeze_model')
+    gradient_accumulation_steps = \
+        config.getint('train', 'gradient_accumulation_steps')
+    hidden_size = config.getint('train', 'hidden_size')
+    learning_rate = config.getfloat('train', 'learning_rate')
+    max_grad_norm = config.getfloat('train', 'max_grad_norm')
+    max_seq_length = config.getint('train', 'max_seq_length')
+    num_train_epochs = config.getint('train', 'num_train_epochs')
+    output_dir = config['train']['output_dir']
+    pretrained_path = config['train']['pretrained_path']
+    seed = config.getint('train', 'seed')
+    squeeze = config.getboolean('train', 'squeeze')
+    train_batch_size = config.getint('train', 'train_batch_size')
+    training_mix = config.getboolean('train', 'training_mix')
+    use_transfer = 'transfer' in config['train'] and \
+                   config['train']['transfer'] != 'None'
+    if use_transfer:
+        transfer = config['train']['transfer']
+    else:
+        transfer = None
+    warmup_proportion = config.getfloat('train', 'warmup_proportion')
+    weight_decay = config.getfloat('train', 'weight_decay')
+
+    # if wandb:
+    #     import wandb
+    #     wandb.init(project=wandb, config=config)
+
+    if os.path.exists(output_dir) and os.listdir(output_dir):
+        raise ValueError(
+            "Output directory (%s) already exists and is not empty."
+            % output_dir)
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    logging.basicConfig(
+        format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+        datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO,
+        filename=Path(output_dir) / "log.txt")
+    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
+    logger = logging.getLogger(__name__)
+    for item in sorted(config.items()):
+        logger.info(item)
+
+    if gradient_accumulation_steps < 1:
+        raise ValueError(
+            "Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
+            % gradient_accumulation_steps)
+
+    train_batch_size = train_batch_size // gradient_accumulation_steps
+
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+    # Determine set of labels
+    processor = NerProcessor()
+    datasets = [data_train]
+    if data_tune:
+        datasets.append(data_tune)
+    if data_test:
+        datasets.append(data_test)
+    label_list = \
+        processor.get_labels(datasets, config.getint('data',
+                                                     'tag_column_index'))
+    logger.info(f"Labels: {label_list}")
+    num_labels = len(label_list) + 1  # add one for IGNORE label
+    logger.info(f"Number of labels: {num_labels}")
+
+    # Load training data
+    logger.info("Loading training data...")
+    t0 = time.time()
+    train_examples = \
+        processor.get_examples(data_train,
+                               config.getint('data',
+                                             'tag_column_index'),
+                               "train")
+    logger.info(f"Training data was loaded in {time.time() - t0} second(s)")
+
+    # preparing model configs
+    hidden_size = 1024 if 'large' in pretrained_path \
+        else (768 if 'base' in pretrained_path else hidden_size)
+    device = device
+
+    logger.info("Loading pretrained model...")
+    t0 = time.time()
+    if pretrained_path.startswith("hf:"):
+        from poldeepner2.model.hf_for_token_calssification import \
+            HfModelForTokenClassification
+        pretrained_dir = pretrained_path.split(':')[1]
+        model = HfModelForTokenClassification(
+            pretrained_path=pretrained_dir, n_labels=num_labels,
+            hidden_size=hidden_size, dropout_p=dropout,
+            device=device)
+    elif pretrained_path.startswith("mt5:"):
+        from poldeepner2.model.mt5_for_token_calssification import \
+            Mt5ModelForTokenClassification
+        variant = pretrained_path.split(':')[1]
+        model = Mt5ModelForTokenClassification(
+            variant=variant, n_labels=num_labels, hidden_size=hidden_size,
+            dropout_p=dropout, device=device)
+    else:
+        from poldeepner2.model.xlmr_for_token_classification \
+            import XLMRForTokenClassification
+        pretrained_dir = pretrained_path
+        if ":" in pretrained_dir:
+            pretrained_dir = pretrained_dir.split(':')[1]
+        if not os.path.exists(pretrained_dir):
+            raise ValueError(
+                "RoBERTa language model not found on path '%s'"
+                % pretrained_dir)
+        model = XLMRForTokenClassification(
+            pretrained_path=pretrained_dir, n_labels=num_labels,
+            hidden_size=hidden_size, dropout_p=dropout,
+            device=device)
+    logger.info(f"Pretrained model was loaded in {time.time() - t0} second(s)")
+
+    if use_transfer:
+        if device == "cpu":
+            state_dict = torch.load(
+                open(os.path.join(transfer, 'model.pt'), 'rb'),
+                map_location='cpu')
+        else:
+            state_dict = torch.load(
+                open(os.path.join(transfer, 'model.pt'), 'rb'))
+        model.load_state_dict(state_dict)
+
+    model.to(device)
+    # if wandb:
+    #     wandb.watch(model)
+
+    train_features = convert_examples_to_features(
+        train_examples, label_list, max_seq_length, model.encode_word,
+        squeeze)
+
+    if training_mix:
+        train_features.extend(convert_examples_to_features(
+            train_examples, label_list, max_seq_length, model.encode_word,
+            not squeeze))
+
+    num_train_optimization_steps = int(
+        len(train_features) / train_batch_size / gradient_accumulation_steps) \
+                                   * num_train_epochs
+
+    no_decay = ['bias', 'final_layer_norm.weight']
+
+    params = list(model.named_parameters())
+
+    optimizer_grouped_parameters = [
+        {'params': [p for n, p in params if not any(
+            nd in n for nd in no_decay)], 'weight_decay': weight_decay},
+        {'params': [p for n, p in params if any(
+            nd in n for nd in no_decay)], 'weight_decay': 0.0}
+    ]
+
+    warmup_steps = int(warmup_proportion * num_train_optimization_steps)
+    optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate,
+                      eps=adam_epsilon)
+    scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps,
+                                     t_total=num_train_optimization_steps)
+
+    # freeze model if necessary
+    if freeze_model:
+        logger.info("Freezing XLM-R model...")
+        for n, p in model.named_parameters():
+            if 'xlmr' in n and p.requires_grad:
+                logging.info("Parameter %s - freezed" % n)
+                p.requires_grad = False
+            else:
+                logging.info("Parameter %s - unchanged" % n)
+
+    if fp16:
+        try:
+            from apex import amp
+        except ImportError:
+            raise ImportError(
+                "Please install apex from https://www.github.com/nvidia/apex "
+                "to use fp16 training.")
+        model, optimizer = amp.initialize(
+            model, optimizer, opt_level=fp16_opt_level)
+
+    # Train the model
+    logger.info("***** Running training *****")
+    logger.info("  Num examples = %d", len(train_examples))
+    logger.info("  Batch size = %d", train_batch_size)
+    logger.info("  Num steps = %d", num_train_optimization_steps)
+
+    train_data = create_dataset(train_features)
+
+    train_sampler = RandomSampler(train_data)
+
+    train_dataloader = DataLoader(train_data, sampler=train_sampler,
+                                  batch_size=train_batch_size)
+
+    # getting validation samples
+    best_val_f1 = 0.0
+    if data_tune:
+        val_examples = \
+            processor.get_examples(data_tune,
+                                   config.getint('data', 'tag_column_index'),
+                                   "tune")
+        val_features = convert_examples_to_features(
+            val_examples, label_list, max_seq_length, model.encode_word,
+            squeeze)
+        val_data = create_dataset(val_features)
+
+    if data_test:
+        eval_examples = \
+            processor.get_examples(data_test,
+                                   config.getint('data', 'tag_column_index'),
+                                   "test")
+        eval_features = convert_examples_to_features(
+            eval_examples, label_list, max_seq_length, model.encode_word,
+            squeeze)
+        eval_data = create_dataset(eval_features)
+
+    for epoch_no in range(1, num_train_epochs + 1):
+        epoch_stats = {"epoch": epoch_no}
+        logger.info("Epoch %d" % epoch_no)
+        tr_loss = 0
+        nb_tr_examples, nb_tr_steps = 0, 0
+
+        model.train()
+        steps = len(train_dataloader)
+
+        time_start = time.time()
+        # ToDo: add parameter for this feature
+        # for g in optimizer.param_groups:
+        #     g['lr'] = learning_rate - (learning_rate/100 * epoch_no)
+        # epoch_stats['lr'] = learning_rate - (learning_rate/100 * epoch_no)
+
+        for step, batch in tqdm(enumerate(train_dataloader), total=steps):
+            batch = tuple(t.to(device) for t in batch)
+            input_ids, label_ids, l_mask, valid_ids, = batch
+            loss = model(input_ids, label_ids, l_mask, valid_ids)
+            if gradient_accumulation_steps > 1:
+                loss = loss / gradient_accumulation_steps
+
+            if fp16:
+                with amp.scale_loss(loss, optimizer) as scaled_loss:
+                    scaled_loss.backward()
+                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
+                                               max_grad_norm)
+            else:
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(),
+                                               max_grad_norm)
+
+            tr_loss += loss.item()
+            nb_tr_examples += input_ids.size(0)
+            nb_tr_steps += 1
+
+            epoch_stats["loss"] = loss
+            epoch_stats["learning_rate"] = scheduler.get_last_lr()[0]
+
+            if (step + 1) % gradient_accumulation_steps == 0:
+                optimizer.step()
+                scheduler.step()
+                model.zero_grad()
+
+            epoch_stats["step"] = step
+            # if wandb:
+            #     wandb.log(epoch_stats)
+
+        # if wandb:
+        #     epoch_stats["epoch_training_time"] = time.time() - time_start
+
+        if data_tune:
+            logger.info("\nTesting on validation set...")
+            time_start = time.time()
+            f1, report = evaluate_model(model, val_data, label_list,
+                                        eval_batch_size, device)
+            time_end = time.time()
+            epoch_stats["validation_F1"] = f1
+            epoch_stats["epoch_validation_time"] = time_end - time_start
+
+            if f1 > best_val_f1:
+                best_val_f1 = f1
+                logger.info(
+                    "\nFound better f1=%.4f on validation set. Saving model\n"
+                    % f1)
+                logger.info("%s\n" % report)
+                torch.save(model.state_dict(),
+                           open(os.path.join(output_dir, 'model.pt'),
+                                'wb'))
+                save_params(output_dir, dropout, num_labels,
+                            label_list)
+
+        if data_test:
+            logger.info("\nTesting on test set...")
+            time_start = time.time()
+            print(f'len label_list: {len(label_list)}')
+            print(f'label_list: {label_list}')
+
+            f1_score, report = evaluate_model(model, eval_data, label_list,
+                                              eval_batch_size, device)
+            time_end = time.time()
+            epoch_stats["test_F1"] = f1_score
+            epoch_stats["epoch_testing_time"] = time_end - time_start
+            logger.info("%s\n" % report)
+
+        if epoch_save_model:
+            epoch_output_dir = os.path.join(output_dir, "e%03d" % epoch_no)
+            os.makedirs(epoch_output_dir)
+            torch.save(model.state_dict(),
+                       open(os.path.join(epoch_output_dir, 'model.pt'), 'wb'))
+            save_params(epoch_output_dir, dropout, num_labels, label_list)
+
+        # if wandb:
+        #     wandb.log(epoch_stats)
+
+    model.to(device)
+
+    if data_test:
+        eval_data = create_dataset(eval_features)
+        f1_score, report = evaluate_model(model, eval_data, label_list,
+                                          eval_batch_size, device)
+        logger.info("\n%s", report)
+        output_eval_file = os.path.join(output_dir, "test_results.txt")
+        with open(output_eval_file, "w") as writer:
+            logger.info("***** Writing results to file *****")
+            writer.write(report)
+            logger.info("Done.")
+
+
+if __name__ == "__main__":
+    main()