Merge branch 'develop' into 'master'

Develop See merge request !2

Merge branch 'develop' into 'master'
3d9dd875 · Mateusz Gniewkowski · 4da073b8 · 83bc71de · 3d9dd875 · 3d9dd875
Commit 3d9dd875 authored Feb 18, 2021 by Mateusz Gniewkowski
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+.vscode
\ No newline at end of file
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+image: "clarinpl/python:3.6"
+cache:
+  paths:
+    - .tox
+stages:
+  - check_style
+  - test
+  - build
+
+pep8:
+  stage: check_style
+  before_script:
+    - pip install tox==2.9.1
+  script:
+    - tox -v -e pep8
+
+docstyle:
+  stage: check_style
+  before_script:
+    - pip install tox==2.9.1
+  script:
+    - tox -v -e docstyle
+
+test:
+  stage: test
+  image: "docker:18.09.7"
+  services:
+    - "docker:18.09.7-dind"
+  script:
+    - docker build -t clarinpl/wordifier .
+    - docker run --rm
+      -v "$(pwd)/requirements-dev.txt:/home/worker/requirements-dev.txt"
+      -v "$(pwd)/tests:/home/worker/tests"
+      clarinpl/wordifier
+      sh -c 'pip3 install -r requirements-dev.txt ; nose2 -v tests'
+
+build:
+  stage: build
+  image: "docker:18.09.7"
+  only:
+    - master
+  services:
+    - "docker:18.09.7-dind"
+  script:
+    - docker build -t clarinpl/wordifier .
+    - echo $DOCKER_PASSWORD > pass.txt
+    - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
+    - rm pass.txt
+    - docker push clarinpl/wordifier
--- a/Dockerfile
+++ b/Dockerfile
+FROM clarinpl/python:3.6
+
+WORKDIR /home/worker
+
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1
+RUN update-alternatives  --set python /usr/bin/python3.6
+
+RUN apt-get update && apt-get install -y morfeusz2 
+
+RUN wget -O morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl http://download.sgjp.pl/morfeusz/20200913/Linux/18.04/64/morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl
+
+RUN python3.6 -m pip install morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl
+
+COPY ./src ./src
+COPY ./main.py .
+COPY ./requirements.txt .
+COPY ./data ./data
+
+RUN python3.6 -m pip install -r requirements.txt
+
+CMD ["python3.6", "main.py", "service"]
\ No newline at end of file
--- a/README.md
+++ b/README.md
+# Wordifier 
+
+A service that expands abbreviations into full texts. The following modules are implemented at this time:
+- verbal notation of digits, numbers, decimal and ordinary fractions (with separators '.' and '/')
+- verbal notation of simple equations with addition, subtraction, multiplication and division
+- verbal notation of dates 
+    - recognizing different ways to write dates.
+        - 25.12.2010 or 25,12,12 (day/month, day/month, year)
+        - 2009-08-30 or 20 08 30 (year, day/month, day/month)
+        - 12 Jan 2010 or 31 Jan 1998 (day, month, year)
+        - Mar 12 (month, year)
+        - Dec 15 (day, month)
+        - April 30 2000 (month, day, year)
+- replace currency symbols with words
+- write special characters (%, &, #, ^, =, +, -, /) in words
\ No newline at end of file
--- a/config.ini
+++ b/config.ini
+[service]
+tool = wordifier
+
+root = /samba/requests/
+rabbit_host = rabbitmq
+rabbit_user = test
+rabbit_password = test
+queue_prefix = nlp_
+
+[tool]
+workers_number = 5
+processed_lines = 1000
+
+[logging]
+port = 9998
+local_log_level = INFO
+
+[logging_levels]
+__main__ = INFO
+
--- a/data/currencies.json
+++ b/data/currencies.json
--- a/data/numbers.json
+++ b/data/numbers.json
+{
+    "number_words": {
+        "0": "zero",
+        "1": "jeden",
+        "2": "dwa",
+        "3": "trzy",
+        "4": "cztery",
+        "5": "pięć",
+        "6": "sześć",
+        "7": "siedem",
+        "8": "osiem",
+        "9": "dziewięć",
+        "10": "dziesięć",
+        "11": "jedenaście",
+        "12": "dwanaście",
+        "13": "trzynaście",
+        "14": "czternaście",
+        "15": "piętnaście",
+        "16": "szesnaście",
+        "17": "siedemnaście",
+        "18": "osiemnaście",
+        "19": "dziewiętnaście",
+        "20": "dwadzieścia",
+        "30": "trzydzieści",
+        "40": "czterdzieści",
+        "50": "pięćdziesiąt",
+        "60": "sześćdziesiąt",
+        "70": "siedemdziesiąt",
+        "80": "osiemdziesiąt",
+        "90": "dziewięćdziesiąt",
+        "100": "sto",
+        "200": "dwieście",
+        "300": "trzysta",
+        "400": "czterysta",
+        "500": "pięćset",
+        "600": "sześćset",
+        "700": "siedemset",
+        "800": "osiemset",
+        "900": "dziewięćset"
+    },
+    "ordinal_number_words": {
+        "0": "zerowy",
+        "1": "pierwszy",
+        "2": "drugi",
+        "3": "trzeci",
+        "4": "czwarty",
+        "5": "piąty",
+        "6": "szósty",
+        "7": "siódmy",
+        "8": "ósmy",
+        "9": "dziewiąty",
+        "10": "dziesiąty",
+        "11": "jedenasty",
+        "12": "dwunasty",
+        "13": "trzynasty",
+        "14": "czternasty",
+        "15": "piętnasty",
+        "16": "szesnasty",
+        "17": "siedemnasty",
+        "18": "osiemnasty",
+        "19": "dziewiętnasty",
+        "20": "dwudziesty",
+        "30": "trzydziesty",
+        "40": "czterdziesty",
+        "50": "pięćdziesiąty",
+        "60": "sześćdziesiąty",
+        "70": "siedemdziesiąty",
+        "80": "osiemdziesiąty",
+        "90": "dziewięćdziesiąty",
+        "100": "setny",
+        "200": "dwusetny",
+        "300": "trzechsetny",
+        "400": "czterechsetny",
+        "500": "pięćsetny",
+        "600": "sześćsetny",
+        "700": "siedemsetny",
+        "800": "osiemsetny",
+        "900": "dziewięćsetny"
+    },
+    "large_numbers": {
+        "3": "tysiąc",
+        "6": "milion",
+        "9": "miliard",
+        "12": "bilion",
+        "15": "biliard",
+        "18": "trylion",
+        "21": "tryliard",
+        "24": "kwadrylion",
+        "27": "kwadryliard",
+        "30": "kwintylion",
+        "33": "kwintyliard",
+        "36": "sekstylion",
+        "39": "sekstyliard",
+        "42": "septylion",
+        "45": "septyliard",
+        "48": "oktylion",
+        "51": "oktyliard",
+        "54": "nonilion",
+        "57": "noniliard",
+        "60": "decylion",
+        "63": "decyliard",
+        "66": "undecylion",
+        "69": "undecyliard",
+        "72": "duodecylion",
+        "75": "duodecyliard",
+        "100": "googol",
+        "600": "centylion",
+        "603": "centyliard"
+    },
+    "ordinal_large_numbers": {
+        "3": "tysięczny",
+        "6": "milionowy",
+        "9": "miliardowy",
+        "12": "bilionowy"
+    }
+}
\ No newline at end of file
--- a/docker-compose.yml
+++ b/docker-compose.yml
+version: '3'
+services:
+  wordifier:
+    container_name: clarin_wordifier
+    build: ./
+    working_dir: /home/worker
+    command:
+      - python3.6 main.py service
+    environment:
+      - PYTHONUNBUFFERED=0
+    volumes:
+      - '/samba:/samba'
+      - './config.ini:/home/worker/config.ini'
+      - './src:/home/worker/src'
+      - './tests:/home/worker/tests'
+      - './main.py:/home/worker/main.py'
--- a/main.py
+++ b/main.py
+"""Implementation of wordifier service."""
+import argparse
+import nlp_ws
+from src.worker import Worker
+
+
+def get_args():
+    """Gets command line arguments."""
+    parser = argparse.ArgumentParser(description="wordifier")
+
+    subparsers = parser.add_subparsers(dest="mode")
+    subparsers.required = True
+
+    subparsers.add_parser(
+        "service",
+        help="Run as a service")
+
+    return parser.parse_args()
+
+
+def main():
+    """Runs the program."""
+    args = get_args()
+
+    generators = {
+        "service": lambda: nlp_ws.NLPService.main(Worker),
+    }
+
+    gen_fn = generators.get(args.mode, lambda: None)
+    gen_fn()
+
+
+if __name__ == "__main__":
+    main()
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
+parameterized==0.8.1
+nose2==0.10.0
\ No newline at end of file
--- a/requirements.txt
+++ b/requirements.txt
+nlp-ws
+Babel==2.8.0
\ No newline at end of file
--- a/src/__init__.py
+++ b/src/__init__.py
--- a/src/ccl_handler.py
+++ b/src/ccl_handler.py
+"""Implementation of ccl reading functionality."""
+from xml.etree.ElementTree import iterparse
+
+
+class CCLHandler:
+    """Implements reading ccl for anonymizer service."""
+
+    def __init__(self, ccl_file_name):
+        """Initialize CCLHandler with a filename."""
+        self._file_name = ccl_file_name
+
+    def process(self, output_file, unmarshallers):
+        """Process xml tags using unmarshallers and save in output_file."""
+        with open(self._file_name, 'r', encoding='utf-8') as input_file, \
+                open(output_file, 'w', encoding='utf-8') as output_file:
+            for event, elem in iterparse(input_file):
+                unmarshal = unmarshallers.get(elem.tag, None)
+                if unmarshal:
+                    output_file.write(unmarshal(elem))
+                    elem.clear()
--- a/src/date2words.py
+++ b/src/date2words.py
+"""Module for converting dates to words."""
+from babel import Locale
+
+from src.num2words import num2words
+
+date_tags = ['sg:gen:m3']
+
+
+def check_none(token):
+    """If token is none then convert to empty list otherwise return token."""
+    if not token:
+        return []
+    return token
+
+
+def month_name_expansion(month):
+    """Expand month abbreviation or change form.
+
+    Args:
+        month (str): Month abbrevation or full name.
+
+    Returns:
+        str: Full month name in genitive case.
+
+    """
+    abbr = len(month) == 3
+    locale = Locale('pl')
+    month = month.lower()
+
+    if abbr:
+        months = locale.months['format']['abbreviated']
+        index = list(months.values()).index(month) + 1
+        month = locale.months['format']['wide'][index]
+    else:
+        for format in ['format', 'stand-alone']:
+            if month in list(locale.months[format]['wide'].values()):
+                months = locale.months[format]['wide']
+                index = list(months.values()).index(month) + 1
+                month = locale.months['format']['wide'][index]
+    return month
+
+
+def date2words(date_match, tags=None):
+    """Convert a date to list of words.
+
+    Args:
+        date_match (re.Match): Date match.
+        tag (str, optional): Morphological tag. Defaults to None.
+
+    Returns:
+        list of str: List of words representing date.
+
+    """
+    if tags and ":".join(tags[0].split(":")[1:4]) in date_tags:
+        corrected_tag = tags[0]
+    else:
+        corrected_tag = None
+    if date_match['day_or_month_year']:
+        day_month1 = num2words(date_match['day_month1'], corrected_tag,
+                               ordinal=True)
+        day_month2 = num2words(date_match['day_month2'], corrected_tag,
+                               ordinal=True)
+        year = num2words(date_match['year1'], corrected_tag, ordinal=True)
+
+        # split punctuation into single characters and remove if None
+        date_order = [day_month1, *check_none(date_match['punct1']),
+                      day_month2, *check_none(date_match['punct2']), year]
+    elif date_match['year_month_or_day']:
+        day_month3 = num2words(date_match['day_month3'], ordinal=True)
+        day_month4 = num2words(date_match['day_month4'], ordinal=True)
+        year = num2words(date_match['year2'], ordinal=True)
+
+        # split punctuation into single characters and remove if None
+        date_order = [year, *check_none(date_match['punct3']), day_month3,
+                      *check_none(date_match['punct4']), day_month4]
+    elif date_match['month_in_words']:
+        day = date_match['day1']
+        if date_match['day2']:
+            day = date_match['day2']
+        if day:
+            day = num2words(day, corrected_tag, ordinal=True)
+
+        year = ''
+        if date_match['year3']:
+            year = num2words(date_match['year3'], corrected_tag, ordinal=True)
+        if date_match['year4']:
+            year = num2words(date_match['year4'], corrected_tag, ordinal=True)
+
+        if not day and not year:
+            return [date_match['month']]
+        else:
+            month = month_name_expansion(date_match['month'])
+
+        # split punctuation into single characters and remove if None
+        if date_match['day2']:
+            date_order = [month, *check_none(date_match['punct7']),
+                          day, *check_none(date_match['punct8'])]
+        elif date_match['day1']:
+            date_order = [day, *check_none(date_match['punct5']),
+                          month, *check_none(date_match['punct6'])]
+        else:
+            date_order = [month]
+        if year:
+            date_order = date_order + [year]
+        date_order = list(map(lambda x: x if x else '', date_order))
+    else:
+        date_order = ['']
+    return date_order
--- a/src/num2words.py
+++ b/src/num2words.py
+"""Module for converting numbers to words."""
+import math
+import json
+
+from src.utils import get_word_form, trailing_zeros
+
+with open('data/numbers.json', 'r') as numbers_file:
+    numbers_dict = json.load(numbers_file)
+    number_words = {int(k): v for k, v in numbers_dict['number_words'].items()}
+    ordinal_number_words = {int(k): v for k, v
+                            in numbers_dict['ordinal_number_words'].items()}
+    large_numbers = {int(k): v for k, v
+                     in numbers_dict['large_numbers'].items()}
+    ordinal_large_numbers = {int(k): v for k, v
+                             in numbers_dict['ordinal_large_numbers'].items()}
+
+
+def three_digit_to_words(text, tag='', ordinal=False):
+    """Convert three digits numbers to words with given tag. Util function."""
+    map_to_words = ordinal_number_words if ordinal else number_words
+
+    number = int(text)
+    if number == 0:
+        return get_word_form(map_to_words[number], tag)
+    words = []
+    units = number % 10
+    tens = number % 100 - units
+    hundredths = number // 100
+    if 0 < tens + units <= 20:
+        word = get_word_form(map_to_words[tens + units], tag)
+        words.append(word)
+    else:
+        if units != 0:
+            words.append(get_word_form(map_to_words[units], tag))
+        if tens != 0:
+            words.append(get_word_form(map_to_words[tens], tag))
+
+    if hundredths != 0:
+        if tens == 0 and units == 0:
+            words.append(get_word_form(map_to_words[hundredths * 100], tag))
+        else:
+            words.append(get_word_form(number_words[hundredths * 100], ''))
+
+    return ' '.join(reversed(words))
+
+
+def num2words(text, tag='', ordinal=False):
+    """Converts a number to words.
+
+    Args:
+        text (str): Three digits number.
+        tag (str, optional): Morphological tag. Defaults to ''.
+        ordinal (bool, optional): If word should be derived from ordinal number.
+        Defaults to False.
+
+    Returns:
+        str: Returns number as words with given tag.
+
+    """
+    i = 0
+    words = []
+    number = int(text)
+
+    if ordinal:
+        zeros = trailing_zeros(number)
+        zeros = 3 * math.floor(zeros / 3)
+        if zeros > 2 and 0 < len(text) - zeros <= 3:
+            number = number // 10 ** zeros
+            if number == 1:
+                words = ''
+            else:
+                words = three_digit_to_words(str(number), 'numcomp')
+            words += get_word_form(ordinal_large_numbers[zeros], tag)
+            return words
+
+    if len(text) <= 3 or number == 0:
+        return three_digit_to_words(text, tag, ordinal)
+
+    while number > 0:
+        remainder = number % 1000
+        if i == 0:
+            triple = three_digit_to_words(remainder, tag, ordinal)
+        else:
+            triple = three_digit_to_words(remainder)
+        number = number // 1000
+        if remainder == 0 and number != 0:
+            i += 3
+            continue
+
+        if i == 0:
+            words.append(triple)
+        else:
+            if remainder == 1:
+                tag = 'subst:sg:nom:m3'
+            elif remainder % 10 in [2, 3, 4]:
+                tag = 'subst:pl:nom:m3'
+            else:
+                tag = 'subst:pl:gen:m3'
+            form = get_word_form(large_numbers[i], tag)
+            if remainder == 1:
+                words.append(form)
+            else:
+                words.append(triple + ' ' + form)
+        i += 3
+    return ' '.join(list(reversed(words)))
--- a/src/utils.py
+++ b/src/utils.py
+"""Module for useful functions."""
+from enum import Enum
+
+import morfeusz2
+
+
+class TokenType(Enum):
+    """Type of token."""
+
+    NUMBER = 1
+    SPECIAL_CHARACTER = 2
+    PUNCTUATION = 3
+    CURRENCY = 4
+
+
+class NumberPlural(Enum):
+    """Type of number indicating what the word suffix will be.
+
+    E.g:
+    SINGULAR 1$ - jeden dolar
+    SEVERAL (2-4) 2$ - dwa dolary
+    MANY (5+) 7$ - siedem dolarów
+    """
+
+    SINGULAR = 0
+    SEVERAL = 1
+    MANY = 2
+
+
+def to_number_plural(number):
+    """Convert a number to enumerate type, that indicates word suffix.
+
+    Args:
+        number (int or string): Number to be converted.
+
+    Returns:
+        NumberPlural: Enumerate, which indicates what the end of the word
+            will be.
+
+    """
+    number = int(number)
+    if number == 1:
+        return NumberPlural.SINGULAR
+    elif 2 <= number <= 4:
+        return NumberPlural.SEVERAL
+    else:
+        return NumberPlural.MANY
+
+
+def is_simple_number(tokens, special_types):
+    """Checks if list of tokens creates a simple number.
+
+    Simple number contains only digits and spaces between groups of three.
+
+    Args:
+        tokens (list): List of tokens.
+        special_types (list): Types of tokens.
+
+    Returns:
+        bool: Return True if joined tokens are simple number otherwise False.
+
+    """
+    numbers = [n for i, n in enumerate(tokens)
+               if special_types[i] == TokenType.NUMBER]
+    return (all([len(t) == 3 for t in numbers[1:]]) and
+            all([(s.isdigit() or s == ' ') for s in tokens]))
+
+
+def is_fraction(tokens, decimal=False):
+    """Check is list of tokens are 2 numbers splitted by slash or dot.
+
+    Args:
+        tokens (list): List of tokens.
+        decimal (bool, optional): If True delimiter is dot otherwise slash '/'.
+            Defaults to False.
+
+    Returns:
+        bool: Return True if tokens are fraction otherwise False.
+
+    """
+    if len(tokens) < 3:
+        return False
+    delimiter = '.' if decimal else '/'
+    splitted = ''.join(tokens).split(delimiter)
+    return ((len(splitted) == 2) and
+            tokens.count(delimiter) == 1 and
+            all([(s.isdigit() or s in ' /.') for s in tokens]))
+
+
+def trailing_zeros(number):
+    """Count trailing zeros in number.
+
+    Returns:
+        int: Return number of trailing zeros.
+
+    """
+    manipulandum = str(number)
+    return len(manipulandum) - len(manipulandum.rstrip('0'))
+
+
+def search_form(forms, tag):
+    """Search for the correct form of word from all those returned by Morfeusz.
+
+    Args:
+        forms (list of tuples): Tags and variations of words returned
+            by Morfeusz.
+        tag (str): The tag of the word whose form is being searched for.
+
+    Returns:
+        str: Word properly conjugated with the given tag or None if not found.
+
+    """
+    for form in forms:
+        form_categories = [x.split('.') for x in form[2].split(':')]
+        gramm_categ_enum = enumerate(tag)
+        if all((c in form_categories[i] for i, c in gramm_categ_enum)):
+            return form[0]
+    return None
+
+
+def get_word_form(text, tag):
+    """Change the word in the appropriate form with given morphological tag.
+
+    Args:
+        text (str): Word to be changed.
+        tag (str): Morphological tag.
+
+    Returns:
+        str: Word changed with given morphological tag.
+
+    """
+    if not tag:
+        return text
+
+    morf = morfeusz2.Morfeusz()
+    all_forms = morf.generate(text)
+
+    tag = tag.split(':')
+    forms = [x for x in all_forms if x[2].split(':')[0] == tag[0]]
+    form = search_form(forms, tag)
+
+    if form:
+        return form
+    if len(tag) > 4:
+        tag = tag[:4]
+        form = search_form(forms, tag)
+
+    if form:
+        return form
+    else:
+        return text
+
+
+def subtract_from_first(list_of_tuples, offset):
+    """Subtract from every first element in tuples that make up list."""
+    list_of_tuples = (list_of_tuples[0] - offset, *list_of_tuples[1:])
+    return list_of_tuples
+
+
+def check_and_replace(string_builder, find, replace, filtered_tokens):
+    """Check for matches in list and replace them with given tokens.
+
+    Remove replaced tokens from `filtered_tokens` to to avoid double processing.
+
+    Args:
+        string_builder (list of str): List of all words.
+        find (list of str): Tokens to be replaced.
+        replace (list of str): Words that will replace `find` tokens in
+            `string_builder`.
+        filtered_tokens (list of tuples): List of tokens and their features.
+
+    Returns:
+        (list of str, list of tuples): Pair: list of words with replaced matched
+            tokens and filtered list of tokens and their feature with deleted
+            items that have been replaced.
+
+    """
+    if not find or not replace:
+        return string_builder, filtered_tokens
+
+    new_builder = string_builder.copy()
+    max_lenght = max(map(len, find))
+    for i, token in enumerate(string_builder):
+        if not find:
+            break
+        to_remove = [i]
+        check = token
+        j = i + 1
+        if check in find:
+            new_builder[i] = ''.join(replace[find.index(check)])
+            filtered_tokens = list(filter(lambda x: x[0] != i, filtered_tokens))
+            del find[0], replace[0]
+            continue
+        if check[0] != find[0][:len(check[0])]:
+            continue
+        while len(check) < max_lenght and j < len(string_builder):
+            check += string_builder[j]
+            to_remove.append(j)
+            if check in find:
+                index = find.index(check)
+                new_builder = new_builder[:i] + replace[index]
+                if j + 1 < len(string_builder):
+                    new_builder += string_builder[j + 1:]
+                filtered_tokens = list(filter(lambda x: x[0] not in to_remove,
+                                              filtered_tokens))
+                find.pop(index)
+                replace.pop(index)
+                if not find:
+                    return new_builder, filtered_tokens
+            j += 1
+    return new_builder, filtered_tokens
--- a/src/wordifier.py
+++ b/src/wordifier.py
+"""Implementation of wordifier functionality."""
+import re
+import json
+from itertools import islice
+
+from src.utils import is_simple_number, subtract_from_first, trailing_zeros, \
+    check_and_replace, TokenType, NumberPlural, to_number_plural, is_fraction
+from src.num2words import num2words
+from src.date2words import date2words
+
+
+class Wordifier:
+    """Class for generating words from special characters or numbers."""
+
+    date_regex = re.compile(
+        r'\b(?P<day_or_month_year>'
+        r'(?P<day_month1>[0-3]?\d)(?P<punct1>[ \t\-\./,]{1,2})'
+        r'(?P<day_month2>[0-3]?\d)(?P<punct2>[ \t\-\./,]{1,2})'
+        r'(?P<year1>\d{4}|\d{2}))\b|'
+
+        r'\b(?P<year_month_or_day>(?P<year2>\d{4}|\d{2})'
+        r'(?P<punct3>[ \t\-\./,]{1,2})(?P<day_month3>[0-3]?\d)'
+        r'(?P<punct4>[ \t\-\./,]{1,2})(?P<day_month4>[0-3]?\d))\b|'
+
+        r'(?P<month_in_words>'
+        r'(?:(?P<day1>[0-3]?\d)(?P<punct5>[ \t\-\./,]{0,2}))?'
+        r'\b(?P<month>Sty(?:|cze[nń]|cznia)|Lut(?:|y|ego)|Mar(?:|zec|ca)|'
+        r'Kwi(?:|ecie[nń]|etnia)|Maj(?:|a)|Cze(?:|rwiec|rwca)|Lip(?:|iec|ca)'
+        r'|Sie(?:|rpie[nń]|rpnia)|Wrz(?:|esie[nń]|e[śs]nia)'
+        r'|Pa[zź](?:|dziernik|dziernika)|Lis(?:|topad|stopada)'
+        r'|Gru(?:|dzie[nń]|dnia))\b'
+        r'((?:(?P<punct7>[ \t\-\./,]{0,2})(?P<day2>[0-3]?\d))'
+        r'(?:(?P<punct8>[ \t\-\./,]{1,2})(?P<year4>\d{4}|\d{2}))|'
+        r'(?:(?P<punct6>[ \t\-\./,]{0,2})(?P<year3>\d{4}|\d{2})))?)', re.I
+    )
+    decimal_fraction_regex = re.compile(r'\d+[ ]?(\.)[ ]?\d+')
+
+    number_punctuation = ' .,'
+    following_type = {
+        TokenType.NUMBER: [TokenType.NUMBER, TokenType.SPECIAL_CHARACTER,
+                           TokenType.CURRENCY],
+        TokenType.SPECIAL_CHARACTER: [TokenType.SPECIAL_CHARACTER,
+                                      TokenType.NUMBER],
+        TokenType.CURRENCY: []
+    }
+
+    _denominator_tag = {
+        NumberPlural.SINGULAR: {
+            'default': 'adj:sg:nom:f',
+            ('acc', 'dat', 'gen', 'loc'): {
+                ('f'): 'adj:sg:acc:f'
+            }
+        },
+        NumberPlural.SEVERAL: {
+            'default': 'adj:pl:acc:f',
+            ('dat'): {
+                ('m1', 'm2', 'm3', 'f', 'n'): 'adj:sg:dat:f'
+            },
+            ('gen', 'loc'): {
+                ('m1', 'm2', 'm3', 'f', 'n'): 'adj:pl:acc:m1'
+            },
+            ('nom', 'voc'): {
+                ('m1'): 'adj:pl:acc:m1'
+            }
+        },
+        NumberPlural.MANY: {
+            'default': 'adj:pl:acc:m1',
+            ('acc', 'nom', 'voc'): {
+                ('m1'): 'adj:sg:dat:f'
+            },
+            ('gen', 'dat', 'inst', 'loc'): {
+                ('m1', 'm2', 'm3', 'f', 'n'): 'adj:sg:dat:f'
+            }
+        }
+    }
+
+    special_character_numbers_map = {
+        '+': 'plus',
+        '-': 'minus',
+        '/': 'przez',
+        '*': 'razy',
+        '%': 'procent',
+        '&': 'ampersand',
+        '=': 'równa się',
+        '^': 'do potęgi',
+        '#': 'numer'
+    }
+    special_character_map = {
+        '+': 'plus',
+        '-': '-',
+        '/': 'ukośnik',
+        '%': 'procent',
+        '&': 'i',
+        '=': 'równa się',
+        '^': 'kareta',
+        '#': 'kratka'
+    }
+
+    def __init__(self):
+        """Class initialization."""
+        self.unmarshallers = {
+            'chunk': lambda *args: '\n',
+            'sentence': lambda *args: self._process_sent_tree(*args),
+        }
+        with open('data/currencies.json', 'r') as currency_file:
+            self._currencies = json.load(currency_file)
+        self._wordify_tokens = []
+
+    def _process_sent_tree(self, sentence_subtree):
+        string_builder = []
+        tags = []
+        tok_id = 0
+        for elem in sentence_subtree:
+            if elem.tag == 'tok':
+                token, tag = self._process_single_tok(tok_id, elem)
+                string_builder.append(token)
+                string_builder.append(' ')
+                tags.append(tag)
+                tok_id += 2
+            elif elem.tag == 'ns':
+                tok_id -= 1
+                string_builder.pop()
+            else:
+                raise Exception('Unrecognized tag inside sentence: ' + elem.tag)
+        return self._process_sentence(string_builder, tags)
+
+    def _get_denominator_tag(self, nominator_plural, nom_case, nom_gender=None):
+        if nom_case == 'default' or nom_gender is None:
+            return self._denominator_tag[nominator_plural]['default']
+
+        for cases, value in self._denominator_tag[nominator_plural].items():
+            if cases == 'default':
+                continue
+            if nom_case in cases:
+                for genders, tag in value.items():
+                    if nom_gender in genders:
+                        return tag
+        return self._denominator_tag[nominator_plural]['default']
+
+    def _special_type(self, text):
+        if text in self.special_character_map:
+            return TokenType.SPECIAL_CHARACTER
+        elif text in self._currencies:
+            return TokenType.CURRENCY
+        elif text.isdigit():
+            return TokenType.NUMBER
+        return None
+
+    def _process_single_tok(self, tok_id, tok_subtree):
+        text = ''
+        tag = ''
+        for elem in tok_subtree:
+            if elem.tag == 'orth':
+                text = elem.text
+            elif elem.tag == 'lex':
+                tag = self._process_lex(elem)
+        word = self._process_word(tok_id, text, tag)
+        return word, tag
+
+    def _process_word(self, tok_id, text, tag):
+        self._add_special(tok_id, text, tag)
+        return text
+
+    def _add_special(self, tok_id, text, tag):
+        s_type = self._special_type(text)
+        if s_type:
+            self._wordify_tokens.append((tok_id, text, tag, s_type))
+        return text
+
+    def _process_lex(self, lex_subtree):
+        tag = ''
+        for elem in lex_subtree:
+            if elem.tag == 'ctag':
+                tag = elem.text
+            elif elem.tag != 'base':
+                raise Exception('Unrecognized tag inside lex: ' + elem.tag)
+        if tag == '':
+            raise Exception('Lex tag had no ctag inside!')
+        return tag
+
+    def _handle_fraction(self, tokens, tags):
+        """Generate words from fraction splitted by slash '/'.
+
+        Args:
+            tokens (list of str): List that contains numbers separated by
+                slash '/'.
+
+        Returns:
+            str: Fraction as words.
+
+        """
+        text = ''.join(tokens)
+        numerator, denominator = text.split('/')
+        tag_num = tags[0]
+        remainder = to_number_plural(int(numerator) % 10)
+
+        tag_case, tag_gender = tag_num.split(':')[2:4]
+        tag_den = self._get_denominator_tag(remainder, tag_case, tag_gender)
+
+        zeros = trailing_zeros(denominator)
+        if len(denominator) < 4 or \
+                (zeros > 2 and 0 < len(denominator) - zeros <= 3):
+            return num2words(numerator, tag_num) + ' ' + \
+                num2words(denominator, tag_den, True)
+        else:
+            return num2words(numerator) + ' przez ' + \
+                num2words(denominator)
+
+    def _handle_decimal_fraction(self, tokens):
+        """Generate words from decimal fraction splitted by dot.
+
+        Args:
+            tokens (list of str): List that contains numbers separated by dot.
+
+        Returns:
+            str: Decimal fraction as words.
+
+        """
+        text = ''.join(tokens)
+        number, numerator = text.split('.')
+        number = number.replace(' ', '')
+        tag_num = 'adj:sg:nom:f' if int(numerator) == 1 else 'num:pl:nom:f'
+        denominator = str(10 ** len(numerator))
+        remainder = to_number_plural(int(numerator) % 10)
+        tag_den = self._get_denominator_tag(remainder, 'default')
+        if int(number) == 0:
+            return num2words(numerator, tag_num) + ' ' + \
+                num2words(denominator, tag_den, True)
+        else:
+            return num2words(number) + ' i ' + \
+                num2words(numerator, tag_num) + ' ' + \
+                num2words(denominator, tag_den, True)
+
+    def _check_decimal_fraction(self, tokens):
+        """Checks whether given list of tokens starts with decimal fraction.
+
+        If contains fraction generate words from whole fraction otherwise
+        generate words from first number.
+
+        Args:
+            tokens (list of str): List of tokens with number at the beginning.
+
+        Returns:
+            str: Tokens that form a fraction or number.
+            int: The number of tokens that make up the fraction.
+
+        """
+        match = self.decimal_fraction_regex.search(''.join(tokens[:5]))
+        if match and match.start() == 0:
+            tokens_match = tokens[0]
+            i = 1
+            while tokens_match != match.group(0):
+                tokens_match += tokens[i]
+                i += 1
+            return match.group(0), i - 1
+        else:
+            return tokens[0], 0
+
+    def _handle_mixed_types(self, tokens, special_types, tags):
+        last_number_plural = NumberPlural.SINGULAR
+        if TokenType.NUMBER in special_types:
+            special_character_map = self.special_character_numbers_map
+        else:
+            special_character_map = self.special_character_map
+        i = 0
+        iter_special_types = iter(special_types)
+        for token_type in iter_special_types:
+            if token_type == TokenType.SPECIAL_CHARACTER:
+                if tokens[i] in special_character_map:
+                    tokens[i] = special_character_map[tokens[i]]
+                else:
+                    tokens[i] = ''
+            elif token_type == TokenType.PUNCTUATION:
+                if tokens[i] == ' ':
+                    tokens[i] = ''
+            elif token_type == TokenType.NUMBER:
+                number, skip = self._check_decimal_fraction(tokens[i:])
+                if skip > 0:
+                    words = self._handle_decimal_fraction(number)
+                    if int(''.join(number).split('.')[0]) == 0:
+                        last_number_plural = NumberPlural.FRACTION
+                    else:
+                        last_number_plural = NumberPlural.MANY
+                else:
+                    words = num2words(number)
+                    last_number_plural = to_number_plural(number)
+                tokens = tokens[:i] + [words] + tokens[i + skip + 1:]
+                if skip != 0:
+                    next(islice(iter_special_types, skip - 1, skip), '')
+            elif token_type == TokenType.CURRENCY:
+                suffix = last_number_plural.value
+                tokens[i] = self._currencies[tokens[i]][suffix]
+            i += 1
+        text = ' '.join([w for w in tokens if w != ''])
+        return text
+
+    def _get_as_words(self, tokens, tags, special_types):
+        """Convert special tokens and numbers to words.
+
+        Args:
+            tokens (list of str): List of tokens.
+            special_types (list of TokenType): Types of tokens.
+
+        Returns:
+            str : Joined tokens converted to words.
+
+        """
+        if is_simple_number(tokens, special_types):
+            numbers = ''.join([n for i, n in enumerate(tokens)
+                               if special_types[i] == TokenType.NUMBER])
+            return num2words(''.join(numbers), tags[-1])
+        elif is_fraction(tokens):
+            return self._handle_fraction(tokens, tags)
+        elif is_fraction(tokens, decimal=True):
+            return self._handle_decimal_fraction(tokens)
+        else:
+            return self._handle_mixed_types(tokens, special_types, tags)
+
+    def _check_number_multipart(self, index, next_id, string_builder):
+        """Check if the next token is continuation of number with actual token.
+
+        Args:
+            index (int): Actual token id.
+            next_id (int): Next token id.
+            string_builder (list of str): List of all words.
+
+        Returns:
+            bool: Is next token continuation of a number.
+
+        """
+        return next_id == index + 1 or \
+            (index + 2 == next_id and
+             string_builder[index + 1] in self.number_punctuation)
+
+    def _join_tokens(self, token, string_builder):
+        """Combine tokens that form multi-part formulas.
+
+        Args:
+            tokens (list of tuple): List of tokens and their features.
+                Every element contains index, word, morphological tag and
+                token type.
+            string_builder (list of str): List of all words.
+
+        Returns:
+            list of tuple: List of joined tokens and their features.
+
+        """
+        joined_tokens = []
+        iter_wordify_tokens = enumerate(iter(self._wordify_tokens))
+        for i, (index, token, tag, token_type) in iter_wordify_tokens:
+            j = i + 1
+            tokens = [token]
+            tags = [tag]
+            special_types = [token_type]
+            start_id = index
+
+            while j < len(self._wordify_tokens):
+                next_id, next_token, next_tag, \
+                    next_special_type = self._wordify_tokens[j]
+                if not self._check_number_multipart(index, next_id,
+                                                    string_builder):
+                    break
+                if next_special_type in self.following_type[token_type]:
+                    if index + 2 == next_id:
+                        tokens.append(string_builder[index + 1])
+                        special_types.append(TokenType.PUNCTUATION)
+                        tags.append('')
+                    tokens.append(next_token)
+                    tags.append(next_tag)
+                    special_types.append(next_special_type)
+                else:
+                    break
+
+                next(iter_wordify_tokens)
+                index = next_id
+                token_type = next_special_type
+                j += 1
+            joined_tokens.append((start_id, tokens, tags, special_types))
+        return joined_tokens
+
+    def _handle_special_types(self, string_builder):
+        """Convert special tokens to words and replace them in string builder.
+
+        Args:
+            string_builder (list of str]): List of all words.
+
+        Returns:
+            list of str: Return updated string builder with special tokens
+                replaced by words.
+
+        """
+        wordify_tokens = self._join_tokens(self._wordify_tokens, string_builder)
+        enum_special = enumerate(wordify_tokens)
+        for i, special_token in enum_special:
+            index, tokens, tags, token_type = special_token
+            words = self._get_as_words(tokens, tags, token_type)
+            no_tokens = len(tokens)
+            string_builder = string_builder[:index] + [words] + \
+                string_builder[index + no_tokens:]
+            offset = no_tokens - 1
+            wordify_tokens[i + 1:] = [subtract_from_first(x, offset)
+                                      for x in wordify_tokens[i + 1:]]
+        self._wordify_tokens.clear()
+        return string_builder
+
+    def _get_match_tag(self, match, string_builder, tags):
+        match = match.group(0)
+        j = 0
+        for i, word in enumerate(string_builder):
+            if match.startswith(word):
+                acc = word
+                match_tags = [tags[j]]
+                tmp = j
+                while i < len(string_builder) - 1 and len(acc) < len(match):
+                    i += 1
+                    acc += string_builder[i]
+                    if acc != match[:len(acc)]:
+                        break
+                    if string_builder[i] != ' ':
+                        j += 1
+                        match_tags.append(tags[j])
+                j = tmp
+                if acc == match:
+                    return match_tags
+            if word != ' ':
+                j += 1
+        return []
+
+    def _handle_regexes(self, string_builder, tags):
+        """Check for regexes in the given builder and replace them with words.
+
+        Args:
+            string_builder (list of str): List of all words.
+
+        Returns:
+            list of str: Updated string builder with matches replaced by words.
+
+        """
+        sentence = ''.join(string_builder)
+        matches = list(self.date_regex.finditer(sentence))
+        if not matches:
+            return string_builder
+        replace = []
+        for match in matches:
+            date_tags = self._get_match_tag(match, string_builder, tags)
+            replace.append(date2words(match, date_tags))
+        matches = list(map(lambda m: m.group(0), matches))
+        builder, self._wordify_tokens = check_and_replace(string_builder,
+                                                          matches, replace,
+                                                          self._wordify_tokens)
+        return builder
+
+    def _process_sentence(self, string_builder, tags):
+        """Process a sentence and replace special tokens (eg. numbers) words.
+
+        Args:
+            string_builder (list of str): List of all words.
+
+        Returns:
+            str: Sentece with replaced special tokens.
+
+        """
+        string_builder = self._handle_regexes(string_builder, tags)
+        string_builder = self._handle_special_types(string_builder)
+        if string_builder[0] and not string_builder[0][0].isupper():
+            string_builder[0] = string_builder[0].capitalize()
+        return ''.join(string_builder)
--- a/src/worker.py
+++ b/src/worker.py
+"""Implementation of nlp_worker."""
+import logging
+
+import nlp_ws
+
+from src.wordifier import Wordifier
+from src.ccl_handler import CCLHandler
+
+
+_log = logging.getLogger(__name__)
+
+
+class Worker(nlp_ws.NLPWorker):
+    """Implements nlp_worker for tokenizer service."""
+
+    @classmethod
+    def static_init(cls, config):
+        """One time static initialisation."""
+
+    def process(self, input_file, task_options, output_file):
+        """Processing an input file and generating tokens converted to words."""
+        wordifier = Wordifier()
+        ccl_handler = CCLHandler(input_file)
+        ccl_handler.process(output_file, wordifier.unmarshallers)
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tests/test_num2words.py
+++ b/tests/test_num2words.py
+import unittest
+from parameterized import parameterized, param
+
+from src.num2words import num2words
+
+
+class TestNum2Words(unittest.TestCase):
+    single_tag = 'adj:sg:nom:f'
+    several_tag = 'adj:pl:acc:f'
+    many_tag = 'adj:pl:acc:m1'
+
+    @parameterized.expand([
+        param('0', 'zero'),
+        param('08', 'osiem'),
+        param('12', 'dwanaście'),
+        param('23', 'dwadzieścia trzy'),
+        param('48', 'czterdzieści osiem'),
+        param('187', 'sto osiemdziesiąt siedem'),
+        param('249', 'dwieście czterdzieści dziewięć'),
+        param('600', 'sześćset'),
+        param('720', 'siedemset dwadzieścia'),
+        param('304', 'trzysta cztery'),
+
+        param('1000', 'tysiąc'),
+        param('425000', 'czterysta dwadzieścia pięć tysięcy'),
+        param('102000', 'sto dwa tysiące'),
+        param('390000', 'trzysta dziewięćdziesiąt tysięcy'),
+        param('701000', 'siedemset jeden tysięcy'),
+        param('993999', 'dziewięćset dziewięćdziesiąt trzy tysiące '
+              'dziewięćset dziewięćdziesiąt dziewięć'),
+        param('1000642', 'milion sześćset czterdzieści dwa'),
+        param('2001003', 'dwa miliony tysiąc trzy'),
+        param('18456000', 'osiemnaście milionów '
+              'czterysta pięćdziesiąt sześć tysięcy'),
+        param('1000000000', 'miliard')
+    ])
+    def test_numbers(self, number, words):
+        self.assertEqual(num2words(number), words)
+
+    @parameterized.expand([
+        param('0', 'zerowy', ordinal=True),
+        param('1', 'pierwszy', ordinal=True),
+        param('10', 'dziesiąty', ordinal=True),
+        param('15', 'piętnasty', ordinal=True),
+        param('31', 'trzydziesty pierwszy', ordinal=True),
+        param('70', 'siedemdziesiąty', ordinal=True),
+        param('099', 'dziewięćdziesiąty dziewiąty', ordinal=True),
+        param('100', 'setny', ordinal=True),
+        param('102', 'sto drugi', ordinal=True),
+        param('183', 'sto osiemdziesiąty trzeci', ordinal=True),
+        param('201', 'dwieście pierwszy', ordinal=True),
+
+        param('1000', 'tysięczny', ordinal=True),
+        param('1005', 'tysiąc piąty', ordinal=True),
+        param('2000', 'dwutysięczny', ordinal=True),
+        param('2020', 'dwa tysiące dwudziesty', ordinal=True),
+        param('10000', 'dziesięciotysięczny', ordinal=True),
+        param('100856', 'sto tysięcy osiemset pięćdziesiąty szósty',
+              ordinal=True),
+        param('1000000', 'milionowy', ordinal=True),
+        param('1002003', 'milion dwa tysiące trzeci', ordinal=True),
+        param('1948052296', 'miliard dziewięćset czterdzieści osiem milionów '
+              'pięćdziesiąt dwa tysiące '
+              'dwieście dziewięćdziesiąty szósty', ordinal=True),
+    ])
+    def test_ordinal_numbers(self, number, words, ordinal):
+        self.assertEqual(num2words(number, ordinal=ordinal), words)
+
+    @parameterized.expand([
+        ('1', 'adj:sg:nom:f', 'jedna'),
+        ('2', 'num:pl:nom:f', 'dwie')
+    ])
+    def test_numbers_numerator(self, number, tag, words):
+        self.assertEqual(num2words(number, tag), words)
+
+    @parameterized.expand([
+        param('1', 'pierwsza'),
+        param('2', 'druga'),
+        param('5', 'piąta'),
+        param('10', 'dziesiąta'),
+        param('31', 'trzydziesta pierwsza'),
+        param('100', 'setna'),
+        param('102', 'sto druga'),
+        param('512', 'pięćset dwunasta'),
+        param('600', 'sześćsetna'),
+
+        param('1000', 'tysięczna'),
+        param('2002', 'dwa tysiące druga'),
+        param('3000', 'trzytysięczna'),
+        param('1000000000', 'miliardowa'),
+        param('1473022977', 'miliard czterysta siedemdziesiąt trzy miliony '
+              'dwadzieścia dwa tysiące dziewięćset siedemdziesiąta siódma'),
+    ])
+    def test_single_numbers_denominator(self, number, words, ordinal=True):
+        self.assertEqual(num2words(number, self.single_tag, ordinal), words)
+
+    @parameterized.expand([
+        param('3', 'trzecie'),
+        param('6', 'szóste'),
+        param('10', 'dziesiąte'),
+        param('47', 'czterdzieste siódme'),
+        param('100', 'setne'),
+        param('101', 'sto pierwsze'),
+        param('300', 'trzechsetne'),
+        param('981', 'dziewięćset osiemdziesiąte pierwsze'),
+
+        param('1000', 'tysięczne'),
+        param('8000', 'ośmiotysięczne'),
+        param('10000', 'dziesięciotysięczne'),
+        param('100000', 'stutysięczne'),
+        param('1000115376708', 'bilion sto piętnaście milionów '
+              'trzysta siedemdziesiąt sześć tysięcy siedemset ósme'),
+    ])
+    def test_several_numbers_denominator(self, number, words, ordinal=True):
+        self.assertEqual(num2words(number, self.several_tag, ordinal), words)
+
+    @parameterized.expand([
+        param('4', 'czwartych'),
+        param('8', 'ósmych'),
+        param('10', 'dziesiątych'),
+        param('69', 'sześćdziesiątych dziewiątych'),
+        param('100', 'setnych'),
+        param('212', 'dwieście dwunastych'),
+        param('700', 'siedemsetnych'),
+        param('901', 'dziewięćset pierwszych'),
+
+        param('1000', 'tysięcznych'),
+        param('6000', 'sześciotysięcznych'),
+        param('10000', 'dziesięciotysięcznych'),
+        param('1000000', 'milionowych'),
+        param('238055017238', 'dwieście trzydzieści osiem miliardów '
+              'pięćdziesiąt pięć milionów siedemnaście tysięcy '
+              'dwieście trzydziestych ósmych'),
+    ])
+    def test_many_numbers_denominator(self, number, words, ordinal=True):
+        self.assertEqual(num2words(number, self.many_tag, ordinal), words)