Skip to content
Snippets Groups Projects
Commit 684bee0d authored by Mateusz Gniewkowski's avatar Mateusz Gniewkowski
Browse files

Merge branch 'code_refactor' into 'develop'

Wordifier - first project outline

See merge request !1
parents 2eaf26fc 36baae94
2 merge requests!2Develop,!1Wordifier - first project outline
Pipeline #2545 passed with stages
in 1 minute and 32 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
.vscode
\ No newline at end of file
image: 'clarinpl/python:3.6'
image: "clarinpl/python:3.6"
cache:
paths:
- .tox
stages:
- check_style
- test
- build
before_script:
- pip install tox==2.9.1
pep8:
stage: check_style
before_script:
- pip install tox==2.9.1
script:
- tox -v -e pep8
docstyle:
stage: check_style
before_script:
- pip install tox==2.9.1
script:
- tox -v -e docstyle
build_image:
test:
stage: test
image: "docker:18.09.7"
services:
- "docker:18.09.7-dind"
script:
- docker build -t clarinpl/wordifier .
- docker run --rm
-v "$(pwd)/requirements-dev.txt:/home/worker/requirements-dev.txt"
-v "$(pwd)/tests:/home/worker/tests"
clarinpl/wordifier
sh -c 'pip3 install -r requirements-dev.txt ; nose2 -v tests'
build:
stage: build
image: 'docker:18.09.7'
image: "docker:18.09.7"
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
- "docker:18.09.7-dind"
script:
- docker build -t clarinpl/wordifier .
- echo $DOCKER_PASSWORD > pass.txt
......
FROM clarinpl/python:3.6
WORKDIR /home/worker
COPY ./src ./src
COPY ./main.py .
COPY ./requirements.txt .
RUN wget -O - http://download.sgjp.pl/apt/sgjp.gpg.key|apt-key add - && \
apt-add-repository http://download.sgjp.pl/apt/ubuntu && \
apt update && \
apt install morfeusz2 -y
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 1
RUN update-alternatives --set python /usr/bin/python3.6
RUN apt-get update && apt-get install -y morfeusz2
RUN wget -O morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl http://download.sgjp.pl/morfeusz/20200913/Linux/18.04/64/morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl
RUN python3.6 -m pip install morfeusz2-1.9.16-cp36-cp36m-linux_x86_64.whl
COPY ./src ./src
COPY ./main.py .
COPY ./requirements.txt .
COPY ./data ./data
RUN python3.6 -m pip install -r requirements.txt
CMD ["python3.6", "main.py", "service"]
\ No newline at end of file
# Wordifier
A service that expands abbreviations into full texts. The following modules are implemented at this time:
- verbal notation of digits, numbers, decimal and ordinary fractions (with separators '.' and '/')
- verbal notation of simple equations with addition, subtraction, multiplication and division
- verbal notation of dates
- recognizing different ways to write dates.
- 25.12.2010 or 25,12,12 (day/month, day/month, year)
- 2009-08-30 or 20 08 30 (year, day/month, day/month)
- 12 Jan 2010 or 31 Jan 1998 (day, month, year)
- Mar 12 (month, year)
- Dec 15 (day, month)
- April 30 2000 (month, day, year)
- replace currency symbols with words
- write special characters (%, &, #, ^, =, +, -, /) in words
\ No newline at end of file
This diff is collapsed.
{
"number_words": {
"0": "zero",
"1": "jeden",
"2": "dwa",
"3": "trzy",
"4": "cztery",
"5": "pięć",
"6": "sześć",
"7": "siedem",
"8": "osiem",
"9": "dziewięć",
"10": "dziesięć",
"11": "jedenaście",
"12": "dwanaście",
"13": "trzynaście",
"14": "czternaście",
"15": "piętnaście",
"16": "szesnaście",
"17": "siedemnaście",
"18": "osiemnaście",
"19": "dziewiętnaście",
"20": "dwadzieścia",
"30": "trzydzieści",
"40": "czterdzieści",
"50": "pięćdziesiąt",
"60": "sześćdziesiąt",
"70": "siedemdziesiąt",
"80": "osiemdziesiąt",
"90": "dziewięćdziesiąt",
"100": "sto",
"200": "dwieście",
"300": "trzysta",
"400": "czterysta",
"500": "pięćset",
"600": "sześćset",
"700": "siedemset",
"800": "osiemset",
"900": "dziewięćset"
},
"ordinal_number_words": {
"0": "zerowy",
"1": "pierwszy",
"2": "drugi",
"3": "trzeci",
"4": "czwarty",
"5": "piąty",
"6": "szósty",
"7": "siódmy",
"8": "ósmy",
"9": "dziewiąty",
"10": "dziesiąty",
"11": "jedenasty",
"12": "dwunasty",
"13": "trzynasty",
"14": "czternasty",
"15": "piętnasty",
"16": "szesnasty",
"17": "siedemnasty",
"18": "osiemnasty",
"19": "dziewiętnasty",
"20": "dwudziesty",
"30": "trzydziesty",
"40": "czterdziesty",
"50": "pięćdziesiąty",
"60": "sześćdziesiąty",
"70": "siedemdziesiąty",
"80": "osiemdziesiąty",
"90": "dziewięćdziesiąty",
"100": "setny",
"200": "dwusetny",
"300": "trzechsetny",
"400": "czterechsetny",
"500": "pięćsetny",
"600": "sześćsetny",
"700": "siedemsetny",
"800": "osiemsetny",
"900": "dziewięćsetny"
},
"large_numbers": {
"3": "tysiąc",
"6": "milion",
"9": "miliard",
"12": "bilion",
"15": "biliard",
"18": "trylion",
"21": "tryliard",
"24": "kwadrylion",
"27": "kwadryliard",
"30": "kwintylion",
"33": "kwintyliard",
"36": "sekstylion",
"39": "sekstyliard",
"42": "septylion",
"45": "septyliard",
"48": "oktylion",
"51": "oktyliard",
"54": "nonilion",
"57": "noniliard",
"60": "decylion",
"63": "decyliard",
"66": "undecylion",
"69": "undecyliard",
"72": "duodecylion",
"75": "duodecyliard",
"100": "googol",
"600": "centylion",
"603": "centyliard"
},
"ordinal_large_numbers": {
"3": "tysięczny",
"6": "milionowy",
"9": "miliardowy",
"12": "bilionowy"
}
}
\ No newline at end of file
version: '3'
services:
tokenizer:
wordifier:
container_name: clarin_wordifier
build: ./
working_dir: /home/worker
entrypoint:
- python3.6
- main.py
- service
command:
- python3.6 main.py service
environment:
- PYTHONUNBUFFERED=0
volumes:
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
- './src:/home/worker/src'
- './tests:/home/worker/tests'
- './main.py:/home/worker/main.py'
parameterized==0.8.1
nose2==0.10.0
\ No newline at end of file
nlp-ws
python-morfeusz
\ No newline at end of file
Babel==2.8.0
\ No newline at end of file
......@@ -2,19 +2,19 @@
from xml.etree.ElementTree import iterparse
class Ccl_handler:
class CCLHandler:
"""Implements reading ccl for anonymizer service."""
def __init__(self, ccl_file_name):
"""Initialize ccl_handler with a filename."""
"""Initialize CCLHandler with a filename."""
self._file_name = ccl_file_name
def process(self, output_file, unmarshallers):
"""Process xml tags using unmarshallers and save in output_file."""
with open(output_file, 'w', encoding='utf-8') as out:
with open(self._file_name, 'r', encoding='utf-8') as f:
for event, elem in iterparse(f):
unmarshal = unmarshallers.get(elem.tag, None)
if unmarshal:
out.write(unmarshal(elem))
elem.clear()
with open(self._file_name, 'r', encoding='utf-8') as input_file, \
open(output_file, 'w', encoding='utf-8') as output_file:
for event, elem in iterparse(input_file):
unmarshal = unmarshallers.get(elem.tag, None)
if unmarshal:
output_file.write(unmarshal(elem))
elem.clear()
"""Module for converting dates to words."""
from babel import Locale
from src.num2words import num2words
date_tags = ['sg:gen:m3']
def check_none(token):
"""If token is none then convert to empty list otherwise return token."""
if not token:
return []
return token
def month_name_expansion(month):
"""Expand month abbreviation or change form.
Args:
month (str): Month abbrevation or full name.
Returns:
str: Full month name in genitive case.
"""
abbr = len(month) == 3
locale = Locale('pl')
month = month.lower()
if abbr:
months = locale.months['format']['abbreviated']
index = list(months.values()).index(month) + 1
month = locale.months['format']['wide'][index]
else:
for format in ['format', 'stand-alone']:
if month in list(locale.months[format]['wide'].values()):
months = locale.months[format]['wide']
index = list(months.values()).index(month) + 1
month = locale.months['format']['wide'][index]
return month
def date2words(date_match, tags=None):
"""Convert a date to list of words.
Args:
date_match (re.Match): Date match.
tag (str, optional): Morphological tag. Defaults to None.
Returns:
list of str: List of words representing date.
"""
if tags and ":".join(tags[0].split(":")[1:4]) in date_tags:
corrected_tag = tags[0]
else:
corrected_tag = None
if date_match['day_or_month_year']:
day_month1 = num2words(date_match['day_month1'], corrected_tag,
ordinal=True)
day_month2 = num2words(date_match['day_month2'], corrected_tag,
ordinal=True)
year = num2words(date_match['year1'], corrected_tag, ordinal=True)
# split punctuation into single characters and remove if None
date_order = [day_month1, *check_none(date_match['punct1']),
day_month2, *check_none(date_match['punct2']), year]
elif date_match['year_month_or_day']:
day_month3 = num2words(date_match['day_month3'], ordinal=True)
day_month4 = num2words(date_match['day_month4'], ordinal=True)
year = num2words(date_match['year2'], ordinal=True)
# split punctuation into single characters and remove if None
date_order = [year, *check_none(date_match['punct3']), day_month3,
*check_none(date_match['punct4']), day_month4]
elif date_match['month_in_words']:
day = date_match['day1']
if date_match['day2']:
day = date_match['day2']
if day:
day = num2words(day, corrected_tag, ordinal=True)
year = ''
if date_match['year3']:
year = num2words(date_match['year3'], corrected_tag, ordinal=True)
if date_match['year4']:
year = num2words(date_match['year4'], corrected_tag, ordinal=True)
if not day and not year:
return [date_match['month']]
else:
month = month_name_expansion(date_match['month'])
# split punctuation into single characters and remove if None
if date_match['day2']:
date_order = [month, *check_none(date_match['punct7']),
day, *check_none(date_match['punct8'])]
elif date_match['day1']:
date_order = [day, *check_none(date_match['punct5']),
month, *check_none(date_match['punct6'])]
else:
date_order = [month]
if year:
date_order = date_order + [year]
date_order = list(map(lambda x: x if x else '', date_order))
else:
date_order = ['']
return date_order
"""Module for converting numbers to words."""
import math
import json
from src.utils import get_word_form, trailing_zeros
with open('data/numbers.json', 'r') as numbers_file:
numbers_dict = json.load(numbers_file)
number_words = {int(k): v for k, v in numbers_dict['number_words'].items()}
ordinal_number_words = {int(k): v for k, v
in numbers_dict['ordinal_number_words'].items()}
large_numbers = {int(k): v for k, v
in numbers_dict['large_numbers'].items()}
ordinal_large_numbers = {int(k): v for k, v
in numbers_dict['ordinal_large_numbers'].items()}
def three_digit_to_words(text, tag='', ordinal=False):
"""Convert three digits numbers to words with given tag. Util function."""
map_to_words = ordinal_number_words if ordinal else number_words
number = int(text)
if number == 0:
return get_word_form(map_to_words[number], tag)
words = []
units = number % 10
tens = number % 100 - units
hundredths = number // 100
if 0 < tens + units <= 20:
word = get_word_form(map_to_words[tens + units], tag)
words.append(word)
else:
if units != 0:
words.append(get_word_form(map_to_words[units], tag))
if tens != 0:
words.append(get_word_form(map_to_words[tens], tag))
if hundredths != 0:
if tens == 0 and units == 0:
words.append(get_word_form(map_to_words[hundredths * 100], tag))
else:
words.append(get_word_form(number_words[hundredths * 100], ''))
return ' '.join(reversed(words))
def num2words(text, tag='', ordinal=False):
"""Converts a number to words.
Args:
text (str): Three digits number.
tag (str, optional): Morphological tag. Defaults to ''.
ordinal (bool, optional): If word should be derived from ordinal number.
Defaults to False.
Returns:
str: Returns number as words with given tag.
"""
i = 0
words = []
number = int(text)
if ordinal:
zeros = trailing_zeros(number)
zeros = 3 * math.floor(zeros / 3)
if zeros > 2 and 0 < len(text) - zeros <= 3:
number = number // 10 ** zeros
if number == 1:
words = ''
else:
words = three_digit_to_words(str(number), 'numcomp')
words += get_word_form(ordinal_large_numbers[zeros], tag)
return words
if len(text) <= 3 or number == 0:
return three_digit_to_words(text, tag, ordinal)
while number > 0:
remainder = number % 1000
if i == 0:
triple = three_digit_to_words(remainder, tag, ordinal)
else:
triple = three_digit_to_words(remainder)
number = number // 1000
if remainder == 0 and number != 0:
i += 3
continue
if i == 0:
words.append(triple)
else:
if remainder == 1:
tag = 'subst:sg:nom:m3'
elif remainder % 10 in [2, 3, 4]:
tag = 'subst:pl:nom:m3'
else:
tag = 'subst:pl:gen:m3'
form = get_word_form(large_numbers[i], tag)
if remainder == 1:
words.append(form)
else:
words.append(triple + ' ' + form)
i += 3
return ' '.join(list(reversed(words)))
"""Module for useful functions."""
from enum import Enum
import morfeusz2
class TokenType(Enum):
"""Type of token."""
NUMBER = 1
SPECIAL_CHARACTER = 2
PUNCTUATION = 3
CURRENCY = 4
class NumberPlural(Enum):
"""Type of number indicating what the word suffix will be.
E.g:
SINGULAR 1$ - jeden dolar
SEVERAL (2-4) 2$ - dwa dolary
MANY (5+) 7$ - siedem dolarów
"""
SINGULAR = 0
SEVERAL = 1
MANY = 2
def to_number_plural(number):
"""Convert a number to enumerate type, that indicates word suffix.
Args:
number (int or string): Number to be converted.
Returns:
NumberPlural: Enumerate, which indicates what the end of the word
will be.
"""
number = int(number)
if number == 1:
return NumberPlural.SINGULAR
elif 2 <= number <= 4:
return NumberPlural.SEVERAL
else:
return NumberPlural.MANY
def is_simple_number(tokens, special_types):
"""Checks if list of tokens creates a simple number.
Simple number contains only digits and spaces between groups of three.
Args:
tokens (list): List of tokens.
special_types (list): Types of tokens.
Returns:
bool: Return True if joined tokens are simple number otherwise False.
"""
numbers = [n for i, n in enumerate(tokens)
if special_types[i] == TokenType.NUMBER]
return (all([len(t) == 3 for t in numbers[1:]]) and
all([(s.isdigit() or s == ' ') for s in tokens]))
def is_fraction(tokens, decimal=False):
"""Check is list of tokens are 2 numbers splitted by slash or dot.
Args:
tokens (list): List of tokens.
decimal (bool, optional): If True delimiter is dot otherwise slash '/'.
Defaults to False.
Returns:
bool: Return True if tokens are fraction otherwise False.
"""
if len(tokens) < 3:
return False
delimiter = '.' if decimal else '/'
splitted = ''.join(tokens).split(delimiter)
return ((len(splitted) == 2) and
tokens.count(delimiter) == 1 and
all([(s.isdigit() or s in ' /.') for s in tokens]))
def trailing_zeros(number):
"""Count trailing zeros in number.
Returns:
int: Return number of trailing zeros.
"""
manipulandum = str(number)
return len(manipulandum) - len(manipulandum.rstrip('0'))
def search_form(forms, tag):
"""Search for the correct form of word from all those returned by Morfeusz.
Args:
forms (list of tuples): Tags and variations of words returned
by Morfeusz.
tag (str): The tag of the word whose form is being searched for.
Returns:
str: Word properly conjugated with the given tag or None if not found.
"""
for form in forms:
form_categories = [x.split('.') for x in form[2].split(':')]
gramm_categ_enum = enumerate(tag)
if all((c in form_categories[i] for i, c in gramm_categ_enum)):
return form[0]
return None
def get_word_form(text, tag):
"""Change the word in the appropriate form with given morphological tag.
Args:
text (str): Word to be changed.
tag (str): Morphological tag.
Returns:
str: Word changed with given morphological tag.
"""
if not tag:
return text
morf = morfeusz2.Morfeusz()
all_forms = morf.generate(text)
tag = tag.split(':')
forms = [x for x in all_forms if x[2].split(':')[0] == tag[0]]
form = search_form(forms, tag)
if form:
return form
if len(tag) > 4:
tag = tag[:4]
form = search_form(forms, tag)
if form:
return form
else:
return text
def subtract_from_first(list_of_tuples, offset):
"""Subtract from every first element in tuples that make up list."""
list_of_tuples = (list_of_tuples[0] - offset, *list_of_tuples[1:])
return list_of_tuples
def check_and_replace(string_builder, find, replace, filtered_tokens):
"""Check for matches in list and replace them with given tokens.
Remove replaced tokens from `filtered_tokens` to to avoid double processing.
Args:
string_builder (list of str): List of all words.
find (list of str): Tokens to be replaced.
replace (list of str): Words that will replace `find` tokens in
`string_builder`.
filtered_tokens (list of tuples): List of tokens and their features.
Returns:
(list of str, list of tuples): Pair: list of words with replaced matched
tokens and filtered list of tokens and their feature with deleted
items that have been replaced.
"""
if not find or not replace:
return string_builder, filtered_tokens
new_builder = string_builder.copy()
max_lenght = max(map(len, find))
for i, token in enumerate(string_builder):
if not find:
break
to_remove = [i]
check = token
j = i + 1
if check in find:
new_builder[i] = ''.join(replace[find.index(check)])
filtered_tokens = list(filter(lambda x: x[0] != i, filtered_tokens))
del find[0], replace[0]
continue
if check[0] != find[0][:len(check[0])]:
continue
while len(check) < max_lenght and j < len(string_builder):
check += string_builder[j]
to_remove.append(j)
if check in find:
index = find.index(check)
new_builder = new_builder[:i] + replace[index]
if j + 1 < len(string_builder):
new_builder += string_builder[j + 1:]
filtered_tokens = list(filter(lambda x: x[0] not in to_remove,
filtered_tokens))
find.pop(index)
replace.pop(index)
if not find:
return new_builder, filtered_tokens
j += 1
return new_builder, filtered_tokens
This diff is collapsed.
......@@ -4,7 +4,7 @@ import logging
import nlp_ws
from src.wordifier import Wordifier
from src.ccl_handler import Ccl_handler
from src.ccl_handler import CCLHandler
_log = logging.getLogger(__name__)
......@@ -18,7 +18,7 @@ class Worker(nlp_ws.NLPWorker):
"""One time static initialisation."""
def process(self, input_file, task_options, output_file):
"""A."""
"""Processing an input file and generating tokens converted to words."""
wordifier = Wordifier()
ccl_handler = Ccl_handler(input_file)
ccl_handler = CCLHandler(input_file)
ccl_handler.process(output_file, wordifier.unmarshallers)
import unittest
from parameterized import parameterized, param
from src.num2words import num2words
class TestNum2Words(unittest.TestCase):
single_tag = 'adj:sg:nom:f'
several_tag = 'adj:pl:acc:f'
many_tag = 'adj:pl:acc:m1'
@parameterized.expand([
param('0', 'zero'),
param('08', 'osiem'),
param('12', 'dwanaście'),
param('23', 'dwadzieścia trzy'),
param('48', 'czterdzieści osiem'),
param('187', 'sto osiemdziesiąt siedem'),
param('249', 'dwieście czterdzieści dziewięć'),
param('600', 'sześćset'),
param('720', 'siedemset dwadzieścia'),
param('304', 'trzysta cztery'),
param('1000', 'tysiąc'),
param('425000', 'czterysta dwadzieścia pięć tysięcy'),
param('102000', 'sto dwa tysiące'),
param('390000', 'trzysta dziewięćdziesiąt tysięcy'),
param('701000', 'siedemset jeden tysięcy'),
param('993999', 'dziewięćset dziewięćdziesiąt trzy tysiące '
'dziewięćset dziewięćdziesiąt dziewięć'),
param('1000642', 'milion sześćset czterdzieści dwa'),
param('2001003', 'dwa miliony tysiąc trzy'),
param('18456000', 'osiemnaście milionów '
'czterysta pięćdziesiąt sześć tysięcy'),
param('1000000000', 'miliard')
])
def test_numbers(self, number, words):
self.assertEqual(num2words(number), words)
@parameterized.expand([
param('0', 'zerowy', ordinal=True),
param('1', 'pierwszy', ordinal=True),
param('10', 'dziesiąty', ordinal=True),
param('15', 'piętnasty', ordinal=True),
param('31', 'trzydziesty pierwszy', ordinal=True),
param('70', 'siedemdziesiąty', ordinal=True),
param('099', 'dziewięćdziesiąty dziewiąty', ordinal=True),
param('100', 'setny', ordinal=True),
param('102', 'sto drugi', ordinal=True),
param('183', 'sto osiemdziesiąty trzeci', ordinal=True),
param('201', 'dwieście pierwszy', ordinal=True),
param('1000', 'tysięczny', ordinal=True),
param('1005', 'tysiąc piąty', ordinal=True),
param('2000', 'dwutysięczny', ordinal=True),
param('2020', 'dwa tysiące dwudziesty', ordinal=True),
param('10000', 'dziesięciotysięczny', ordinal=True),
param('100856', 'sto tysięcy osiemset pięćdziesiąty szósty',
ordinal=True),
param('1000000', 'milionowy', ordinal=True),
param('1002003', 'milion dwa tysiące trzeci', ordinal=True),
param('1948052296', 'miliard dziewięćset czterdzieści osiem milionów '
'pięćdziesiąt dwa tysiące '
'dwieście dziewięćdziesiąty szósty', ordinal=True),
])
def test_ordinal_numbers(self, number, words, ordinal):
self.assertEqual(num2words(number, ordinal=ordinal), words)
@parameterized.expand([
('1', 'adj:sg:nom:f', 'jedna'),
('2', 'num:pl:nom:f', 'dwie')
])
def test_numbers_numerator(self, number, tag, words):
self.assertEqual(num2words(number, tag), words)
@parameterized.expand([
param('1', 'pierwsza'),
param('2', 'druga'),
param('5', 'piąta'),
param('10', 'dziesiąta'),
param('31', 'trzydziesta pierwsza'),
param('100', 'setna'),
param('102', 'sto druga'),
param('512', 'pięćset dwunasta'),
param('600', 'sześćsetna'),
param('1000', 'tysięczna'),
param('2002', 'dwa tysiące druga'),
param('3000', 'trzytysięczna'),
param('1000000000', 'miliardowa'),
param('1473022977', 'miliard czterysta siedemdziesiąt trzy miliony '
'dwadzieścia dwa tysiące dziewięćset siedemdziesiąta siódma'),
])
def test_single_numbers_denominator(self, number, words, ordinal=True):
self.assertEqual(num2words(number, self.single_tag, ordinal), words)
@parameterized.expand([
param('3', 'trzecie'),
param('6', 'szóste'),
param('10', 'dziesiąte'),
param('47', 'czterdzieste siódme'),
param('100', 'setne'),
param('101', 'sto pierwsze'),
param('300', 'trzechsetne'),
param('981', 'dziewięćset osiemdziesiąte pierwsze'),
param('1000', 'tysięczne'),
param('8000', 'ośmiotysięczne'),
param('10000', 'dziesięciotysięczne'),
param('100000', 'stutysięczne'),
param('1000115376708', 'bilion sto piętnaście milionów '
'trzysta siedemdziesiąt sześć tysięcy siedemset ósme'),
])
def test_several_numbers_denominator(self, number, words, ordinal=True):
self.assertEqual(num2words(number, self.several_tag, ordinal), words)
@parameterized.expand([
param('4', 'czwartych'),
param('8', 'ósmych'),
param('10', 'dziesiątych'),
param('69', 'sześćdziesiątych dziewiątych'),
param('100', 'setnych'),
param('212', 'dwieście dwunastych'),
param('700', 'siedemsetnych'),
param('901', 'dziewięćset pierwszych'),
param('1000', 'tysięcznych'),
param('6000', 'sześciotysięcznych'),
param('10000', 'dziesięciotysięcznych'),
param('1000000', 'milionowych'),
param('238055017238', 'dwieście trzydzieści osiem miliardów '
'pięćdziesiąt pięć milionów siedemnaście tysięcy '
'dwieście trzydziestych ósmych'),
])
def test_many_numbers_denominator(self, number, words, ordinal=True):
self.assertEqual(num2words(number, self.many_tag, ordinal), words)
......@@ -40,5 +40,5 @@ max-line-length = 80
# D410 Missing blank line after section
# D411 Missing blank line before section
ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
match-dir = ^(?!\.tox|venv).*
match-dir = ^(?!\.tox|venv|tests).*
match = ^(?!setup).*\.py
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment