diff --git a/.gitignore b/.gitignore index f622468222a6d36fc394b19a7a0de058fa0f40ee..34a6b1b89022690378c2112e9b45851ffc0e9708 100644 --- a/.gitignore +++ b/.gitignore @@ -108,6 +108,7 @@ celerybeat.pid .env .venv env/ +.idea venv/ ENV/ env.bak/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 07f7794939aa9f9505ad32e4521b0da73e8b1316..031fc2ac5c4951df02440a2583518e30d472afe6 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,38 +1,43 @@ -image: "clarinpl/python:3.6" +image: clarinpl/python:3.8 cache: paths: - .tox + stages: - check_style - - test + - tests - build + - build_develop + +.check_style_template: + before_script: + - pip install tox==3.18.1 pep8: + extends: .check_style_template stage: check_style - before_script: - - pip install tox==2.9.1 script: - tox -v -e pep8 docstyle: + extends: .check_style_template stage: check_style - before_script: - - pip install tox==2.9.1 script: - tox -v -e docstyle -test: - stage: test - image: "docker:18.09.7" - services: - - "docker:18.09.7-dind" +tests: + stage: tests + before_script: + - pip install tox==3.18.1 script: - - docker build -t clarinpl/wordifier . - - docker run --rm - -v "$(pwd)/requirements-dev.txt:/home/worker/requirements-dev.txt" - -v "$(pwd)/tests:/home/worker/tests" - clarinpl/wordifier - sh -c 'pip3 install -r requirements-dev.txt ; nose2 -v tests' + - tox -v -e pytest + artifacts: + paths: + - htmlcov + expire_in: 1 week + reports: + junit: + - report.xml build_image: stage: build @@ -41,18 +46,19 @@ build_image: - master services: - 'docker:18.09.7-dind' - variables: - DOCKERHUB_NAME: clarinpl/$CI_PROJECT_NAME - before_script: - - '' script: - - docker build -t $DOCKERHUB_NAME . - - echo $DOCKER_PASSWORD > pass.txt - - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin - - rm pass.txt - - docker push $DOCKERHUB_NAME + - docker build -t $CI_REGISTRY_IMAGE:latest . - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY - - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG - - docker image tag $DOCKERHUB_NAME $CI_REGISTRY_IMAGE:latest - - docker push $CI_REGISTRY_IMAGE + - docker push $CI_REGISTRY_IMAGE:latest +build_develop: + except: + - master + stage: build_develop + image: docker:18.09.7 + services: + - 'docker:18.09.7-dind' + script: + - docker build -t $CI_REGISTRY_IMAGE:develop . + - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY + - docker push $CI_REGISTRY_IMAGE:develop diff --git a/Dockerfile b/Dockerfile index 2dfcce57a1c900a55566eb90d6797c6bc40d4001..73a9150d5b2f0addddea5b0425ea650737e4ad9b 100755 --- a/Dockerfile +++ b/Dockerfile @@ -18,4 +18,4 @@ COPY ./data ./data RUN python3.6 -m pip install -r requirements.txt -CMD ["python3.6", "main.py", "service"] \ No newline at end of file +CMD ["python3.6", "main.py"] \ No newline at end of file diff --git a/main.py b/main.py index ccb9f30badc8e2d2ae1b746a19260c4e7009471e..691fa9a40a406b0c55155a737027d9225b4ddd69 100755 --- a/main.py +++ b/main.py @@ -1,34 +1,8 @@ """Implementation of wordifier service.""" -import argparse import nlp_ws -from src.worker import Worker - - -def get_args(): - """Gets command line arguments.""" - parser = argparse.ArgumentParser(description="wordifier") - - subparsers = parser.add_subparsers(dest="mode") - subparsers.required = True - - subparsers.add_parser( - "service", - help="Run as a service") - return parser.parse_args() - - -def main(): - """Runs the program.""" - args = get_args() - - generators = { - "service": lambda: nlp_ws.NLPService.main(Worker), - } - - gen_fn = generators.get(args.mode, lambda: None) - gen_fn() +from src.worker import Worker if __name__ == "__main__": - main() + nlp_ws.NLPService.main(Worker) diff --git a/requirements.txt b/requirements.txt index e8340049c13ee23f0def228066991df9b0abf234..75491fe70dc2c264d394c7b4d202f2ad7470511b 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ -nlp-ws -Babel==2.8.0 \ No newline at end of file +nlp-ws<=0.8 +Babel==2.8.0 +morfeusz2 \ No newline at end of file diff --git a/src/ccl_handler.py b/src/ccl_handler.py index 02e08ac29c6fc7c51715362b600cee42593064e7..58c3abc1da67d75de8a285de8aa82619689c750c 100755 --- a/src/ccl_handler.py +++ b/src/ccl_handler.py @@ -6,11 +6,20 @@ class CCLHandler: """Implements reading ccl for anonymizer service.""" def __init__(self, ccl_file_name): - """Initialize CCLHandler with a filename.""" + """Initialize CCLHandler with a filename. + + :param ccl_file_name: The name of the ccl file to read. + :type ccl_file_name: str + """ self._file_name = ccl_file_name def process(self, output_file, unmarshallers): - """Process xml tags using unmarshallers and save in output_file.""" + """Process xml tags using unmarshallers and save in output_file. + + :param output_file: The name of the file to save program output in. + :type output_file: str + # TODO: unmarshallers param + """ with open(self._file_name, 'r', encoding='utf-8') as input_file, \ open(output_file, 'w', encoding='utf-8') as output_file: for event, elem in iterparse(input_file): diff --git a/src/date2words.py b/src/date2words.py index 068f2d78fc8678ee8f4890d9df8fe6b55a0fae3d..af5c563ba0fdbdeda23fa036b3ded9111c3b64e3 100644 --- a/src/date2words.py +++ b/src/date2words.py @@ -16,11 +16,11 @@ def check_none(token): def month_name_expansion(month): """Expand month abbreviation or change form. - Args: - month (str): Month abbrevation or full name. + :param month: Month abbrevation or full name. + :type month: str - Returns: - str: Full month name in genitive case. + :returns: Full month name in genitive case. + :rtype: str """ abbr = len(month) == 3 @@ -43,12 +43,13 @@ def month_name_expansion(month): def date2words(date_match, tags=None): """Convert a date to list of words. - Args: - date_match (re.Match): Date match. - tag (str, optional): Morphological tag. Defaults to None. + :param date_match: Date match. + :type date_match: re.Match + :param tags: Morphological tags. (Default value = None) + :type tags: str - Returns: - list of str: List of words representing date. + :returns: List of words representing date. + :rtype: list[str] """ if tags and ":".join(tags[0].split(":")[1:4]) in date_tags: diff --git a/src/num2words.py b/src/num2words.py index fdae1196ab64932e5ded12cef1af9aef52feffdf..8f2ac2cd5799ac6023b60168485b247e3e4dcc90 100644 --- a/src/num2words.py +++ b/src/num2words.py @@ -1,10 +1,14 @@ """Module for converting numbers to words.""" -import math import json +import math +import os +from pathlib import Path from src.utils import get_word_form, trailing_zeros -with open('data/numbers.json', 'r') as numbers_file: + +filename = os.path.join(Path(__file__).parent.parent, 'data', 'numbers.json') +with open(filename, 'r') as numbers_file: numbers_dict = json.load(numbers_file) number_words = {int(k): v for k, v in numbers_dict['number_words'].items()} ordinal_number_words = {int(k): v for k, v @@ -16,7 +20,21 @@ with open('data/numbers.json', 'r') as numbers_file: def three_digit_to_words(text, tag='', ordinal=False): - """Convert three digits numbers to words with given tag. Util function.""" + """Convert three digits numbers to words with given tag. Util function. + + :param text: A three-digit number to be converted to words. + :type text: str or int + :param tag: Morphological tag. (Default value = '') + :type tag: str + :param ordinal: True if the number is in the ordinal form + ("first", "fifth", etc.), False when the number is in its basic form + ("one", "five", etc.). (Default value = False) + :type ordinal: bool + + :returns: The provided 'text' parameter value in words. + :rtype: str + + """ map_to_words = ordinal_number_words if ordinal else number_words number = int(text) @@ -47,14 +65,17 @@ def three_digit_to_words(text, tag='', ordinal=False): def num2words(text, tag='', ordinal=False): """Converts a number to words. - Args: - text (str): Three digits number. - tag (str, optional): Morphological tag. Defaults to ''. - ordinal (bool, optional): If word should be derived from ordinal number. - Defaults to False. - - Returns: - str: Returns number as words with given tag. + :param text: Three digits number. + :type text: str + :param tag: Morphological tag. (Default value = '') + :type tag: str + :param ordinal: True if the number is in the ordinal form + ("first", "fifth", etc.), False when the number is in its basic form + ("one", "five", etc.). (Default value = False) + :type ordinal: bool + + :returns: Number as words with given tag. + :rtype: str """ i = 0 diff --git a/src/utils.py b/src/utils.py index a21467591223c961f8cd6ec0ce92c273640e05aa..7bb136b5df843a33cc000dd69e608e6769fc1a4d 100644 --- a/src/utils.py +++ b/src/utils.py @@ -16,7 +16,7 @@ class TokenType(Enum): class NumberPlural(Enum): """Type of number indicating what the word suffix will be. - E.g: + E.g.: SINGULAR 1$ - jeden dolar SEVERAL (2-4) 2$ - dwa dolary MANY (5+) 7$ - siedem dolarów @@ -30,12 +30,11 @@ class NumberPlural(Enum): def to_number_plural(number): """Convert a number to enumerate type, that indicates word suffix. - Args: - number (int or string): Number to be converted. + :param number: Number to be converted. + :type number: str or int - Returns: - NumberPlural: Enumerate, which indicates what the end of the word - will be. + :returns: Enumerate, which indicates what the end of the word will be. + :rtype: NumberPlural """ number = int(number) @@ -52,12 +51,13 @@ def is_simple_number(tokens, special_types): Simple number contains only digits and spaces between groups of three. - Args: - tokens (list): List of tokens. - special_types (list): Types of tokens. + :param tokens: List of tokens. + :type tokens: list + :param special_types: Types of tokens. + :type special_types: list - Returns: - bool: Return True if joined tokens are simple number otherwise False. + :returns: Return True if joined tokens are simple number otherwise False. + :rtype: bool """ numbers = [n for i, n in enumerate(tokens) @@ -67,31 +67,35 @@ def is_simple_number(tokens, special_types): def is_fraction(tokens, decimal=False): - """Check is list of tokens are 2 numbers splitted by slash or dot. + """Checks if list of tokens are 2 numbers split by slash or dot. - Args: - tokens (list): List of tokens. - decimal (bool, optional): If True delimiter is dot otherwise slash '/'. - Defaults to False. + :param tokens: List of tokens. + :type tokens: list + :param decimal: If True the delimiter is a dot, otherwise a slash ('/'). + (Default value = False) + :type decimal: bool - Returns: - bool: Return True if tokens are fraction otherwise False. + :returns: True if tokens are a fraction, False otherwise. + :rtype: bool """ if len(tokens) < 3: return False delimiter = '.' if decimal else '/' - splitted = ''.join(tokens).split(delimiter) - return ((len(splitted) == 2) and + split = ''.join(tokens).split(delimiter) + return ((len(split) == 2) and tokens.count(delimiter) == 1 and all([(s.isdigit() or s in ' /.') for s in tokens])) def trailing_zeros(number): - """Count trailing zeros in number. + """Count trailing zeros in a number. - Returns: - int: Return number of trailing zeros. + :param number: The number to count trailing zeros in (written in digits). + :type number: str or int + + :returns: Return number of trailing zeros. + :rtype: int """ manipulandum = str(number) @@ -101,13 +105,13 @@ def trailing_zeros(number): def search_form(forms, tag): """Search for the correct form of word from all those returned by Morfeusz. - Args: - forms (list of tuples): Tags and variations of words returned - by Morfeusz. - tag (str): The tag of the word whose form is being searched for. + :param forms: Tags and variations of words returned by Morfeusz. + :type forms: list[tuple] + :param tag: The tag of the word whose form is being sought. + :type tag: str - Returns: - str: Word properly conjugated with the given tag or None if not found. + :returns: Word properly conjugated with the given tag or None if not found. + :rtype: str or None """ for form in forms: @@ -121,12 +125,13 @@ def search_form(forms, tag): def get_word_form(text, tag): """Change the word in the appropriate form with given morphological tag. - Args: - text (str): Word to be changed. - tag (str): Morphological tag. + :param text: Word to be changed. + :type text: str + :param tag: Morphological tag. + :type tag: str - Returns: - str: Word changed with given morphological tag. + :returns: Word changed with given morphological tag. + :rtype: str """ if not tag: @@ -160,19 +165,22 @@ def subtract_from_first(list_of_tuples, offset): def check_and_replace(string_builder, find, replace, filtered_tokens): """Check for matches in list and replace them with given tokens. - Remove replaced tokens from `filtered_tokens` to to avoid double processing. - - Args: - string_builder (list of str): List of all words. - find (list of str): Tokens to be replaced. - replace (list of str): Words that will replace `find` tokens in - `string_builder`. - filtered_tokens (list of tuples): List of tokens and their features. - - Returns: - (list of str, list of tuples): Pair: list of words with replaced matched - tokens and filtered list of tokens and their feature with deleted - items that have been replaced. + Remove replaced tokens from `filtered_tokens` to avoid double processing. + + :param string_builder: List of all words. + :type string_builder: list[str] + :param find: Tokens to be replaced. + :type find: list[str] + :param replace: Words that will replace tokens provided in the + `find` parameter in `string_builder`. + :type replace: list[str] + :param filtered_tokens: List of tokens and their features. + :type filtered_tokens: list[tuple] + + :returns: Pair: list of words with replaced matched tokens and filtered + list of tokens and their feature with deleted items + that have been replaced. + :rtype: list[list[str], list[tuple]] """ if not find or not replace: diff --git a/src/wordifier.py b/src/wordifier.py index cdb84625781818d2c5e9b9bf832aeeca3f1203b3..0fada7f21d00343e7d9d73fd8c66d4b3ddc23971 100644 --- a/src/wordifier.py +++ b/src/wordifier.py @@ -1,12 +1,13 @@ """Implementation of wordifier functionality.""" -import re import json +import re from itertools import islice -from src.utils import is_simple_number, subtract_from_first, trailing_zeros, \ - check_and_replace, TokenType, NumberPlural, to_number_plural, is_fraction -from src.num2words import num2words from src.date2words import date2words +from src.num2words import num2words +from src.utils import (NumberPlural, TokenType, check_and_replace, is_fraction, + is_simple_number, subtract_from_first, to_number_plural, + trailing_zeros) class Wordifier: @@ -180,14 +181,15 @@ class Wordifier: return tag def _handle_fraction(self, tokens, tags): - """Generate words from fraction splitted by slash '/'. + """Generate words from fraction split by slash '/'. - Args: - tokens (list of str): List that contains numbers separated by - slash '/'. + :param tokens: List that contains numbers separated by slash '/'. + :type tokens: list[str] + :param tags: Morphological tags. + :type tags: list[str] - Returns: - str: Fraction as words. + :returns: Fraction as words. + :rtype: str """ text = ''.join(tokens) @@ -210,11 +212,11 @@ class Wordifier: def _handle_decimal_fraction(self, tokens): """Generate words from decimal fraction splitted by dot. - Args: - tokens (list of str): List that contains numbers separated by dot. + :param tokens: List that contains numbers separated by a dot. + :type tokens: list[str] - Returns: - str: Decimal fraction as words. + :returns: Decimal fraction as words. + :rtype: str """ text = ''.join(tokens) @@ -238,12 +240,14 @@ class Wordifier: If contains fraction generate words from whole fraction otherwise generate words from first number. - Args: - tokens (list of str): List of tokens with number at the beginning. + # TODO: Check the spelling above + + :param tokens: List of tokens with number at the beginning. + :type tokens: list[str] - Returns: - str: Tokens that form a fraction or number. - int: The number of tokens that make up the fraction. + :returns: Pair: (Tokens that form a fraction or number; + The number of tokens that make up the fraction). + :rtype: list[str, int] """ match = self.decimal_fraction_regex.search(''.join(tokens[:5])) @@ -298,12 +302,15 @@ class Wordifier: def _get_as_words(self, tokens, tags, special_types): """Convert special tokens and numbers to words. - Args: - tokens (list of str): List of tokens. - special_types (list of TokenType): Types of tokens. + :param tokens: List of tokens. + :type tokens: list[str] + :param tags: Morphological tags. + :type tags: list[str] + :param special_types: Types of tokens. + :type special_types: list[TokenType] - Returns: - str : Joined tokens converted to words. + :returns: Joined tokens converted to words. + :rtype: str """ if is_simple_number(tokens, special_types): @@ -320,30 +327,36 @@ class Wordifier: def _check_number_multipart(self, index, next_id, string_builder): """Check if the next token is continuation of number with actual token. - Args: - index (int): Actual token id. - next_id (int): Next token id. - string_builder (list of str): List of all words. + :param index: Actual token id. + :type index: int + # TODO: Change actual to current or valid depending on the intention. + :param next_id: Next token id. + :type next_id: int + :param string_builder: List of all words. + :type string_builder: list[str] - Returns: - bool: Is next token continuation of a number. + :returns: True if the next token is the continuation of a number, + False otherwise. + # TODO: Rephrase? + :rtype: bool """ return next_id == index + 1 or \ (index + 2 == next_id and string_builder[index + 1] in self.number_punctuation) - def _join_tokens(self, token, string_builder): - """Combine tokens that form multi-part formulas. + def _join_tokens(self, tokens, string_builder): + """Combine tokens that form multipart formulas. - Args: - tokens (list of tuple): List of tokens and their features. - Every element contains index, word, morphological tag and - token type. - string_builder (list of str): List of all words. + :param tokens: List of tokens and their features. Every element + contains index, word, morphological tag and token type. + :type tokens: list[tuple] + :param string_builder: List of all words. + :type string_builder: list[str] - Returns: - list of tuple: List of joined tokens and their features. + :returns: List of joined tokens and their features. + :rtype: list[tuple] + # TODO: Check the unused tokens parameter """ joined_tokens = [] @@ -382,12 +395,12 @@ class Wordifier: def _handle_special_types(self, string_builder): """Convert special tokens to words and replace them in string builder. - Args: - string_builder (list of str]): List of all words. + :param string_builder: List of all words. + :type string_builder: list[str] - Returns: - list of str: Return updated string builder with special tokens - replaced by words. + :returns: Updated string builder where special tokens have been + replaced by words. + :rtype: list[str] """ wordify_tokens = self._join_tokens(self._wordify_tokens, string_builder) @@ -430,11 +443,13 @@ class Wordifier: def _handle_regexes(self, string_builder, tags): """Check for regexes in the given builder and replace them with words. - Args: - string_builder (list of str): List of all words. + :param string_builder: List of all words. + :type string_builder: list[str] + :param tags: Morphological tags. + :type tags: #TODO: Check this and other occurrences. - Returns: - list of str: Updated string builder with matches replaced by words. + :returns: Updated string builder with matches replaced by words. + :rtype: list[str] """ sentence = ''.join(string_builder) @@ -454,11 +469,13 @@ class Wordifier: def _process_sentence(self, string_builder, tags): """Process a sentence and replace special tokens (eg. numbers) words. - Args: - string_builder (list of str): List of all words. + :param string_builder: List of all words. + :type string_builder: list[str] + :param tags: Morphological tags. + :type tags: # TODO: Just a flag for tags - Returns: - str: Sentece with replaced special tokens. + :returns: Sentence where special tokens have been replaced. + :rtype: str """ string_builder = self._handle_regexes(string_builder, tags) @@ -466,3 +483,5 @@ class Wordifier: if string_builder[0] and not string_builder[0][0].isupper(): string_builder[0] = string_builder[0].capitalize() return ''.join(string_builder) + + # TODO: Finished here. Check type of tags. diff --git a/src/worker.py b/src/worker.py index 8dfe2f8c957b0a0072886f4e6138a944ce9fe9f6..141eb554ea0d35744da4a431c26c1ba881ab8574 100755 --- a/src/worker.py +++ b/src/worker.py @@ -3,9 +3,8 @@ import logging import nlp_ws -from src.wordifier import Wordifier from src.ccl_handler import CCLHandler - +from src.wordifier import Wordifier _log = logging.getLogger(__name__) @@ -17,8 +16,9 @@ class Worker(nlp_ws.NLPWorker): def static_init(cls, config): """One time static initialisation.""" - def process(self, input_file, task_options, output_file): - """Processing an input file and generating tokens converted to words.""" + def process(self, input_file: str, task_options: dict, + output_file: str) -> None: + """Generating tokens converted to words from input file.""" wordifier = Wordifier() ccl_handler = CCLHandler(input_file) ccl_handler.process(output_file, wordifier.unmarshallers) diff --git a/tests/test_num2words.py b/tests/test_num2words.py index 23b06e6ff93dc325dbe96083d3f84ec20c845907..76e1abad641823d7a53bccac39db93d034807af3 100644 --- a/tests/test_num2words.py +++ b/tests/test_num2words.py @@ -1,136 +1,66 @@ -import unittest -from parameterized import parameterized, param +import json +import os +from pathlib import Path +import pytest + +from src.worker import Worker from src.num2words import num2words -class TestNum2Words(unittest.TestCase): - single_tag = 'adj:sg:nom:f' - several_tag = 'adj:pl:acc:f' - many_tag = 'adj:pl:acc:m1' - - @parameterized.expand([ - param('0', 'zero'), - param('08', 'osiem'), - param('12', 'dwanaście'), - param('23', 'dwadzieścia trzy'), - param('48', 'czterdzieści osiem'), - param('187', 'sto osiemdziesiąt siedem'), - param('249', 'dwieście czterdzieści dziewięć'), - param('600', 'sześćset'), - param('720', 'siedemset dwadzieścia'), - param('304', 'trzysta cztery'), - - param('1000', 'tysiąc'), - param('425000', 'czterysta dwadzieścia pięć tysięcy'), - param('102000', 'sto dwa tysiące'), - param('390000', 'trzysta dziewięćdziesiąt tysięcy'), - param('701000', 'siedemset jeden tysięcy'), - param('993999', 'dziewięćset dziewięćdziesiąt trzy tysiące ' - 'dziewięćset dziewięćdziesiąt dziewięć'), - param('1000642', 'milion sześćset czterdzieści dwa'), - param('2001003', 'dwa miliony tysiąc trzy'), - param('18456000', 'osiemnaście milionów ' - 'czterysta pięćdziesiąt sześć tysięcy'), - param('1000000000', 'miliard') - ]) - def test_numbers(self, number, words): - self.assertEqual(num2words(number), words) - - @parameterized.expand([ - param('0', 'zerowy', ordinal=True), - param('1', 'pierwszy', ordinal=True), - param('10', 'dziesiąty', ordinal=True), - param('15', 'piętnasty', ordinal=True), - param('31', 'trzydziesty pierwszy', ordinal=True), - param('70', 'siedemdziesiąty', ordinal=True), - param('099', 'dziewięćdziesiąty dziewiąty', ordinal=True), - param('100', 'setny', ordinal=True), - param('102', 'sto drugi', ordinal=True), - param('183', 'sto osiemdziesiąty trzeci', ordinal=True), - param('201', 'dwieście pierwszy', ordinal=True), - - param('1000', 'tysięczny', ordinal=True), - param('1005', 'tysiąc piąty', ordinal=True), - param('2000', 'dwutysięczny', ordinal=True), - param('2020', 'dwa tysiące dwudziesty', ordinal=True), - param('10000', 'dziesięciotysięczny', ordinal=True), - param('100856', 'sto tysięcy osiemset pięćdziesiąty szósty', - ordinal=True), - param('1000000', 'milionowy', ordinal=True), - param('1002003', 'milion dwa tysiące trzeci', ordinal=True), - param('1948052296', 'miliard dziewięćset czterdzieści osiem milionów ' - 'pięćdziesiąt dwa tysiące ' - 'dwieście dziewięćdziesiąty szósty', ordinal=True), - ]) - def test_ordinal_numbers(self, number, words, ordinal): - self.assertEqual(num2words(number, ordinal=ordinal), words) - - @parameterized.expand([ - ('1', 'adj:sg:nom:f', 'jedna'), - ('2', 'num:pl:nom:f', 'dwie') - ]) - def test_numbers_numerator(self, number, tag, words): - self.assertEqual(num2words(number, tag), words) - - @parameterized.expand([ - param('1', 'pierwsza'), - param('2', 'druga'), - param('5', 'piąta'), - param('10', 'dziesiąta'), - param('31', 'trzydziesta pierwsza'), - param('100', 'setna'), - param('102', 'sto druga'), - param('512', 'pięćset dwunasta'), - param('600', 'sześćsetna'), - - param('1000', 'tysięczna'), - param('2002', 'dwa tysiące druga'), - param('3000', 'trzytysięczna'), - param('1000000000', 'miliardowa'), - param('1473022977', 'miliard czterysta siedemdziesiąt trzy miliony ' - 'dwadzieścia dwa tysiące dziewięćset siedemdziesiąta siódma'), - ]) - def test_single_numbers_denominator(self, number, words, ordinal=True): - self.assertEqual(num2words(number, self.single_tag, ordinal), words) - - @parameterized.expand([ - param('3', 'trzecie'), - param('6', 'szóste'), - param('10', 'dziesiąte'), - param('47', 'czterdzieste siódme'), - param('100', 'setne'), - param('101', 'sto pierwsze'), - param('300', 'trzechsetne'), - param('981', 'dziewięćset osiemdziesiąte pierwsze'), - - param('1000', 'tysięczne'), - param('8000', 'ośmiotysięczne'), - param('10000', 'dziesięciotysięczne'), - param('100000', 'stutysięczne'), - param('1000115376708', 'bilion sto piętnaście milionów ' - 'trzysta siedemdziesiąt sześć tysięcy siedemset ósme'), - ]) - def test_several_numbers_denominator(self, number, words, ordinal=True): - self.assertEqual(num2words(number, self.several_tag, ordinal), words) - - @parameterized.expand([ - param('4', 'czwartych'), - param('8', 'ósmych'), - param('10', 'dziesiątych'), - param('69', 'sześćdziesiątych dziewiątych'), - param('100', 'setnych'), - param('212', 'dwieście dwunastych'), - param('700', 'siedemsetnych'), - param('901', 'dziewięćset pierwszych'), - - param('1000', 'tysięcznych'), - param('6000', 'sześciotysięcznych'), - param('10000', 'dziesięciotysięcznych'), - param('1000000', 'milionowych'), - param('238055017238', 'dwieście trzydzieści osiem miliardów ' - 'pięćdziesiąt pięć milionów siedemnaście tysięcy ' - 'dwieście trzydziestych ósmych'), - ]) - def test_many_numbers_denominator(self, number, words, ordinal=True): - self.assertEqual(num2words(number, self.many_tag, ordinal), words) +data_path = os.path.join(Path(__file__).parent, 'testdata', 'input', + 'num2words_data.json') +with open(data_path) as f: + TESTDATA = json.load(f) + +SINGLE_TAG = 'adj:sg:nom:f' +SEVERAL_TAG = 'adj:pl:acc:f' +MANY_TAG = 'adj:pl:acc:m1' + + +@pytest.mark.anyio +def test_init(): + worker = Worker() + assert type(worker).__name__ == 'Worker' + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, word', TESTDATA["nominative_numbers"]) +def test_nominative_numbers(number, word): + assert num2words(number) == word + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, word', TESTDATA["ordinal_numbers"]) +def test_ordinal_numbers(number, word): + assert num2words(number, ordinal=True) == word + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, tag, words', [ + ('1', 'adj:sg:nom:f', 'jedna'), + ('2', 'num:pl:nom:f', 'dwie') +]) +def test_numbers_numerator(number, tag, words): + assert num2words(number, tag) == words + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, words', + TESTDATA["single_numbers_denominator"]) +def test_single_numbers_denominator(number, words): + assert num2words(number, SINGLE_TAG, ordinal=True) == words + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, words', + TESTDATA["several_numbers_denominator"]) +def test_several_numbers_denominator(number, words): + assert num2words(number, SEVERAL_TAG, ordinal=True) == words + + +@pytest.mark.anyio +@pytest.mark.parametrize('number, words', + TESTDATA["many_numbers_denominator"]) +def test_many_numbers_denominator(number, words): + assert num2words(number, MANY_TAG, ordinal=True) == words diff --git a/tests/testdata/input/num2words_data.json b/tests/testdata/input/num2words_data.json new file mode 100644 index 0000000000000000000000000000000000000000..a5b62321098f23069dbd11e7c8597582c8779452 --- /dev/null +++ b/tests/testdata/input/num2words_data.json @@ -0,0 +1,92 @@ +{ + "nominative_numbers": [ + ["0", "zero"], + ["08", "osiem"], + ["12", "dwanaście"], + ["23", "dwadzieścia trzy"], + ["48", "czterdzieści osiem"], + ["187", "sto osiemdziesiąt siedem"], + ["249", "dwieście czterdzieści dziewięć"], + ["600", "sześćset"], + ["720", "siedemset dwadzieścia"], + ["304", "trzysta cztery"], + ["1000", "tysiąc"], + ["425000", "czterysta dwadzieścia pięć tysięcy"], + ["102000", "sto dwa tysiące"], + ["390000", "trzysta dziewięćdziesiąt tysięcy"], + ["701000", "siedemset jeden tysięcy"], + ["993999", "dziewięćset dziewięćdziesiąt trzy tysiące dziewięćset dziewięćdziesiąt dziewięć"], + ["1000642", "milion sześćset czterdzieści dwa"], + ["2001003", "dwa miliony tysiąc trzy"], + ["18456000", "osiemnaście milionów czterysta pięćdziesiąt sześć tysięcy"], + ["1000000000", "miliard"] + ], + "ordinal_numbers": [ + ["0", "zerowy"], + ["1", "pierwszy"], + ["10", "dziesiąty"], + ["15", "piętnasty"], + ["31", "trzydziesty pierwszy"], + ["70", "siedemdziesiąty"], + ["099", "dziewięćdziesiąty dziewiąty"], + ["100", "setny"], + ["102", "sto drugi"], + ["183", "sto osiemdziesiąty trzeci"], + ["201", "dwieście pierwszy"], + ["1000", "tysięczny"], + ["1005", "tysiąc piąty"], + ["2000", "dwutysięczny"], + ["2020", "dwa tysiące dwudziesty"], + ["10000", "dziesięciotysięczny"], + ["100856", "sto tysięcy osiemset pięćdziesiąty szósty"], + ["1000000", "milionowy"], + ["1002003", "milion dwa tysiące trzeci"], + ["1948052296", "miliard dziewięćset czterdzieści osiem milionów pięćdziesiąt dwa tysiące dwieście dziewięćdziesiąty szósty"] + ], + "single_numbers_denominator": [ + ["1", "pierwsza"], + ["2", "druga"], + ["5", "piąta"], + ["10", "dziesiąta"], + ["31", "trzydziesta pierwsza"], + ["100", "setna"], + ["102", "sto druga"], + ["512", "pięćset dwunasta"], + ["600", "sześćsetna"], + ["1000", "tysięczna"], + ["2002", "dwa tysiące druga"], + ["3000", "trzytysięczna"], + ["1000000000", "miliardowa"], + ["1473022977", "miliard czterysta siedemdziesiąt trzy miliony dwadzieścia dwa tysiące dziewięćset siedemdziesiąta siódma"] + ], + "several_numbers_denominator": [ + ["3", "trzecie"], + ["6", "szóste"], + ["10", "dziesiąte"], + ["47", "czterdzieste siódme"], + ["100", "setne"], + ["101", "sto pierwsze"], + ["300", "trzechsetne"], + ["981", "dziewięćset osiemdziesiąte pierwsze"], + ["1000", "tysięczne"], + ["8000", "ośmiotysięczne"], + ["10000", "dziesięciotysięczne"], + ["100000", "stutysięczne"], + ["1000115376708", "bilion sto piętnaście milionów trzysta siedemdziesiąt sześć tysięcy siedemset ósme"] + ], + "many_numbers_denominator": [ + ["4", "czwartych"], + ["8", "ósmych"], + ["10", "dziesiątych"], + ["69", "sześćdziesiątych dziewiątych"], + ["100", "setnych"], + ["212", "dwieście dwunastych"], + ["700", "siedemsetnych"], + ["901", "dziewięćset pierwszych"], + ["1000", "tysięcznych"], + ["6000", "sześciotysięcznych"], + ["10000", "dziesięciotysięcznych"], + ["1000000", "milionowych"], + ["238055017238", "dwieście trzydzieści osiem miliardów pięćdziesiąt pięć milionów siedemnaście tysięcy dwieście trzydziestych ósmych"] + ] +} \ No newline at end of file diff --git a/tox.ini b/tox.ini index 67d5403ab50027aa81fac8d52de8b1d10379e086..963c030a54e9671cb141d3cd8c8328172076ccc0 100755 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,20 @@ basepython = python3 commands = pydocstyle --verbose {posargs} +[testenv:pytest] +deps = + pytest + nlp_ws + Babel==2.8.0 + morfeusz2 + coverage +filterwarnings = + error + ignore::UserWarning +commands = + coverage run -m pytest --junitxml=report.xml tests/ + coverage html + [flake8] # W504 skipped because it is overeager and unnecessary ignore = W504 @@ -41,4 +55,8 @@ max-line-length = 80 # D411 Missing blank line before section ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 match-dir = ^(?!\.tox|venv|tests).* -match = ^(?!setup).*\.py \ No newline at end of file +match = ^(?!setup).*\.py + +[run] +relative_files = True +branch = True