Skip to content
Snippets Groups Projects
Select Git revision
  • master
  • develop
2 results

wordifier.py

Blame
  • num2words.py 4.03 KiB
    """Module for converting numbers to words."""
    import json
    import math
    import os
    from pathlib import Path
    
    from src.utils import get_word_form, trailing_zeros
    
    
    filename = os.path.join(Path(__file__).parent.parent, 'data', 'numbers.json')
    with open(filename, 'r') as numbers_file:
        numbers_dict = json.load(numbers_file)
        number_words = {int(k): v for k, v in numbers_dict['number_words'].items()}
        ordinal_number_words = {int(k): v for k, v
                                in numbers_dict['ordinal_number_words'].items()}
        large_numbers = {int(k): v for k, v
                         in numbers_dict['large_numbers'].items()}
        ordinal_large_numbers = {int(k): v for k, v
                                 in numbers_dict['ordinal_large_numbers'].items()}
    
    
    def three_digit_to_words(text, tag='', ordinal=False):
        """Convert three digits numbers to words with given tag. Util function.
    
        :param text: A three-digit number to be converted to words.
        :type text: str or int
        :param tag: Morphological tag. (Default value = '')
        :type tag: str
        :param ordinal: True if the number is in the ordinal form
        ("first", "fifth", etc.), False when the number is in its basic form
        ("one", "five", etc.). (Default value = False)
        :type ordinal: bool
    
        :returns: The provided 'text' parameter value in words.
        :rtype: str
    
        """
        map_to_words = ordinal_number_words if ordinal else number_words
    
        number = int(text)
        if number == 0:
            return get_word_form(map_to_words[number], tag)
        words = []
        units = number % 10
        tens = number % 100 - units
        hundredths = number // 100
        if 0 < tens + units <= 20:
            word = get_word_form(map_to_words[tens + units], tag)
            words.append(word)
        else:
            if units != 0:
                words.append(get_word_form(map_to_words[units], tag))
            if tens != 0:
                words.append(get_word_form(map_to_words[tens], tag))
    
        if hundredths != 0:
            if tens == 0 and units == 0:
                words.append(get_word_form(map_to_words[hundredths * 100], tag))
            else:
                words.append(get_word_form(number_words[hundredths * 100], ''))
    
        return ' '.join(reversed(words))
    
    
    def num2words(text, tag='', ordinal=False):
        """Converts a number to words.
    
        :param text: Three digits number.
        :type text: str
        :param tag: Morphological tag. (Default value = '')
        :type tag: str
        :param ordinal: True if the number is in the ordinal form
        ("first", "fifth", etc.), False when the number is in its basic form
        ("one", "five", etc.). (Default value = False)
        :type ordinal: bool
    
        :returns: Number as words with given tag.
        :rtype: str
    
        """
        i = 0
        words = []
        number = int(text)
    
        if ordinal:
            zeros = trailing_zeros(number)
            zeros = 3 * math.floor(zeros / 3)
            if zeros > 2 and 0 < len(text) - zeros <= 3:
                number = number // 10 ** zeros
                if number == 1:
                    words = ''
                else:
                    words = three_digit_to_words(str(number), 'numcomp')
                words += get_word_form(ordinal_large_numbers[zeros], tag)
                return words
    
        if len(text) <= 3 or number == 0:
            return three_digit_to_words(text, tag, ordinal)
    
        while number > 0:
            remainder = number % 1000
            if i == 0:
                triple = three_digit_to_words(remainder, tag, ordinal)
            else:
                triple = three_digit_to_words(remainder)
            number = number // 1000
            if remainder == 0 and number != 0:
                i += 3
                continue
    
            if i == 0:
                words.append(triple)
            else:
                if remainder == 1:
                    tag = 'subst:sg:nom:m3'
                elif remainder % 10 in [2, 3, 4]:
                    tag = 'subst:pl:nom:m3'
                else:
                    tag = 'subst:pl:gen:m3'
                form = get_word_form(large_numbers[i], tag)
                if remainder == 1:
                    words.append(form)
                else:
                    words.append(triple + ' ' + form)
            i += 3
        return ' '.join(list(reversed(words)))