Skip to content
Snippets Groups Projects
Commit f5183e01 authored by Wiktor Walentynowicz's avatar Wiktor Walentynowicz 👷🏻
Browse files

Merge branch 'pawel.tometczak-master-patch-73954' into 'master'

Add new file sort_alphabetically.py

See merge request !1
parents b84b3911 022acf3a
1 merge request!1Add new file sort_alphabetically.py
def compare_tokens(token1, token2):
if len(token1) == 2 and len(token2) == 2:
if token1 == token2:
return 0
elif token1[0] == token2[0]:
return -1 if token1[1] < token2[1] else 1
else:
return -1 if token1 < token2 else 1
else:
return -1 if token1 < token2 else 1
def sort_words(file_path, detect_digraphs=True):
# Create a dictionary with the desired order of letters
letter_order = {'a': 1, 'ą': 2, 'ã': 3, 'b': 4, 'c': 5, 'ch': 6, 'cz': 7,
'd': 8, 'dz': 9, '': 10, 'e': 11, 'é': 12, 'ë': 13, 'f': 14,
'g': 15, 'h': 16, 'i': 17, 'j': 18, 'k': 19, 'l': 20, 'ł': 21,
'm': 22, 'n': 23, 'ń': 24, 'ò': 25, 'o': 26, 'ó': 27, 'ô': 28,
'p': 29, 'r': 30, 'rz': 31, 's': 32, 'sz': 33, 't': 34, 'ù': 35,
'u': 36,'v':37 ,'w': 38, 'y': 39, 'z': 40, 'ż': 41}
# Open the text file
with open(file_path, 'r') as file:
# Read the contents of the file
contents = file.read()
# Remove any punctuation and numbers from the contents
contents = ''.join(c for c in contents if c.isalpha() or c.isspace())
# Split the contents into a list of words
words = contents.split()
# Define a function for getting the token list for a word
def get_tokens(word):
if detect_digraphs:
# Split the word into tokens, treating digraphs as a single token
tokens = []
i = 0
while i < len(word):
if i < len(word) - 1 and word[i:i+2] in letter_order:
tokens.append(word[i:i+2])
i += 2
else:
tokens.append(word[i])
i += 1
return tokens
else:
# Split the word into individual letters
return list(word)
# Sort the words according to the letter order and ignore case, treating digraphs as single units if desired
words.sort(key=lambda x: [letter_order.get(token.lower(), -1) for token in get_tokens(x)])
# Remove duplicate words while preserving order
#words_no_duplicates = [word for i, word in enumerate(words) if word not in words[:i]]
# Write the sorted words to the text file
with open('test_sorting.txt', 'w') as file:
file.write('\n'.join(words))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment