Merge branch 'pawel.tometczak-master-patch-73954' into 'master'

Add new file sort_alphabetically.py See merge request !1

Merge branch 'pawel.tometczak-master-patch-73954' into 'master'
Add new file sort_alphabetically.py See merge request !1
f5183e01 · Wiktor Walentynowicz · b84b3911 · 022acf3a · f5183e01
Commit f5183e01 authored 2 years ago by Wiktor Walentynowicz 👷🏻
--- a/sort_alphabetically.py
+++ b/sort_alphabetically.py
+def compare_tokens(token1, token2):
+    if len(token1) == 2 and len(token2) == 2:
+        if token1 == token2:
+            return 0
+        elif token1[0] == token2[0]:
+            return -1 if token1[1] < token2[1] else 1
+        else:
+            return -1 if token1 < token2 else 1
+    else:
+        return -1 if token1 < token2 else 1
+
+def sort_words(file_path, detect_digraphs=True):
+    # Create a dictionary with the desired order of letters
+    letter_order = {'a': 1, 'ą': 2, 'ã': 3, 'b': 4, 'c': 5, 'ch': 6, 'cz': 7,
+                    'd': 8, 'dz': 9, 'dż': 10, 'e': 11, 'é': 12, 'ë': 13, 'f': 14,
+                    'g': 15, 'h': 16, 'i': 17, 'j': 18, 'k': 19, 'l': 20, 'ł': 21,
+                    'm': 22, 'n': 23, 'ń': 24, 'ò': 25, 'o': 26, 'ó': 27, 'ô': 28,
+                    'p': 29, 'r': 30, 'rz': 31, 's': 32, 'sz': 33, 't': 34, 'ù': 35,
+                    'u': 36,'v':37 ,'w': 38, 'y': 39, 'z': 40, 'ż': 41}
+
+    # Open the text file
+    with open(file_path, 'r') as file:
+        # Read the contents of the file
+        contents = file.read()
+
+    # Remove any punctuation and numbers from the contents
+    contents = ''.join(c for c in contents if c.isalpha() or c.isspace())
+
+    # Split the contents into a list of words
+    words = contents.split()
+
+    # Define a function for getting the token list for a word
+    def get_tokens(word):
+        if detect_digraphs:
+            # Split the word into tokens, treating digraphs as a single token
+            tokens = []
+            i = 0
+            while i < len(word):
+                if i < len(word) - 1 and word[i:i+2] in letter_order:
+                    tokens.append(word[i:i+2])
+                    i += 2
+                else:
+                    tokens.append(word[i])
+                    i += 1
+            return tokens
+        else:
+            # Split the word into individual letters
+            return list(word)
+
+    # Sort the words according to the letter order and ignore case, treating digraphs as single units if desired
+    words.sort(key=lambda x: [letter_order.get(token.lower(), -1) for token in get_tokens(x)])
+
+    # Remove duplicate words while preserving order
+    #words_no_duplicates = [word for i, word in enumerate(words) if word not in words[:i]]
+
+    # Write the sorted words to the text file
+    with open('test_sorting.txt', 'w') as file:
+        file.write('\n'.join(words))