Skip to content
Snippets Groups Projects
Commit 3a7c234d authored by Michał Pogoda's avatar Michał Pogoda
Browse files

Fixed styling with black

parent de260011
Branches master
No related merge requests found
Pipeline #8235 passed with stage
in 26 seconds
......@@ -5,27 +5,19 @@ import numpy as np
class WordSplit(object):
def __init__(self):
<<<<<<< HEAD
self.sym_spell = SymSpell(
max_dictionary_edit_distance=0, prefix_length=7
)
self.sym_spell.load_dictionary(
"vocab.txt", term_index=0, count_index=1
)
=======
self.sym_spell = SymSpell(max_dictionary_edit_distance=0,
prefix_length=7)
self.sym_spell.load_dictionary('vocab.txt',
term_index=0,
count_index=1)
with open('vocab.txt', 'r', encoding='utf8') as f:
with open("vocab.txt", "r", encoding="utf8") as f:
self.vocab_data = f.readlines()
self.frequeancy = []
self.key_data = []
for line in self.vocab_data:
self.frequeancy.append(int(line.split(" ")[1]))
self.key_data.append(line.split(" "[0]))
>>>>>>> deployment
def load_data(self, path):
with open(path, "r", encoding="utf8") as f:
......@@ -38,8 +30,8 @@ class WordSplit(object):
def quantile_vocab(self, data, freq_vocab, path):
filter_data = []
for key in data:
filter_data.append(f'{key} {freq_vocab}'.replace('\n', '') + '\n')
self.save_data(path+'_joined', filter_data)
filter_data.append(f"{key} {freq_vocab}".replace("\n", "") + "\n")
self.save_data(path + "_joined", filter_data)
def check_if_exist(self, vocab, path):
filter_data = []
......@@ -48,7 +40,7 @@ class WordSplit(object):
continue
else:
filter_data.append(key)
self.save_data(path+'_joined', filter_data)
self.save_data(path + "_joined", filter_data)
return filter_data
def handle_word_transfer(self, text):
......@@ -73,14 +65,14 @@ class WordSplit(object):
if quantile is not None:
freq_vocab = np.quantile(self.frequeancy, quantile)
self.quantile_vocab(filter_data, freq_vocab, vocab)
self.sym_spell.load_dictionary(vocab+'_v1',
term_index=0,
count_index=1)
if language and language != 'pl':
if os.path.isfile(f'{language}_vocab.txt'):
self.sym_spell.load_dictionary(f'{language}_vocab.txt',
term_index=0,
count_index=1)
self.sym_spell.load_dictionary(
vocab + "_v1", term_index=0, count_index=1
)
if language and language != "pl":
if os.path.isfile(f"{language}_vocab.txt"):
self.sym_spell.load_dictionary(
f"{language}_vocab.txt", term_index=0, count_index=1
)
if os.path.isdir(input_file):
folder = os.listdir(input_file)
if not os.path.exists(output_file):
......@@ -106,9 +98,9 @@ class WordSplit(object):
text.append(result.corrected_string + "\n")
self.save_data(output_file, text)
if vocab is not None:
with open(vocab + '_joined,', 'r') as f:
with open(vocab + "_joined,", "r") as f:
vocab_data = f.readlines()
for line in vocab_data:
key = line.split(" ")[0]
self.sym_spell.delete_dictionary_entry(key)
os.remove(vocab + '_joined,')
os.remove(vocab + "_joined,")
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment