Skip to content
Snippets Groups Projects
Commit 5b277ebf authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Add log file

parent 8f911880
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ from textfooler import Attack, TextFooler, Similarity, BaseLine, \
process, run_queue, filter_similarity_queue, spoil_queue, \
AttackMethod, get_xai_importance_diff
from time import sleep, time
from date import datetime
from multiprocessing import Process
from multiprocessing import Queue, Manager
from threading import Thread
......@@ -67,7 +68,8 @@ def join_punct(words):
return "".join(w if set(w) <= punc else " " + w for w in words).lstrip()
def data_producer(queue_out, dataset_df, queue_recurse, queue_log):
def data_producer(queue_out, dataset_df, queue_recurse, queue_log, log_file):
try:
for i, cols in tqdm(
dataset_df[[TEXT, ID, LEMMAS, TAGS, ORTHS, PRED, NER]].iterrows(), total=len(dataset_df)
):
......@@ -92,10 +94,16 @@ def data_producer(queue_out, dataset_df, queue_recurse, queue_log):
for n in ner:
ners.extend(n[TEXT].split(" "))
queue_out.put([sentence, orths, ners, lemmas, tags, sent_id, y_pred, sub])
except Exception as e:
queue_log.put(f"Error in data producer: {e}")
with open(log_file, "a") as f:
f.write("Producer failed with {e}\n")
queue_out.put(None)
def data_saver(queue_in, queue_log, queue_recurse, output_file,
output_dir, cases_nbr, queues_kill, to_kill_nbr, max_sub):
output_dir, cases_nbr, queues_kill, to_kill_nbr, max_sub, log_file):
try:
processed_nbr, start = 0, time()
item = 1
test_y, pred_y = [], []
......@@ -180,9 +188,15 @@ def data_saver(queue_in, queue_log, queue_recurse, output_file,
}
with open(f"{output_dir}/metrics.json", mode="w") as fd:
json.dump(metrics, fd)
except Exception as e:
queue_log.put(f"Error in data saver: {e}")
with open(log_file, "a") as f:
f.write("Saver failed with {e}\n")
queue_in.put(None)
def classify_queue(queue_in, queue_out, queue_log, dataset_name):
def classify_queue(queue_in, queue_out, queue_log, dataset_name, log_file):
try:
fun = getattr(
importlib.import_module(f"text_attacks.models.{dataset_name}"),
"get_classify_function",
......@@ -207,6 +221,11 @@ def classify_queue(queue_in, queue_out, queue_log, dataset_name):
classified = classify_fun(sentences) if sentences else []
queue_out.put((sent_id, org_sentence, changed, y_pred, classified, synonyms_nbr, sent_words, subs))
queue_log.put(f"Classified sentences {sent_id}")
except Exception as e:
queue_log.put(f"Error in classifier: {e}")
with open(log_file, "a") as f:
f.write("Classifier failed with {e}\n")
queue_in.put(None)
def log_queues(queues):
......@@ -347,22 +366,23 @@ def main(dataset_name: str, attack_type: str):
else:
ft_model_name = None
log_file = f"{attack_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M')}.log"
processes_nbr = 12
sim = Similarity(queues[5], similarity_bound, sent_model, menli_model, lang)
processes = [Process(target=data_producer, args=(queues[0], dataset_df, queues[4], queues[5]))] # loading data file_in -> 0
processes = [Process(target=data_producer, args=(queues[0], dataset_df, queues[4], queues[5], log_file))] # loading data file_in -> 0
processes.extend([Process(target=spoil_queue, args=(queues[0], queues[1], queues[5],
attack_type, params, ft_model_name))
attack_type, params, ft_model_name, log_file))
for _ in range(processes_nbr)]) # spoiling 0 -> 1
processes.extend([Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)),
Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)), # cosim 1 -> 2
Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name)),
Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name)),
processes.extend([Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim, log_file)),
Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim, log_file)), # cosim 1 -> 2
Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, log_file)),
Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, log_file)),
# classify changed 2 -> 3
Process(target=data_saver, args=(queues[3], queues[5], queues[4], output_path,
output_dir, len(dataset_df), queues, processes_nbr+6, max_sub)) # saving 3 -> file_out
output_dir, len(dataset_df), queues, processes_nbr+6, max_sub,
log_file)) # saving 3 -> file_out
])
[p.start() for p in processes]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment