From 1a42bf81c9cbe93ee397c95e2b11f7af02c8a279 Mon Sep 17 00:00:00 2001 From: pwalkow <pwalkow@gpu-server.ws.clarin> Date: Wed, 29 Mar 2023 17:13:12 +0200 Subject: [PATCH] Add new xai way attacks --- dvc.lock | 40 +++++++++++++++++------------------ experiments/scripts/attack.py | 23 +++++++++++--------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/dvc.lock b/dvc.lock index bbd98c8..7098398 100644 --- a/dvc.lock +++ b/dvc.lock @@ -393,12 +393,12 @@ stages: size: 18505614 nfiles: 6 - path: experiments/scripts/attack.py - md5: 87f54ee4e2a08f1259d9d8b2d01fe1b9 - size: 12061 + md5: 7899aad1184479703a62862c44f158bc + size: 11660 outs: - path: data/results/attack_xai/enron_spam/ - md5: ad19831866da140de113e64862da0bce.dir - size: 2860109 + md5: 5e63fa3fe181aaeba9fcb079c192f218.dir + size: 3244997 nfiles: 2 attack_xai@20_news: cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name 20_news @@ -473,12 +473,12 @@ stages: size: 439008808 nfiles: 6 - path: experiments/scripts/attack.py - md5: 6e7aa3d45f4726ada7d8271c03c6dcb9 - size: 12245 + md5: 705c81050f98eedf23e0e2c57a433013 + size: 11537 outs: - path: data/results/attack_xai_local/20_news/ - md5: eafd38e0be43782c2d44a1f1d03dc4fa.dir - size: 8782702 + md5: 28d7746ec17515c5cb42c2a38c2f836d.dir + size: 117908496 nfiles: 2 attack_xai_local@enron_spam: cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name enron_spam @@ -493,12 +493,12 @@ stages: size: 18505614 nfiles: 6 - path: experiments/scripts/attack.py - md5: 6e7aa3d45f4726ada7d8271c03c6dcb9 - size: 12245 + md5: 7899aad1184479703a62862c44f158bc + size: 11660 outs: - path: data/results/attack_xai_local/enron_spam/ - md5: 817eb5deaab1174c354753888d5d4fcf.dir - size: 2849176 + md5: a7a271f04ce0b1f377d1a8ecfd560610.dir + size: 3244995 nfiles: 2 attack_xai_discard_local@enron_spam: cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name enron_spam @@ -709,12 +709,12 @@ stages: size: 501609312 nfiles: 7 - path: experiments/scripts/attack.py - md5: 9518ec9af275d6a12fede47dff6767e1 - size: 11530 + md5: 9210980aa88333434ef74508ea758177 + size: 11533 outs: - path: data/results/attack_xai_local/poleval/ - md5: 7597e90d1ddfa82615e79f6821d90e1b.dir - size: 188754 + md5: 3d64a90e30b90893e50aeae785f78c01.dir + size: 275048 nfiles: 2 attack_xai_discard_local@poleval: cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name poleval @@ -749,12 +749,12 @@ stages: size: 501609312 nfiles: 7 - path: experiments/scripts/attack.py - md5: 9518ec9af275d6a12fede47dff6767e1 - size: 11530 + md5: 8f7b0ce20c27a8ef5ab2272be1220582 + size: 11535 outs: - path: data/results/attack_xai/poleval/ - md5: d368af0f7069a5f43b9cf6f3a0422522.dir - size: 189001 + md5: 4f857083b0174e342703246fa40c4fc7.dir + size: 275071 nfiles: 2 attack_xai_discard@poleval: cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name poleval diff --git a/experiments/scripts/attack.py b/experiments/scripts/attack.py index b660adf..c5e0645 100644 --- a/experiments/scripts/attack.py +++ b/experiments/scripts/attack.py @@ -87,8 +87,10 @@ def data_saver(queue_in, queue_log, output_file, output_dir, cases_nbr, queues_k [que_kill.put(None) for _ in range(to_kill_nbr)] if processed_nbr == cases_nbr - 10: end_time = time() + queue_log.put(f"End time set to { end_time }, { end_time + 1000 }") if processed_nbr >= cases_nbr - 10: - if sum([q.qsize() for q in queues_kill]) == 0 and (time() - end_time) > 3600: + queue_log.put(time() - end_time) + if sum([q.qsize() for q in queues_kill]) == 0 and (time() - end_time) > 1000: for que_kill in queues_kill: [que_kill.put(None) for _ in range(to_kill_nbr)] with open(output_file, 'wt') as fd: @@ -186,7 +188,7 @@ def main(dataset_name: str, attack_type: str): if "attack_xai" in attack_type: importance = load_xai_importance(f"data/explanations/{dataset_name}") xai_global, xai_local = importance[0], importance[1] - xai_sub = 5 + xai_sub = 'all' params = { "attack_textfooler": [lang, SYNONYM], "attack_textfooler_discard": [lang, DISCARD], @@ -208,6 +210,14 @@ def main(dataset_name: str, attack_type: str): m = Manager() queues = [m.Queue(maxsize=QUEUE_SIZE) for _ in range(6)] sim = Similarity(queues[5], 0.95, "distiluse-base-multilingual-cased-v1") + + log_que = Thread(target=log_queues, args=(queues[:5],)) + log_que.daemon = True + log_que.start() + info_que = Thread(target=log_info_queue, args=(queues[5],)) + info_que.daemon = True + info_que.start() + processes = [ Process(target=data_producer, args=(queues[0], dataset_df,)), # loading data file_in -> 0 Process(target=spoil_queue, args=(queues[0], queues[1], queues[5], max_sub, attack_type, params)), @@ -241,7 +251,7 @@ def main(dataset_name: str, attack_type: str): # spoiling 0 -> 1 Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)), Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)), # cosim 1 -> 2 - Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "6")), + Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "3")), Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "4")), # classify changed 2 -> 3 Process(target=run_queue, args=(queues[3], queues[4], queues[5], process,)), # process 3 -> 4 @@ -249,13 +259,6 @@ def main(dataset_name: str, attack_type: str): # saving 4 -> file_out ] [p.start() for p in processes] - - log_que = Thread(target=log_queues, args=(queues[:5],)) - log_que.daemon = True - log_que.start() - info_que = Thread(target=log_info_queue, args=(queues[5],)) - info_que.daemon = True - info_que.start() # wait for all processes to finish [p.join() for p in processes] log_que.join(timeout=0.5) -- GitLab