From 1a42bf81c9cbe93ee397c95e2b11f7af02c8a279 Mon Sep 17 00:00:00 2001
From: pwalkow <pwalkow@gpu-server.ws.clarin>
Date: Wed, 29 Mar 2023 17:13:12 +0200
Subject: [PATCH] Add new xai way attacks

---
 dvc.lock                      | 40 +++++++++++++++++------------------
 experiments/scripts/attack.py | 23 +++++++++++---------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/dvc.lock b/dvc.lock
index bbd98c8..7098398 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -393,12 +393,12 @@ stages:
       size: 18505614
       nfiles: 6
     - path: experiments/scripts/attack.py
-      md5: 87f54ee4e2a08f1259d9d8b2d01fe1b9
-      size: 12061
+      md5: 7899aad1184479703a62862c44f158bc
+      size: 11660
     outs:
     - path: data/results/attack_xai/enron_spam/
-      md5: ad19831866da140de113e64862da0bce.dir
-      size: 2860109
+      md5: 5e63fa3fe181aaeba9fcb079c192f218.dir
+      size: 3244997
       nfiles: 2
   attack_xai@20_news:
     cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name 20_news
@@ -473,12 +473,12 @@ stages:
       size: 439008808
       nfiles: 6
     - path: experiments/scripts/attack.py
-      md5: 6e7aa3d45f4726ada7d8271c03c6dcb9
-      size: 12245
+      md5: 705c81050f98eedf23e0e2c57a433013
+      size: 11537
     outs:
     - path: data/results/attack_xai_local/20_news/
-      md5: eafd38e0be43782c2d44a1f1d03dc4fa.dir
-      size: 8782702
+      md5: 28d7746ec17515c5cb42c2a38c2f836d.dir
+      size: 117908496
       nfiles: 2
   attack_xai_local@enron_spam:
     cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name enron_spam
@@ -493,12 +493,12 @@ stages:
       size: 18505614
       nfiles: 6
     - path: experiments/scripts/attack.py
-      md5: 6e7aa3d45f4726ada7d8271c03c6dcb9
-      size: 12245
+      md5: 7899aad1184479703a62862c44f158bc
+      size: 11660
     outs:
     - path: data/results/attack_xai_local/enron_spam/
-      md5: 817eb5deaab1174c354753888d5d4fcf.dir
-      size: 2849176
+      md5: a7a271f04ce0b1f377d1a8ecfd560610.dir
+      size: 3244995
       nfiles: 2
   attack_xai_discard_local@enron_spam:
     cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name enron_spam
@@ -709,12 +709,12 @@ stages:
       size: 501609312
       nfiles: 7
     - path: experiments/scripts/attack.py
-      md5: 9518ec9af275d6a12fede47dff6767e1
-      size: 11530
+      md5: 9210980aa88333434ef74508ea758177
+      size: 11533
     outs:
     - path: data/results/attack_xai_local/poleval/
-      md5: 7597e90d1ddfa82615e79f6821d90e1b.dir
-      size: 188754
+      md5: 3d64a90e30b90893e50aeae785f78c01.dir
+      size: 275048
       nfiles: 2
   attack_xai_discard_local@poleval:
     cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name poleval
@@ -749,12 +749,12 @@ stages:
       size: 501609312
       nfiles: 7
     - path: experiments/scripts/attack.py
-      md5: 9518ec9af275d6a12fede47dff6767e1
-      size: 11530
+      md5: 8f7b0ce20c27a8ef5ab2272be1220582
+      size: 11535
     outs:
     - path: data/results/attack_xai/poleval/
-      md5: d368af0f7069a5f43b9cf6f3a0422522.dir
-      size: 189001
+      md5: 4f857083b0174e342703246fa40c4fc7.dir
+      size: 275071
       nfiles: 2
   attack_xai_discard@poleval:
     cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name poleval
diff --git a/experiments/scripts/attack.py b/experiments/scripts/attack.py
index b660adf..c5e0645 100644
--- a/experiments/scripts/attack.py
+++ b/experiments/scripts/attack.py
@@ -87,8 +87,10 @@ def data_saver(queue_in, queue_log, output_file, output_dir, cases_nbr, queues_k
                 [que_kill.put(None) for _ in range(to_kill_nbr)]
         if processed_nbr == cases_nbr - 10:
             end_time = time()
+            queue_log.put(f"End time set to { end_time }, { end_time + 1000 }")
         if processed_nbr >= cases_nbr - 10:
-            if sum([q.qsize() for q in queues_kill]) == 0 and (time() - end_time) > 3600:
+            queue_log.put(time() - end_time)
+            if sum([q.qsize() for q in queues_kill]) == 0 and (time() - end_time) > 1000:
                 for que_kill in queues_kill:
                     [que_kill.put(None) for _ in range(to_kill_nbr)]
     with open(output_file, 'wt') as fd:
@@ -186,7 +188,7 @@ def main(dataset_name: str, attack_type: str):
     if "attack_xai" in attack_type:
         importance = load_xai_importance(f"data/explanations/{dataset_name}")
         xai_global, xai_local = importance[0], importance[1]
-    xai_sub = 5
+    xai_sub = 'all'
     params = {
         "attack_textfooler": [lang, SYNONYM],
         "attack_textfooler_discard": [lang, DISCARD],
@@ -208,6 +210,14 @@ def main(dataset_name: str, attack_type: str):
     m = Manager()
     queues = [m.Queue(maxsize=QUEUE_SIZE) for _ in range(6)]
     sim = Similarity(queues[5], 0.95, "distiluse-base-multilingual-cased-v1")
+
+    log_que = Thread(target=log_queues, args=(queues[:5],))
+    log_que.daemon = True
+    log_que.start()
+    info_que = Thread(target=log_info_queue, args=(queues[5],))
+    info_que.daemon = True
+    info_que.start()
+
     processes = [
         Process(target=data_producer, args=(queues[0], dataset_df,)),  # loading data file_in -> 0
         Process(target=spoil_queue, args=(queues[0], queues[1], queues[5], max_sub, attack_type, params)),
@@ -241,7 +251,7 @@ def main(dataset_name: str, attack_type: str):
         # spoiling 0 -> 1
         Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)),
         Process(target=filter_similarity_queue, args=(queues[1], queues[2], queues[5], sim)),  # cosim 1 -> 2
-        Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "6")),
+        Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "3")),
         Process(target=classify_queue, args=(queues[2], queues[3], queues[5], dataset_name, "4")),
         # classify changed 2 -> 3
         Process(target=run_queue, args=(queues[3], queues[4], queues[5], process,)),  # process 3 -> 4
@@ -249,13 +259,6 @@ def main(dataset_name: str, attack_type: str):
         # saving 4 -> file_out
     ]
     [p.start() for p in processes]
-
-    log_que = Thread(target=log_queues, args=(queues[:5],))
-    log_que.daemon = True
-    log_que.start()
-    info_que = Thread(target=log_info_queue, args=(queues[5],))
-    info_que.daemon = True
-    info_que.start()
     # wait for all processes to finish
     [p.join() for p in processes]
     log_que.join(timeout=0.5)
-- 
GitLab