From 77a966408b76977297e64c6601b3ff4c39190449 Mon Sep 17 00:00:00 2001
From: pwalkow <pwalkow@gpu-server.ws.clarin>
Date: Thu, 23 Mar 2023 07:41:47 +0100
Subject: [PATCH] Add discard all files

---
 .../attack_textfooler_discard/.gitignore      |  1 +
 dvc.lock                                      | 20 +++++++++++++++++++
 experiments/scripts/attack.py                 | 17 +++++++++++-----
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/data/results/attack_textfooler_discard/.gitignore b/data/results/attack_textfooler_discard/.gitignore
index 70a2cd6..9199641 100644
--- a/data/results/attack_textfooler_discard/.gitignore
+++ b/data/results/attack_textfooler_discard/.gitignore
@@ -1,2 +1,3 @@
 /wiki_pl
 /enron_spam
+/20_news
diff --git a/dvc.lock b/dvc.lock
index 93146df..7ccf085 100644
--- a/dvc.lock
+++ b/dvc.lock
@@ -359,3 +359,23 @@ stages:
       md5: 8a78484bd77916f82021a72338342a44.dir
       size: 2816160
       nfiles: 2
+  attack_textfooler_discard@20_news:
+    cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name 20_news
+      --attack_type attack_textfooler_discard
+    deps:
+    - path: data/classification/20_news
+      md5: b73611443c4189af91b827c083f37e0b.dir
+      size: 42897496
+      nfiles: 2
+    - path: data/models/20_news
+      md5: 43d68a67ecb8149bd6bf50db9767cb64.dir
+      size: 439008808
+      nfiles: 6
+    - path: experiments/scripts/attack.py
+      md5: 9e913b341cb0993625a41c401d64a30b
+      size: 12017
+    outs:
+    - path: data/results/attack_textfooler_discard/20_news/
+      md5: 82d89b00a710e9de0a2157357fed5894.dir
+      size: 24977923
+      nfiles: 2
diff --git a/experiments/scripts/attack.py b/experiments/scripts/attack.py
index 78f97ca..b545015 100644
--- a/experiments/scripts/attack.py
+++ b/experiments/scripts/attack.py
@@ -41,6 +41,9 @@ FEATURES = "features"
 IMPORTANCE = "importance"
 SYNONYM = "synonym"
 DISCARD = "discard"
+GLOBAL = "global"
+LOCAL = "local"
+
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
@@ -178,10 +181,18 @@ def main(dataset_name: str, attack_type: str):
         "20_news": "en",
         "wiki_pl": "pl",
     }[dataset_name]
+    xai_global, xai_local = load_xai_importance(
+        f"data/explanations/{dataset_name}"
+    ) if attack_type in ["attack_xai", "attack_xai_discard"] else {}, {}
+    xai_sub = 10
     params = {
         "attack_textfooler": [lang, SYNONYM],
         "attack_textfooler_discard": [lang, DISCARD],
-        "attack_basic": [lang, 0.5, 0.4, 0.3]  # prawopodobieństwa spacji  > usunięcia znaku > usunięcia słowa
+        "attack_basic": [lang, 0.5, 0.4, 0.3],  # prawopodobieństwa spacji  > usunięcia znaku > usunięcia słowa
+        "attack_xai": [lang, xai_global, xai_local, GLOBAL, SYNONYM, xai_sub],
+        "attack_xai_discard": [lang, xai_global, xai_local, GLOBAL, DISCARD, xai_sub],
+        "attack_xai_local": [lang, xai_global, xai_local, LOCAL, SYNONYM, xai_sub],
+        "attack_xai_discard_local": [lang, xai_global, xai_local, LOCAL, DISCARD, xai_sub]
     }[attack_type]
 
     output_dir = f"data/results/{attack_type}/{dataset_name}/"
@@ -190,10 +201,6 @@ def main(dataset_name: str, attack_type: str):
     output_path = os.path.join(output_dir, "test.jsonl")
     dataset_df = pd.read_json(input_file, lines=True)
 
-    # xai_global, xai_local = load_xai_importance(
-    #     f"data/explanations/{dataset_name}"
-    # ) if attack_type == "attack_xai" else {}, {}
-
     max_sub = 1
 
     m = Manager()
-- 
GitLab