Add parameters

f4ca3f8f · Paweł Walkowiak · cdd255c7 · f4ca3f8f · f4ca3f8f
Commit f4ca3f8f authored Aug 10, 2023 by Paweł Walkowiak
--- a/experiments/parameters.json
+++ b/experiments/parameters.json
+{
+  "max_sub": 12,
+  "word_change_size": 0.4,
+  "similarity_bound": 0.95,
+  "word_synonym_threshold": 0.65,
+  "lang": {
+      "enron_spam": "en",
+      "poleval": "pl",
+      "20_news": "en",
+      "wiki_pl": "pl",
+      "ag_news": "en",
+      "multi_emo": "pl",
+      "imdb": "en"
+    },
+  "class_mapping": {
+      "enron_spam": {
+            "ham": "spam",
+            "spam": "ham"
+        },
+      "wiki_pl": {
+            "Albania":  "Arabowie",
+            "Amerykanscy-prozaicy": "Albania",
+            "Arabowie":  "Albania",
+            "Astronautyka": "Albania",
+            "Choroby": "Albania",
+            "Egipt": "Albania",
+            "Ekologia-roslin": "Albania",
+            "Filmy-animowane": "Albania",
+            "Galezie-prawa": "Albania",
+            "Gry-komputerowe": "Albania",
+            "Katolicyzm": "Albania",
+            "Karkonosze": "Albania",
+            "Komiksy": "Albania",
+            "Komputery": "Albania",
+            "Kotowate": "Albania",
+            "Kultura-Chin": "Albania",
+            "Monety": "Albania",
+            "Muzyka-powazna": "Albania",
+            "Narciarstwo": "Albania",
+            "Narkomania": "Albania",
+            "Niemieccy-wojskowi": "Albania"
+      },
+      "ag_news": {
+            "Business": "Sci_Tech",
+            "Sci_Tech": "Bussines",
+            "Sports": "Bussines",
+            "World": "Bussines"
+      },
+      "multi_emo": {
+            "negative": "positive",
+            "positive": "negative"
+      },
+      "imdb": {
+            "negative": "positive",
+            "positive": "negative"
+      }
+  }
+}
\ No newline at end of file
--- a/experiments/scripts/attack.py
+++ b/experiments/scripts/attack.py
@@ -9,7 +9,8 @@ import os
 import torch
 from tqdm import tqdm
 from textfooler import Attack, TextFooler, Similarity, BaseLine, \
-    process, run_queue, filter_similarity_queue, spoil_queue, AttackMethod
+    process, run_queue, filter_similarity_queue, spoil_queue, \
+    AttackMethod, get_xai_importance_diff
 from time import sleep, time
 from multiprocessing import Process
 from multiprocessing import Queue, Manager
@@ -259,24 +260,28 @@ def load_xai_importance(input_dir):
 )
 def main(dataset_name: str, attack_type: str):
    """Downloads the dataset to the output directory."""
-    lang = {
-        "enron_spam": "en",
-        "poleval": "pl",
-        "20_news": "en",
-        "wiki_pl": "pl",
-        "ag_news": "en",
-        "multi_emo": "pl",
-        "imdb": "en",
-    }[dataset_name]
+    params = {}
+    with open('../parameters.json', 'r') as fin:
+        params = json.load(fin)
+
+    lang = params.get("lang", {})[dataset_name]
+
    xai_global, xai_local = {}, {}
+    xai_global_directed = {}
    if "attack_xai" in attack_type:
        importance = load_xai_importance(f"data/explanations/{dataset_name}")
        xai_global, xai_local = importance[0], importance[1]

-    max_sub = 12
-    word_change_size = 0.4
-    similarity_bound = 0.95
-    word_synonym_threshold = 0.65
+        if "attack_xai_directed" in attack_type:
+            class_mapping = params.get("class_mapping", {})[dataset_name]
+            xai_global_directed = {get_xai_importance_diff(xai_global[source], xai_global[target])
+                                   for source, target in class_mapping.items()
+                                   }
+
+    max_sub = params.get("max_sub", 10)
+    word_change_size = params.get("word_change_size", 0.5)
+    similarity_bound = params.get("similarity_bound", 0.8)
+    word_synonym_threshold = params.get("word_synonym_threshold", 0.65)

    params = {
        "attack_textfooler": [lang, AttackMethod.SYNONYM, word_synonym_threshold],
@@ -291,7 +296,10 @@ def main(dataset_name: str, attack_type: str):
        "attack_xai_char_insert": [lang, xai_global, xai_local, GLOBAL, AttackMethod.LETTER_INSERT, 0.0, word_change_size],
        "attack_xai_char_substitute": [lang, xai_global, xai_local, GLOBAL, AttackMethod.LETTER_SUBSTITUTE, 0.0, word_change_size],
        "attack_xai_char_mixin": [lang, xai_global, xai_local, GLOBAL, AttackMethod.LETTER_MIX, 0.0, word_change_size],
-        "attack_xai_char_discard_local": [lang, xai_global, xai_local, LOCAL, AttackMethod.LETTER_DISCARD, 0.0, word_change_size]
+        "attack_xai_char_discard_local": [lang, xai_global, xai_local, LOCAL, AttackMethod.LETTER_DISCARD, 0.0, word_change_size],
+        "attack_xai_directed_char_mixin": [lang, xai_global_directed, xai_local, GLOBAL, AttackMethod.LETTER_MIX, 0.0, word_change_size],
+        "attack_xai_directed": [lang, xai_global_directed, xai_local, GLOBAL, AttackMethod.SYNONYM, similarity_bound],
+        "attack_xai_directed_discard": [lang, xai_global_directed, xai_local, GLOBAL, AttackMethod.DISCARD, similarity_bound],
    }[attack_type]
    output_dir = f"data/results/{attack_type}/{dataset_name}/"
    input_file = f"data/classification/{dataset_name}/test.jsonl"