diff --git a/data/results/attack_textfooler_discard/.gitignore b/data/results/attack_textfooler_discard/.gitignore index 70a2cd627e76cfd8acff2853e1e4706c56373b8c..9199641fdf84d1b97fdc9832e4d9039b49bc12e2 100644 --- a/data/results/attack_textfooler_discard/.gitignore +++ b/data/results/attack_textfooler_discard/.gitignore @@ -1,2 +1,3 @@ /wiki_pl /enron_spam +/20_news diff --git a/dvc.lock b/dvc.lock index 93146df62808b9fffe01589b24079b85e22f3e7a..7ccf085fdad8a01c27f8101a0a1b6ad29f75ef7f 100644 --- a/dvc.lock +++ b/dvc.lock @@ -359,3 +359,23 @@ stages: md5: 8a78484bd77916f82021a72338342a44.dir size: 2816160 nfiles: 2 + attack_textfooler_discard@20_news: + cmd: PYTHONPATH=. python experiments/scripts/attack.py --dataset_name 20_news + --attack_type attack_textfooler_discard + deps: + - path: data/classification/20_news + md5: b73611443c4189af91b827c083f37e0b.dir + size: 42897496 + nfiles: 2 + - path: data/models/20_news + md5: 43d68a67ecb8149bd6bf50db9767cb64.dir + size: 439008808 + nfiles: 6 + - path: experiments/scripts/attack.py + md5: 9e913b341cb0993625a41c401d64a30b + size: 12017 + outs: + - path: data/results/attack_textfooler_discard/20_news/ + md5: 82d89b00a710e9de0a2157357fed5894.dir + size: 24977923 + nfiles: 2 diff --git a/experiments/scripts/attack.py b/experiments/scripts/attack.py index 78f97ca5604985d42b61d1b8246299a7a0798fa3..b545015f2812080f20850626144907ba3c1ae735 100644 --- a/experiments/scripts/attack.py +++ b/experiments/scripts/attack.py @@ -41,6 +41,9 @@ FEATURES = "features" IMPORTANCE = "importance" SYNONYM = "synonym" DISCARD = "discard" +GLOBAL = "global" +LOCAL = "local" + os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -178,10 +181,18 @@ def main(dataset_name: str, attack_type: str): "20_news": "en", "wiki_pl": "pl", }[dataset_name] + xai_global, xai_local = load_xai_importance( + f"data/explanations/{dataset_name}" + ) if attack_type in ["attack_xai", "attack_xai_discard"] else {}, {} + xai_sub = 10 params = { "attack_textfooler": [lang, SYNONYM], "attack_textfooler_discard": [lang, DISCARD], - "attack_basic": [lang, 0.5, 0.4, 0.3] # prawopodobieństwa spacji > usunięcia znaku > usunięcia słowa + "attack_basic": [lang, 0.5, 0.4, 0.3], # prawopodobieństwa spacji > usunięcia znaku > usunięcia słowa + "attack_xai": [lang, xai_global, xai_local, GLOBAL, SYNONYM, xai_sub], + "attack_xai_discard": [lang, xai_global, xai_local, GLOBAL, DISCARD, xai_sub], + "attack_xai_local": [lang, xai_global, xai_local, LOCAL, SYNONYM, xai_sub], + "attack_xai_discard_local": [lang, xai_global, xai_local, LOCAL, DISCARD, xai_sub] }[attack_type] output_dir = f"data/results/{attack_type}/{dataset_name}/" @@ -190,10 +201,6 @@ def main(dataset_name: str, attack_type: str): output_path = os.path.join(output_dir, "test.jsonl") dataset_df = pd.read_json(input_file, lines=True) - # xai_global, xai_local = load_xai_importance( - # f"data/explanations/{dataset_name}" - # ) if attack_type == "attack_xai" else {}, {} - max_sub = 1 m = Manager()