From fd3987626ae8ea121268d9b1e132d72df898b70e Mon Sep 17 00:00:00 2001
From: MGniew <m.f.gniewkowski@gmail.com>
Date: Fri, 10 Mar 2023 16:00:05 +0100
Subject: [PATCH] Dependencies issues

---
 README.md                          |  5 +++++
 dvc.yaml                           |  7 ++++---
 experiments/scripts/tag_dataset.py |  9 +++++++--
 requirements.txt                   | 11 +++++++----
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e69de29..e5bd7a2 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,5 @@
+```
+pip install morfeusz2
+pip install -r requirements.txt
+pip install --no-deps git+ssh://git@gitlab.clarin-pl.eu/adversarial-attacks/textfooling.git@develop
+```
diff --git a/dvc.yaml b/dvc.yaml
index 533298e..92afbaa 100644
--- a/dvc.yaml
+++ b/dvc.yaml
@@ -17,6 +17,8 @@ stages:
     foreach:
       - enron_spam
       - poleval
+      - 20_news
+      - wiki_pl
     do:
        wdir: .
        cmd: >-
@@ -30,7 +32,6 @@ stages:
   get_model:
     foreach:
       - enron_spam
-        # - poleval
     do:
       wdir: .
       cmd: >-
@@ -45,7 +46,8 @@ stages:
   classify:
     foreach:
       - enron_spam
-        #- poleval
+      - 20_news
+      - wiki_pl
     do:
       wdir: .
       cmd: >-
@@ -61,7 +63,6 @@ stages:
   explain:
     foreach:
       - enron_spam
-        #- poleval
     do:
       wdir: .
       cmd: >-
diff --git a/experiments/scripts/tag_dataset.py b/experiments/scripts/tag_dataset.py
index e1b0671..4b24429 100644
--- a/experiments/scripts/tag_dataset.py
+++ b/experiments/scripts/tag_dataset.py
@@ -74,7 +74,12 @@ def process_file(dataset_df, lang, output_path):
 )
 def main(dataset_name: str):
     """Downloads the dataset to the output directory."""
-    lang = 'en' if dataset_name == 'enron_spam' else 'pl'
+    lang = {
+        "enron_spam": "en",
+        "poleval": "pl",
+        "20_news": "en",
+        "wiki_pl": "pl",
+    }[dataset_name]
     output_dir = f"data/preprocessed/{dataset_name}"
     os.makedirs(output_dir, exist_ok=True)
 
@@ -93,4 +98,4 @@ def main(dataset_name: str):
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/requirements.txt b/requirements.txt
index 66b509a..7825594 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,13 +2,16 @@ datasets
 transformers
 click
 scikit-learn
-dvc[s3]==2.46.0
-shap==0.41.0
-lpmn_client_biz
+dvc[s3]
+shap
+tqdm
+transformers
+tokenizers
+sentence-transformers
 
 --find-links https://download.pytorch.org/whl/torch_stable.html
 torch==1.12.0+cu116
 
 --index-url https://pypi.clarin-pl.eu/simple/
 plwn-api
-git+ssh://git@gitlab.clarin-pl.eu/adversarial-attacks/textfooling.git@develop
+lpmn_client_biz
-- 
GitLab