From fd3987626ae8ea121268d9b1e132d72df898b70e Mon Sep 17 00:00:00 2001 From: MGniew <m.f.gniewkowski@gmail.com> Date: Fri, 10 Mar 2023 16:00:05 +0100 Subject: [PATCH] Dependencies issues --- README.md | 5 +++++ dvc.yaml | 7 ++++--- experiments/scripts/tag_dataset.py | 9 +++++++-- requirements.txt | 11 +++++++---- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index e69de29..e5bd7a2 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,5 @@ +``` +pip install morfeusz2 +pip install -r requirements.txt +pip install --no-deps git+ssh://git@gitlab.clarin-pl.eu/adversarial-attacks/textfooling.git@develop +``` diff --git a/dvc.yaml b/dvc.yaml index 533298e..92afbaa 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -17,6 +17,8 @@ stages: foreach: - enron_spam - poleval + - 20_news + - wiki_pl do: wdir: . cmd: >- @@ -30,7 +32,6 @@ stages: get_model: foreach: - enron_spam - # - poleval do: wdir: . cmd: >- @@ -45,7 +46,8 @@ stages: classify: foreach: - enron_spam - #- poleval + - 20_news + - wiki_pl do: wdir: . cmd: >- @@ -61,7 +63,6 @@ stages: explain: foreach: - enron_spam - #- poleval do: wdir: . cmd: >- diff --git a/experiments/scripts/tag_dataset.py b/experiments/scripts/tag_dataset.py index e1b0671..4b24429 100644 --- a/experiments/scripts/tag_dataset.py +++ b/experiments/scripts/tag_dataset.py @@ -74,7 +74,12 @@ def process_file(dataset_df, lang, output_path): ) def main(dataset_name: str): """Downloads the dataset to the output directory.""" - lang = 'en' if dataset_name == 'enron_spam' else 'pl' + lang = { + "enron_spam": "en", + "poleval": "pl", + "20_news": "en", + "wiki_pl": "pl", + }[dataset_name] output_dir = f"data/preprocessed/{dataset_name}" os.makedirs(output_dir, exist_ok=True) @@ -93,4 +98,4 @@ def main(dataset_name: str): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/requirements.txt b/requirements.txt index 66b509a..7825594 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,13 +2,16 @@ datasets transformers click scikit-learn -dvc[s3]==2.46.0 -shap==0.41.0 -lpmn_client_biz +dvc[s3] +shap +tqdm +transformers +tokenizers +sentence-transformers --find-links https://download.pytorch.org/whl/torch_stable.html torch==1.12.0+cu116 --index-url https://pypi.clarin-pl.eu/simple/ plwn-api -git+ssh://git@gitlab.clarin-pl.eu/adversarial-attacks/textfooling.git@develop +lpmn_client_biz -- GitLab