diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7eb89881d647b068bebd5995351f11b9f47a2532..9dedc36a6754b1addc20b174207665758aff982e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -image: "clarinpl/python:3.8" +image: "python:3.10" cache: paths: diff --git a/Dockerfile b/Dockerfile index 446cfc11d3db76d11cb6450e7b9f94fb3c3504c0..4e8a2616e6855fd228438c807cad98177d8ee1d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM clarinpl/python:3.8 +FROM python:3.10 COPY requirements.txt requirements.txt RUN python3 -m pip install -r requirements.txt && rm requirements.txt diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 2a530046af413effae3ac6f136e5804bbd3f7c52..ff4cde9da04813c5c33ef1469de69b1d1b171be3 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 +FROM nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu22.04 RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y gcc python3-dev python3-venv python3-pip diff --git a/requirements-gpu.txt b/requirements-gpu.txt index cca044f9f9a3d5316255b66dd3fde95903fea1af..3508634c394ab85065b2193ccb822187a688b89b 100644 --- a/requirements-gpu.txt +++ b/requirements-gpu.txt @@ -1,4 +1,4 @@ --index-url https://pypi.clarin-pl.eu/simple/ nlp_ws -winer==0.2.0 +winer==0.3.0 awscli==1.22.57 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 02e500a4ea74a1822faeb2dd38a4de12645b8cc0..3508634c394ab85065b2193ccb822187a688b89b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ --index-url https://pypi.clarin-pl.eu/simple/ nlp_ws +winer==0.3.0 awscli==1.22.57 \ No newline at end of file diff --git a/src/winer_worker.py b/src/winer_worker.py index 7dae252ab3ce5d8ae4bbaf9a01d39530c55036b0..f676237c92b46258becaa54612a89a4347a4b0c1 100644 --- a/src/winer_worker.py +++ b/src/winer_worker.py @@ -26,7 +26,7 @@ class WinerWorker: ) -> None: documents = [create_document_from_clarin_json(read_clarin_json(input_path))] outputs = self.active_model.predict( - [document.get_pretokenized_text() for document in documents] + [str(document) for document in documents] ) for idx in range(len(documents)):