Skip to content
Snippets Groups Projects
Unverified Commit e800454d authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Change model and update pipeline

parent e7a1f7ac
1 merge request!13Change data model
Showing
with 343 additions and 2 deletions
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" /> <excludeFolder url="file://$MODULE_DIR$/venv" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="jdk" jdkName="Poetry (asr-benchmarks) (2)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyDocumentationSettings"> <component name="PyDocumentationSettings">
......
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (asr-benchmarks)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Poetry (asr-benchmarks) (2)" project-jdk-type="Python SDK" />
</project> </project>
\ No newline at end of file
FROM danijel3/clarin-pl-speechtools:pkf
ENV PATH="/root/miniconda3/bin:${PATH}"
ARG PATH="/root/miniconda3/bin:${PATH}"
# RUN python3 --version
RUN mkdir /data
RUN mkdir /data/processing_flask
ADD requirements.txt .
RUN apt-get update && apt-get install -y curl wget
RUN rm -rf /var/lib/apt/lists/*
RUN wget \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
&& mkdir /root/.conda \
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
&& rm -f Miniconda3-latest-Linux-x86_64.sh
RUN conda --version
# RUN bash Miniconda3-py39_4.9.2-Linux-x86_64.sh
RUN pip install -i https://pypi.clarin-pl.eu/simple -r requirements.txt
RUN rm requirements.txt
RUN ls -l
ADD main.py .
CMD ["python3", "-u", "main.py"]
ENTRYPOINT ["python3", "-u", "main.py"]
import os
import uuid
from sziszapangma.integration.service_core.asr.asr_base_processor import AsrBaseProcessor
from sziszapangma.integration.service_core.asr.asr_result import AsrResult
class SpeechbrainAsrProcessor(AsrBaseProcessor):
def process_asr(self, audio_file_path: str) -> AsrResult:
# prepare paths
file_tag = str(uuid.uuid4())
file_extension = audio_file_path.split('.')[-1]
file_name = f'{file_tag}.{file_extension}'
result_file_path = f'processing_flask/{file_tag}.txt'
file_path = f'processing_flask/{file_name}'
# create file in /data/uuid.ext
os.system(f"cp {audio_file_path} /data/{file_path}")
command = f'/tools/Recognize/run.sh {file_path} {result_file_path}'
print(f'run {command}')
os.system(command)
with open(f'/data/{result_file_path}', 'r') as f:
transcription = f.read()
transcription = transcription.replace('\n', ' ')
# remove temp file
os.remove(f'/data/{file_path}')
os.remove(f'/data/{result_file_path}')
return AsrResult(
words=[it for it in transcription.split(' ')],
full_text=transcription,
words_time_alignment=None
)
if __name__ == '__main__':
SpeechbrainAsrProcessor().start_processor()
#!/bin/bash
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
docker build -t asr-clarin-pl-service "$SCRIPT_DIR"
docker tag asr-clarin-pl-service docker-registry.theliver.pl/asr-clarin-pl-service:1.4
docker push docker-registry.theliver.pl/asr-clarin-pl-service:1.4
asr-benchmarks==0.0.1-alpha.48
#!/bin/bash
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
"$SCRIPT_DIR"/ajn_asr/prepare_docker.sh
"$SCRIPT_DIR"/fasttext_embedding/prepare_docker.sh
"$SCRIPT_DIR"/polish_asr_hf/prepare_docker.sh
"$SCRIPT_DIR"/speechbrain_asr/prepare_docker.sh
"$SCRIPT_DIR"/techmo_asr/prepare_docker.sh
version: "3.8"
services:
techmo_asr:
image: docker-registry.theliver.pl/techmo-asr:1.1
container_name: techmo_asr
restart: always
ports:
- 5001:5000
volumes:
- /etc/localtime:/etc/localtime:ro
- /home/marcinwatroba/.ssh/keys/techmo_asr_server:/keys/techmo_rsa_key:ro
environment:
- TECHMO_SSH_SERVER_USERNAME=mwatroba
- TECHMO_SSH_SERVER_URL=jankocon.clarin-pl.eu
- TECHMO_SERVER_SSH_PORT=9222
- TECHMO_REMOTE_SERVICE_PORT=12321
- TECHMO_SERVER_URL=156.17.135.34
- AUTH_TOKEN=t8sv-9bwd-6rps-rs9u
transformers-wav2vec2for_ctc:
image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
container_name: transformers-wav2vec2for_ctc
restart: always
volumes:
- /etc/localtime:/etc/localtime:ro
- ./wav2vec2for_ctc_models:/models
ports:
- 5002:5000
environment:
- AUTH_TOKEN=x42s-qz8u-baa4-d354
- MODEL_NAME=jonatasgrosman/wav2vec2-large-xlsr-53-polish
- SAMPLING_RATE=16000
embedding_service:
image: docker-registry.theliver.pl/embedding_docker:1.0
container_name: embeddings_service
restart: always
ports:
- 5003:5000
environment:
- AUTH_TOKEN=fjsd-mkwe-oius-m9h2
volumes:
- /etc/localtime:/etc/localtime:ro
- ./embedding_models:/models
ajn_asr:
image: docker-registry.theliver.pl/asr-clarin-pl-service:1.4
container_name: ajn_asr
restart: always
ports:
- 5004:5000
environment:
- AUTH_TOKEN=am43-649g-gwa3-b9wj
volumes:
- /etc/localtime:/etc/localtime:ro
speechbrain_asr:
image: docker-registry.theliver.pl/speechbrain-asr:1.5
container_name: speechbrain_asr
restart: always
ports:
- 5005:5000
volumes:
- /etc/localtime:/etc/localtime:ro
- ./speechbrain_asr_models:/models
environment:
- AUTH_TOKEN=gwa3-b9wj-am43-649g
FROM python:3.9
WORKDIR /app
COPY requirements.txt requirements.txt
RUN pip install -i https://pypi.clarin-pl.eu/simple -r requirements.txt && rm requirements.txt
COPY main.py main.py
CMD ["python3", "-u", "main.py"]
import os
import shutil
from typing import Dict
import fasttext
import fasttext.util
import numpy as np
from fasttext.FastText import _FastText
from numpy import typing as npt
from sziszapangma.integration.service_core.embedding.embedding_base_processor \
import EmbeddingBaseProcessor
class FasttextWebEmbeddingTransformer(EmbeddingBaseProcessor):
_models = Dict[str, _FastText]
def __init__(self):
super().__init__()
self._models = dict()
def get_embedding(self, phrase: str, language: str) -> npt.NDArray[np.float64]:
return self.get_model(language).get_word_vector(phrase)
def get_model(self, language: str) -> _FastText:
if language not in self._models:
print(f'load model {language}')
model_filename = f'cc.{language}.300.bin'
model_path = f'/models/{model_filename}'
print(f'{model_filename} {model_path}')
if not os.path.exists(model_path):
full_model_name = fasttext.util.download_model(language,
if_exists='ignore')
shutil.move(full_model_name, f'/models/{full_model_name}')
print(f'downloaded {full_model_name}')
self._models[language] = fasttext.load_model(model_path)
return self._models[language]
if __name__ == '__main__':
transformer = FasttextWebEmbeddingTransformer()
transformer.get_model('pl')
transformer.get_model('en')
transformer.start_processor()
#!/bin/bash
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
docker build -t embedding_docker "$SCRIPT_DIR"
docker tag embedding_docker docker-registry.theliver.pl/embedding_docker:1.0
docker push docker-registry.theliver.pl/embedding_docker:1.0
fasttext>=0.9.2
asr-benchmarks==0.0.1-alpha.48
FROM python:3.9
WORKDIR /app
RUN apt-get update && apt-get install -y libsndfile1 && apt-get clean
COPY requirements.txt requirements.txt
RUN pip install -i https://pypi.clarin-pl.eu/simple -r requirements.txt && rm requirements.txt
COPY main.py main.py
CMD ["python3", "-u", "main.py"]
version: "3.8"
services:
transformers-wav2vec2for_ctc:
image: docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
container_name: transformers-wav2vec2for_ctc
volumes:
- ./models:/models
ports:
- 5003:5000
environment:
- AUTH_TOKEN=test1234
- MODEL_ID=jonatasgrosman/wav2vec2-large-xlsr-53-polish
- SAMPLING_RATE=16000
\ No newline at end of file
import os
import warnings
import librosa
import torch
from sziszapangma.integration.service_core.asr.asr_base_processor import AsrBaseProcessor
from sziszapangma.integration.service_core.asr.asr_result import AsrResult
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
warnings.filterwarnings("ignore")
# MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-polish"
# SAMPLING_RATE = 16_000
class SpeechbrainAsrProcessor(AsrBaseProcessor):
_model_id: str
_processor: Wav2Vec2Processor
_model: Wav2Vec2ForCTC
_sampling_rate: int
def __init__(self, model_id: str, sampling_rate: int):
super().__init__()
self._model_id = model_id
self._sampling_rate = sampling_rate
self._processor = Wav2Vec2Processor.from_pretrained(model_id, cache_dir='/models')
self._model = Wav2Vec2ForCTC.from_pretrained(model_id, cache_dir='/models')
def process_asr(self, audio_file_path: str) -> AsrResult:
speech_array, sampling_rate = librosa.load(audio_file_path, sr=self._sampling_rate)
inputs = self._processor([speech_array], sampling_rate=sampling_rate, return_tensors="pt",
padding=True)
with torch.no_grad():
logits = self._model(inputs.input_values, attention_mask=inputs.attention_mask).logits
predicted_ids = torch.argmax(logits, dim=-1)
predicted_sentences = self._processor.batch_decode(predicted_ids)
transcription = predicted_sentences[0]
return AsrResult(words=transcription.split(' '), full_text=transcription,
words_time_alignment=None)
if __name__ == '__main__':
SpeechbrainAsrProcessor(
os.environ['MODEL_NAME'],
int(os.environ['SAMPLING_RATE'])
).start_processor()
#!/bin/bash
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
docker build -t transformers-wav2vec2for_ctc "$SCRIPT_DIR"
docker tag transformers-wav2vec2for_ctc docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
docker push docker-registry.theliver.pl/transformers-wav2vec2for_ctc:1.0
torchaudio
datasets
transformers
asrp
asr-benchmarks==0.0.1-alpha.48
librosa
\ No newline at end of file
FROM python:3.9
WORKDIR /app
ADD requirements.txt .
RUN pip install -i https://pypi.clarin-pl.eu/simple -r requirements.txt
RUN rm requirements.txt
ADD main.py .
RUN mkdir asr_processing
CMD ["python3", "-u", "main.py"]
import os
from speechbrain.pretrained import EncoderDecoderASR
from sziszapangma.integration.service_core.asr.asr_base_processor import AsrBaseProcessor
from sziszapangma.integration.service_core.asr.asr_result import AsrResult
class SpeechbrainAsrProcessor(AsrBaseProcessor):
asr_model: EncoderDecoderASR
def __init__(self):
super().__init__()
self.asr_model = EncoderDecoderASR.from_hparams(
source="speechbrain/asr-transformer-transformerlm-librispeech"
)
def process_asr(self, audio_file_path: str) -> AsrResult:
transcription = self.asr_model.transcribe_file(audio_file_path)
os.remove(audio_file_path)
words = [it.lower() for it in transcription.split(' ')]
final_transcription = transcription.lower()
return AsrResult(words=words, full_text=final_transcription, words_time_alignment=None)
if __name__ == '__main__':
SpeechbrainAsrProcessor().start_processor()
#!/bin/bash
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
docker build --no-cache -t speechbrain-asr "$SCRIPT_DIR"
docker tag speechbrain-asr docker-registry.theliver.pl/speechbrain-asr:1.5
docker push docker-registry.theliver.pl/speechbrain-asr:1.5
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment