Skip to content
Snippets Groups Projects

Support for russian and english models

Merged Michał Pogoda requested to merge en-ru-support into master
Viewing commit f75b15f1
Show latest version
1 file
+ 5
7
Compare changes
  • Side-by-side
  • Inline
+ 5
7
@@ -3,7 +3,6 @@
import configparser
import json
import string
import requests
import os
import nlp_ws
@@ -26,8 +25,11 @@ def _preprocess_input(text: str):
return text
def is_punctuation_rule(rule):
lambda rule: rule.category != 'PUNCTUATION' and len(rule.replacements)
def _post_process(text: str, tool):
is_punctuation_rule = lambda rule: rule.category != 'PUNCTUATION' and len(rule.replacements)
matches = tool.check(text)
matches = [rule for rule in matches if not is_punctuation_rule(rule)]
return language_tool_python.utils.correct(text, matches)
@@ -47,11 +49,7 @@ class Worker(nlp_ws.NLPWorker):
self.languagetool_path = self.config["languagetool_path"]
os.environ["LTP_PATH"] = self.languagetool_path
self.tool = language_tool_python.LanguageTool('pl-PL')
#
print(_post_process('Ile dałbym osiem dziewięc korzyk, dwa razy, kamera, dwa', self.tool))
#
model_path = self.config["model_path"]
self.model = AutoModelForTokenClassification.from_pretrained(
model_path