There will be a small downtime on Friday 16.04 between 15:00 and 18:00 (Gitlab upgrade).

If necessary contact me at mateusz.gniewkowski@pwr.edu.pl

Commit dfe7e982 authored by Bartłomiej Koptyra's avatar Bartłomiej Koptyra

Merge branch 'develop' into 'master'

Develop

See merge request !5
parents aee4f377 8a40d3eb
Pipeline #1406 passed with stages
in 2 minutes and 32 seconds
image: clarinpl/openjdk:8
stages:
- check_style
- build
google_checks:
stage: check_style
script:
- cd module
- mvn checkstyle:check -Dcheckstyle.config.location=google_checks.xml -Dcheckstyle.violationSeverity=warning -Dcheckstyle.checkstyle.consoleOutput=true
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/speller .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/speller
image: clarinpl/openjdk:8
stages:
- check_style
- build
google_checks:
stage: check_style
script:
- cd module
- mvn checkstyle:check -Dcheckstyle.config.location=google_checks.xml -Dcheckstyle.violationSeverity=warning -Dcheckstyle.checkstyle.consoleOutput=true
build_image:
stage: build
image: 'docker:18.09.7'
only:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t clarinpl/speller .
- echo $DOCKER_PASSWORD > pass.txt
- cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
- rm pass.txt
- docker push clarinpl/speller
FROM clarinpl/openjdk:8 as builder
LABEL application="Speller"
LABEL description="Client - Workers - correcting mistakes in sentances in txt files"
LABEL organiztation="NLP Tools for Polish from G4.19 Group - Wroclaw University of Science and Technology"
WORKDIR /home/install
RUN cd nlp.worker && \
mvn clean && \
mvn install
WORKDIR /home/install
COPY ./module ./module
RUN cd module && \
mvn clean && \
mvn install
FROM clarinpl/openjdk-jre:8
WORKDIR /home/worker
COPY ./entrypoint.sh ./entrypoint.sh
RUN ["chmod", "+x", "./entrypoint.sh"]
COPY --from=builder /home/install/module/target/nlp.worker.speller-1.0-SNAPSHOT.jar .
FROM clarinpl/openjdk:8 as builder
LABEL application="Speller"
LABEL description="Client - Workers - correcting mistakes in sentances in txt files"
LABEL organiztation="NLP Tools for Polish from G4.19 Group - Wroclaw University of Science and Technology"
WORKDIR /home/install
RUN cd nlp.worker && \
mvn clean && \
mvn install
WORKDIR /home/install
COPY ./module ./module
RUN cd module && \
mvn clean && \
mvn install
FROM clarinpl/openjdk-jre:8
WORKDIR /home/worker
COPY ./entrypoint.sh ./entrypoint.sh
RUN ["chmod", "+x", "./entrypoint.sh"]
COPY --from=builder /home/install/module/target/nlp.worker.speller-1.0-SNAPSHOT.jar .
CMD ["./entrypoint.sh"]
\ No newline at end of file
[service]
tool = speller
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix = nlp_
[tool]
workers_number = 1
[service]
tool = speller
root = /samba/requests/
rabbit_host = rabbitmq
rabbit_user = test
rabbit_password = test
queue_prefix = nlp_
[tool]
workers_number = 1
version: '3'
services:
speller:
container_name: clarin_speller
build: ./
volumes:
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
working_dir: /home/worker/
entrypoint:
- ./entrypoint.sh
restart: always
version: '3'
services:
speller:
container_name: clarin_speller
build: ./
volumes:
- '/samba:/samba'
- './config.ini:/home/worker/config.ini'
working_dir: /home/worker/
entrypoint:
- ./entrypoint.sh
restart: always
package pl.clarin.speller;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
public class SpaCy {
/**Class that handles spacy input.*/
boolean loaded = false;
ArrayList<ArrayList<Integer>> properNounArray = null;
ArrayList<ArrayList<Integer>> foreignSentenceArray = null;
/**Checks if spaCy correctly loaded input.*/
public boolean isLoaded() {
return loaded;
}
/**Unloading input from file.*/
public void unload() {
properNounArray = null;
foreignSentenceArray = null;
loaded = false;
}
/**Loading spaCy input files.*/
public InputStream load(ZipFile zipFile) throws IOException {
InputStream inputText = null;
for (Enumeration<? extends ZipEntry> entries = zipFile.entries(); entries.hasMoreElements();) {
ZipEntry entry = entries.nextElement();
if (!entry.isDirectory()) {
if (entry.getName().equals("text.txt")) {
inputText = zipFile.getInputStream(entry);
} else if (entry.getName().equals("proper_nouns.txt")) {
InputStream in = zipFile.getInputStream(entry);
properNounArray = processSpacyFiles(in);
in.close();
} else if (entry.getName().equals("foreign_sentences.txt")) {
InputStream in = zipFile.getInputStream(entry);
foreignSentenceArray = processSpacyFiles(in);
in.close();
} else {
System.out.println("Zip from spaCy contains unexpected files!");
}
} else {
System.out.println("Zip from spaCy contains unexpected directories!");
}
}
loaded = true;
return inputText;
}
private static ArrayList<ArrayList<Integer>> processSpacyFiles(InputStream in) {
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
ArrayList<ArrayList<Integer>> array = new ArrayList<>();
try {
while ((line = reader.readLine()) != null) {
String[] str = line.split(" ");
ArrayList<Integer> list = new ArrayList<>();
list.add(Integer.parseInt(str[0]));
list.add(Integer.parseInt(str[1]));
array.add(list);
}
} catch (IOException e) {
System.out.println("The text file contains incorrect data." + e.getMessage());
}
return array;
}
/**Checks if input sentence is from a different language.*/
public boolean isForeignSentence(String inputString, int matchFrom) {
boolean isForeginSent = false;
if (loaded) {
for (List<Integer> tuple : foreignSentenceArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
}
if (matchFrom > tuple.get(1)) {
return false;
}
}
}
return isForeginSent;
}
/**Checks if input sentence is from a proper noun.*/
public boolean isProperNoun(String inputString, int matchFrom) {
boolean isProperNoun = false;
if (!loaded) {
if (Character.isUpperCase(inputString.charAt(matchFrom))) {
int i = matchFrom - 1;
while (i > 0) {
if (Character.isLetterOrDigit(inputString.charAt(i))) {
isProperNoun = true;
break;
} else if (".!?\n\t".indexOf(inputString.charAt(i)) != -1) {
break;
}
--i;
}
}
} else {
for (List<Integer> tuple : properNounArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
}
if (matchFrom > tuple.get(1)) {
return false;
}
}
}
return isProperNoun;
}
}
package pl.clarin.speller;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
public class SpaCy {
/**Class that handles spacy input.*/
private boolean loaded = false;
private ArrayList<ArrayList<Integer>> properNounArray = null;
private ArrayList<ArrayList<Integer>> foreignSentenceArray = null;
/**Checks if spaCy correctly loaded input.*/
public boolean isLoaded() {
return loaded;
}
/**Unloading input from file.*/
public void unload() {
properNounArray = null;
foreignSentenceArray = null;
loaded = false;
}
/**Loading spaCy input files.*/
public InputStream load(ZipFile zipFile) throws IOException {
InputStream inputText = null;
for (Enumeration<? extends ZipEntry> entries = zipFile.entries(); entries.hasMoreElements();) {
ZipEntry entry = entries.nextElement();
if (!entry.isDirectory()) {
if (entry.getName().equals("text.txt")) {
inputText = zipFile.getInputStream(entry);
} else if (entry.getName().equals("proper_nouns.txt")) {
InputStream in = zipFile.getInputStream(entry);
properNounArray = processSpacyFiles(in);
in.close();
} else if (entry.getName().equals("foreign_sentences.txt")) {
InputStream in = zipFile.getInputStream(entry);
foreignSentenceArray = processSpacyFiles(in);
in.close();
} else {
System.out.println("Zip from spaCy contains unexpected files!");
}
} else {
System.out.println("Zip from spaCy contains unexpected directories!");
}
}
loaded = true;
return inputText;
}
private ArrayList<ArrayList<Integer>> processSpacyFiles(InputStream in) {
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
ArrayList<ArrayList<Integer>> array = new ArrayList<>();
try {
while ((line = reader.readLine()) != null) {
String[] str = line.split(" ");
ArrayList<Integer> list = new ArrayList<>();
list.add(Integer.parseInt(str[0]));
list.add(Integer.parseInt(str[1]));
array.add(list);
}
} catch (IOException e) {
System.out.println("The text file contains incorrect data." + e.getMessage());
}
return array;
}
/**Checks if input sentence is from a different language.*/
public boolean isForeignSentence(String inputString, int matchFrom) {
boolean isForeginSent = false;
if (loaded) {
for (List<Integer> tuple : foreignSentenceArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
}
if (matchFrom > tuple.get(1)) {
return false;
}
}
}
return isForeginSent;
}
/**Checks if input sentence is from a proper noun.*/
public boolean isProperNoun(String inputString, int matchFrom) {
boolean isProperNoun = false;
if (!loaded) {
if (Character.isUpperCase(inputString.charAt(matchFrom))) {
int i = matchFrom - 1;
while (i > 0) {
if (Character.isLetterOrDigit(inputString.charAt(i))) {
isProperNoun = true;
break;
} else if (".!?\n\t".indexOf(inputString.charAt(i)) != -1) {
break;
}
--i;
}
}
} else {
for (List<Integer> tuple : properNounArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
}
if (matchFrom > tuple.get(1)) {
return false;
}
}
}
return isProperNoun;
}
}
package pl.clarin.speller;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipFile;
import org.json.JSONObject;
import org.languagetool.JLanguageTool;
import org.languagetool.language.Polish;
import pl.clarin.ws.worker.IniFile;
import pl.clarin.ws.worker.Service;
import pl.clarin.ws.worker.Worker;
public class Speller extends Worker {
static Polish lang = null;
JLanguageTool langTool = null;
TextEdit textEditor = null;
public static void main(String[] args) {
new Service<>(Speller.class);
}
// init object for each thread
@Override
public void init() throws Exception {
langTool = new JLanguageTool(lang);
textEditor = new TextEdit();
}
// init objects shared by threads
@Override
public void static_init(IniFile init) throws Exception {
lang = new Polish();
}
@Override
public void process(String fileIn, String fileOut, JSONObject options) {
String inputformat = "text";
try {
if (options.has("format")) {
inputformat = options.getString("format");
}
InputStreamReader reader = null;
ZipFile zipFile = null;
if (inputformat.equals("spacy")) {
try {
zipFile = new ZipFile(fileIn);
reader = new InputStreamReader(textEditor.spacy.load(zipFile), StandardCharsets.UTF_8);
} catch (IOException e) {
System.out.println("Problems reading zip file!" + e.getStackTrace());
throw e;
}
} else {
FileInputStream fstream = new FileInputStream(fileIn);
reader = new InputStreamReader(fstream, StandardCharsets.UTF_8);
}
Writer out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(fileOut), StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder();
try (BufferedReader br = new BufferedReader(reader)) {
String line = null;
while ((line = br.readLine()) != null) {
try {
String correctedLine = textEditor.edit(line, langTool);
sb.append(correctedLine).append('\n');
} catch (Exception exception) {
Logger.getLogger(Speller.class.getName())
.log(Level.SEVERE, "Problems with TextEdit class: " + fileOut, exception);
}
}
reader.close();
if (inputformat.equals("spacy")) {
zipFile.close();
textEditor.spacy.unload();
}
}
try {
out.write(sb.toString());
} finally {
out.close();
}
} catch (IOException exception) {
Logger.getLogger(Speller.class.getName())
.log(Level.SEVERE, "Problems with reading or writing: " + fileOut, exception);
}
if (inputformat.equals("spacy")) {
textEditor.spacy.unload();
}
}
}
package pl.clarin.speller;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.ZipFile;
import org.json.JSONObject;
import org.languagetool.JLanguageTool;
import org.languagetool.language.Polish;
import pl.clarin.ws.worker.IniFile;
import pl.clarin.ws.worker.Service;
import pl.clarin.ws.worker.Worker;
public class Speller extends Worker {
static Polish lang = null;
private JLanguageTool langTool = null;
private TextEdit textEditor = null;
public static void main(String[] args) {
new Service<>(Speller.class);
}
// init object for each thread
@Override
public void init() throws Exception {
langTool = new JLanguageTool(lang);
textEditor = new TextEdit();
}
// init objects shared by threads
@Override
public void static_init(IniFile init) throws Exception {
lang = new Polish();
}
@Override
public void process(String fileIn, String fileOut, JSONObject options) {
String inputformat = "text";
try {
if (options.has("format")) {
inputformat = options.getString("format");
}
InputStreamReader reader = null;
ZipFile zipFile = null;
if (inputformat.equals("spacy")) {
try {
zipFile = new ZipFile(fileIn);
reader = new InputStreamReader(textEditor.spacy.load(zipFile), StandardCharsets.UTF_8);
} catch (IOException e) {
System.out.println("Problems reading zip file!" + e.getStackTrace());
throw e;
}
} else {
FileInputStream fstream = new FileInputStream(fileIn);
reader = new InputStreamReader(fstream, StandardCharsets.UTF_8);
}
Writer out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(fileOut), StandardCharsets.UTF_8));
StringBuilder sb = new StringBuilder();
try (BufferedReader br = new BufferedReader(reader)) {
String line = null;
while ((line = br.readLine()) != null) {
try {
String correctedLine = textEditor.edit(line, langTool);
sb.append(correctedLine).append('\n');
} catch (Exception exception) {
Logger.getLogger(Speller.class.getName())
.log(Level.SEVERE, "Problems with TextEdit class: " + fileOut, exception);
}
}
reader.close();
}
try {
out.write(sb.toString());
} finally {
out.close();
}
} catch (IOException exception) {
Logger.getLogger(Speller.class.getName())
.log(Level.SEVERE, "Problems with reading or writing: " + fileOut, exception);
}
if (inputformat.equals("spacy")) {
textEditor.spacy.unload();
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment