There will be a small downtime on Friday 16.04 between 15:00 and 18:00 (Gitlab upgrade).

If necessary contact me at mateusz.gniewkowski@pwr.edu.pl

Commit 68d03909 authored by Bartłomiej Koptyra's avatar Bartłomiej Koptyra

Merge branch 'develop' into 'master'

Develop

See merge request !6
parents dfe7e982 acccbb6a
Pipeline #1524 passed with stages
in 3 minutes and 16 seconds
......@@ -8,4 +8,4 @@ rabbit_password = test
queue_prefix = nlp_
[tool]
workers_number = 1
workers_number = 12
#!/bin/sh
cd /home/worker
java -jar nlp.worker.speller-1.0-SNAPSHOT.jar
\ No newline at end of file
java -jar nlp.worker.speller-1.0-SNAPSHOT.jar
......@@ -61,11 +61,13 @@ public class SpaCy {
ArrayList<ArrayList<Integer>> array = new ArrayList<>();
try {
while ((line = reader.readLine()) != null) {
String[] str = line.split(" ");
ArrayList<Integer> list = new ArrayList<>();
list.add(Integer.parseInt(str[0]));
list.add(Integer.parseInt(str[1]));
array.add(list);
if (!line.equals("")) {
String[] str = line.split(" ");
ArrayList<Integer> list = new ArrayList<>();
list.add(Integer.parseInt(str[0]));
list.add(Integer.parseInt(str[1]));
array.add(list);
}
}
} catch (IOException e) {
System.out.println("The text file contains incorrect data." + e.getMessage());
......@@ -74,14 +76,14 @@ public class SpaCy {
}
/**Checks if input sentence is from a different language.*/
public boolean isForeignSentence(String inputString, int matchFrom) {
public boolean isForeignSentence(String inputString, int idx, int matchFrom) {
boolean isForeginSent = false;
if (loaded) {
for (List<Integer> tuple : foreignSentenceArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
if (matchFrom >= (tuple.get(0) - idx) && matchFrom < (tuple.get(1) - idx)) {
return true;
}
if (matchFrom > tuple.get(1)) {
if ((tuple.get(1) - idx) > matchFrom) {
return false;
}
}
......@@ -90,7 +92,7 @@ public class SpaCy {
}
/**Checks if input sentence is from a proper noun.*/
public boolean isProperNoun(String inputString, int matchFrom) {
public boolean isProperNoun(String inputString, int idx, int matchFrom) {
boolean isProperNoun = false;
if (!loaded) {
if (Character.isUpperCase(inputString.charAt(matchFrom))) {
......@@ -107,10 +109,10 @@ public class SpaCy {
}
} else {
for (List<Integer> tuple : properNounArray) {
if (matchFrom >= tuple.get(0) && matchFrom < tuple.get(1)) {
if (matchFrom >= (tuple.get(0) - idx) && matchFrom < (tuple.get(1) - idx)) {
return Character.isUpperCase(inputString.charAt(matchFrom));
}
if (matchFrom > tuple.get(1)) {
if ((tuple.get(1) - idx) > matchFrom) {
return false;
}
}
......
......@@ -63,7 +63,6 @@ public class Speller extends Worker {
reader = new InputStreamReader(fstream, StandardCharsets.UTF_8);
}
Writer out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(fileOut), StandardCharsets.UTF_8));
......@@ -71,9 +70,11 @@ public class Speller extends Worker {
try (BufferedReader br = new BufferedReader(reader)) {
String line = null;
int idx = 0;
while ((line = br.readLine()) != null) {
try {
String correctedLine = textEditor.edit(line, langTool);
String correctedLine = textEditor.edit(line, idx, langTool);
idx = idx + line.length() + "\n".length();
sb.append(correctedLine).append('\n');
} catch (Exception exception) {
Logger.getLogger(Speller.class.getName())
......
......@@ -9,7 +9,7 @@ public class TextEdit {
public SpaCy spacy = new SpaCy();
/**Class that corrects input text.*/
public String edit(String inputString, JLanguageTool langTool) throws Exception {
public String edit(String inputString, int idx, JLanguageTool langTool) throws Exception {
char[] buffer = inputString.toCharArray();
StringBuilder sb = new StringBuilder();
List<RuleMatch> matches = langTool.check(inputString);
......@@ -27,7 +27,7 @@ public class TextEdit {
if (match.getSuggestedReplacements().isEmpty()) {
matchingWord = inputString.substring(matchFrom, matchTo);
} else {
boolean change = toChange(inputString,matchFrom,matchTo);
boolean change = toChange(inputString, idx, matchFrom, matchTo);
if (change) {
matchingWord = match.getSuggestedReplacements().get(0);
} else {
......@@ -41,9 +41,9 @@ public class TextEdit {
return sb.toString();
}
private boolean toChange(String inputString, int matchFrom, int matchTo) {
return !isForeignSentence(inputString, matchFrom)
&& !isProperNoun(inputString, matchFrom)
private boolean toChange(String inputString, int idx, int matchFrom, int matchTo) {
return !isForeignSentence(inputString, idx, matchFrom)
&& !isProperNoun(inputString, idx, matchFrom)
&& !isAcronym(inputString, matchFrom, matchTo)
&& !isFileOrExtension(inputString, matchFrom, matchTo)
&& !checkFirstLetter(inputString, matchFrom)
......@@ -77,12 +77,12 @@ public class TextEdit {
return isSurname;
}
private boolean isForeignSentence(String inputString, int matchFrom) {
return spacy.isForeignSentence(inputString, matchFrom);
private boolean isForeignSentence(String inputString, int idx, int matchFrom) {
return spacy.isForeignSentence(inputString, idx, matchFrom);
}
private boolean isProperNoun(String inputString, int matchFrom) {
return spacy.isProperNoun(inputString, matchFrom);
private boolean isProperNoun(String inputString, int idx, int matchFrom) {
return spacy.isProperNoun(inputString, idx, matchFrom);
}
private boolean isAcronym(String inputString, int matchFrom, int matchTo) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment