diff --git a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java index 2ee377d942526c5c3433522004871426a4298199..0f67f3d4f0ec773efc7562cfd6d09b071b5ac4f6 100755 --- a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java +++ b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java @@ -157,6 +157,7 @@ public class CorpusReader { xmlReader = inputFactory.createXMLEventReader(xmlPosReader); } + //TODO Format checking extract to separate file public void checkFormat() { try { if (currentFile.getName().toLowerCase().endsWith(".tgt")) format = INTERNAL_FORMAT; @@ -166,6 +167,7 @@ public class CorpusReader { else if (checkFormatXCES()) format = XCES_FORMAT; else if (checkFormatTEI()) format = TEI_FORMAT; else if (checkFormatCCL()) format = XCES_FORMAT; + else if (checkFormatJson()) format = JSON_FORMAT; else format = UNKNOWN_FORMAT; } catch (IOException e) { e.printStackTrace(); @@ -173,6 +175,11 @@ public class CorpusReader { } } + //TODO Implement correct check + private boolean checkFormatJson() { + return true; + } + public boolean checkFormatTXT() throws IOException { String line; boolean ok = true; @@ -272,12 +279,20 @@ public class CorpusReader { case CONLLU_FORMAT: current = getTokenFromCONLLUFile(); break; + case JSON_FORMAT: + current = getTokenFromJsonFile(); + break; default: current = null; } return current; } + //TODO Implement token reder + private Token getTokenFromJsonFile() { + return new Token(); + } + public Token getTokenFromTGTFile() { String line; diff --git a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java index 36d70f3babaf263c08b93d934060343f5cf5d177..5e7aca6f7950f64d62f88bc4bd8e291736722124 100644 --- a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java +++ b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java @@ -6,5 +6,6 @@ public enum FileFormat { XCES_FORMAT, TEI_FORMAT, CONLLU_FORMAT, + JSON_FORMAT, INTERNAL_FORMAT }