From 9fb51e9a0a3f6489cbd591ff66f2cd99fdeb1b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Naskr=C4=99t?= <tomasz.naskret@pwr.edu.pl> Date: Wed, 24 May 2023 22:28:11 +0200 Subject: [PATCH] skeleton for impl --- .../ipipan/termopl/core/readers/CorpusReader.java | 15 +++++++++++++++ .../ipipan/termopl/core/readers/FileFormat.java | 1 + 2 files changed, 16 insertions(+) diff --git a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java index 2ee377d..0f67f3d 100755 --- a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java +++ b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/CorpusReader.java @@ -157,6 +157,7 @@ public class CorpusReader { xmlReader = inputFactory.createXMLEventReader(xmlPosReader); } + //TODO Format checking extract to separate file public void checkFormat() { try { if (currentFile.getName().toLowerCase().endsWith(".tgt")) format = INTERNAL_FORMAT; @@ -166,6 +167,7 @@ public class CorpusReader { else if (checkFormatXCES()) format = XCES_FORMAT; else if (checkFormatTEI()) format = TEI_FORMAT; else if (checkFormatCCL()) format = XCES_FORMAT; + else if (checkFormatJson()) format = JSON_FORMAT; else format = UNKNOWN_FORMAT; } catch (IOException e) { e.printStackTrace(); @@ -173,6 +175,11 @@ public class CorpusReader { } } + //TODO Implement correct check + private boolean checkFormatJson() { + return true; + } + public boolean checkFormatTXT() throws IOException { String line; boolean ok = true; @@ -272,12 +279,20 @@ public class CorpusReader { case CONLLU_FORMAT: current = getTokenFromCONLLUFile(); break; + case JSON_FORMAT: + current = getTokenFromJsonFile(); + break; default: current = null; } return current; } + //TODO Implement token reder + private Token getTokenFromJsonFile() { + return new Token(); + } + public Token getTokenFromTGTFile() { String line; diff --git a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java index 36d70f3..5e7aca6 100644 --- a/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java +++ b/modules/termopl-core/src/main/java/pl/ipipan/termopl/core/readers/FileFormat.java @@ -6,5 +6,6 @@ public enum FileFormat { XCES_FORMAT, TEI_FORMAT, CONLLU_FORMAT, + JSON_FORMAT, INTERNAL_FORMAT } -- GitLab