From 879398ed370aaa90fd587cc3644f692b7608418c Mon Sep 17 00:00:00 2001 From: pwalkow <pawel.walkowiak@hotmail.com> Date: Tue, 30 May 2023 11:38:07 +0200 Subject: [PATCH] Create doc builder --- module/src/main/java/pl/clarin/any2txt/Converter.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/module/src/main/java/pl/clarin/any2txt/Converter.java b/module/src/main/java/pl/clarin/any2txt/Converter.java index 41aa9d9..3134acf 100644 --- a/module/src/main/java/pl/clarin/any2txt/Converter.java +++ b/module/src/main/java/pl/clarin/any2txt/Converter.java @@ -75,6 +75,10 @@ public class Converter extends Worker { Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Input file: " + fileIn); File file = new File(fileIn); TikaInputStream inS; + + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + Document doc = builder.parse(file); + try { Metadata metadata = new Metadata(); metadata.set("Content-Encoding", "CP1250"); @@ -114,8 +118,8 @@ public class Converter extends Worker { Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage() , ex.getMessage()); if (ex.getMessage().contains("XML parse error")) { try { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - Document doc = builder.parse(file); +// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); +// Document doc = builder.parse(file); doc.getDocumentElement().normalize(); Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent()); Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent()); -- GitLab