From 879398ed370aaa90fd587cc3644f692b7608418c Mon Sep 17 00:00:00 2001
From: pwalkow <pawel.walkowiak@hotmail.com>
Date: Tue, 30 May 2023 11:38:07 +0200
Subject: [PATCH] Create doc builder

---
 module/src/main/java/pl/clarin/any2txt/Converter.java | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/module/src/main/java/pl/clarin/any2txt/Converter.java b/module/src/main/java/pl/clarin/any2txt/Converter.java
index 41aa9d9..3134acf 100644
--- a/module/src/main/java/pl/clarin/any2txt/Converter.java
+++ b/module/src/main/java/pl/clarin/any2txt/Converter.java
@@ -75,6 +75,10 @@ public class Converter extends Worker {
         Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Input file: " + fileIn);
         File file = new File(fileIn);
         TikaInputStream inS;
+
+        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+        Document doc = builder.parse(file);
+
         try {
             Metadata metadata = new Metadata();
             metadata.set("Content-Encoding", "CP1250");
@@ -114,8 +118,8 @@ public class Converter extends Worker {
             Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage() , ex.getMessage());
             if (ex.getMessage().contains("XML parse error")) {
                 try {
-                    DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
-                    Document doc = builder.parse(file);
+//                    DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+//                    Document doc = builder.parse(file);
                     doc.getDocumentElement().normalize();
                     Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent());
                     Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());
-- 
GitLab