diff --git a/module/src/main/java/pl/clarin/any2txt/Converter.java b/module/src/main/java/pl/clarin/any2txt/Converter.java index 41aa9d9d363f09661aac73090ecc97fea6fccaf0..3134acf536c0973fe3340c35f47f49eaedf1e690 100644 --- a/module/src/main/java/pl/clarin/any2txt/Converter.java +++ b/module/src/main/java/pl/clarin/any2txt/Converter.java @@ -75,6 +75,10 @@ public class Converter extends Worker { Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Input file: " + fileIn); File file = new File(fileIn); TikaInputStream inS; + + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + Document doc = builder.parse(file); + try { Metadata metadata = new Metadata(); metadata.set("Content-Encoding", "CP1250"); @@ -114,8 +118,8 @@ public class Converter extends Worker { Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage() , ex.getMessage()); if (ex.getMessage().contains("XML parse error")) { try { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - Document doc = builder.parse(file); +// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); +// Document doc = builder.parse(file); doc.getDocumentElement().normalize(); Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent()); Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());