diff --git a/module/src/main/java/pl/clarin/any2txt/Converter.java b/module/src/main/java/pl/clarin/any2txt/Converter.java index 5b0e3f38dd0d109ab3cdb439867f1674e2861753..ae91d30eda3b9367022c8a8699731a1139b22971 100644 --- a/module/src/main/java/pl/clarin/any2txt/Converter.java +++ b/module/src/main/java/pl/clarin/any2txt/Converter.java @@ -76,19 +76,22 @@ public class Converter extends Worker { File file = new File(fileIn); TikaInputStream inS; - try { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - Document doc = builder.parse(file); - } - catch (IOException | SAXException | ParserConfigurationException e) { - Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage()); - } +// try { +// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); +// Document doc = builder.parse(file); +// } +// catch (IOException | SAXException | ParserConfigurationException e) { +// Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage()); +// } try { Metadata metadata = new Metadata(); metadata.set("Content-Encoding", "CP1250"); inS = TikaInputStream.get(file, metadata); + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + Document doc = builder.parse(file); + Tika tika = new Tika(); Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Detected file type: " + tika.detect(inS)); // || "text/plain".equals(tika.detect(inS)) @@ -119,7 +122,7 @@ public class Converter extends Worker { } catch (FileNotFoundException ex) { Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "File not found", ex.getMessage()); saveEmptyFile(fileOut); - } catch (IOException | TikaException ex) { + } catch (IOException | TikaException | SAXException | ParserConfigurationException ex) { Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage() , ex.getMessage()); if (ex.getMessage().contains("XML parse error")) { // try {