Skip to content
Snippets Groups Projects
Commit a952d497 authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Use inner sax parser

parent 2a5bdd73
Branches
1 merge request!1Support xml
Pipeline #10795 failed with stage
in 1 minute and 12 seconds
......@@ -75,28 +75,28 @@ public class Converter extends Worker {
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Input file: " + fileIn);
File file = new File(fileIn);
TikaInputStream inS = null;
try {
if (inS != null){
try {
inS.close();
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "TikaInputStream closed");
}
catch (IOException exp) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with closing TikaInputStream " + exp.getMessage() , exp.getMessage());
}
}
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename " + fileIn + " " + new File(".").getAbsolutePath());
// File f = new File(fileIn);
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename exists? " + file.exists() + " Can read? " + file.canRead());
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.parse(file);
doc.getDocumentElement().normalize();
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent());
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());
}
catch (IOException | SAXException | ParserConfigurationException e) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage());
}
// try {
// if (inS != null){
// try {
// inS.close();
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "TikaInputStream closed");
// }
// catch (IOException exp) {
// Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with closing TikaInputStream " + exp.getMessage() , exp.getMessage());
// }
// }
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename " + fileIn + " " + new File(".").getAbsolutePath());
//// File f = new File(fileIn);
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename exists? " + file.exists() + " Can read? " + file.canRead());
// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// Document doc = builder.parse(file);
// doc.getDocumentElement().normalize();
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent());
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());
// }
// catch (IOException | SAXException | ParserConfigurationException e) {
// Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage());
// }
try {
Metadata metadata = new Metadata();
......@@ -113,6 +113,15 @@ public class Converter extends Worker {
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Using TextDetector for type: " + tika.detect(inS));
tika = new Tika(new TextDetector());
}
try {
BodyContentHandler handler = new BodyContentHandler();
ParseContext pcontext = new ParseContext();
XMLParser xmlparser = new XMLParser();
xmlparser.parse(inS, handler, metadata, pcontext);
System.out.println("Contents of the document:" + handler.toString());
} catch (SAXException ex) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage(), ex.getMessage());
}
tika.setMaxStringLength(maxLength);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment