Skip to content
Snippets Groups Projects
Commit a952d497 authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Use inner sax parser

parent 2a5bdd73
No related branches found
No related tags found
1 merge request!1Support xml
Pipeline #10795 failed
......@@ -75,28 +75,28 @@ public class Converter extends Worker {
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Input file: " + fileIn);
File file = new File(fileIn);
TikaInputStream inS = null;
try {
if (inS != null){
try {
inS.close();
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "TikaInputStream closed");
}
catch (IOException exp) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with closing TikaInputStream " + exp.getMessage() , exp.getMessage());
}
}
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename " + fileIn + " " + new File(".").getAbsolutePath());
// File f = new File(fileIn);
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename exists? " + file.exists() + " Can read? " + file.canRead());
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.parse(file);
doc.getDocumentElement().normalize();
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent());
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());
}
catch (IOException | SAXException | ParserConfigurationException e) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage());
}
// try {
// if (inS != null){
// try {
// inS.close();
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "TikaInputStream closed");
// }
// catch (IOException exp) {
// Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with closing TikaInputStream " + exp.getMessage() , exp.getMessage());
// }
// }
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename " + fileIn + " " + new File(".").getAbsolutePath());
//// File f = new File(fileIn);
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Filename exists? " + file.exists() + " Can read? " + file.canRead());
// DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// Document doc = builder.parse(file);
// doc.getDocumentElement().normalize();
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Root element :" + doc.getDocumentElement().getNodeName() + " " + doc.getDocumentElement().getTextContent());
// Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Child element :" + doc.getDocumentElement().getChildNodes().item(0).getTextContent());
// }
// catch (IOException | SAXException | ParserConfigurationException e) {
// Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems with XML parsing " + e.getMessage() , e.getMessage());
// }
try {
Metadata metadata = new Metadata();
......@@ -113,6 +113,15 @@ public class Converter extends Worker {
Logger.getLogger(Converter.class.getName()).log(Level.INFO, "Using TextDetector for type: " + tika.detect(inS));
tika = new Tika(new TextDetector());
}
try {
BodyContentHandler handler = new BodyContentHandler();
ParseContext pcontext = new ParseContext();
XMLParser xmlparser = new XMLParser();
xmlparser.parse(inS, handler, metadata, pcontext);
System.out.println("Contents of the document:" + handler.toString());
} catch (SAXException ex) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in Tika processing " + ex.getMessage(), ex.getMessage());
}
tika.setMaxStringLength(maxLength);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment