Skip to content
Snippets Groups Projects
Commit 898b0a7e authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Restore version

parent 30426ab1
Branches
No related tags found
Loading
Pipeline #11509 passed
...@@ -89,42 +89,41 @@ public class Converter extends Worker { ...@@ -89,42 +89,41 @@ public class Converter extends Worker {
if("application/x-msdownload".equals(tika.detect(inS))){ if("application/x-msdownload".equals(tika.detect(inS))){
tika = new Tika(new TextDetector()); tika = new Tika(new TextDetector());
} }
if("text/plain".equals(tika.detect(inS))){ // if("text/plain".equals(tika.detect(inS))){
try { // try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); // DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder loader = factory.newDocumentBuilder(); // DocumentBuilder loader = factory.newDocumentBuilder();
inS.setEncoding("UTF-8"); // Document document = loader.parse(inS);
Document document = loader.parse(inS); //
// DocumentTraversal trav = (DocumentTraversal) document;
DocumentTraversal trav = (DocumentTraversal) document; // NodeIterator iterator = trav.createNodeIterator(document.getDocumentElement(),
NodeIterator iterator = trav.createNodeIterator(document.getDocumentElement(), // NodeFilter.SHOW_ELEMENT, null, true);
NodeFilter.SHOW_ELEMENT, null, true); //
// for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) { //
// String text = n.getTextContent().trim();
String text = n.getTextContent().trim(); //
// if (!text.isEmpty()) {
if (!text.isEmpty()) { // System.out.println(text);
System.out.println(text); // }
} // }
}
// BodyContentHandler handler = new BodyContentHandler(); // BodyContentHandler handler = new BodyContentHandler();
// ParseContext pcontext = new ParseContext(); // ParseContext pcontext = new ParseContext();
// XMLParser xmlparser = new XMLParser(); // XMLParser xmlparser = new XMLParser();
// xmlparser.parse(inS, handler, metadata, pcontext); // xmlparser.parse(inS, handler, metadata, pcontext);
// System.out.println("Contents of the document:" + handler.toString()); // System.out.println("Contents of the document:" + handler.toString());
} catch (SAXException ex) { // } catch (SAXException ex) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage()); // Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage());
tika = new Tika(new TextDetector()); // tika = new Tika(new TextDetector());
} catch (ParserConfigurationException ex) { // } catch (ParserConfigurationException ex) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage()); // Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage());
tika = new Tika(new TextDetector()); // tika = new Tika(new TextDetector());
} catch (IOException ex) { // } catch (IOException ex) {
Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage()); // Logger.getLogger(Converter.class.getName()).log(Level.WARNING, "Problems in xml processing " + ex.getMessage(), ex.getMessage());
tika = new Tika(new TextDetector()); // tika = new Tika(new TextDetector());
} // }
} // }
tika.setMaxStringLength(maxLength); tika.setMaxStringLength(maxLength);
String txt = tika.parseToString(inS, metadata); String txt = tika.parseToString(inS, metadata);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment