/* * Copyright 2001-2004 (C) MetaStuff, Ltd. All Rights Reserved. * * This software is open source. * See the bottom of this file for the licence. * */ package org.dom4j.io; import org.dom4j.*; import org.jivesoftware.openfire.net.MXParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import org.xmlpull.v1.XmlPullParserFactory; import javax.annotation.Nonnull; import java.io.*; import java.net.URL; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Set; /** *
XMPPPacketReader is a Reader of DOM4J documents that
* uses the fast
* XML Pull Parser 3.x.
* It is very fast for use in SOAP style environments.
DocumentFactory used to create new document objects
*/
private DocumentFactory factory;
/**
* XmlPullParser used to parse XML
*/
private MXParser xppParser;
/**
* XmlPullParser used to parse XML
*/
private XmlPullParserFactory xppFactory;
/**
* DispatchHandler to call when each Element is encountered
*/
private DispatchHandler dispatchHandler;
/**
* Last time a full Document was read or a heartbeat was received. Hearbeats
* are represented as whitespaces received while a Document is not being parsed.
*/
private long lastActive = System.currentTimeMillis();
/**
* Stream of various endpoints (eg: s2s, c2s) use different default namespaces. To be able to use a stanza that's
* parsed on one type of endpoint in the context of another endpoint, we explicitly ignore these namespaced. This
* allows us to forward, for instance, a stanza received via C2S (which has the "jabber:client" default namespace)
* on a S2S stream (which has the "jabber:server" default namespace).
*
* @see RFC 6120, 4.8.3. XMPP Content Namespaces
*/
public static final CollectionReads a Document from the given File
File to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if an error occurs during reading
* @throws XmlPullParserException if an error occurs creating a pull parser
*/
public Document read(File file) throws DocumentException, IOException, XmlPullParserException {
String systemID = file.getAbsolutePath();
return read(new BufferedReader(new FileReader(file)), systemID);
}
/**
* Reads a Document from the given URL
URL to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if an error occurs during reading
* @throws XmlPullParserException if an error occurs creating a pull parser
*/
public Document read(URL url) throws DocumentException, IOException, XmlPullParserException {
String systemID = url.toExternalForm();
return read(createReader(url.openStream()), systemID);
}
/**
* Reads a Document from the given URL or filename.
*
* If the systemID contains a ':' character then it is
* assumed to be a URL otherwise its assumed to be a file name.
* If you want finer grained control over this mechansim then please
* explicitly pass in either a {@link URL} or a {@link File} instance
* instead of a {@link String} to denote the source of the document.
*
Reads a Document from the given stream
* * @param inInputStream to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if an error occurs during reading
* @throws XmlPullParserException if an error occurs creating a pull parser
*/
public Document read(InputStream in) throws DocumentException, IOException, XmlPullParserException {
return read(createReader(in));
}
/**
* Reads a Document from the given stream
* * @param charSet the charSet that the input is encoded in * @param inInputStream to read from.
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if an error occurs during reading
* @throws XmlPullParserException if an error occurs creating a pull parser
*/
public Document read(String charSet, InputStream in)
throws DocumentException, IOException, XmlPullParserException
{
return read(createReader(in, charSet));
}
/**
* Reads a Document from the given Reader
Reads a Document from the given array of characters
* * @param text is the text to parse * @return the newly created Document instance * @throws DocumentException if an error occurs during parsing. * @throws IOException if an error occurs during reading * @throws XmlPullParserException if an error occurs creating a pull parser */ public Document read(char[] text) throws DocumentException, IOException, XmlPullParserException { getXPPParser().setInput(new CharArrayReader(text)); return parseDocument(); } /** *Reads a Document from the given stream
* * @param inInputStream to read from.
* @param systemID is the URI for the input
* @return the newly created Document instance
* @throws DocumentException if an error occurs during parsing.
* @throws IOException if an error occurs during reading
* @throws XmlPullParserException if an error occurs creating a pull parser
*/
public Document read(InputStream in, String systemID) throws DocumentException, IOException, XmlPullParserException {
return read(createReader(in), systemID);
}
/**
* Reads a Document from the given Reader
DocumentFactory used to create document objects
*/
public DocumentFactory getDocumentFactory() {
if (factory == null) {
factory = DocumentFactory.getInstance();
}
return factory;
}
/**
* This sets the DocumentFactory used to create new documents.
* This method allows the building of custom DOM4J tree objects to be implemented
* easily using a custom derivation of {@link DocumentFactory}
DocumentFactory used to create DOM4J objects
*/
public void setDocumentFactory(DocumentFactory factory) {
this.factory = factory;
}
/**
* Adds the ElementHandler to be called when the
* specified path is encounted.
*
* @param path is the path to be handled
* @param handler is the ElementHandler to be called
* by the event based processor.
*/
public void addHandler(String path, ElementHandler handler) {
getDispatchHandler().addHandler(path, handler);
}
/**
* Removes the ElementHandler from the event based
* processor, for the specified path.
*
* @param path is the path to remove the ElementHandler for.
*/
public void removeHandler(String path) {
getDispatchHandler().removeHandler(path);
}
/**
* When multiple ElementHandler instances have been
* registered, this will set a default ElementHandler
* to be called for any path which does NOT have a handler
* registered.
*
* @param handler is the ElementHandler to be called
* by the event based processor.
*/
public void setDefaultHandler(ElementHandler handler) {
getDispatchHandler().setDefaultHandler(handler);
}
/**
* Returns the last time a full Document was read or a heartbeat was received. Hearbeats
* are represented as whitespaces or \n received while a Document is not being parsed.
*
* @return the time in milliseconds when the last document or heartbeat was received.
*/
public long getLastActive() {
long lastHeartbeat = 0;
try {
lastHeartbeat = getXPPParser().getLastHeartbeat();
}
catch (XmlPullParserException e) {
Log.trace("An exception occurred while trying to get the last hartbeat.", e);
}
return Math.max(lastActive, lastHeartbeat);
}
/*
* DANIELE: Add parse document by string
*/
public Document parseDocument(String xml) throws DocumentException {
/*
// Long way with reuse of DocumentFactory.
DocumentFactory df = getDocumentFactory();
SAXReader reader = new SAXReader( df );
Document document = reader.read( new StringReader( xml );*/
// Simple way
// TODO Optimize. Do not create a sax reader for each parsing
Document document = DocumentHelper.parseText(xml);
return document;
}
// Implementation methods
//-------------------------------------------------------------------------
public Document parseDocument() throws DocumentException, IOException, XmlPullParserException {
DocumentFactory df = getDocumentFactory();
Document document = df.createDocument();
Element parent = null;
XmlPullParser pp = getXPPParser();
int count = 0;
while (true) {
int type = -1;
type = pp.nextToken();
switch (type) {
case XmlPullParser.PROCESSING_INSTRUCTION: {
String text = pp.getText();
int loc = text.indexOf(" ");
if (loc >= 0) {
document.addProcessingInstruction(text.substring(0, loc),
text.substring(loc + 1));
}
else {
document.addProcessingInstruction(text, "");
}
break;
}
case XmlPullParser.COMMENT: {
if (parent != null) {
parent.addComment(pp.getText());
}
else {
document.addComment(pp.getText());
}
break;
}
case XmlPullParser.CDSECT: {
String text = pp.getText();
if (parent != null) {
parent.addCDATA(text);
}
else {
if (!text.trim().isEmpty()) {
throw new DocumentException("Cannot have text content outside of the root document");
}
}
break;
}
case XmlPullParser.ENTITY_REF: {
String text = pp.getText();
if (parent != null) {
parent.addText(text);
}
else {
if (!text.trim().isEmpty()) {
throw new DocumentException("Cannot have an entityref outside of the root document");
}
}
break;
}
case XmlPullParser.END_DOCUMENT: {
return document;
}
case XmlPullParser.START_TAG: {
QName qname = (pp.getPrefix() == null) ? df.createQName(pp.getName(), pp.getNamespace()) : df.createQName(pp.getName(), pp.getPrefix(), pp.getNamespace());
Element newElement;
// Strip namespace from all default-namespaced elements if
// all ancestors have the same namespace and it's a content
// namespace.
boolean dropNamespace = false;
if (pp.getPrefix() == null && IGNORED_NAMESPACE_ON_STANZA.contains(qname.getNamespaceURI())) {
// Default namespaced element which is in a content namespace,
// so we'll drop. Example, stanzas,