/*
* Copyright (c) 2003, 2004 Henri Sivonen and Taavi Hupponen
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package fi.iki.hsivonen.xml;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.Arrays;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Serializes a sequence of SAX events representing an XHTML 1.0 Strict
* document to an OutputStream
as a UTF-8-encoded HTML 4.01
* Strict document. The SAX events must represent a valid XHTML 1.0 document,
* except the namespace prefixes don't matter and there may be
* startElement
and endElement
calls for
* elements from other namespaces. The startElement
and
* endElement
calls for non-XHTML elements are ignored. No
* validity checking is performed. Hence, the emitter of the SAX events
* is responsible for making sure the events represent a document that
* meets the above requirements. The OutputStream
is closed
* when the end of the document is seen.
*
* @version $Id: HtmlSerializer.java,v 1.12 2004/08/29 08:19:23 hsivonen Exp $
* @author hsivonen
* @author taavi
*/
public class HtmlSerializer implements ContentHandler {
/**
* The XHTML namespace URI
*/
private final static String XHTML_NS = "http://www.w3.org/1999/xhtml";
/**
* HTML 4.01 Strict elements which don't have an end tag
*/
private static final String[] emptyElements = {
"area", "base", "br", "col","hr",
"img", "input", "link", "meta", "param"
};
/**
* Minimized "boolean" HTML 4.01 Strict attributes
*/
private static final String[] booleanAttributes = {
"checked", "declare", "defer", "disabled", "ismap",
"multiple", "nohref", "readonly", "selected"
};
/**
* The writer used for output
*/
private Writer writer;
/**
* Indicates whether the root element has been seen
*/
private boolean rootSeen;
/**
* Creates a new instance of HtmlSerializer
*
* @param out the stream to which the output is written
*/
public HtmlSerializer(OutputStream out) {
rootSeen = false;
try {
this.writer = new OutputStreamWriter(out, "UTF-8");
} catch (UnsupportedEncodingException uee) {
throw new RuntimeException("UTF-8 not supported", uee);
}
}
/**
* Writes out characters.
*
* @param ch the source array
* @param start the index of the first character to be written
* @param length the number of characters to write
*
* @throws SAXException if there are IO problems
*/
public void characters(char[] ch, int start, int length) throws SAXException {
try {
for (int j = 0; j < length; j++) {
char c = ch[start+ j];
switch (c) {
case '<':
this.writer.write("<");
break;
case '>':
this.writer.write(">");
break;
case '&':
this.writer.write("&");
break;
default:
this.writer.write(c);
}
}
} catch (IOException ioe) {
throw new SAXException(ioe);
}
}
/**
* Must be called in the end.
*
* @throws SAXException if there are IO problems
*/
public void endDocument() throws SAXException {
try {
this.writer.close();
} catch (IOException ioe) {
throw new SAXException(ioe);
}
}
/**
* Writes an end tag if the element is an XHTML element and is not an
* empty element in HTML 4.01 Strict.
*
* @param namespaceURI the XML namespace
* @param localName the element name in the namespace
* @param qName ignored
*
* @throws SAXException if there are IO problems
*/
public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
try {
if(XHTML_NS.equals(namespaceURI) && Arrays.binarySearch(emptyElements, localName) < 0) {
this.writer.write("");
this.writer.write(localName);
this.writer.write('>');
}
} catch (IOException ioe) {
throw new SAXException(ioe);
}
}
/**
* Must be called first.
*/
public void startDocument() throws SAXException {
this.rootSeen = false;
try {
writer.write("\n");
} catch (IOException ioe) {
throw new SAXException(ioe);
}
}
/**
* Writes a start tag if the element is an XHTML element.
*
* @param namespaceURI the XML namespace
* @param localName the element name in the namespace
* @param qName ignored
* @param atts the attribute list
*
* @throws SAXException if there are IO problems
*/
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
try {
if(XHTML_NS.equals(namespaceURI)) {
this.rootSeen = true;
// start and element name
this.writer.write('<');
this.writer.write(localName);
// attributes
int length = atts.getLength();
boolean langPrinted = false;
for(int i = 0; i < length; i++) {
String ns = atts.getURI(i);
String name = null;
if("".equals(ns)) {
name = atts.getLocalName(i);
} else if("http://www.w3.org/XML/1998/namespace".equals(ns)
&& "lang".equals(atts.getLocalName(i)))
{
name = "lang";
}
if(name != null && !(langPrinted && "lang".equals(name))) {
this.writer.write(' ');
this.writer.write(name);
if("lang".equals(name)) {
langPrinted = true;
}
if (Arrays.binarySearch(booleanAttributes, name) < 0) {
// write value, escape certain characters
this.writer.write("=\"");
String value = atts.getValue(i);
for (int j = 0; j < value.length(); j++) {
char c = value.charAt(j);
switch (c) {
case '<':
this.writer.write("<");
break;
case '>':
this.writer.write(">");
break;
case '&':
this.writer.write("&");
break;
case '"':
this.writer.write(""");
break;
default:
this.writer.write(c);
}
}
this.writer.write('"');
}
}
}
// close
this.writer.write('>');
} else if(!this.rootSeen) {
throw new SAXException("The root element was not in the xhtml namespace");
}
} catch (IOException ioe) {
throw new SAXException(ioe);
}
}
/**
* Used for testing. Pass a file:// URL as the command line argument.
*/
public static void main(String[] args) {
try {
javax.xml.parsers.SAXParserFactory fac = javax.xml.parsers.SAXParserFactory.newInstance();
fac.setNamespaceAware(true);
fac.setValidating(false);
XMLReader parser = fac.newSAXParser().getXMLReader();
parser.setContentHandler(new HtmlSerializer(System.out));
parser.parse(args[0]);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/** Does nothing. */
public void endPrefixMapping(String str) throws SAXException {
}
/** Does nothing. */
public void ignorableWhitespace(char[] values, int param, int param2) throws SAXException {
}
/** Does nothing. */
public void processingInstruction(String str, String str1) throws SAXException {
}
/** Does nothing. */
public void setDocumentLocator(Locator locator) {
}
/** Does nothing. */
public void skippedEntity(String str) throws SAXException {
}
/** Does nothing. */
public void startPrefixMapping(String str, String str1) throws SAXException {
}
}