|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.cyberneko.html.filters.DefaultFilter
org.cyberneko.html.filters.Purifier
This filter purifies the HTML input to ensure XML well-formedness. The purification process includes:
Illegal characters in XML names are converted to the character sequence "_u####_" where "####" is the value of the Unicode character represented in hexadecimal. Whereas illegal characters appearing in document content is converted to the character sequence "\\u####".
In comments, the character '-' is replaced by the character sequence "- " to prevent "--" from ever appearing in the comment content. For CDATA sections, the character ']' is replaced by the character sequence "] " to prevent "]]" from appearing.
The URI used for synthesized namespace bindings is "http://cyberneko.org/html/ns/synthesized/number" where number is generated to ensure uniqueness.
Field Summary | |
protected static String |
AUGMENTATIONS
Include infoset augmentations. |
protected boolean |
fAugmentations
Augmentations. |
protected boolean |
fInCDATASection
True if inside a CDATA section. |
protected NamespaceContext |
fNamespaceContext
Namespace information. |
protected boolean |
fNamespaces
Namespaces. |
protected String |
fPublicId
Public identifier of doctype declaration. |
protected boolean |
fSeenDoctype
True if the doctype declaration was seen. |
protected boolean |
fSeenRootElement
True if root element was seen. |
protected int |
fSynthesizedNamespaceCount
Synthesized namespace binding count. |
protected String |
fSystemId
System identifier of doctype declaration. |
protected static String |
NAMESPACES
Namespaces. |
protected static HTMLEventInfo |
SYNTHESIZED_ITEM
Synthesized event info item. |
static String |
SYNTHESIZED_NAMESPACE_PREFX
Synthesized namespace binding prefix. |
Fields inherited from class org.cyberneko.html.filters.DefaultFilter |
fDocumentHandler, fDocumentSource |
Constructor Summary | |
Purifier()
|
Method Summary | |
void |
characters(XMLString text,
Augmentations augs)
Characters. |
void |
comment(XMLString text,
Augmentations augs)
Comment. |
void |
doctypeDecl(String root,
String pubid,
String sysid,
Augmentations augs)
Doctype declaration. |
void |
emptyElement(QName element,
XMLAttributes attrs,
Augmentations augs)
Empty element. |
void |
endCDATA(Augmentations augs)
End CDATA section. |
void |
endElement(QName element,
Augmentations augs)
End element. |
protected void |
handleStartDocument()
Handle start document. |
protected void |
handleStartElement(QName element,
XMLAttributes attrs)
Handle start element. |
void |
processingInstruction(String target,
XMLString data,
Augmentations augs)
Processing instruction. |
protected String |
purifyName(String name,
boolean localpart)
Purify name. |
protected QName |
purifyQName(QName qname)
Purify qualified name. |
protected XMLString |
purifyText(XMLString text)
Purify content. |
void |
reset(XMLComponentManager manager)
Resets the component. |
void |
startCDATA(Augmentations augs)
Start CDATA section. |
void |
startDocument(XMLLocator locator,
String encoding,
Augmentations augs)
Start document. |
void |
startDocument(XMLLocator locator,
String encoding,
NamespaceContext nscontext,
Augmentations augs)
Start document. |
void |
startElement(QName element,
XMLAttributes attrs,
Augmentations augs)
Start element. |
protected void |
synthesizeBinding(XMLAttributes attrs,
String ns)
Synthesize namespace binding. |
protected Augmentations |
synthesizedAugs()
Returns an augmentations object with a synthesized item added. |
protected static String |
toHexString(int c,
int padlen)
Returns a padded hexadecimal string for the given value. |
void |
xmlDecl(String version,
String encoding,
String standalone,
Augmentations augs)
XML declaration. |
Methods inherited from class org.cyberneko.html.filters.DefaultFilter |
endDocument, endGeneralEntity, endPrefixMapping, getDocumentHandler, getDocumentSource, getFeatureDefault, getPropertyDefault, getRecognizedFeatures, getRecognizedProperties, ignorableWhitespace, merge, setDocumentHandler, setDocumentSource, setFeature, setProperty, startGeneralEntity, startPrefixMapping, textDecl |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
public static final String SYNTHESIZED_NAMESPACE_PREFX
protected static final String NAMESPACES
protected static final String AUGMENTATIONS
protected static final HTMLEventInfo SYNTHESIZED_ITEM
protected boolean fNamespaces
protected boolean fAugmentations
protected boolean fSeenDoctype
protected boolean fSeenRootElement
protected boolean fInCDATASection
protected String fPublicId
protected String fSystemId
protected NamespaceContext fNamespaceContext
protected int fSynthesizedNamespaceCount
Constructor Detail |
public Purifier()
Method Detail |
public void reset(XMLComponentManager manager) throws XMLConfigurationException
DefaultFilter
reset
in interface XMLComponent
reset
in class DefaultFilter
manager
- The component manager.
XMLConfigurationException
public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException
startDocument
in class DefaultFilter
XNIException
public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) throws XNIException
startDocument
in interface XMLDocumentHandler
startDocument
in class DefaultFilter
XNIException
public void xmlDecl(String version, String encoding, String standalone, Augmentations augs) throws XNIException
xmlDecl
in interface XMLDocumentHandler
xmlDecl
in class DefaultFilter
XNIException
public void comment(XMLString text, Augmentations augs) throws XNIException
comment
in interface XMLDocumentHandler
comment
in class DefaultFilter
XNIException
public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException
processingInstruction
in interface XMLDocumentHandler
processingInstruction
in class DefaultFilter
XNIException
public void doctypeDecl(String root, String pubid, String sysid, Augmentations augs) throws XNIException
doctypeDecl
in interface XMLDocumentHandler
doctypeDecl
in class DefaultFilter
XNIException
public void startElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException
startElement
in interface XMLDocumentHandler
startElement
in class DefaultFilter
XNIException
public void emptyElement(QName element, XMLAttributes attrs, Augmentations augs) throws XNIException
emptyElement
in interface XMLDocumentHandler
emptyElement
in class DefaultFilter
XNIException
public void startCDATA(Augmentations augs) throws XNIException
startCDATA
in interface XMLDocumentHandler
startCDATA
in class DefaultFilter
XNIException
public void endCDATA(Augmentations augs) throws XNIException
endCDATA
in interface XMLDocumentHandler
endCDATA
in class DefaultFilter
XNIException
public void characters(XMLString text, Augmentations augs) throws XNIException
characters
in interface XMLDocumentHandler
characters
in class DefaultFilter
XNIException
public void endElement(QName element, Augmentations augs) throws XNIException
endElement
in interface XMLDocumentHandler
endElement
in class DefaultFilter
XNIException
protected void handleStartDocument()
protected void handleStartElement(QName element, XMLAttributes attrs)
protected void synthesizeBinding(XMLAttributes attrs, String ns)
protected final Augmentations synthesizedAugs()
protected QName purifyQName(QName qname)
protected String purifyName(String name, boolean localpart)
protected XMLString purifyText(XMLString text)
protected static String toHexString(int c, int padlen)
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |