org.cyberneko.html
Class HTMLScanner

java.lang.Object
  extended byorg.cyberneko.html.HTMLScanner
All Implemented Interfaces:
HTMLComponent, XMLComponent, XMLDocumentScanner, XMLDocumentSource, XMLLocator

public class HTMLScanner
extends Object
implements XMLDocumentScanner, XMLLocator, HTMLComponent

A simple HTML scanner. This scanner makes no attempt to balance tags or fix other problems in the source document — it just scans what it can and generates XNI document "events", ignoring errors of all kinds.

This component recognizes the following features:

This component recognizes the following properties:

Version:
$Id$
Author:
Andy Clark
See Also:
HTMLElements, HTMLEntities

Nested Class Summary
 class HTMLScanner.ContentScanner
          The primary HTML document scanner.
static class HTMLScanner.CurrentEntity
          Current entity.
protected static class HTMLScanner.LocationItem
          Location infoset item.
static class HTMLScanner.PlaybackInputStream
          A playback input stream.
static interface HTMLScanner.Scanner
          Basic scanner interface.
 class HTMLScanner.SpecialScanner
          Special scanner used for elements whose content needs to be scanned as plain text, ignoring markup such as elements and entity references.
 
Field Summary
protected static String AUGMENTATIONS
          Include infoset augmentations.
static String CDATA_SECTIONS
          Scan CDATA sections.
protected static boolean DEBUG_CALLBACKS
          Set to true to debug callbacks.
protected static int DEFAULT_BUFFER_SIZE
          Default buffer size.
protected static String DEFAULT_ENCODING
          Default encoding.
protected static String DOCTYPE_PUBID
          Doctype declaration public identifier.
protected static String DOCTYPE_SYSID
          Doctype declaration system identifier.
protected static String ERROR_REPORTER
          Error reporter.
protected  boolean fAugmentations
          Augmentations.
protected  int fBeginColumnNumber
          Beginning column number.
protected  int fBeginLineNumber
          Beginning line number.
protected  HTMLScanner.PlaybackInputStream fByteStream
          The playback byte stream.
protected  boolean fCDATASections
          CDATA sections.
protected  HTMLScanner.Scanner fContentScanner
          Content scanner.
protected  HTMLScanner.CurrentEntity fCurrentEntity
          Current entity.
protected  Stack fCurrentEntityStack
          The current entity stack.
protected  String fDefaultIANAEncoding
          Default encoding.
protected  String fDoctypePubid
          Doctype declaration public identifier.
protected  String fDoctypeSysid
          Doctype declaration system identifier.
protected  XMLDocumentHandler fDocumentHandler
          The document handler.
protected  int fElementCount
          Element count.
protected  int fElementDepth
          Element depth.
protected  int fEndColumnNumber
          Ending column number.
protected  int fEndLineNumber
          Ending line number.
protected  HTMLErrorReporter fErrorReporter
          Error reporter.
protected  String fIANAEncoding
          Auto-detected IANA encoding.
protected  boolean fIgnoreSpecifiedCharset
          Ignore specified character set.
protected  boolean fInsertDoctype
          Insert document type declaration.
protected  String fJavaEncoding
          Auto-detected Java encoding.
protected  short fNamesAttrs
          Modify HTML attribute names.
protected  short fNamesElems
          Modify HTML element names.
protected  boolean fNotifyCharRefs
          Notify character entity references.
protected  boolean fNotifyHtmlBuiltinRefs
          Notify HTML built-in general entity references.
protected  boolean fNotifyXmlBuiltinRefs
          Notify XML built-in general entity references.
protected  boolean fOverrideDoctype
          Override doctype declaration public and system identifiers.
protected  boolean fReportErrors
          Report errors.
protected  HTMLScanner.Scanner fScanner
          The current scanner.
protected  short fScannerState
          The current scanner state.
protected  boolean fScriptStripCommentDelims
          Strip comment delimiters from SCRIPT tags.
protected  HTMLScanner.SpecialScanner fSpecialScanner
          Special scanner used for elements whose content needs to be scanned as plain text, ignoring markup such as elements and entity references.
protected  XMLString fString
          String.
protected  XMLStringBuffer fStringBuffer
          String buffer.
protected  boolean fStyleStripCommentDelims
          Strip comment delimiters from STYLE tags.
static String HTML_4_01_FRAMESET_PUBID
          HTML 4.01 frameset public identifier ("-//W3C//DTD HTML 4.01 Frameset//EN").
static String HTML_4_01_FRAMESET_SYSID
          HTML 4.01 frameset system identifier ("http://www.w3.org/TR/html4/frameset.dtd").
static String HTML_4_01_STRICT_PUBID
          HTML 4.01 strict public identifier ("-//W3C//DTD HTML 4.01//EN").
static String HTML_4_01_STRICT_SYSID
          HTML 4.01 strict system identifier ("http://www.w3.org/TR/html4/strict.dtd").
static String HTML_4_01_TRANSITIONAL_PUBID
          HTML 4.01 transitional public identifier ("-//W3C//DTD HTML 4.01 Transitional//EN").
static String HTML_4_01_TRANSITIONAL_SYSID
          HTML 4.01 transitional system identifier ("http://www.w3.org/TR/html4/loose.dtd").
static String IGNORE_SPECIFIED_CHARSET
          Ignore specified charset found in the <meta equiv='Content-Type' content='text/html;charset=…'> tag.
static String INSERT_DOCTYPE
          Insert document type declaration.
protected static String NAMES_ATTRS
          Modify HTML attribute names: { "upper", "lower", "default" }.
protected static String NAMES_ELEMS
          Modify HTML element names: { "upper", "lower", "default" }.
protected static short NAMES_LOWERCASE
          Lowercase HTML names.
protected static short NAMES_NO_CHANGE
          Don't modify HTML names.
protected static short NAMES_UPPERCASE
          Uppercase HTML names.
static String NOTIFY_CHAR_REFS
          Notify character entity references (e.g.
static String NOTIFY_HTML_BUILTIN_REFS
          Notify handler of built-in entity references (e.g.
static String NOTIFY_XML_BUILTIN_REFS
          Notify handler of built-in entity references (e.g.
static String OVERRIDE_DOCTYPE
          Override doctype declaration public and system identifiers.
protected static String REPORT_ERRORS
          Report errors.
static String SCRIPT_STRIP_COMMENT_DELIMS
          Strip HTML comment delimiters ("<!−−" and "−−>") from SCRIPT tag contents.
protected static short STATE_CONTENT
          State: content.
protected static short STATE_END_DOCUMENT
          State: end document.
protected static short STATE_MARKUP_BRACKET
          State: markup bracket.
protected static short STATE_START_DOCUMENT
          State: start document.
static String STYLE_STRIP_COMMENT_DELIMS
          Strip HTML comment delimiters ("<!−−" and "−−>") from STYLE tag contents.
protected static HTMLEventInfo SYNTHESIZED_ITEM
          Synthesized event info item.
 
Constructor Summary
HTMLScanner()
           
 
Method Summary
protected static boolean builtinXmlRef(String name)
          Returns true if the name is a built-in XML general entity reference.
 void cleanup(boolean closeall)
          Cleans up used resources.
static String expandSystemId(String systemId, String baseSystemId)
          Expands a system id and returns the system id as a URI, if it can be expanded.
protected static String fixURI(String str)
          Fixes a platform dependent filename to standard URI form.
 String getBaseSystemId()
          Returns the base system identifier.
 int getColumnNumber()
          Returns the current column number.
 XMLDocumentHandler getDocumentHandler()
          Returns the document handler.
 String getEncoding()
          Returns the encoding.
 String getExpandedSystemId()
          Returns the expanded system identifier.
 Boolean getFeatureDefault(String featureId)
          Returns the default state for a feature.
 int getLineNumber()
          Returns the current line number.
 String getLiteralSystemId()
          Returns the literal system identifier.
protected static short getNamesValue(String value)
          Converts HTML names string value to constant value.
 Object getPropertyDefault(String propertyId)
          Returns the default state for a property.
 String getPublicId()
          Returns the public identifier.
 String[] getRecognizedFeatures()
          Returns recognized features.
 String[] getRecognizedProperties()
          Returns recognized properties.
protected static String getValue(XMLAttributes attrs, String aname)
          Returns the value of the specified attribute, ignoring case.
protected  int load(int offset)
          Loads a new chunk of data into the buffer and returns the number of characters loaded or -1 if no additional characters were loaded.
protected  Augmentations locationAugs()
          Returns an augmentations object with a location item added.
protected static String modifyName(String name, short mode)
          Modifies the given name based on the specified mode.
 void pushInputSource(XMLInputSource inputSource)
          Pushes an input source onto the current entity stack.
protected  int read()
          Reads a single character.
 void reset(XMLComponentManager manager)
          Resets the component.
protected  XMLResourceIdentifier resourceId()
          Returns an empty resource identifier.
protected  void scanDoctype()
          Scans a DOCTYPE line.
 boolean scanDocument(boolean complete)
          Scans the document.
protected  int scanEntityRef(XMLStringBuffer str, boolean content)
          Scans an entity reference.
protected  String scanLiteral()
          Scans a quoted literal.
protected  String scanName()
          Scans a name.
 void setDocumentHandler(XMLDocumentHandler handler)
          Sets the document handler.
 void setFeature(String featureId, boolean state)
          Sets a feature.
 void setInputSource(XMLInputSource source)
          Sets the input source.
 void setProperty(String propertyId, Object value)
          Sets a property.
protected  void setScanner(HTMLScanner.Scanner scanner)
          Sets the scanner.
protected  void setScannerState(short state)
          Sets the scanner state.
protected  boolean skip(String s, boolean caseSensitive)
          Returns true if the specified text is present and is skipped.
protected  boolean skipMarkup(boolean balance)
          Skips markup.
protected  int skipNewlines()
          Skips newlines and returns the number of newlines skipped.
protected  int skipNewlines(int maxlines)
          Skips newlines and returns the number of newlines skipped.
protected  boolean skipSpaces()
          Skips whitespace.
protected  Augmentations synthesizedAugs()
          Returns an augmentations object with a synthesized item added.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

HTML_4_01_STRICT_PUBID

public static final String HTML_4_01_STRICT_PUBID
HTML 4.01 strict public identifier ("-//W3C//DTD HTML 4.01//EN").

See Also:
Constant Field Values

HTML_4_01_STRICT_SYSID

public static final String HTML_4_01_STRICT_SYSID
HTML 4.01 strict system identifier ("http://www.w3.org/TR/html4/strict.dtd").

See Also:
Constant Field Values

HTML_4_01_TRANSITIONAL_PUBID

public static final String HTML_4_01_TRANSITIONAL_PUBID
HTML 4.01 transitional public identifier ("-//W3C//DTD HTML 4.01 Transitional//EN").

See Also:
Constant Field Values

HTML_4_01_TRANSITIONAL_SYSID

public static final String HTML_4_01_TRANSITIONAL_SYSID
HTML 4.01 transitional system identifier ("http://www.w3.org/TR/html4/loose.dtd").

See Also:
Constant Field Values

HTML_4_01_FRAMESET_PUBID

public static final String HTML_4_01_FRAMESET_PUBID
HTML 4.01 frameset public identifier ("-//W3C//DTD HTML 4.01 Frameset//EN").

See Also:
Constant Field Values

HTML_4_01_FRAMESET_SYSID

public static final String HTML_4_01_FRAMESET_SYSID
HTML 4.01 frameset system identifier ("http://www.w3.org/TR/html4/frameset.dtd").

See Also:
Constant Field Values

AUGMENTATIONS

protected static final String AUGMENTATIONS
Include infoset augmentations.

See Also:
Constant Field Values

REPORT_ERRORS

protected static final String REPORT_ERRORS
Report errors.

See Also:
Constant Field Values

NOTIFY_CHAR_REFS

public static final String NOTIFY_CHAR_REFS
Notify character entity references (e.g. &#32;, &#x20;, etc).

See Also:
Constant Field Values

NOTIFY_XML_BUILTIN_REFS

public static final String NOTIFY_XML_BUILTIN_REFS
Notify handler of built-in entity references (e.g. &amp;, &lt;, etc).

Note: This only applies to the five pre-defined XML general entities. Specifically, "amp", "lt", "gt", "quot", and "apos". This is done for compatibility with the Xerces feature.

To be notified of the built-in entity references in HTML, set the http://cyberneko.org/html/features/scanner/notify-builtin-refs feature to true.

See Also:
Constant Field Values

NOTIFY_HTML_BUILTIN_REFS

public static final String NOTIFY_HTML_BUILTIN_REFS
Notify handler of built-in entity references (e.g. &nobr;, &copy;, etc).

Note: This includes the five pre-defined XML general entities.

See Also:
Constant Field Values

SCRIPT_STRIP_COMMENT_DELIMS

public static final String SCRIPT_STRIP_COMMENT_DELIMS
Strip HTML comment delimiters ("<!−−" and "−−>") from SCRIPT tag contents.

See Also:
Constant Field Values

STYLE_STRIP_COMMENT_DELIMS

public static final String STYLE_STRIP_COMMENT_DELIMS
Strip HTML comment delimiters ("<!−−" and "−−>") from STYLE tag contents.

See Also:
Constant Field Values

IGNORE_SPECIFIED_CHARSET

public static final String IGNORE_SPECIFIED_CHARSET
Ignore specified charset found in the <meta equiv='Content-Type' content='text/html;charset=…'> tag.

See Also:
Constant Field Values

CDATA_SECTIONS

public static final String CDATA_SECTIONS
Scan CDATA sections.

See Also:
Constant Field Values

OVERRIDE_DOCTYPE

public static final String OVERRIDE_DOCTYPE
Override doctype declaration public and system identifiers.

See Also:
Constant Field Values

INSERT_DOCTYPE

public static final String INSERT_DOCTYPE
Insert document type declaration.

See Also:
Constant Field Values

NAMES_ELEMS

protected static final String NAMES_ELEMS
Modify HTML element names: { "upper", "lower", "default" }.

See Also:
Constant Field Values

NAMES_ATTRS

protected static final String NAMES_ATTRS
Modify HTML attribute names: { "upper", "lower", "default" }.

See Also:
Constant Field Values

DEFAULT_ENCODING

protected static final String DEFAULT_ENCODING
Default encoding.

See Also:
Constant Field Values

ERROR_REPORTER

protected static final String ERROR_REPORTER
Error reporter.

See Also:
Constant Field Values

DOCTYPE_PUBID

protected static final String DOCTYPE_PUBID
Doctype declaration public identifier.

See Also:
Constant Field Values

DOCTYPE_SYSID

protected static final String DOCTYPE_SYSID
Doctype declaration system identifier.

See Also:
Constant Field Values

STATE_CONTENT

protected static final short STATE_CONTENT
State: content.

See Also:
Constant Field Values

STATE_MARKUP_BRACKET

protected static final short STATE_MARKUP_BRACKET
State: markup bracket.

See Also:
Constant Field Values

STATE_START_DOCUMENT

protected static final short STATE_START_DOCUMENT
State: start document.

See Also:
Constant Field Values

STATE_END_DOCUMENT

protected static final short STATE_END_DOCUMENT
State: end document.

See Also:
Constant Field Values

NAMES_NO_CHANGE

protected static final short NAMES_NO_CHANGE
Don't modify HTML names.

See Also:
Constant Field Values

NAMES_UPPERCASE

protected static final short NAMES_UPPERCASE
Uppercase HTML names.

See Also:
Constant Field Values

NAMES_LOWERCASE

protected static final short NAMES_LOWERCASE
Lowercase HTML names.

See Also:
Constant Field Values

DEFAULT_BUFFER_SIZE

protected static final int DEFAULT_BUFFER_SIZE
Default buffer size.

See Also:
Constant Field Values

DEBUG_CALLBACKS

protected static final boolean DEBUG_CALLBACKS
Set to true to debug callbacks.

See Also:
Constant Field Values

SYNTHESIZED_ITEM

protected static final HTMLEventInfo SYNTHESIZED_ITEM
Synthesized event info item.


fAugmentations

protected boolean fAugmentations
Augmentations.


fReportErrors

protected boolean fReportErrors
Report errors.


fNotifyCharRefs

protected boolean fNotifyCharRefs
Notify character entity references.


fNotifyXmlBuiltinRefs

protected boolean fNotifyXmlBuiltinRefs
Notify XML built-in general entity references.


fNotifyHtmlBuiltinRefs

protected boolean fNotifyHtmlBuiltinRefs
Notify HTML built-in general entity references.


fScriptStripCommentDelims

protected boolean fScriptStripCommentDelims
Strip comment delimiters from SCRIPT tags.


fStyleStripCommentDelims

protected boolean fStyleStripCommentDelims
Strip comment delimiters from STYLE tags.


fIgnoreSpecifiedCharset

protected boolean fIgnoreSpecifiedCharset
Ignore specified character set.


fCDATASections

protected boolean fCDATASections
CDATA sections.


fOverrideDoctype

protected boolean fOverrideDoctype
Override doctype declaration public and system identifiers.


fInsertDoctype

protected boolean fInsertDoctype
Insert document type declaration.


fNamesElems

protected short fNamesElems
Modify HTML element names.


fNamesAttrs

protected short fNamesAttrs
Modify HTML attribute names.


fDefaultIANAEncoding

protected String fDefaultIANAEncoding
Default encoding.


fErrorReporter

protected HTMLErrorReporter fErrorReporter
Error reporter.


fDoctypePubid

protected String fDoctypePubid
Doctype declaration public identifier.


fDoctypeSysid

protected String fDoctypeSysid
Doctype declaration system identifier.


fBeginLineNumber

protected int fBeginLineNumber
Beginning line number.


fBeginColumnNumber

protected int fBeginColumnNumber
Beginning column number.


fEndLineNumber

protected int fEndLineNumber
Ending line number.


fEndColumnNumber

protected int fEndColumnNumber
Ending column number.


fByteStream

protected HTMLScanner.PlaybackInputStream fByteStream
The playback byte stream.


fCurrentEntity

protected HTMLScanner.CurrentEntity fCurrentEntity
Current entity.


fCurrentEntityStack

protected final Stack fCurrentEntityStack
The current entity stack.


fScanner

protected HTMLScanner.Scanner fScanner
The current scanner.


fScannerState

protected short fScannerState
The current scanner state.


fDocumentHandler

protected XMLDocumentHandler fDocumentHandler
The document handler.


fIANAEncoding

protected String fIANAEncoding
Auto-detected IANA encoding.


fJavaEncoding

protected String fJavaEncoding
Auto-detected Java encoding.


fElementCount

protected int fElementCount
Element count.


fElementDepth

protected int fElementDepth
Element depth.


fContentScanner

protected HTMLScanner.Scanner fContentScanner
Content scanner.


fSpecialScanner

protected HTMLScanner.SpecialScanner fSpecialScanner
Special scanner used for elements whose content needs to be scanned as plain text, ignoring markup such as elements and entity references. For example: <SCRIPT> and <COMMENT>.


fString

protected final XMLString fString
String.


fStringBuffer

protected final XMLStringBuffer fStringBuffer
String buffer.

Constructor Detail

HTMLScanner

public HTMLScanner()
Method Detail

pushInputSource

public void pushInputSource(XMLInputSource inputSource)
Pushes an input source onto the current entity stack. This enables the scanner to transparently scan new content (e.g. the output written by an embedded script). At the end of the current entity, the scanner returns where it left off at the time this entity source was pushed.

Note: This functionality is experimental at this time and is subject to change in future releases of NekoHTML.

Parameters:
inputSource - The new input source to start scanning.

cleanup

public void cleanup(boolean closeall)
Cleans up used resources. For example, if scanning is terminated early, then this method ensures all remaining open streams are closed.

Parameters:
closeall - Close all streams, including the original. This is used in cases when the application has opened the original document stream and should be responsible for closing it.

getEncoding

public String getEncoding()
Returns the encoding.

Specified by:
getEncoding in interface XMLLocator

getPublicId

public String getPublicId()
Returns the public identifier.

Specified by:
getPublicId in interface XMLLocator

getBaseSystemId

public String getBaseSystemId()
Returns the base system identifier.

Specified by:
getBaseSystemId in interface XMLLocator

getLiteralSystemId

public String getLiteralSystemId()
Returns the literal system identifier.

Specified by:
getLiteralSystemId in interface XMLLocator

getExpandedSystemId

public String getExpandedSystemId()
Returns the expanded system identifier.

Specified by:
getExpandedSystemId in interface XMLLocator

getLineNumber

public int getLineNumber()
Returns the current line number.

Specified by:
getLineNumber in interface XMLLocator

getColumnNumber

public int getColumnNumber()
Returns the current column number.

Specified by:
getColumnNumber in interface XMLLocator

getFeatureDefault

public Boolean getFeatureDefault(String featureId)
Returns the default state for a feature.

Specified by:
getFeatureDefault in interface HTMLComponent

getPropertyDefault

public Object getPropertyDefault(String propertyId)
Returns the default state for a property.

Specified by:
getPropertyDefault in interface HTMLComponent

getRecognizedFeatures

public String[] getRecognizedFeatures()
Returns recognized features.

Specified by:
getRecognizedFeatures in interface XMLComponent

getRecognizedProperties

public String[] getRecognizedProperties()
Returns recognized properties.

Specified by:
getRecognizedProperties in interface XMLComponent

reset

public void reset(XMLComponentManager manager)
           throws XMLConfigurationException
Resets the component.

Specified by:
reset in interface XMLComponent
Throws:
XMLConfigurationException

setFeature

public void setFeature(String featureId,
                       boolean state)
                throws XMLConfigurationException
Sets a feature.

Specified by:
setFeature in interface XMLComponent
Throws:
XMLConfigurationException

setProperty

public void setProperty(String propertyId,
                        Object value)
                 throws XMLConfigurationException
Sets a property.

Specified by:
setProperty in interface XMLComponent
Throws:
XMLConfigurationException

setInputSource

public void setInputSource(XMLInputSource source)
                    throws IOException
Sets the input source.

Specified by:
setInputSource in interface XMLDocumentScanner
Throws:
IOException

scanDocument

public boolean scanDocument(boolean complete)
                     throws XNIException,
                            IOException
Scans the document.

Specified by:
scanDocument in interface XMLDocumentScanner
Throws:
XNIException
IOException

setDocumentHandler

public void setDocumentHandler(XMLDocumentHandler handler)
Sets the document handler.

Specified by:
setDocumentHandler in interface XMLDocumentSource

getDocumentHandler

public XMLDocumentHandler getDocumentHandler()
Returns the document handler.

Specified by:
getDocumentHandler in interface XMLDocumentSource

getValue

protected static String getValue(XMLAttributes attrs,
                                 String aname)
Returns the value of the specified attribute, ignoring case.


expandSystemId

public static String expandSystemId(String systemId,
                                    String baseSystemId)
Expands a system id and returns the system id as a URI, if it can be expanded. A return value of null means that the identifier is already expanded. An exception thrown indicates a failure to expand the id.

Parameters:
systemId - The systemId to be expanded.
Returns:
Returns the URI string representing the expanded system identifier. A null value indicates that the given system identifier is already expanded.

fixURI

protected static String fixURI(String str)
Fixes a platform dependent filename to standard URI form.

Parameters:
str - The string to fix.
Returns:
Returns the fixed URI string.

modifyName

protected static final String modifyName(String name,
                                         short mode)
Modifies the given name based on the specified mode.


getNamesValue

protected static final short getNamesValue(String value)
Converts HTML names string value to constant value.

See Also:
NAMES_NO_CHANGE, NAMES_LOWERCASE, NAMES_UPPERCASE

read

protected int read()
            throws IOException
Reads a single character.

Throws:
IOException

load

protected int load(int offset)
            throws IOException
Loads a new chunk of data into the buffer and returns the number of characters loaded or -1 if no additional characters were loaded.

Parameters:
offset - The offset at which new characters should be loaded.
Throws:
IOException

setScanner

protected void setScanner(HTMLScanner.Scanner scanner)
Sets the scanner.


setScannerState

protected void setScannerState(short state)
Sets the scanner state.


scanDoctype

protected void scanDoctype()
                    throws IOException
Scans a DOCTYPE line.

Throws:
IOException

scanLiteral

protected String scanLiteral()
                      throws IOException
Scans a quoted literal.

Throws:
IOException

scanName

protected String scanName()
                   throws IOException
Scans a name.

Throws:
IOException

scanEntityRef

protected int scanEntityRef(XMLStringBuffer str,
                            boolean content)
                     throws IOException
Scans an entity reference.

Throws:
IOException

skip

protected boolean skip(String s,
                       boolean caseSensitive)
                throws IOException
Returns true if the specified text is present and is skipped.

Throws:
IOException

skipMarkup

protected boolean skipMarkup(boolean balance)
                      throws IOException
Skips markup.

Throws:
IOException

skipSpaces

protected boolean skipSpaces()
                      throws IOException
Skips whitespace.

Throws:
IOException

skipNewlines

protected int skipNewlines()
                    throws IOException
Skips newlines and returns the number of newlines skipped.

Throws:
IOException

skipNewlines

protected int skipNewlines(int maxlines)
                    throws IOException
Skips newlines and returns the number of newlines skipped.

Throws:
IOException

locationAugs

protected final Augmentations locationAugs()
Returns an augmentations object with a location item added.


synthesizedAugs

protected final Augmentations synthesizedAugs()
Returns an augmentations object with a synthesized item added.


resourceId

protected final XMLResourceIdentifier resourceId()
Returns an empty resource identifier.


builtinXmlRef

protected static boolean builtinXmlRef(String name)
Returns true if the name is a built-in XML general entity reference.



(C) Copyright 2002-2004, Andy Clark. All rights reserved.