http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Download
Installation
Build

API Docs
Samples
Schema

FAQs
Programming
Migration

Releases
Bug-Reporting
Feedback

Y2K Compliance
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

DOMParser.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Id: DOMParser.hpp,v 1.26 2001/07/16 13:15:07 tng Exp $
00059  *
00060  */
00061 
00062 #if !defined(DOMPARSER_HPP)
00063 #define DOMPARSER_HPP
00064 
00065 
00066 #include <dom/DOM_Document.hpp>
00067 #include <framework/XMLDocumentHandler.hpp>
00068 #include <framework/XMLErrorReporter.hpp>
00069 #include <framework/XMLEntityHandler.hpp>
00070 #include <util/ValueStackOf.hpp>
00071 
00072 #include <validators/DTD/DocTypeHandler.hpp>
00073 #include <dom/DOM_DocumentType.hpp>
00074 #include <validators/DTD/DTDElementDecl.hpp>
00075 
00076 class EntityResolver;
00077 class ErrorHandler;
00078 class XMLPScanToken;
00079 class XMLScanner;
00080 class XMLValidator;
00081 
00082 
00093 class  DOMParser :
00094 
00095     public XMLDocumentHandler
00096     , public XMLErrorReporter
00097     , public XMLEntityHandler
00098     , public DocTypeHandler
00099 {
00100 public :
00101     // -----------------------------------------------------------------------
00102     //  Class types
00103     // -----------------------------------------------------------------------
00104     enum ValSchemes
00105     {
00106         Val_Never
00107         , Val_Always
00108         , Val_Auto
00109     };
00110 
00111 
00112     // -----------------------------------------------------------------------
00113     //  Constructors and Detructor
00114     // -----------------------------------------------------------------------
00115 
00118 
00127     DOMParser(XMLValidator* const valToAdopt = 0);
00128 
00132     ~DOMParser();
00133 
00135 
00141     void reset();
00142 
00143 
00144     // -----------------------------------------------------------------------
00145     //  Getter methods
00146     // -----------------------------------------------------------------------
00147 
00150 
00160     DOM_Document getDocument();
00161 
00169     ErrorHandler* getErrorHandler();
00170 
00178     const ErrorHandler* getErrorHandler() const;
00179 
00187     EntityResolver* getEntityResolver();
00188 
00196     const EntityResolver* getEntityResolver() const;
00197 
00205     const XMLValidator& getValidator() const;
00206 
00214     ValSchemes getValidationScheme() const;
00215 
00226     bool getDoSchema() const;
00227 
00239     int getErrorCount() const;
00240 
00251     bool getDoNamespaces() const;
00252 
00265     bool getExitOnFirstFatalError() const;
00266 
00277     bool getValidationConstraintFatal() const;
00278 
00290     bool getExpandEntityReferences() const;
00291 
00309     bool  getCreateEntityReferenceNodes()const;
00310 
00321     bool getIncludeIgnorableWhitespace() const;
00322 
00332     bool getToCreateXMLDeclTypeNode() const;
00333 
00335 
00336 
00337     // -----------------------------------------------------------------------
00338     //  Setter methods
00339     // -----------------------------------------------------------------------
00340 
00343 
00357     void setErrorHandler(ErrorHandler* const handler);
00358 
00374     void setEntityResolver(EntityResolver* const handler);
00375 
00394     void setDoNamespaces(const bool newState);
00395 
00412     void setExitOnFirstFatalError(const bool newState);
00413 
00429     void setValidationConstraintFatal(const bool newState);
00430 
00445     void setExpandEntityReferences(const bool expand);
00446 
00463     void setCreateEntityReferenceNodes(const bool create);
00464 
00486     void setIncludeIgnorableWhitespace(const bool include);
00487 
00504     void setValidationScheme(const ValSchemes newScheme);
00505 
00519     void setDoSchema(const bool newState);
00520 
00531     void setToCreateXMLDeclTypeNode(const bool create);
00532 
00534 
00535 
00536     // -----------------------------------------------------------------------
00537     //  Parsing methods
00538     // -----------------------------------------------------------------------
00539 
00542 
00563     void parse(const InputSource& source, const bool reuseGrammar = false);
00564 
00584     void parse(const XMLCh* const systemId, const bool reuseGrammar = false);
00585 
00603     void parse(const char* const systemId, const bool reuseGrammar = false);
00604 
00634     bool parseFirst
00635     (
00636         const   XMLCh* const    systemId
00637         ,       XMLPScanToken&  toFill
00638         , const bool            reuseGrammar = false
00639     );
00640 
00671     bool parseFirst
00672     (
00673         const   char* const     systemId
00674         ,       XMLPScanToken&  toFill
00675         , const bool            reuseGrammar = false
00676     );
00677 
00708     bool parseFirst
00709     (
00710         const   InputSource&    source
00711         ,       XMLPScanToken&  toFill
00712         , const bool            reuseGrammar = false
00713     );
00714 
00737     bool parseNext(XMLPScanToken& token);
00738 
00764     void parseReset(XMLPScanToken& token);
00765 
00767 
00768 
00769 
00770     // -----------------------------------------------------------------------
00771     //  Implementation of the XMLErrorReporter interface.
00772     // -----------------------------------------------------------------------
00773 
00776 
00801     virtual void error
00802     (
00803         const   unsigned int                errCode
00804         , const XMLCh* const                msgDomain
00805         , const XMLErrorReporter::ErrTypes  errType
00806         , const XMLCh* const                errorText
00807         , const XMLCh* const                systemId
00808         , const XMLCh* const                publicId
00809         , const unsigned int                lineNum
00810         , const unsigned int                colNum
00811     );
00812 
00821     virtual void resetErrors();
00823 
00824 
00825     // -----------------------------------------------------------------------
00826     //  Implementation of the XMLEntityHandler interface.
00827     // -----------------------------------------------------------------------
00828 
00831 
00844     virtual void endInputSource(const InputSource& inputSource);
00845 
00861     virtual bool expandSystemId
00862     (
00863         const   XMLCh* const    systemId
00864         ,       XMLBuffer&      toFill
00865     );
00866 
00875     virtual void resetEntities();
00876 
00892     virtual InputSource* resolveEntity
00893     (
00894         const   XMLCh* const    publicId
00895         , const XMLCh* const    systemId
00896     );
00897 
00910     virtual void startInputSource(const InputSource& inputSource);
00911 
00913 
00914 
00915 
00916     // -----------------------------------------------------------------------
00917     //  Implementation of the XMLDocumentHandler interface.
00918     // -----------------------------------------------------------------------
00919 
00922 
00935     virtual void docCharacters
00936     (
00937         const   XMLCh* const    chars
00938         , const unsigned int    length
00939         , const bool            cdataSection
00940     );
00941 
00950     virtual void docComment
00951     (
00952         const   XMLCh* const    comment
00953     );
00954 
00967     virtual void docPI
00968     (
00969         const   XMLCh* const    target
00970         , const XMLCh* const    data
00971     );
00972 
00977     virtual void endDocument();
00978 
00992     virtual void endElement
00993     (
00994         const   XMLElementDecl& elemDecl
00995         , const unsigned int    urlId
00996         , const bool            isRoot
00997     );
00998 
01007     virtual void endEntityReference
01008     (
01009         const   XMLEntityDecl&  entDecl
01010     );
01011 
01030     virtual void ignorableWhitespace
01031     (
01032         const   XMLCh* const    chars
01033         , const unsigned int    length
01034         , const bool            cdataSection
01035     );
01036 
01043     virtual void resetDocument();
01044 
01049     virtual void startDocument();
01050 
01078     virtual void startElement
01079     (
01080         const   XMLElementDecl&         elemDecl
01081         , const unsigned int            urlId
01082         , const XMLCh* const            elemPrefix
01083         , const RefVectorOf<XMLAttr>&   attrList
01084         , const unsigned int            attrCount
01085         , const bool                    isEmpty
01086         , const bool                    isRoot
01087     );
01088 
01098     virtual void startEntityReference
01099     (
01100         const   XMLEntityDecl&  entDecl
01101     );
01102 
01121     virtual void XMLDecl
01122     (
01123         const   XMLCh* const    versionStr
01124         , const XMLCh* const    encodingStr
01125         , const XMLCh* const    standaloneStr
01126         , const XMLCh* const    actualEncStr
01127     );
01129 
01130 
01133 
01143     bool getDoValidation() const;
01144 
01158     void setDoValidation(const bool newState);
01159 
01163     virtual void attDef
01164     (
01165         const   DTDElementDecl&     elemDecl
01166         , const DTDAttDef&          attDef
01167         , const bool                ignoring
01168     );
01169 
01170     virtual void doctypeComment
01171     (
01172         const   XMLCh* const    comment
01173     );
01174 
01175     virtual void doctypeDecl
01176     (
01177         const   DTDElementDecl& elemDecl
01178         , const XMLCh* const    publicId
01179         , const XMLCh* const    systemId
01180         , const bool            hasIntSubset
01181     );
01182 
01183     virtual void doctypePI
01184     (
01185         const   XMLCh* const    target
01186         , const XMLCh* const    data
01187     );
01188 
01189     virtual void doctypeWhitespace
01190     (
01191         const   XMLCh* const    chars
01192         , const unsigned int    length
01193     );
01194 
01195     virtual void elementDecl
01196     (
01197         const   DTDElementDecl& decl
01198         , const bool            isIgnored
01199     );
01200 
01201     virtual void endAttList
01202     (
01203         const   DTDElementDecl& elemDecl
01204     );
01205 
01206     virtual void endIntSubset();
01207 
01208     virtual void endExtSubset();
01209 
01210     virtual void entityDecl
01211     (
01212         const   DTDEntityDecl&  entityDecl
01213         , const bool            isPEDecl
01214         , const bool            isIgnored
01215     );
01216 
01217     virtual void resetDocType();
01218 
01219     virtual void notationDecl
01220     (
01221         const   XMLNotationDecl&    notDecl
01222         , const bool                isIgnored
01223     );
01224 
01225     virtual void startAttList
01226     (
01227         const   DTDElementDecl& elemDecl
01228     );
01229 
01230     virtual void startIntSubset();
01231 
01232     virtual void startExtSubset();
01233 
01234     virtual void TextDecl
01235     (
01236         const   XMLCh* const    versionStr
01237         , const XMLCh* const    encodingStr
01238     );
01239 
01240 
01242 
01243 
01244 protected :
01245     // -----------------------------------------------------------------------
01246     //  Protected getter methods
01247     // -----------------------------------------------------------------------
01248 
01251 
01256     DOM_Node getCurrentNode();
01257 
01259 
01260 
01261     // -----------------------------------------------------------------------
01262     //  Protected setter methods
01263     // -----------------------------------------------------------------------
01264 
01267 
01275     void setCurrentNode(DOM_Node toSet);
01276 
01283     void setDocument(DOM_Document toSet);
01285 
01286 
01287 private :
01288     // -----------------------------------------------------------------------
01289     //  Private data members
01290     //
01291     //  fCurrentNode
01292     //  fCurrentParent
01293     //      Used to track the current node during nested element events. Since
01294     //      the tree must be built from a set of disjoint callbacks, we need
01295     //      these to keep up with where we currently are.
01296     //
01297     //  fDocument
01298     //      The root document object, filled with the document contents.
01299     //
01300     //  fEntityResolver
01301     //      The installed SAX entity resolver, if any. Null if none.
01302     //
01303     //  fErrorHandler
01304     //      The installed SAX error handler, if any. Null if none.
01305     //
01306     //  fCreateEntityReferenceNode
01307     //      Indicates whether entity reference nodes should be created.
01308     //
01309     //  fIncludeIgnorableWhitespace
01310     //      Indicates whether ignorable whiltespace should be added to
01311     //      the DOM tree for validating parsers.
01312     //
01313     //  fNodeStack
01314     //      Used to track previous parent nodes during nested element events.
01315     //
01316     //  fParseInProgress
01317     //      Used to prevent multiple entrance to the parser while its doing
01318     //      a parse.
01319     //
01320     //  fScanner
01321     //      The scanner used for this parser. This is created during the
01322     //      constructor.
01323     //
01324     //  fWithinElement
01325     //      A flag to indicate that the parser is within at least one level
01326     //      of element processing.
01327     //
01328     //  fDocumentType
01329     //      Used to store and update the documentType variable information
01330     //      in fDocument
01331     //
01332     //  fToCreateXMLDecTypeNode
01333     //      A flag to create a DOM_XMLDecl node in the ODM tree if it exists
01334     //      This is an extension to xerces implementation
01335     //
01336     // -----------------------------------------------------------------------
01337     DOM_Node                fCurrentParent;
01338     DOM_Node                fCurrentNode;
01339     DOM_Document            fDocument;
01340     EntityResolver*         fEntityResolver;
01341     ErrorHandler*           fErrorHandler;
01342     bool                    fCreateEntityReferenceNodes;
01343     bool                    fIncludeIgnorableWhitespace;
01344     ValueStackOf<DOM_Node>* fNodeStack;
01345     bool                    fParseInProgress;
01346     XMLScanner*             fScanner;
01347     bool                    fWithinElement;
01348     DocumentTypeImpl*       fDocumentType;
01349     bool                    fToCreateXMLDeclTypeNode;
01350 };
01351 
01352 
01353 
01354 // ---------------------------------------------------------------------------
01355 //  DOMParser: Handlers for the XMLEntityHandler interface
01356 // ---------------------------------------------------------------------------
01357 inline void DOMParser::endInputSource(const InputSource&)
01358 {
01359     // The DOM entity resolver doesn't handle this
01360 }
01361 
01362 inline bool DOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
01363 {
01364     // The DOM entity resolver doesn't handle this
01365     return false;
01366 }
01367 
01368 inline void DOMParser::resetEntities()
01369 {
01370     // Nothing to do on this one
01371 }
01372 
01373 inline void DOMParser::startInputSource(const InputSource&)
01374 {
01375     // The DOM entity resolver doesn't handle this
01376 }
01377 
01378 
01379 // ---------------------------------------------------------------------------
01380 //  DOMParser: Getter methods
01381 // ---------------------------------------------------------------------------
01382 inline DOM_Document DOMParser::getDocument()
01383 {
01384     return fDocument;
01385 }
01386 
01387 inline ErrorHandler* DOMParser::getErrorHandler()
01388 {
01389     return fErrorHandler;
01390 }
01391 
01392 inline const ErrorHandler* DOMParser::getErrorHandler() const
01393 {
01394     return fErrorHandler;
01395 }
01396 
01397 inline EntityResolver* DOMParser::getEntityResolver()
01398 {
01399     return fEntityResolver;
01400 }
01401 
01402 inline const EntityResolver* DOMParser::getEntityResolver() const
01403 {
01404     return fEntityResolver;
01405 }
01406 
01407 inline bool DOMParser::getExpandEntityReferences() const
01408 {
01409     return fCreateEntityReferenceNodes;
01410 }
01411 inline bool DOMParser::getCreateEntityReferenceNodes() const
01412 {
01413     return fCreateEntityReferenceNodes;
01414 }
01415 
01416 inline bool DOMParser::getIncludeIgnorableWhitespace() const
01417 {
01418     return fIncludeIgnorableWhitespace;
01419 }
01420 
01421 inline bool DOMParser::getToCreateXMLDeclTypeNode() const
01422 {
01423     return fToCreateXMLDeclTypeNode;
01424 }
01425 
01426 
01427 // ---------------------------------------------------------------------------
01428 //  DOMParser: Setter methods
01429 // ---------------------------------------------------------------------------
01430 inline void DOMParser::setExpandEntityReferences(const bool expand)
01431 {
01432     fCreateEntityReferenceNodes = expand;
01433 }
01434 
01435 inline void DOMParser::setCreateEntityReferenceNodes(const bool create)
01436 {
01437     fCreateEntityReferenceNodes = create;
01438 }
01439 
01440 inline void DOMParser::setIncludeIgnorableWhitespace(const bool include)
01441 {
01442     fIncludeIgnorableWhitespace = include;
01443 }
01444 
01445 inline void DOMParser::setToCreateXMLDeclTypeNode(const bool create)
01446 {
01447     fToCreateXMLDeclTypeNode = create;
01448 }
01449 
01450 
01451 // ---------------------------------------------------------------------------
01452 //  DOMParser: Protected getter methods
01453 // ---------------------------------------------------------------------------
01454 inline DOM_Node DOMParser::getCurrentNode()
01455 {
01456     return fCurrentNode;
01457 }
01458 
01459 
01460 // ---------------------------------------------------------------------------
01461 //  DOMParser: Protected setter methods
01462 // ---------------------------------------------------------------------------
01463 inline void DOMParser::setCurrentNode(DOM_Node toSet)
01464 {
01465     fCurrentNode = toSet;
01466 }
01467 
01468 inline void DOMParser::setDocument(DOM_Document toSet)
01469 {
01470     fDocument = toSet;
01471 }
01472 
01473 #endif


Copyright © 2000 The Apache Software Foundation. All Rights Reserved.