MSPUBParser.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /* libmspub
00003  * Version: MPL 1.1 / GPLv2+ / LGPLv2+
00004  *
00005  * The contents of this file are subject to the Mozilla Public License Version
00006  * 1.1 (the "License"); you may not use this file except in compliance with
00007  * the License or as specified alternatively below. You may obtain a copy of
00008  * the License at http://www.mozilla.org/MPL/
00009  *
00010  * Software distributed under the License is distributed on an "AS IS" basis,
00011  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00012  * for the specific language governing rights and limitations under the
00013  * License.
00014  *
00015  * Major Contributor(s):
00016  * Copyright (C) 2012 Brennan Vincent <brennanv@email.arizona.edu>
00017  * Copyright (C) 2012 Fridrich Strba <fridrich.strba@bluewin.ch>
00018  *
00019  * All Rights Reserved.
00020  *
00021  * For minor contributions see the git repository.
00022  *
00023  * Alternatively, the contents of this file may be used under the terms of
00024  * either the GNU General Public License Version 2 or later (the "GPLv2+"), or
00025  * the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
00026  * in which case the provisions of the GPLv2+ or the LGPLv2+ are applicable
00027  * instead of those above.
00028  */
00029 
00030 #ifndef __MSPUBPARSER_H__
00031 #define __MSPUBPARSER_H__
00032 
00033 #include <map>
00034 #include <set>
00035 #include <vector>
00036 #include <memory>
00037 
00038 #include <boost/shared_ptr.hpp>
00039 #include <boost/optional.hpp>
00040 
00041 #include <libwpd/libwpd.h>
00042 #include <libwpg/libwpg.h>
00043 
00044 #include "MSPUBTypes.h"
00045 #include "Fill.h"
00046 #include "Coordinate.h"
00047 #include "PolygonUtils.h"
00048 
00049 class WPXInputStream;
00050 
00051 namespace libmspub
00052 {
00053 class MSPUBCollector;
00054 class FindBySeqNum
00055 {
00056   unsigned seqNum;
00057 public:
00058   FindBySeqNum(unsigned sn) : seqNum(sn) { }
00059   bool operator()(const libmspub::ContentChunkReference &ref)
00060   {
00061     return ref.seqNum == seqNum;
00062   }
00063 };
00064 
00065 class FindByParentSeqNum
00066 {
00067   unsigned seqNum;
00068 public:
00069   FindByParentSeqNum(unsigned sn) : seqNum(sn) { }
00070   bool operator()(const libmspub::ContentChunkReference &ref)
00071   {
00072     return ref.parentSeqNum == seqNum;
00073   }
00074 };
00075 
00076 struct FOPTValues
00077 {
00078   std::map<unsigned short, unsigned> m_scalarValues;
00079   std::map<unsigned short, std::vector<unsigned char> > m_complexValues;
00080   FOPTValues() : m_scalarValues(), m_complexValues()
00081   {
00082   }
00083 };
00084 
00085 class MSPUBParser
00086 {
00087 public:
00088   explicit MSPUBParser(WPXInputStream *input, MSPUBCollector *collector);
00089   virtual ~MSPUBParser();
00090   virtual bool parse();
00091 protected:
00092   virtual unsigned getColorIndexByQuillEntry(unsigned entry);
00093 
00094   struct TextSpanReference
00095   {
00096     TextSpanReference(unsigned short f, unsigned short l, const CharacterStyle &cs) : first(f), last(l), charStyle(cs) { }
00097     unsigned short first;
00098     unsigned short last;
00099     CharacterStyle charStyle;
00100   };
00101 
00102   struct TextParagraphReference
00103   {
00104     TextParagraphReference(unsigned short f, unsigned short l, const ParagraphStyle &ps) : first(f), last(l), paraStyle(ps) { }
00105     unsigned short first;
00106     unsigned short last;
00107     ParagraphStyle paraStyle;
00108   };
00109 
00110   typedef std::vector<ContentChunkReference>::const_iterator ccr_iterator_t;
00111 
00112   MSPUBParser();
00113   MSPUBParser(const MSPUBParser &);
00114   MSPUBParser &operator=(const MSPUBParser &);
00115   virtual bool parseContents(WPXInputStream *input);
00116   bool parseQuill(WPXInputStream *input);
00117   bool parseEscher(WPXInputStream *input);
00118   bool parseEscherDelay(WPXInputStream *input);
00119 
00120   MSPUBBlockInfo parseBlock(WPXInputStream *input, bool skipHierarchicalData = false);
00121   EscherContainerInfo parseEscherContainer(WPXInputStream *input);
00122 
00123   bool parseContentChunkReference(WPXInputStream *input, MSPUBBlockInfo block);
00124   QuillChunkReference parseQuillChunkReference(WPXInputStream *input);
00125   bool parseDocumentChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00126   bool parsePageChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00127   bool parsePaletteChunk(WPXInputStream *input, const ContentChunkReference &chunk);
00128   bool parsePageShapeList(WPXInputStream *input, MSPUBBlockInfo block, unsigned pageSeqNum);
00129   bool parseShape(WPXInputStream *input, const ContentChunkReference &chunk);
00130   bool parseBorderArtChunk(WPXInputStream *input,
00131                            const ContentChunkReference &chunk);
00132   bool parseFontChunk(WPXInputStream *input,
00133                       const ContentChunkReference &chunk);
00134   void parsePaletteEntry(WPXInputStream *input, MSPUBBlockInfo block);
00135   void parseColors(WPXInputStream *input, const QuillChunkReference &chunk);
00136   void parseFonts(WPXInputStream *input, const QuillChunkReference &chunk);
00137   void parseDefaultStyle(WPXInputStream *input, const QuillChunkReference &chunk);
00138   void parseShapeGroup(WPXInputStream *input, const EscherContainerInfo &spgr, Coordinate parentCoordinateSystem, Coordinate parentGroupAbsoluteCoord);
00139   void skipBlock(WPXInputStream *input, MSPUBBlockInfo block);
00140   void parseEscherShape(WPXInputStream *input, const EscherContainerInfo &sp, Coordinate &parentCoordinateSystem, Coordinate &parentGroupAbsoluteCoord);
00141   bool findEscherContainer(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, unsigned short type);
00142   bool findEscherContainerWithTypeInSet(WPXInputStream *input, const EscherContainerInfo &parent, EscherContainerInfo &out, std::set<unsigned short> types);
00143   std::map<unsigned short, unsigned> extractEscherValues(WPXInputStream *input, const EscherContainerInfo &record);
00144   FOPTValues extractFOPTValues(WPXInputStream *input,
00145                                const libmspub::EscherContainerInfo &record);
00146   std::vector<TextSpanReference> parseCharacterStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00147   std::vector<TextParagraphReference> parseParagraphStyles(WPXInputStream *input, const QuillChunkReference &chunk);
00148   std::vector<Calculation> parseGuides(const std::vector<unsigned char>
00149                                        &guideData);
00150   std::vector<Vertex> parseVertices(const std::vector<unsigned char>
00151                                     &vertexData);
00152   std::vector<unsigned> parseTableCellDefinitions(WPXInputStream *input,
00153       const QuillChunkReference &chunk);
00154   std::vector<unsigned short> parseSegments(
00155     const std::vector<unsigned char> &segmentData);
00156   DynamicCustomShape getDynamicCustomShape(
00157     const std::vector<unsigned char> &vertexData,
00158     const std::vector<unsigned char> &segmentData,
00159     const std::vector<unsigned char> &guideData,
00160     unsigned geoWidth, unsigned geoHeight);
00161   int getColorIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00162   unsigned getFontIndex(WPXInputStream *input, const MSPUBBlockInfo &info);
00163   CharacterStyle getCharacterStyle(WPXInputStream *input);
00164   ParagraphStyle getParagraphStyle(WPXInputStream *input);
00165   boost::shared_ptr<Fill> getNewFill(const std::map<unsigned short, unsigned> &foptValues, bool &skipIfNotBg, std::map<unsigned short, std::vector<unsigned char> > &foptVal);
00166 
00167   WPXInputStream *m_input;
00168   MSPUBCollector *m_collector;
00169   std::vector<MSPUBBlockInfo> m_blockInfo;
00170   std::vector<ContentChunkReference> m_contentChunks;
00171   std::vector<unsigned> m_cellsChunkIndices;
00172   std::vector<unsigned> m_pageChunkIndices;
00173   std::vector<unsigned> m_shapeChunkIndices;
00174   std::vector<unsigned> m_paletteChunkIndices;
00175   std::vector<unsigned> m_borderArtChunkIndices;
00176   std::vector<unsigned> m_fontChunkIndices;
00177   std::vector<unsigned> m_unknownChunkIndices;
00178   boost::optional<unsigned> m_documentChunkIndex;
00179   int m_lastSeenSeqNum;
00180   unsigned m_lastAddedImage;
00181   std::vector<int> m_alternateShapeSeqNums;
00182   std::vector<int> m_escherDelayIndices;
00183 
00184   static short getBlockDataLength(unsigned type);
00185   static bool isBlockDataString(unsigned type);
00186   static PageType getPageTypeBySeqNum(unsigned seqNum);
00187   static unsigned getEscherElementTailLength(unsigned short type);
00188   static unsigned getEscherElementAdditionalHeaderLength(unsigned short type);
00189   static ImgType imgTypeByBlipType(unsigned short type);
00190   static int getStartOffset(ImgType type, unsigned short initial);
00191   static bool lineExistsByFlagPointer(unsigned *flags,
00192                                       unsigned *geomFlags = NULL);
00193 };
00194 
00195 } // namespace libmspub
00196 
00197 #endif //  __MSPUBRAPHICS_H__
00198 /* vim:set shiftwidth=2 softtabstop=2 expandtab: */