The class which parses text zones in a pc MS Works document v1-4. More...
#include <WPS4Text.h>
Public Member Functions | |
WPS4Text (WPS4Parser &parser, RVNGInputStreamPtr &input) | |
contructor | |
~WPS4Text () override | |
destructor | |
void | setListener (WPSContentListenerPtr &listen) |
sets the listener | |
int | numPages () const |
returns the number of pages | |
void | flushExtra () |
sends the data which have not yet been sent to the listener | |
void | sendObjects (int page) |
send all the objects with page anchor corresponding given page | |
Public Member Functions inherited from WPSTextParser | |
virtual | ~WPSTextParser () |
virtual destructor | |
int | version () const |
returns the file version | |
RVNGInputStreamPtr & | getInput () |
returns the actual input |
Protected Types | |
typedef bool(WPS4Text::* | DataParser) (long bot, long eot, int id, long endPos, std::string &mess) |
definition of the plc data parser (low level) | |
Protected Types inherited from WPSTextParser | |
typedef bool(WPSTextParser::* | FDPParser) (long endPos, int &id, std::string &mess) |
callback when a new attribute is found in an FDPP/FDPC entry |
Protected Member Functions | |
WPS4Parser & | mainParser () |
return the main parser | |
WPS4Parser const & | mainParser () const |
return the main parser | |
WPS4TextInternal::Font | getDefaultFont () const |
returns the default font to use for the document | |
WPSEntry | getHeaderEntry () const |
returns the header entry (if such entry exists, if not returns an invalid entry) | |
WPSEntry | getFooterEntry () const |
returns the footer entry (if such entry exists, if not returns an invalid entry) | |
WPSEntry | getMainTextEntry () const |
returns the main text entry (if such entry exists, if not returns an invalid entry) | |
bool | readText (WPSEntry const &entry) |
reads a text section and sends it to a listener | |
bool | readEntries () |
finds all text entries (TEXT, SHdr, SFtr, BTEC, BTEP, FTNp, FTNp, BKMK, FONT, CHRT) | |
bool | readStructures () |
parsed all the text entries | |
bool | findFDPStructures (int which) |
bool | findFDPStructuresByHand (int which) |
bool | readPLC (WPSEntry const &zone, std::vector< long > &textPtrs, std::vector< long > &listValues, DataParser parser=nullptr) |
reads a PLC (Pointer List Composant ?) in zone entry | |
bool | defDataParser (long bot, long eot, int id, long endPos, std::string &mess) |
default plc reader | |
bool | readFontNames (WPSEntry const &entry) |
reads the font names | |
bool | readFont (long endPos, int &id, std::string &mess) |
reads a font properties | |
bool | readParagraph (long endPos, int &id, std::string &mess) |
reads a paragraph properties | |
bool | readDosLink (WPSEntry const &entry) |
reads the ZZDLink ( a list of filename ) | |
bool | objectDataParser (long bot, long eot, int id, long endPos, std::string &mess) |
reads a object properties ( position in text, size and definition in file) | |
bool | readFootNotes (WPSEntry const &ftnD, WPSEntry const &ftnP) |
reads the footnotes positions and definitions ( zones FTNd and FTNp) | |
bool | footNotesDataParser (long bot, long eot, int id, long endPos, std::string &mess) |
reads a book mark property ( string) | |
bool | bkmkDataParser (long bot, long eot, int id, long endPos, std::string &mess) |
reads a book mark property ( string) | |
bool | dttmDataParser (long bot, long eot, int id, long endPos, std::string &mess) |
reads a date time property | |
Protected Member Functions inherited from WPSTextParser | |
WPSTextParser (WPSParser &parser, RVNGInputStreamPtr &input) | |
constructor | |
std::multimap< std::string, WPSEntry > & | getNameEntryMap () |
returns the map type->entry | |
std::multimap< std::string, WPSEntry > const & | getNameEntryMap () const |
returns the map type->entry | |
std::vector< DataFOD > | mergeSortedFODLists (std::vector< DataFOD > const &lst1, std::vector< DataFOD > const &lst2) const |
function which takes two sorted list of attribute (by text position). | |
bool | readFDP (WPSEntry const &entry, std::vector< DataFOD > &fods, FDPParser parser) |
parses a FDPP or a FDPC entry (which contains a list of ATTR_TEXT/ATTR_PARAG with their definition ) and adds found data in listFODs | |
libwps::DebugFile & | ascii () |
a DebugFile used to write what we recognize when we parse the document |
Protected Attributes | |
WPSContentListenerPtr | m_listener |
the listener | |
std::shared_ptr< WPS4TextInternal::State > | m_state |
the internal state | |
Protected Attributes inherited from WPSTextParser | |
int | m_version |
the file version | |
RVNGInputStreamPtr | m_input |
the main input | |
WPSParser & | m_mainParser |
pointer to the main zone parser; | |
WPSEntry | m_textPositions |
an entry which corresponds to the complete text zone | |
std::vector< DataFOD > | m_FODList |
the list of a FOD | |
libwps::DebugFile & | m_asciiFile |
the ascii file |
Friends | |
class | WPS4Parser |
The class which parses text zones in a pc MS Works document v1-4.
This class must be associated with a WPS4Parser. It finds and reads:
|
protected |
definition of the plc data parser (low level)
endPos | the end of the properties' definition, |
bot | defines the begin of the text's zone |
eot | defines the end of the text's zone |
id | the number of this properties |
mess | a string which can be filled to indicate unparsed data |
WPS4Text::WPS4Text | ( | WPS4Parser & | parser, |
RVNGInputStreamPtr & | input ) |
contructor
|
override |
destructor
|
protected |
reads a book mark property ( string)
Referenced by readStructures().
|
protected |
default plc reader
Referenced by readPLC().
|
protected |
reads a date time property
Referenced by readStructures().
|
protected |
finds the FDPC/FDPP structure using the BTEC/BTEP entries
which | == 0 means FDPP |
Referenced by readStructures().
|
protected |
finds the FDPC/FDPP structure by searching after the text zone
which | == 0 means FDPP |
Referenced by readStructures().
void WPS4Text::flushExtra | ( | ) |
sends the data which have not yet been sent to the listener
|
protected |
reads a book mark property ( string)
Referenced by readFootNotes().
|
protected |
returns the default font to use for the document
Referenced by flushExtra(), readFont(), and readText().
|
protected |
returns the footer entry (if such entry exists, if not returns an invalid entry)
|
protected |
returns the header entry (if such entry exists, if not returns an invalid entry)
|
protected |
returns the main text entry (if such entry exists, if not returns an invalid entry)
|
inlineprotected |
return the main parser
Referenced by footNotesDataParser(), getDefaultFont(), objectDataParser(), readEntries(), readFont(), readFontNames(), readParagraph(), readText(), and sendObjects().
|
inlineprotected |
return the main parser
int WPS4Text::numPages | ( | ) | const |
returns the number of pages
|
protected |
reads a object properties ( position in text, size and definition in file)
Referenced by readStructures().
|
protected |
reads the ZZDLink ( a list of filename )
Referenced by readStructures().
|
protected |
finds all text entries (TEXT, SHdr, SFtr, BTEC, BTEP, FTNp, FTNp, BKMK, FONT, CHRT)
|
protected |
reads a font properties
Referenced by readStructures().
|
protected |
reads the font names
Referenced by readStructures().
reads the footnotes positions and definitions ( zones FTNd and FTNp)
Referenced by readStructures().
|
protected |
reads a paragraph properties
Referenced by readStructures().
|
protected |
reads a PLC (Pointer List Composant ?) in zone entry
zone | the zone of the data in the file, |
textPtrs | lists of offset in text zones where properties changes |
listValues | lists of properties values (filled only if values are simple types: int, ..) |
parser | the parser to use to read the values |
Referenced by findFDPStructures(), readFootNotes(), and readStructures().
|
protected |
parsed all the text entries
|
protected |
reads a text section and sends it to a listener
Referenced by flushExtra().
void WPS4Text::sendObjects | ( | int | page | ) |
send all the objects with page anchor corresponding given page
page | if page < 0, sends all the pictures which have a page anchor, |
|
inline |
sets the listener
|
friend |
Referenced by mainParser(), mainParser(), WPS4Parser, and WPS4Text().
|
protected |
the listener
Referenced by flushExtra(), readText(), setListener(), and WPS4Text().
|
mutableprotected |
the internal state
Referenced by bkmkDataParser(), dttmDataParser(), findFDPStructures(), findFDPStructuresByHand(), flushExtra(), footNotesDataParser(), getFooterEntry(), getHeaderEntry(), getMainTextEntry(), numPages(), objectDataParser(), readDosLink(), readEntries(), readFont(), readFontNames(), readFootNotes(), readParagraph(), readPLC(), readStructures(), readText(), sendObjects(), and WPS4Text().