#include <WPS8Text.h>
Public Member Functions | |
WPS8Text (WPS8Parser &parser) | |
~WPS8Text () override | |
void | setListener (WPSContentListenerPtr &listen) |
sets the listener | |
int | numPages () const |
returns the number of pages | |
void | flushExtra () |
sends the data which have not yet been sent to the listener | |
bool | readStructures () |
finds all entries which correspond to the text data, parses them and stores data | |
int | getNumTextZones () const |
returns the number of different text zones | |
int | getTextZoneType (int typeId) const |
returns the type of a text zone | |
WPSEntry | getHeaderEntry () const |
returns the header entry (if such entry exists, if not returns an invalid entry) | |
WPSEntry | getFooterEntry () const |
returns the footer entry (if such entry exists, if not returns an invalid entry) | |
WPSEntry | getTextEntry () const |
returns the main zone entry (if such entry exists, if not returns an invalid entry) | |
WPSEntry | getEntry (int strsId) const |
returns ith zone entry (if such entry exists, if not returns an invalid entry) | |
void | readText (WPSEntry const &entry) |
reads a text section and sends it to a listener | |
void | readTextInCell (int strsId, int cellId) |
reads a cell section and sends it to a listener | |
Public Member Functions inherited from WPSTextParser | |
virtual | ~WPSTextParser () |
virtual destructor | |
int | version () const |
returns the file version | |
RVNGInputStreamPtr & | getInput () |
returns the actual input |
Protected Types | |
typedef bool(WPS8Text::* | DataParser) (long bot, long eot, int id, WPS8Struct::FileData const &data, std::string &mess) |
definition of the plc data parser (low level) | |
typedef bool(WPS8Text::* | EndDataParser) (long endPos, std::vector< long > const &textPtrs) |
definition of the last part of plc data parser (low level) | |
Protected Types inherited from WPSTextParser | |
typedef bool(WPSTextParser::* | FDPParser) (long endPos, int &id, std::string &mess) |
callback when a new attribute is found in an FDPP/FDPC entry |
Protected Member Functions | |
WPS8Parser & | mainParser () |
return the main parser | |
WPS8Parser const & | mainParser () const |
return the main parser | |
bool | readFont (long endPos, int &id, std::string &mess) |
reads a font properties | |
bool | readParagraph (long endPos, int &id, std::string &mess) |
the paragraph | |
bool | readNotes (WPSEntry const &entry) |
the footnote ( FTN or EDN ) | |
void | createNotesCorrespondance () |
creates the notes association : text and notes positions | |
bool | readPLC (WPSEntry const &entry, std::vector< long > &textPtrs, std::vector< long > &listValues, DataParser parser=&WPS8Text::defDataParser, EndDataParser endParser=nullptr) |
reads a PLC (Pointer List Composant ?) in zone entry | |
bool | defDataParser (long, long, int, WPS8Struct::FileData const &data, std::string &mess) |
default parser | |
bool | textZonesDataParser (long bot, long eot, int nId, WPS8Struct::FileData const &data, std::string &mess) |
the text zones parser: STRS | |
bool | objectDataParser (long bot, long eot, int id, WPS8Struct::FileData const &data, std::string &mess) |
reads a EOBJ properties: an object id and its size, ... | |
bool | tokenEndDataParser (long endPage, std::vector< long > const &textPtrs) |
reads a field type : TOKN zone | |
bool | bmktEndDataParser (long endPage, std::vector< long > const &textPtrs) |
reads a field type : BMKT zone | |
Protected Member Functions inherited from WPSTextParser | |
WPSTextParser (WPSParser &parser, RVNGInputStreamPtr &input) | |
constructor | |
std::multimap< std::string, WPSEntry > & | getNameEntryMap () |
returns the map type->entry | |
std::multimap< std::string, WPSEntry > const & | getNameEntryMap () const |
returns the map type->entry | |
std::vector< DataFOD > | mergeSortedFODLists (std::vector< DataFOD > const &lst1, std::vector< DataFOD > const &lst2) const |
function which takes two sorted list of attribute (by text position). | |
bool | readFDP (WPSEntry const &entry, std::vector< DataFOD > &fods, FDPParser parser) |
parses a FDPP or a FDPC entry (which contains a list of ATTR_TEXT/ATTR_PARAG with their definition ) and adds found data in listFODs | |
libwps::DebugFile & | ascii () |
a DebugFile used to write what we recognize when we parse the document |
Static Protected Member Functions | |
static bool | readString (RVNGInputStreamPtr input, long page_size, librevenge::RVNGString &res) |
reads a string | |
static long | readUTF16LE (RVNGInputStreamPtr input, long endPos, uint16_t firstC) |
reads a utf16 character, |
Protected Attributes | |
WPSContentListenerPtr | m_listener |
the listener | |
std::shared_ptr< WPS8TextStyle > | m_styleParser |
the graph parser | |
std::shared_ptr< WPS8TextInternal::State > | m_state |
the internal state | |
Protected Attributes inherited from WPSTextParser | |
int | m_version |
the file version | |
RVNGInputStreamPtr | m_input |
the main input | |
WPSParser & | m_mainParser |
pointer to the main zone parser; | |
WPSEntry | m_textPositions |
an entry which corresponds to the complete text zone | |
std::vector< DataFOD > | m_FODList |
the list of a FOD | |
libwps::DebugFile & | m_asciiFile |
the ascii file |
Friends | |
class | WPS8TextInternal::SubDocument |
class | WPS8Parser |
class | WPS8TextStyle |
|
protected |
definition of the plc data parser (low level)
bot | defines the begin of the text zone corresponding to these properties |
eot | define the end of the text zone corresponding to these properties |
id | the number of this properties |
data | a reference to store the parsed data |
mess | a string which can be filled to indicate unparsed data |
|
protected |
definition of the last part of plc data parser (low level)
endPos | the end of the properties' definition, |
textPtrs | the list of text positions |
|
explicit |
Referenced by getFooterEntry(), and getHeaderEntry().
|
override |
|
protected |
|
protected |
creates the notes association : text and notes positions
we can now creates the link position -> notes
Referenced by readStructures().
|
protected |
default parser
Referenced by readStructures().
void WPS8Text::flushExtra | ( | ) |
sends the data which have not yet been sent to the listener
WPSEntry WPS8Text::getEntry | ( | int | strsId | ) | const |
returns ith zone entry (if such entry exists, if not returns an invalid entry)
Referenced by createNotesCorrespondance().
WPSEntry WPS8Text::getFooterEntry | ( | ) | const |
returns the footer entry (if such entry exists, if not returns an invalid entry)
WPSEntry WPS8Text::getHeaderEntry | ( | ) | const |
returns the header entry (if such entry exists, if not returns an invalid entry)
int WPS8Text::getNumTextZones | ( | ) | const |
returns the number of different text zones
WPSEntry WPS8Text::getTextEntry | ( | ) | const |
returns the main zone entry (if such entry exists, if not returns an invalid entry)
int WPS8Text::getTextZoneType | ( | int | typeId | ) | const |
returns the type of a text zone
1: mainzone, 2: footnote, 3: endnote, 4: ???, 5: text in table/textbox 6: header, 7: footer
|
inlineprotected |
return the main parser
Referenced by readText().
|
inlineprotected |
return the main parser
int WPS8Text::numPages | ( | ) | const |
returns the number of pages
|
protected |
reads a EOBJ properties: an object id and its size, ...
Referenced by readStructures().
|
protected |
reads a font properties
Referenced by WPS8TextStyle::readStructures().
|
protected |
the footnote ( FTN or EDN )
Referenced by readStructures().
|
protected |
the paragraph
Referenced by WPS8TextStyle::readStructures().
|
protected |
reads a PLC (Pointer List Composant ?) in zone entry
entry | the file zone |
textPtrs | lists of offset in text zones where properties changes |
listValues | lists of properties values (filled only if values are simple types: int, ..) |
parser | the parser to use to read the values |
endParser | the parser to use to read remaining data |
Referenced by readStructures().
|
staticprotected |
reads a string
Referenced by bmktEndDataParser(), WPS8TextStyle::readSTSH(), and tokenEndDataParser().
bool WPS8Text::readStructures | ( | ) |
finds all entries which correspond to the text data, parses them and stores data
create the main structures
void WPS8Text::readText | ( | WPSEntry const & | entry | ) |
reads a text section and sends it to a listener
Read the range of the document text using previously-read formatting information, up to but excluding entry.end().
Referenced by flushExtra(), and readTextInCell().
void WPS8Text::readTextInCell | ( | int | strsId, |
int | cellId ) |
reads a cell section and sends it to a listener
|
staticprotected |
reads a utf16 character,
CHECKME: for Symbol font, we must probably convert 0xF0xx and 0x00xx in a 0x03yy symbol :-~
Referenced by readString(), and readText().
void WPS8Text::setListener | ( | WPSContentListenerPtr & | listen | ) |
sets the listener
|
protected |
the text zones parser: STRS
Referenced by readStructures().
|
protected |
reads a field type : TOKN zone
Referenced by readStructures().
|
friend |
Referenced by mainParser(), mainParser(), WPS8Parser, WPS8Text(), and WPS8TextStyle.
|
friend |
|
friend |
Referenced by WPS8Text(), and WPS8TextStyle.
|
protected |
the listener
Referenced by flushExtra(), readText(), readTextInCell(), setListener(), and WPS8Text().
|
mutableprotected |
the internal state
Referenced by bmktEndDataParser(), createNotesCorrespondance(), flushExtra(), getEntry(), getFooterEntry(), getHeaderEntry(), getNumTextZones(), getTextEntry(), getTextZoneType(), objectDataParser(), readNotes(), readPLC(), readStructures(), readText(), readTextInCell(), textZonesDataParser(), tokenEndDataParser(), and WPS8Text().
|
protected |
the graph parser
Referenced by readFont(), readParagraph(), readStructures(), readText(), setListener(), and WPS8Text().