xapian-core
1.4.24
|
This class is used to access a database, or a group of databases. More...
#include <database.h>
Public Member Functions | |
void | add_database (const Database &database) |
Add an existing database (or group of databases) to those accessed by this object. | |
size_t | size () const |
Return number of shards in this Database object. | |
Database () | |
Create a Database with no databases in. | |
Database (const std::string &path, int flags=0) | |
Open a Database, automatically determining the database backend to use. | |
Database (int fd, int flags=0) | |
Open a single-file Database. | |
virtual | ~Database () |
Destroy this handle on the database. | |
Database (const Database &other) | |
Copying is allowed. | |
void | operator= (const Database &other) |
Assignment is allowed. | |
bool | reopen () |
Re-open the database. | |
virtual void | close () |
Close the database. | |
virtual std::string | get_description () const |
Return a string describing this object. | |
PostingIterator | postlist_begin (const std::string &tname) const |
An iterator pointing to the start of the postlist for a given term. | |
PostingIterator | postlist_end (const std::string &) const |
Corresponding end iterator to postlist_begin(). | |
TermIterator | termlist_begin (Xapian::docid did) const |
An iterator pointing to the start of the termlist for a given document. | |
TermIterator | termlist_end (Xapian::docid) const |
Corresponding end iterator to termlist_begin(). | |
bool | has_positions () const |
Does this database have any positional information? | |
PositionIterator | positionlist_begin (Xapian::docid did, const std::string &tname) const |
An iterator pointing to the start of the position list for a given term in a given document. | |
PositionIterator | positionlist_end (Xapian::docid, const std::string &) const |
Corresponding end iterator to positionlist_begin(). | |
TermIterator | allterms_begin (const std::string &prefix=std::string()) const |
An iterator which runs across all terms with a given prefix. | |
TermIterator | allterms_end (const std::string &=std::string()) const |
Corresponding end iterator to allterms_begin(prefix). | |
Xapian::doccount | get_doccount () const |
Get the number of documents in the database. | |
Xapian::docid | get_lastdocid () const |
Get the highest document id which has been used in the database. | |
Xapian::doclength | get_avlength () const |
Get the average length of the documents in the database. | |
double | get_average_length () const |
New name for get_avlength(). | |
Xapian::totallength | get_total_length () const |
Get the total length of all the documents in the database. | |
Xapian::doccount | get_termfreq (const std::string &tname) const |
Get the number of documents in the database indexed by a given term. | |
bool | term_exists (const std::string &tname) const |
Check if a given term exists in the database. | |
Xapian::termcount | get_collection_freq (const std::string &tname) const |
Return the total number of occurrences of the given term. | |
Xapian::doccount | get_value_freq (Xapian::valueno slot) const |
Return the frequency of a given value slot. | |
std::string | get_value_lower_bound (Xapian::valueno slot) const |
Get a lower bound on the values stored in the given value slot. | |
std::string | get_value_upper_bound (Xapian::valueno slot) const |
Get an upper bound on the values stored in the given value slot. | |
Xapian::termcount | get_doclength_lower_bound () const |
Get a lower bound on the length of a document in this DB. | |
Xapian::termcount | get_doclength_upper_bound () const |
Get an upper bound on the length of a document in this DB. | |
Xapian::termcount | get_wdf_upper_bound (const std::string &term) const |
Get an upper bound on the wdf of term term. | |
ValueIterator | valuestream_begin (Xapian::valueno slot) const |
Return an iterator over the value in slot slot for each document. | |
ValueIterator | valuestream_end (Xapian::valueno) const |
Return end iterator corresponding to valuestream_begin(). | |
Xapian::termcount | get_doclength (Xapian::docid did) const |
Get the length of a document. | |
Xapian::termcount | get_unique_terms (Xapian::docid did) const |
Get the number of unique terms in document. | |
void | keep_alive () |
Send a "keep-alive" to remote databases to stop them timing out. | |
Xapian::Document | get_document (Xapian::docid did) const |
Get a document from the database, given its document id. | |
Xapian::Document | get_document (Xapian::docid did, unsigned flags) const |
Get a document from the database, given its document id. | |
std::string | get_spelling_suggestion (const std::string &word, unsigned max_edit_distance=2) const |
Suggest a spelling correction. | |
Xapian::TermIterator | spellings_begin () const |
An iterator which returns all the spelling correction targets. | |
Xapian::TermIterator | spellings_end () const |
Corresponding end iterator to spellings_begin(). | |
Xapian::TermIterator | synonyms_begin (const std::string &term) const |
An iterator which returns all the synonyms for a given term. | |
Xapian::TermIterator | synonyms_end (const std::string &) const |
Corresponding end iterator to synonyms_begin(term). | |
Xapian::TermIterator | synonym_keys_begin (const std::string &prefix=std::string()) const |
An iterator which returns all terms which have synonyms. | |
Xapian::TermIterator | synonym_keys_end (const std::string &=std::string()) const |
Corresponding end iterator to synonym_keys_begin(prefix). | |
std::string | get_metadata (const std::string &key) const |
Get the user-specified metadata associated with a given key. | |
Xapian::TermIterator | metadata_keys_begin (const std::string &prefix=std::string()) const |
An iterator which returns all user-specified metadata keys. | |
Xapian::TermIterator | metadata_keys_end (const std::string &=std::string()) const |
Corresponding end iterator to metadata_keys_begin(). | |
std::string | get_uuid () const |
Get a UUID for the database. | |
bool | locked () const |
Test if this database is currently locked for writing. | |
Xapian::rev | get_revision () const |
Get the revision of the database. | |
void | compact (const std::string &output, unsigned flags=0, int block_size=0) |
Produce a compact version of this database. | |
void | compact (int fd, unsigned flags=0, int block_size=0) |
Produce a compact version of this database. | |
void | compact (const std::string &output, unsigned flags, int block_size, Xapian::Compactor &compactor) |
Produce a compact version of this database. | |
void | compact (int fd, unsigned flags, int block_size, Xapian::Compactor &compactor) |
Produce a compact version of this database. | |
Static Public Member Functions | |
static size_t | check (const std::string &path, int opts=0, std::ostream *out=NULL) |
Check the integrity of a database or database table. | |
static size_t | check (int fd, int opts=0, std::ostream *out=NULL) |
Check the integrity of a single file database. | |
This class is used to access a database, or a group of databases.
For searching, this class is used in conjunction with an Enquire object.
InvalidArgumentError | will be thrown if an invalid argument is supplied, for example, an unknown database type. |
DatabaseOpeningError | may be thrown if the database cannot be opened (for example, a required file cannot be found). |
DatabaseVersionError | may be thrown if the database is in an unsupported format (for example, created by a newer version of Xapian which uses an incompatible format). |
|
explicit |
Open a Database, automatically determining the database backend to use.
path | directory that the database is stored in. |
flags | Bitwise-or of Xapian::DB_* constants. |
|
explicit |
Open a single-file Database.
This method opens a single-file Database given a file descriptor open on it. Xapian looks starting at the current file offset, allowing a single file database to be easily embedded within another file.
fd | file descriptor for the file. Xapian takes ownership of this and will close it when the database is closed. |
flags | Bitwise-or of Xapian::DB_* constants. |
|
virtual |
Destroy this handle on the database.
If there are no copies of this object remaining, the database(s) will be closed.
Xapian::Database::Database | ( | const Database & | other | ) |
Copying is allowed.
The internals are reference counted, so copying is cheap.
other | The object to copy. |
void Xapian::Database::add_database | ( | const Database & | database | ) |
Add an existing database (or group of databases) to those accessed by this object.
database | the database(s) to add. |
TermIterator Xapian::Database::allterms_begin | ( | const std::string & | prefix = std::string() | ) | const |
An iterator which runs across all terms with a given prefix.
prefix | The prefix to restrict the returned terms to (default: iterate all terms) |
|
inlinestatic |
Check the integrity of a database or database table.
path | Path to database or table |
opts | Options to use for check |
out | std::ostream to write output to (NULL for no output) |
|
inlinestatic |
Check the integrity of a single file database.
fd | file descriptor for the database. The current file offset is used, allowing checking a single file database which is embedded within another file. Xapian takes ownership of the file descriptor and will close it before returning. |
opts | Options to use for check |
out | std::ostream to write output to (NULL for no output) |
|
virtual |
Close the database.
This closes the database and closes all its file handles.
For a WritableDatabase, if a transaction is active it will be aborted, while if no transaction is active commit() will be implicitly called. Also the write lock is released.
Closing a database cannot be undone - in particular, calling reopen() after close() will not reopen it, but will instead throw a Xapian::DatabaseError exception.
Calling close() again on a database which has already been closed has no effect (and doesn't raise an exception).
After close() has been called, calls to other methods of the database, and to methods of other objects associated with the database, will either:
The reason for this behaviour is that otherwise we'd have to check that the database is still open on every method call on every object associated with a Database, when in many cases they are working on data which has already been loaded and so they are able to just behave correctly.
This method was added in Xapian 1.1.0.
|
inline |
Produce a compact version of this database.
New 1.3.4. Various methods of the Compactor class were deprecated in 1.3.4.
The compactor functor allows handling progress output and specifying how user metadata is merged.
output | Path to write the compact version to. This can be the same as an input if that input is a stub database (in which case the database(s) listed in the stub will be compacted to a new database and then the stub will be atomically updated to point to this new database). |
flags | Any of the following combined using bitwise-or (| in C++):
|
block_size | This specifies the block size (in bytes) for to use for the output. For glass, the block size must be a power of 2 between 2048 and 65536 (inclusive), and the default (also used if an invalid value is passed) is 8192 bytes. |
compactor | Functor |
|
inline |
Produce a compact version of this database.
New 1.3.4. Various methods of the Compactor class were deprecated in 1.3.4.
output | Path to write the compact version to. This can be the same as an input if that input is a stub database (in which case the database(s) listed in the stub will be compacted to a new database and then the stub will be atomically updated to point to this new database). |
flags | Any of the following combined using bitwise-or (| in C++):
|
block_size | This specifies the block size (in bytes) for to use for the output. For glass, the block size must be a power of 2 between 2048 and 65536 (inclusive), and the default (also used if an invalid value is passed) is 8192 bytes. |
|
inline |
Produce a compact version of this database.
New 1.3.4. Various methods of the Compactor class were deprecated in 1.3.4.
The compactor functor allows handling progress output and specifying how user metadata is merged.
This variant writes a single-file database to the specified file descriptor. Only the glass backend supports such databases, so this form is only supported for this backend.
fd | File descriptor to write the compact version to. The descriptor needs to be readable and writable (open with O_RDWR) and seekable. The current file offset is used, allowing compacting to a single file database embedded within another file. Xapian takes ownership of the file descriptor and will close it before returning. |
flags | Any of the following combined using bitwise-or (| in C++):
|
block_size | This specifies the block size (in bytes) for to use for the output. For glass, the block size must be a power of 2 between 2048 and 65536 (inclusive), and the default (also used if an invalid value is passed) is 8192 bytes. |
compactor | Functor |
|
inline |
Produce a compact version of this database.
New 1.3.4. Various methods of the Compactor class were deprecated in 1.3.4.
This variant writes a single-file database to the specified file descriptor. Only the glass backend supports such databases, so this form is only supported for this backend.
fd | File descriptor to write the compact version to. The descriptor needs to be readable and writable (open with O_RDWR) and seekable. The current file offset is used, allowing compacting to a single file database embedded within another file. Xapian takes ownership of the file descriptor and will close it before returning. |
flags | Any of the following combined using bitwise-or (| in C++):
|
block_size | This specifies the block size (in bytes) for to use for the output. For glass, the block size must be a power of 2 between 2048 and 65536 (inclusive), and the default (also used if an invalid value is passed) is 8192 bytes. |
|
inline |
New name for get_avlength().
Added for forward compatibility with the next release series.
Xapian::termcount Xapian::Database::get_collection_freq | ( | const std::string & | tname | ) | const |
Return the total number of occurrences of the given term.
This is the sum of the number of occurrences of the term in each document it indexes: i.e., the sum of the within document frequencies of the term.
tname | The term whose collection frequency is being requested. |
|
virtual |
Return a string describing this object.
Reimplemented in Xapian::WritableDatabase.
Xapian::termcount Xapian::Database::get_doclength_lower_bound | ( | ) | const |
Get a lower bound on the length of a document in this DB.
This bound does not include any zero-length documents.
Xapian::Document Xapian::Database::get_document | ( | Xapian::docid | did | ) | const |
Get a document from the database, given its document id.
This method returns a Xapian::Document object which provides the information about a document.
did | The document id of the document to retrieve. |
Xapian::DocNotFoundError | The document specified could not be found in the database. |
Xapian::InvalidArgumentError | did was 0, which is not a valid document id. |
Xapian::Document Xapian::Database::get_document | ( | Xapian::docid | did, |
unsigned | flags | ||
) | const |
Get a document from the database, given its document id.
This method returns a Xapian::Document object which provides the information about a document.
did | The document id of the document to retrieve. |
flags | Zero or more flags bitwise-or-ed together (currently only Xapian::DOC_ASSUME_VALID is supported). |
Xapian::DocNotFoundError | The document specified could not be found in the database. |
Xapian::InvalidArgumentError | did was 0, which is not a valid document id. |
std::string Xapian::Database::get_metadata | ( | const std::string & | key | ) | const |
Get the user-specified metadata associated with a given key.
User-specified metadata allows you to store arbitrary information in the form of (key, value) pairs. See WritableDatabase::set_metadata() for more information.
When invoked on a Xapian::Database object representing multiple databases, currently only the metadata for the first is considered but this behaviour may change in the future.
If there is no piece of metadata associated with the specified key, an empty string is returned (this applies even for backends which don't support metadata).
Empty keys are not valid, and specifying one will cause an exception.
key | The key of the metadata item to access. |
Xapian::InvalidArgumentError | will be thrown if the key supplied is empty. |
Xapian::rev Xapian::Database::get_revision | ( | ) | const |
Get the revision of the database.
The revision is an unsigned integer which increases with each commit.
The database must have exactly one sub-database, which must be of type chert or glass. Otherwise an exception will be thrown.
Experimental - see https://xapian.org/docs/deprecation#experimental-features
std::string Xapian::Database::get_spelling_suggestion | ( | const std::string & | word, |
unsigned | max_edit_distance = 2 |
||
) | const |
Suggest a spelling correction.
word | The potentially misspelled word. |
max_edit_distance | Only consider words which are at most max_edit_distance edits from word. An edit is a character insertion, deletion, or the transposition of two adjacent characters (default is 2). |
Xapian::totallength Xapian::Database::get_total_length | ( | ) | const |
Get the total length of all the documents in the database.
Added in Xapian 1.4.5.
std::string Xapian::Database::get_uuid | ( | ) | const |
Get a UUID for the database.
The UUID will persist for the lifetime of the database.
Replicas (eg, made with the replication protocol, or by copying all the database files) will have the same UUID. However, copies (made with copydatabase, or xapian-compact) will have different UUIDs.
If the backend does not support UUIDs or this database has no subdatabases, the UUID will be empty.
If this database has multiple sub-databases, the UUID string will contain the UUIDs of all the sub-databases.
Xapian::doccount Xapian::Database::get_value_freq | ( | Xapian::valueno | slot | ) | const |
Return the frequency of a given value slot.
This is the number of documents which have a (non-empty) value stored in the slot.
slot | The value slot to examine. |
std::string Xapian::Database::get_value_lower_bound | ( | Xapian::valueno | slot | ) | const |
Get a lower bound on the values stored in the given value slot.
If there are no values stored in the given value slot, this will return an empty string.
slot | The value slot to examine. |
std::string Xapian::Database::get_value_upper_bound | ( | Xapian::valueno | slot | ) | const |
Get an upper bound on the values stored in the given value slot.
If there are no values stored in the given value slot, this will return an empty string.
slot | The value slot to examine. |
void Xapian::Database::keep_alive | ( | ) |
Send a "keep-alive" to remote databases to stop them timing out.
Has no effect on non-remote databases.
bool Xapian::Database::locked | ( | ) | const |
Test if this database is currently locked for writing.
If the underlying object is actually a WritableDatabase, always returns true.
Otherwise tests if there's a writer holding the lock (or if we can't test for a lock without taking it on the current platform, throw Xapian::UnimplementedError). If there's an error while trying to test the lock, throws Xapian::DatabaseLockError.
For multi-databases, this tests each sub-database and returns true if any of them are locked.
Xapian::TermIterator Xapian::Database::metadata_keys_begin | ( | const std::string & | prefix = std::string() | ) | const |
An iterator which returns all user-specified metadata keys.
When invoked on a Xapian::Database object representing multiple databases, currently only the metadata for the first is considered but this behaviour may change in the future.
If the backend doesn't support metadata, then this method returns an iterator which compares equal to that returned by metadata_keys_end().
prefix | If non-empty, only keys with this prefix are returned. |
Xapian::UnimplementedError | will be thrown if the backend implements user-specified metadata, but doesn't implement iterating its keys (currently this happens for the InMemory backend). |
void Xapian::Database::operator= | ( | const Database & | other | ) |
Assignment is allowed.
The internals are reference counted, so assignment is cheap.
other | The object to copy. |
PostingIterator Xapian::Database::postlist_begin | ( | const std::string & | tname | ) | const |
An iterator pointing to the start of the postlist for a given term.
tname | The termname to iterate postings for. If the term name is the empty string, the iterator returned will list all the documents in the database. Such an iterator will always return a WDF value of 1, since there is no obvious meaning for this quantity in this case. |
bool Xapian::Database::reopen | ( | ) |
Re-open the database.
This re-opens the database(s) to the latest available version(s). It can be used either to make sure the latest results are returned, or to recover from a Xapian::DatabaseModifiedError.
Calling reopen() on a database which has been closed (with close()) will always raise a Xapian::DatabaseError.
Xapian::TermIterator Xapian::Database::spellings_begin | ( | ) | const |
An iterator which returns all the spelling correction targets.
This returns all the words which are considered as targets for the spelling correction algorithm. The frequency of each word is available as the term frequency of each entry in the returned iterator.
Xapian::TermIterator Xapian::Database::synonym_keys_begin | ( | const std::string & | prefix = std::string() | ) | const |
An iterator which returns all terms which have synonyms.
prefix | If non-empty, only terms with this prefix are returned. |
Xapian::TermIterator Xapian::Database::synonyms_begin | ( | const std::string & | term | ) | const |
An iterator which returns all the synonyms for a given term.
term | The term to return synonyms for. |
bool Xapian::Database::term_exists | ( | const std::string & | tname | ) | const |
Check if a given term exists in the database.
tname | The term to test the existence of. |
TermIterator Xapian::Database::termlist_begin | ( | Xapian::docid | did | ) | const |
An iterator pointing to the start of the termlist for a given document.
did | The document id of the document to iterate terms for. |