forked from lijiext/lammps
start documenting tokenizer classes
This commit is contained in:
parent
31c91a8928
commit
96ee132e85
|
@ -422,6 +422,8 @@ INPUT = @LAMMPS_SOURCE_DIR@/utils.cpp \
|
|||
@LAMMPS_SOURCE_DIR@/atom.h \
|
||||
@LAMMPS_SOURCE_DIR@/input.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/input.h \
|
||||
@LAMMPS_SOURCE_DIR@/tokenizer.cpp \
|
||||
@LAMMPS_SOURCE_DIR@/tokenizer.h \
|
||||
|
||||
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
|
||||
# directories that are symbolic links (a Unix file system feature) are excluded
|
||||
|
|
|
@ -880,3 +880,32 @@ Convenience functions
|
|||
|
||||
.. doxygenfunction:: timespec2seconds
|
||||
:project: progguide
|
||||
|
||||
---------------------------
|
||||
|
||||
Tokenizer classes
|
||||
=================
|
||||
|
||||
The purpose of the tokenizer classes is to simplify the recurring task
|
||||
of breaking lines of text down into words and/or numbers.
|
||||
Traditionally, LAMMPS code would be using the ``strtok()`` function from
|
||||
the C library for that purpose, but that function has two significant
|
||||
disadvantages: 1) it cannot be used concurrently from different LAMMPS
|
||||
instances since it stores its status in a global variable and 2) it
|
||||
modifies the string that it is processing. These classes were
|
||||
implemented to avoid both of these issues and also to reduce the amount
|
||||
of code that needs to be written.
|
||||
|
||||
The basic procedure is to create an instance of the class with the
|
||||
string to be processed as an argument and then do a loop until all
|
||||
available tokens are read. The constructor has a default set of
|
||||
separator characters, but that can be overridden. The default separators
|
||||
are all "whitespace" characters, i.e. the space character, the tabulator
|
||||
character, the carriage return character, the linefeed character, and
|
||||
the form feed character.
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::Tokenizer
|
||||
:project: progguide
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::ValueTokenizer
|
||||
:project: progguide
|
||||
|
|
|
@ -27,6 +27,11 @@ namespace LAMMPS_NS {
|
|||
|
||||
#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
|
||||
|
||||
/*! Class for splitting text into words
|
||||
*
|
||||
* \sa ValueTokenizer
|
||||
*/
|
||||
|
||||
class Tokenizer {
|
||||
std::string text;
|
||||
std::string separators;
|
||||
|
@ -39,13 +44,46 @@ public:
|
|||
Tokenizer& operator=(const Tokenizer&) = default;
|
||||
Tokenizer& operator=(Tokenizer&&) = default;
|
||||
|
||||
/*! Reposition the tokenizer state to the first word,
|
||||
* i.e. the first non-separator character
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/*! Skip over a given number of tokens
|
||||
*
|
||||
* \param n number of tokens to skip over
|
||||
*/
|
||||
void skip(int n);
|
||||
|
||||
/*! Indicate whether more tokens are available
|
||||
*
|
||||
* \return true if there are more tokens, false if not
|
||||
*/
|
||||
bool has_next() const;
|
||||
|
||||
/*! Search the text to be processed for a sub-string.
|
||||
*
|
||||
* \param str string to be searched for
|
||||
* \return true if string was found, false if not
|
||||
*/
|
||||
bool contains(const std::string & str) const;
|
||||
|
||||
/*! Retrieve next token.
|
||||
*
|
||||
* \return string with the next token
|
||||
*/
|
||||
std::string next();
|
||||
|
||||
/*! Count number of tokens in text.
|
||||
*
|
||||
* \return number of counted tokens
|
||||
*/
|
||||
size_t count();
|
||||
|
||||
/*! Retrieve the entire text converted to an STL vector of tokens.
|
||||
*
|
||||
* \return The STL vector
|
||||
*/
|
||||
std::vector<std::string> as_vector();
|
||||
};
|
||||
|
||||
|
@ -74,6 +112,11 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
/*! Class for reading text with numbers
|
||||
*
|
||||
* \sa Tokenizer
|
||||
*/
|
||||
|
||||
class ValueTokenizer {
|
||||
Tokenizer tokens;
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue