mirror of https://github.com/lammps/lammps.git
start documenting tokenizer classes
This commit is contained in:
parent
31c91a8928
commit
96ee132e85
|
@ -422,6 +422,8 @@ INPUT = @LAMMPS_SOURCE_DIR@/utils.cpp \
|
||||||
@LAMMPS_SOURCE_DIR@/atom.h \
|
@LAMMPS_SOURCE_DIR@/atom.h \
|
||||||
@LAMMPS_SOURCE_DIR@/input.cpp \
|
@LAMMPS_SOURCE_DIR@/input.cpp \
|
||||||
@LAMMPS_SOURCE_DIR@/input.h \
|
@LAMMPS_SOURCE_DIR@/input.h \
|
||||||
|
@LAMMPS_SOURCE_DIR@/tokenizer.cpp \
|
||||||
|
@LAMMPS_SOURCE_DIR@/tokenizer.h \
|
||||||
|
|
||||||
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
|
# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
|
||||||
# directories that are symbolic links (a Unix file system feature) are excluded
|
# directories that are symbolic links (a Unix file system feature) are excluded
|
||||||
|
|
|
@ -880,3 +880,32 @@ Convenience functions
|
||||||
|
|
||||||
.. doxygenfunction:: timespec2seconds
|
.. doxygenfunction:: timespec2seconds
|
||||||
:project: progguide
|
:project: progguide
|
||||||
|
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
Tokenizer classes
|
||||||
|
=================
|
||||||
|
|
||||||
|
The purpose of the tokenizer classes is to simplify the recurring task
|
||||||
|
of breaking lines of text down into words and/or numbers.
|
||||||
|
Traditionally, LAMMPS code would be using the ``strtok()`` function from
|
||||||
|
the C library for that purpose, but that function has two significant
|
||||||
|
disadvantages: 1) it cannot be used concurrently from different LAMMPS
|
||||||
|
instances since it stores its status in a global variable and 2) it
|
||||||
|
modifies the string that it is processing. These classes were
|
||||||
|
implemented to avoid both of these issues and also to reduce the amount
|
||||||
|
of code that needs to be written.
|
||||||
|
|
||||||
|
The basic procedure is to create an instance of the class with the
|
||||||
|
string to be processed as an argument and then do a loop until all
|
||||||
|
available tokens are read. The constructor has a default set of
|
||||||
|
separator characters, but that can be overridden. The default separators
|
||||||
|
are all "whitespace" characters, i.e. the space character, the tabulator
|
||||||
|
character, the carriage return character, the linefeed character, and
|
||||||
|
the form feed character.
|
||||||
|
|
||||||
|
.. doxygenclass:: LAMMPS_NS::Tokenizer
|
||||||
|
:project: progguide
|
||||||
|
|
||||||
|
.. doxygenclass:: LAMMPS_NS::ValueTokenizer
|
||||||
|
:project: progguide
|
||||||
|
|
|
@ -27,6 +27,11 @@ namespace LAMMPS_NS {
|
||||||
|
|
||||||
#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
|
#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
|
||||||
|
|
||||||
|
/*! Class for splitting text into words
|
||||||
|
*
|
||||||
|
* \sa ValueTokenizer
|
||||||
|
*/
|
||||||
|
|
||||||
class Tokenizer {
|
class Tokenizer {
|
||||||
std::string text;
|
std::string text;
|
||||||
std::string separators;
|
std::string separators;
|
||||||
|
@ -39,13 +44,46 @@ public:
|
||||||
Tokenizer& operator=(const Tokenizer&) = default;
|
Tokenizer& operator=(const Tokenizer&) = default;
|
||||||
Tokenizer& operator=(Tokenizer&&) = default;
|
Tokenizer& operator=(Tokenizer&&) = default;
|
||||||
|
|
||||||
|
/*! Reposition the tokenizer state to the first word,
|
||||||
|
* i.e. the first non-separator character
|
||||||
|
*/
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
|
/*! Skip over a given number of tokens
|
||||||
|
*
|
||||||
|
* \param n number of tokens to skip over
|
||||||
|
*/
|
||||||
void skip(int n);
|
void skip(int n);
|
||||||
|
|
||||||
|
/*! Indicate whether more tokens are available
|
||||||
|
*
|
||||||
|
* \return true if there are more tokens, false if not
|
||||||
|
*/
|
||||||
bool has_next() const;
|
bool has_next() const;
|
||||||
|
|
||||||
|
/*! Search the text to be processed for a sub-string.
|
||||||
|
*
|
||||||
|
* \param str string to be searched for
|
||||||
|
* \return true if string was found, false if not
|
||||||
|
*/
|
||||||
bool contains(const std::string & str) const;
|
bool contains(const std::string & str) const;
|
||||||
|
|
||||||
|
/*! Retrieve next token.
|
||||||
|
*
|
||||||
|
* \return string with the next token
|
||||||
|
*/
|
||||||
std::string next();
|
std::string next();
|
||||||
|
|
||||||
|
/*! Count number of tokens in text.
|
||||||
|
*
|
||||||
|
* \return number of counted tokens
|
||||||
|
*/
|
||||||
size_t count();
|
size_t count();
|
||||||
|
|
||||||
|
/*! Retrieve the entire text converted to an STL vector of tokens.
|
||||||
|
*
|
||||||
|
* \return The STL vector
|
||||||
|
*/
|
||||||
std::vector<std::string> as_vector();
|
std::vector<std::string> as_vector();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -74,6 +112,11 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*! Class for reading text with numbers
|
||||||
|
*
|
||||||
|
* \sa Tokenizer
|
||||||
|
*/
|
||||||
|
|
||||||
class ValueTokenizer {
|
class ValueTokenizer {
|
||||||
Tokenizer tokens;
|
Tokenizer tokens;
|
||||||
public:
|
public:
|
||||||
|
|
Loading…
Reference in New Issue