forked from lijiext/lammps
complete documentation of tokenizer classes
This commit is contained in:
parent
96ee132e85
commit
4b0999e167
|
@ -902,10 +902,58 @@ available tokens are read. The constructor has a default set of
|
|||
separator characters, but that can be overridden. The default separators
|
||||
are all "whitespace" characters, i.e. the space character, the tabulator
|
||||
character, the carriage return character, the linefeed character, and
|
||||
the form feed character.
|
||||
the form feed character. Below is a small example code using the
|
||||
tokenizer class to print the individual entries of the PATH environment
|
||||
variable.
|
||||
|
||||
.. code-block:: C++
|
||||
|
||||
#include "tokenizer.h"
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
const char *path = getenv("PATH");
|
||||
|
||||
if (path != nullptr) {
|
||||
Tokenizer p(path,":");
|
||||
while (p.has_next())
|
||||
std::cout << "Entry: " << p.next() << "\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Most tokenizer operations cannot fail except for
|
||||
:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
|
||||
checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
|
||||
:cpp:func:`LAMMPS_NS::Tokenizer::skip`. In case of failure, the class
|
||||
will throw an exception, so you may need to wrap the code using the
|
||||
tokenizer into a ``try`` / ``catch`` block to handle errors. The
|
||||
:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
|
||||
when a (type of) number is requested as next token that is not
|
||||
compatible with the string representing the next word.
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::Tokenizer
|
||||
:project: progguide
|
||||
:members:
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::TokenizerException
|
||||
:project: progguide
|
||||
:members:
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::ValueTokenizer
|
||||
:project: progguide
|
||||
:members:
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
|
||||
:project: progguide
|
||||
:members: what
|
||||
|
||||
.. doxygenclass:: LAMMPS_NS::InvalidFloatException
|
||||
:project: progguide
|
||||
:members: what
|
||||
|
||||
|
|
|
@ -30,6 +30,13 @@ TokenizerException::TokenizerException(const std::string & msg, const std::strin
|
|||
}
|
||||
}
|
||||
|
||||
/** Class for splitting text into words
|
||||
*
|
||||
* This tokenizer will break down a string into sub-strings (i.e words)
|
||||
* separated by the given separator characters.
|
||||
*
|
||||
* \sa LAMMPS_NS::ValueTokenizer TokenizerException */
|
||||
|
||||
Tokenizer::Tokenizer(const std::string & str, const std::string & separators) :
|
||||
text(str), separators(separators), start(0), ntokens(std::string::npos)
|
||||
{
|
||||
|
@ -48,14 +55,23 @@ Tokenizer::Tokenizer(Tokenizer && rhs) :
|
|||
reset();
|
||||
}
|
||||
|
||||
/*! Reposition the tokenizer state to the first word,
|
||||
* i.e. the first non-separator character */
|
||||
void Tokenizer::reset() {
|
||||
start = text.find_first_not_of(separators);
|
||||
}
|
||||
|
||||
/*! Search the text to be processed for a sub-string.
|
||||
*
|
||||
* \param str string to be searched for
|
||||
* \return true if string was found, false if not */
|
||||
bool Tokenizer::contains(const std::string & str) const {
|
||||
return text.find(str) != std::string::npos;
|
||||
}
|
||||
|
||||
/*! Skip over a given number of tokens
|
||||
*
|
||||
* \param n number of tokens to skip over */
|
||||
void Tokenizer::skip(int n) {
|
||||
for(int i = 0; i < n; ++i) {
|
||||
if(!has_next()) throw TokenizerException("No more tokens", "");
|
||||
|
@ -70,10 +86,16 @@ void Tokenizer::skip(int n) {
|
|||
}
|
||||
}
|
||||
|
||||
/*! Indicate whether more tokens are available
|
||||
*
|
||||
* \return true if there are more tokens, false if not */
|
||||
bool Tokenizer::has_next() const {
|
||||
return start != std::string::npos;
|
||||
}
|
||||
|
||||
/*! Retrieve next token.
|
||||
*
|
||||
* \return string with the next token */
|
||||
std::string Tokenizer::next() {
|
||||
if(!has_next()) throw TokenizerException("No more tokens", "");
|
||||
|
||||
|
@ -90,6 +112,9 @@ std::string Tokenizer::next() {
|
|||
return token;
|
||||
}
|
||||
|
||||
/*! Count number of tokens in text.
|
||||
*
|
||||
* \return number of counted tokens */
|
||||
size_t Tokenizer::count() {
|
||||
// lazy evaluation
|
||||
if (ntokens == std::string::npos) {
|
||||
|
@ -98,6 +123,9 @@ size_t Tokenizer::count() {
|
|||
return ntokens;
|
||||
}
|
||||
|
||||
/*! Retrieve the entire text converted to an STL vector of tokens.
|
||||
*
|
||||
* \return The STL vector */
|
||||
std::vector<std::string> Tokenizer::as_vector() {
|
||||
// store current state
|
||||
size_t current = start;
|
||||
|
@ -117,6 +145,9 @@ std::vector<std::string> Tokenizer::as_vector() {
|
|||
return tokens;
|
||||
}
|
||||
|
||||
/*! Class for reading text with numbers
|
||||
*
|
||||
* \sa LAMMPS_NS::Tokenizer InvalidIntegerException InvalidFloatException */
|
||||
|
||||
ValueTokenizer::ValueTokenizer(const std::string & str, const std::string & separators) : tokens(str, separators) {
|
||||
}
|
||||
|
@ -127,14 +158,24 @@ ValueTokenizer::ValueTokenizer(const ValueTokenizer & rhs) : tokens(rhs.tokens)
|
|||
ValueTokenizer::ValueTokenizer(ValueTokenizer && rhs) : tokens(std::move(rhs.tokens)) {
|
||||
}
|
||||
|
||||
/*! Indicate whether more tokens are available
|
||||
*
|
||||
* \return true if there are more tokens, false if not */
|
||||
bool ValueTokenizer::has_next() const {
|
||||
return tokens.has_next();
|
||||
}
|
||||
|
||||
/*! Search the text to be processed for a sub-string.
|
||||
*
|
||||
* \param value string with value to be searched for
|
||||
* \return true if string was found, false if not */
|
||||
bool ValueTokenizer::contains(const std::string & value) const {
|
||||
return tokens.contains(value);
|
||||
}
|
||||
|
||||
/*! Retrieve next token
|
||||
*
|
||||
* \return string with next token */
|
||||
std::string ValueTokenizer::next_string() {
|
||||
if (has_next()) {
|
||||
std::string value = tokens.next();
|
||||
|
@ -143,6 +184,9 @@ std::string ValueTokenizer::next_string() {
|
|||
return "";
|
||||
}
|
||||
|
||||
/*! Retrieve next token and convert to int
|
||||
*
|
||||
* \return value of next token */
|
||||
int ValueTokenizer::next_int() {
|
||||
if (has_next()) {
|
||||
std::string current = tokens.next();
|
||||
|
@ -155,6 +199,9 @@ int ValueTokenizer::next_int() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*! Retrieve next token and convert to bigint
|
||||
*
|
||||
* \return value of next token */
|
||||
bigint ValueTokenizer::next_bigint() {
|
||||
if (has_next()) {
|
||||
std::string current = tokens.next();
|
||||
|
@ -167,6 +214,9 @@ bigint ValueTokenizer::next_bigint() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*! Retrieve next token and convert to tagint
|
||||
*
|
||||
* \return value of next token */
|
||||
tagint ValueTokenizer::next_tagint() {
|
||||
if (has_next()) {
|
||||
std::string current = tokens.next();
|
||||
|
@ -179,6 +229,9 @@ tagint ValueTokenizer::next_tagint() {
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*! Retrieve next token and convert to double
|
||||
*
|
||||
* \return value of next token */
|
||||
double ValueTokenizer::next_double() {
|
||||
if (has_next()) {
|
||||
std::string current = tokens.next();
|
||||
|
@ -191,10 +244,16 @@ double ValueTokenizer::next_double() {
|
|||
return 0.0;
|
||||
}
|
||||
|
||||
/*! Skip over a given number of tokens
|
||||
*
|
||||
* \param n number of tokens to skip over */
|
||||
void ValueTokenizer::skip(int n) {
|
||||
tokens.skip(n);
|
||||
}
|
||||
|
||||
/*! Count number of tokens in text.
|
||||
*
|
||||
* \return number of counted tokens */
|
||||
size_t ValueTokenizer::count() {
|
||||
return tokens.count();
|
||||
}
|
||||
|
|
|
@ -27,11 +27,6 @@ namespace LAMMPS_NS {
|
|||
|
||||
#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
|
||||
|
||||
/*! Class for splitting text into words
|
||||
*
|
||||
* \sa ValueTokenizer
|
||||
*/
|
||||
|
||||
class Tokenizer {
|
||||
std::string text;
|
||||
std::string separators;
|
||||
|
@ -44,49 +39,17 @@ public:
|
|||
Tokenizer& operator=(const Tokenizer&) = default;
|
||||
Tokenizer& operator=(Tokenizer&&) = default;
|
||||
|
||||
/*! Reposition the tokenizer state to the first word,
|
||||
* i.e. the first non-separator character
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/*! Skip over a given number of tokens
|
||||
*
|
||||
* \param n number of tokens to skip over
|
||||
*/
|
||||
void skip(int n);
|
||||
|
||||
/*! Indicate whether more tokens are available
|
||||
*
|
||||
* \return true if there are more tokens, false if not
|
||||
*/
|
||||
bool has_next() const;
|
||||
|
||||
/*! Search the text to be processed for a sub-string.
|
||||
*
|
||||
* \param str string to be searched for
|
||||
* \return true if string was found, false if not
|
||||
*/
|
||||
bool contains(const std::string & str) const;
|
||||
|
||||
/*! Retrieve next token.
|
||||
*
|
||||
* \return string with the next token
|
||||
*/
|
||||
std::string next();
|
||||
|
||||
/*! Count number of tokens in text.
|
||||
*
|
||||
* \return number of counted tokens
|
||||
*/
|
||||
size_t count();
|
||||
|
||||
/*! Retrieve the entire text converted to an STL vector of tokens.
|
||||
*
|
||||
* \return The STL vector
|
||||
*/
|
||||
std::vector<std::string> as_vector();
|
||||
};
|
||||
|
||||
/** \exception TokenizerException. Contains an error message string. */
|
||||
class TokenizerException : public std::exception {
|
||||
std::string message;
|
||||
public:
|
||||
|
@ -95,28 +58,27 @@ public:
|
|||
~TokenizerException() throw() {
|
||||
}
|
||||
|
||||
/** Retrieve message describing the thrown exception
|
||||
* \return string with error message */
|
||||
virtual const char * what() const throw() {
|
||||
return message.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
/** \exception InvalidIntegerException. Contains an error message string. */
|
||||
class InvalidIntegerException : public TokenizerException {
|
||||
public:
|
||||
InvalidIntegerException(const std::string & token) : TokenizerException("Not a valid integer number", token) {
|
||||
}
|
||||
};
|
||||
|
||||
/** \exception FloatIntegerException. Contains an error message string. */
|
||||
class InvalidFloatException : public TokenizerException {
|
||||
public:
|
||||
InvalidFloatException(const std::string & token) : TokenizerException("Not a valid floating-point number", token) {
|
||||
}
|
||||
};
|
||||
|
||||
/*! Class for reading text with numbers
|
||||
*
|
||||
* \sa Tokenizer
|
||||
*/
|
||||
|
||||
class ValueTokenizer {
|
||||
Tokenizer tokens;
|
||||
public:
|
||||
|
|
Loading…
Reference in New Issue