complete documentation of tokenizer classes

This commit is contained in:
Axel Kohlmeyer 2020-08-30 14:11:14 -04:00
parent 96ee132e85
commit 4b0999e167
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
3 changed files with 113 additions and 44 deletions

View File

@ -902,10 +902,58 @@ available tokens are read. The constructor has a default set of
separator characters, but that can be overridden. The default separators
are all "whitespace" characters, i.e. the space character, the tabulator
character, the carriage return character, the linefeed character, and
the form feed character.
the form feed character. Below is a small example code using the
tokenizer class to print the individual entries of the PATH environment
variable.
.. code-block:: C++
#include "tokenizer.h"
#include <cstdlib>
#include <string>
#include <iostream>
using namespace LAMMPS_NS;
int main(int, char **)
{
const char *path = getenv("PATH");
if (path != nullptr) {
Tokenizer p(path,":");
while (p.has_next())
std::cout << "Entry: " << p.next() << "\n";
}
return 0;
}
Most tokenizer operations cannot fail except for
:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
:cpp:func:`LAMMPS_NS::Tokenizer::skip`. In case of failure, the class
will throw an exception, so you may need to wrap the code using the
tokenizer into a ``try`` / ``catch`` block to handle errors. The
:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
when a (type of) number is requested as next token that is not
compatible with the string representing the next word.
.. doxygenclass:: LAMMPS_NS::Tokenizer
:project: progguide
:members:
.. doxygenclass:: LAMMPS_NS::TokenizerException
:project: progguide
:members:
.. doxygenclass:: LAMMPS_NS::ValueTokenizer
:project: progguide
:members:
.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
:project: progguide
:members: what
.. doxygenclass:: LAMMPS_NS::InvalidFloatException
:project: progguide
:members: what

View File

@ -30,6 +30,13 @@ TokenizerException::TokenizerException(const std::string & msg, const std::strin
}
}
/** Class for splitting text into words
*
* This tokenizer will break down a string into sub-strings (i.e words)
* separated by the given separator characters.
*
* \sa LAMMPS_NS::ValueTokenizer TokenizerException */
Tokenizer::Tokenizer(const std::string & str, const std::string & separators) :
text(str), separators(separators), start(0), ntokens(std::string::npos)
{
@ -48,14 +55,23 @@ Tokenizer::Tokenizer(Tokenizer && rhs) :
reset();
}
/*! Reposition the tokenizer state to the first word,
* i.e. the first non-separator character */
void Tokenizer::reset() {
start = text.find_first_not_of(separators);
}
/*! Search the text to be processed for a sub-string.
*
* \param str string to be searched for
* \return true if string was found, false if not */
bool Tokenizer::contains(const std::string & str) const {
return text.find(str) != std::string::npos;
}
/*! Skip over a given number of tokens
*
* \param n number of tokens to skip over */
void Tokenizer::skip(int n) {
for(int i = 0; i < n; ++i) {
if(!has_next()) throw TokenizerException("No more tokens", "");
@ -70,10 +86,16 @@ void Tokenizer::skip(int n) {
}
}
/*! Indicate whether more tokens are available
*
* \return true if there are more tokens, false if not */
bool Tokenizer::has_next() const {
return start != std::string::npos;
}
/*! Retrieve next token.
*
* \return string with the next token */
std::string Tokenizer::next() {
if(!has_next()) throw TokenizerException("No more tokens", "");
@ -90,6 +112,9 @@ std::string Tokenizer::next() {
return token;
}
/*! Count number of tokens in text.
*
* \return number of counted tokens */
size_t Tokenizer::count() {
// lazy evaluation
if (ntokens == std::string::npos) {
@ -98,6 +123,9 @@ size_t Tokenizer::count() {
return ntokens;
}
/*! Retrieve the entire text converted to an STL vector of tokens.
*
* \return The STL vector */
std::vector<std::string> Tokenizer::as_vector() {
// store current state
size_t current = start;
@ -117,6 +145,9 @@ std::vector<std::string> Tokenizer::as_vector() {
return tokens;
}
/*! Class for reading text with numbers
*
* \sa LAMMPS_NS::Tokenizer InvalidIntegerException InvalidFloatException */
ValueTokenizer::ValueTokenizer(const std::string & str, const std::string & separators) : tokens(str, separators) {
}
@ -127,14 +158,24 @@ ValueTokenizer::ValueTokenizer(const ValueTokenizer & rhs) : tokens(rhs.tokens)
ValueTokenizer::ValueTokenizer(ValueTokenizer && rhs) : tokens(std::move(rhs.tokens)) {
}
/*! Indicate whether more tokens are available
*
* \return true if there are more tokens, false if not */
bool ValueTokenizer::has_next() const {
return tokens.has_next();
}
/*! Search the text to be processed for a sub-string.
*
* \param value string with value to be searched for
* \return true if string was found, false if not */
bool ValueTokenizer::contains(const std::string & value) const {
return tokens.contains(value);
}
/*! Retrieve next token
*
* \return string with next token */
std::string ValueTokenizer::next_string() {
if (has_next()) {
std::string value = tokens.next();
@ -143,6 +184,9 @@ std::string ValueTokenizer::next_string() {
return "";
}
/*! Retrieve next token and convert to int
*
* \return value of next token */
int ValueTokenizer::next_int() {
if (has_next()) {
std::string current = tokens.next();
@ -155,6 +199,9 @@ int ValueTokenizer::next_int() {
return 0;
}
/*! Retrieve next token and convert to bigint
*
* \return value of next token */
bigint ValueTokenizer::next_bigint() {
if (has_next()) {
std::string current = tokens.next();
@ -167,6 +214,9 @@ bigint ValueTokenizer::next_bigint() {
return 0;
}
/*! Retrieve next token and convert to tagint
*
* \return value of next token */
tagint ValueTokenizer::next_tagint() {
if (has_next()) {
std::string current = tokens.next();
@ -179,6 +229,9 @@ tagint ValueTokenizer::next_tagint() {
return 0;
}
/*! Retrieve next token and convert to double
*
* \return value of next token */
double ValueTokenizer::next_double() {
if (has_next()) {
std::string current = tokens.next();
@ -191,10 +244,16 @@ double ValueTokenizer::next_double() {
return 0.0;
}
/*! Skip over a given number of tokens
*
* \param n number of tokens to skip over */
void ValueTokenizer::skip(int n) {
tokens.skip(n);
}
/*! Count number of tokens in text.
*
* \return number of counted tokens */
size_t ValueTokenizer::count() {
return tokens.count();
}

View File

@ -27,11 +27,6 @@ namespace LAMMPS_NS {
#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
/*! Class for splitting text into words
*
* \sa ValueTokenizer
*/
class Tokenizer {
std::string text;
std::string separators;
@ -44,49 +39,17 @@ public:
Tokenizer& operator=(const Tokenizer&) = default;
Tokenizer& operator=(Tokenizer&&) = default;
/*! Reposition the tokenizer state to the first word,
* i.e. the first non-separator character
*/
void reset();
/*! Skip over a given number of tokens
*
* \param n number of tokens to skip over
*/
void skip(int n);
/*! Indicate whether more tokens are available
*
* \return true if there are more tokens, false if not
*/
bool has_next() const;
/*! Search the text to be processed for a sub-string.
*
* \param str string to be searched for
* \return true if string was found, false if not
*/
bool contains(const std::string & str) const;
/*! Retrieve next token.
*
* \return string with the next token
*/
std::string next();
/*! Count number of tokens in text.
*
* \return number of counted tokens
*/
size_t count();
/*! Retrieve the entire text converted to an STL vector of tokens.
*
* \return The STL vector
*/
std::vector<std::string> as_vector();
};
/** \exception TokenizerException. Contains an error message string. */
class TokenizerException : public std::exception {
std::string message;
public:
@ -95,28 +58,27 @@ public:
~TokenizerException() throw() {
}
/** Retrieve message describing the thrown exception
* \return string with error message */
virtual const char * what() const throw() {
return message.c_str();
}
};
/** \exception InvalidIntegerException. Contains an error message string. */
class InvalidIntegerException : public TokenizerException {
public:
InvalidIntegerException(const std::string & token) : TokenizerException("Not a valid integer number", token) {
}
};
/** \exception FloatIntegerException. Contains an error message string. */
class InvalidFloatException : public TokenizerException {
public:
InvalidFloatException(const std::string & token) : TokenizerException("Not a valid floating-point number", token) {
}
};
/*! Class for reading text with numbers
*
* \sa Tokenizer
*/
class ValueTokenizer {
Tokenizer tokens;
public: