complete documentation of tokenizer classes

2020-08-30 14:11:14 -04:00 · 2020-08-30 14:11:14 -04:00 · 4b0999e167
parent 96ee132e85
commit 4b0999e167
3 changed files with 113 additions and 44 deletions
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
@ -902,10 +902,58 @@ available tokens are read.  The constructor has a default set of
 separator characters, but that can be overridden. The default separators
 are all "whitespace" characters, i.e. the space character, the tabulator
 character, the carriage return character, the linefeed character, and
-the form feed character.
+the form feed character.  Below is a small example code using the
+tokenizer class to print the individual entries of the PATH environment
+variable.
+
+.. code-block:: C++
+
+   #include "tokenizer.h"
+   #include <cstdlib>
+   #include <string>
+   #include <iostream>
+
+   using namespace LAMMPS_NS;
+
+   int main(int, char **)
+   {
+       const char *path = getenv("PATH");
+
+       if (path != nullptr) {
+           Tokenizer p(path,":");
+           while (p.has_next())
+               std::cout << "Entry: " << p.next() << "\n";
+       }
+       return 0;
+   }
+
+Most tokenizer operations cannot fail except for
+:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
+checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
+:cpp:func:`LAMMPS_NS::Tokenizer::skip`.  In case of failure, the class
+will throw an exception, so you may need to wrap the code using the
+tokenizer into a ``try`` / ``catch`` block to handle errors.  The
+:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
+when a (type of) number is requested as next token that is not
+compatible with the string representing the next word.

 .. doxygenclass:: LAMMPS_NS::Tokenizer
   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::TokenizerException
+   :project: progguide
+   :members:

 .. doxygenclass:: LAMMPS_NS::ValueTokenizer
   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
+   :project: progguide
+   :members: what
+
+.. doxygenclass:: LAMMPS_NS::InvalidFloatException
+   :project: progguide
+   :members: what
+
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -30,6 +30,13 @@ TokenizerException::TokenizerException(const std::string & msg, const std::strin
    }
 }

+/** Class for splitting text into words
+ *
+ * This tokenizer will break down a string into sub-strings (i.e words)
+ * separated by the given separator characters.
+ *
+ * \sa LAMMPS_NS::ValueTokenizer TokenizerException */
+
 Tokenizer::Tokenizer(const std::string & str, const std::string & separators) :
    text(str), separators(separators), start(0), ntokens(std::string::npos)
 {
@ -48,14 +55,23 @@ Tokenizer::Tokenizer(Tokenizer && rhs) :
    reset();
 }

+/*! Reposition the tokenizer state to the first word,
+ * i.e. the first non-separator character */
 void Tokenizer::reset() {
    start = text.find_first_not_of(separators);
 }

+/*! Search the text to be processed for a sub-string.
+ *
+ * \param  str  string to be searched for
+ * \return      true if string was found, false if not */
 bool Tokenizer::contains(const std::string & str) const {
    return text.find(str) != std::string::npos;
 }

+/*! Skip over a given number of tokens
+ *
+ * \param  n  number of tokens to skip over */
 void Tokenizer::skip(int n) {
    for(int i = 0; i < n; ++i) {
        if(!has_next()) throw TokenizerException("No more tokens", "");
@ -70,10 +86,16 @@ void Tokenizer::skip(int n) {
    }
 }

+/*! Indicate whether more tokens are available
+ *
+ * \return   true if there are more tokens, false if not */
 bool Tokenizer::has_next() const {
    return start != std::string::npos;
 }

+/*! Retrieve next token.
+ *
+ * \return   string with the next token */
 std::string Tokenizer::next() {
    if(!has_next()) throw TokenizerException("No more tokens", "");

@ -90,6 +112,9 @@ std::string Tokenizer::next() {
    return token;
 }

+/*! Count number of tokens in text.
+ *
+ * \return   number of counted tokens */
 size_t Tokenizer::count() {
    // lazy evaluation
    if (ntokens == std::string::npos) {
@ -98,6 +123,9 @@ size_t Tokenizer::count() {
    return ntokens;
 }

+/*! Retrieve the entire text converted to an STL vector of tokens.
+ *
+ * \return   The STL vector */
 std::vector<std::string> Tokenizer::as_vector() {
  // store current state
  size_t current = start;
@ -117,6 +145,9 @@ std::vector<std::string> Tokenizer::as_vector() {
  return tokens;
 }

+/*! Class for reading text with numbers
+ *
+ * \sa LAMMPS_NS::Tokenizer InvalidIntegerException InvalidFloatException */

 ValueTokenizer::ValueTokenizer(const std::string & str, const std::string & separators) : tokens(str, separators) {
 }
@ -127,14 +158,24 @@ ValueTokenizer::ValueTokenizer(const ValueTokenizer & rhs) : tokens(rhs.tokens)
 ValueTokenizer::ValueTokenizer(ValueTokenizer && rhs) : tokens(std::move(rhs.tokens)) {
 }

+/*! Indicate whether more tokens are available
+ *
+ * \return   true if there are more tokens, false if not */
 bool ValueTokenizer::has_next() const {
    return tokens.has_next();
 }

+/*! Search the text to be processed for a sub-string.
+ *
+ * \param  value  string with value to be searched for
+ * \return        true if string was found, false if not */
 bool ValueTokenizer::contains(const std::string & value) const {
    return tokens.contains(value);
 }

+/*! Retrieve next token
+ *
+ * \return   string with next token */
 std::string ValueTokenizer::next_string() {
    if (has_next()) {
        std::string value = tokens.next();
@ -143,6 +184,9 @@ std::string ValueTokenizer::next_string() {
    return "";
 }

+/*! Retrieve next token and convert to int
+ *
+ * \return   value of next token */
 int ValueTokenizer::next_int() {
    if (has_next()) {
        std::string current = tokens.next();
@ -155,6 +199,9 @@ int ValueTokenizer::next_int() {
    return 0;
 }

+/*! Retrieve next token and convert to bigint
+ *
+ * \return   value of next token */
 bigint ValueTokenizer::next_bigint() {
    if (has_next()) {
        std::string current = tokens.next();
@ -167,6 +214,9 @@ bigint ValueTokenizer::next_bigint() {
    return 0;
 }

+/*! Retrieve next token and convert to tagint
+ *
+ * \return   value of next token */
 tagint ValueTokenizer::next_tagint() {
    if (has_next()) {
        std::string current = tokens.next();
@ -179,6 +229,9 @@ tagint ValueTokenizer::next_tagint() {
    return 0;
 }

+/*! Retrieve next token and convert to double
+ *
+ * \return   value of next token */
 double ValueTokenizer::next_double() {
    if (has_next()) {
        std::string current = tokens.next();
@ -191,10 +244,16 @@ double ValueTokenizer::next_double() {
    return 0.0;
 }

+/*! Skip over a given number of tokens
+ *
+ * \param  n  number of tokens to skip over */
 void ValueTokenizer::skip(int n) {
    tokens.skip(n);
 }

+/*! Count number of tokens in text.
+ *
+ * \return   number of counted tokens */
 size_t ValueTokenizer::count() {
    return tokens.count();
 }
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -27,11 +27,6 @@ namespace LAMMPS_NS {

 #define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"

-/*! Class for splitting text into words
- *
- * \sa ValueTokenizer
- */
-
 class Tokenizer {
    std::string text;
    std::string separators;
@ -44,49 +39,17 @@ public:
    Tokenizer& operator=(const Tokenizer&) = default;
    Tokenizer& operator=(Tokenizer&&) = default;

-    /*! Reposition the tokenizer state to the first word,
-     * i.e. the first non-separator character
-     */
    void reset();
-
-    /*! Skip over a given number of tokens
-     *
-     * \param  n  number of tokens to skip over
-     */
    void skip(int n);
-
-    /*! Indicate whether more tokens are available
-     *
-     * \return   true if there are more tokens, false if not
-     */
    bool has_next() const;
-
-    /*! Search the text to be processed for a sub-string.
-     *
-     * \param  str  string to be searched for
-     * \return      true if string was found, false if not
-     */
    bool contains(const std::string & str) const;
-
-    /*! Retrieve next token.
-     *
-     * \return   string with the next token
-     */
    std::string next();

-    /*! Count number of tokens in text.
-     *
-     * \return   number of counted tokens
-     */
    size_t count();
-
-    /*! Retrieve the entire text converted to an STL vector of tokens.
-     *
-     * \return   The STL vector
-     */
    std::vector<std::string> as_vector();
 };

+/** \exception TokenizerException. Contains an error message string. */
 class TokenizerException : public std::exception {
  std::string message;
 public:
@ -95,28 +58,27 @@ public:
  ~TokenizerException() throw() {
  }

+  /** Retrieve message describing the thrown exception
+   * \return string with error message */
  virtual const char * what() const throw() {
    return message.c_str();
  }
 };

+/** \exception InvalidIntegerException. Contains an error message string. */
 class InvalidIntegerException : public TokenizerException {
 public:
    InvalidIntegerException(const std::string & token) : TokenizerException("Not a valid integer number", token) {
    }
 };

+/** \exception FloatIntegerException. Contains an error message string. */
 class InvalidFloatException : public TokenizerException {
 public:
    InvalidFloatException(const std::string & token) : TokenizerException("Not a valid floating-point number", token) {
    }
 };

-/*! Class for reading text with numbers
- *
- * \sa Tokenizer
- */
-
 class ValueTokenizer {
    Tokenizer tokens;
 public: