start documenting tokenizer classes

2020-08-30 01:50:37 -04:00 · 2020-08-30 01:50:37 -04:00 · 96ee132e85
parent 31c91a8928
commit 96ee132e85
3 changed files with 74 additions and 0 deletions
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -422,6 +422,8 @@ INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp      \
                         @LAMMPS_SOURCE_DIR@/atom.h         \
                         @LAMMPS_SOURCE_DIR@/input.cpp      \
                         @LAMMPS_SOURCE_DIR@/input.h        \
                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp  \
                         @LAMMPS_SOURCE_DIR@/tokenizer.h    \
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
@ -880,3 +880,32 @@ Convenience functions
 .. doxygenfunction:: timespec2seconds
   :project: progguide
 ---------------------------
 Tokenizer classes
 =================
 The purpose of the tokenizer classes is to simplify the recurring task
 of breaking lines of text down into words and/or numbers.
 Traditionally, LAMMPS code would be using the ``strtok()`` function from
 the C library for that purpose, but that function has two significant
 disadvantages: 1) it cannot be used concurrently from different LAMMPS
 instances since it stores its status in a global variable and 2) it
 modifies the string that it is processing.  These classes were
 implemented to avoid both of these issues and also to reduce the amount
 of code that needs to be written.
 The basic procedure is to create an instance of the class with the
 string to be processed as an argument and then do a loop until all
 available tokens are read.  The constructor has a default set of
 separator characters, but that can be overridden. The default separators
 are all "whitespace" characters, i.e. the space character, the tabulator
 character, the carriage return character, the linefeed character, and
 the form feed character.
 .. doxygenclass:: LAMMPS_NS::Tokenizer
   :project: progguide
 .. doxygenclass:: LAMMPS_NS::ValueTokenizer
   :project: progguide
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -27,6 +27,11 @@ namespace LAMMPS_NS {
 #define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f"
 /*! Class for splitting text into words
 *
 * \sa ValueTokenizer
 */
 class Tokenizer {
    std::string text;
    std::string separators;
@ -39,13 +44,46 @@ public:
    Tokenizer& operator=(const Tokenizer&) = default;
    Tokenizer& operator=(Tokenizer&&) = default;
    /*! Reposition the tokenizer state to the first word,
     * i.e. the first non-separator character
     */
    void reset();
    /*! Skip over a given number of tokens
     *
     * \param  n  number of tokens to skip over
     */
    void skip(int n);
    /*! Indicate whether more tokens are available
     *
     * \return   true if there are more tokens, false if not
     */
    bool has_next() const;
    /*! Search the text to be processed for a sub-string.
     *
     * \param  str  string to be searched for
     * \return      true if string was found, false if not
     */
    bool contains(const std::string & str) const;
    /*! Retrieve next token.
     *
     * \return   string with the next token
     */
    std::string next();
    /*! Count number of tokens in text.
     *
     * \return   number of counted tokens
     */
    size_t count();
    /*! Retrieve the entire text converted to an STL vector of tokens.
     *
     * \return   The STL vector
     */
    std::vector<std::string> as_vector();
 };
@ -74,6 +112,11 @@ public:
    }
 };
 /*! Class for reading text with numbers
 *
 * \sa Tokenizer
 */
 class ValueTokenizer {
    Tokenizer tokens;
 public: