From 81d937ee97ba27ee22dd10965950bbd909224aa3 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 00:16:46 -0400 Subject: [PATCH 1/8] Performance optimization of Tokenizer Reduces string allocations and removes std::vector from Tokenizer Most processing now happens on-demand. --- src/atom_vec.cpp | 8 +- src/atom_vec_hybrid.cpp | 4 +- src/potential_file_reader.h | 2 +- src/text_file_reader.h | 2 +- src/tokenizer.cpp | 154 +++++++++++++++++++----------- src/tokenizer.h | 34 ++++--- src/utils.cpp | 16 +++- unittest/utils/test_tokenizer.cpp | 16 ++-- unittest/utils/test_utils.cpp | 4 + 9 files changed, 145 insertions(+), 95 deletions(-) diff --git a/src/atom_vec.cpp b/src/atom_vec.cpp index 8cd7db4fd4..353ab27c99 100644 --- a/src/atom_vec.cpp +++ b/src/atom_vec.cpp @@ -2448,11 +2448,11 @@ int AtomVec::process_fields(char *str, const char *default_str, Method *method) } // tokenize words in both strings - Tokenizer words(str, " "); - Tokenizer def_words(default_str, " "); + std::vector words = Tokenizer(str, " ").as_vector(); + std::vector def_words = Tokenizer(default_str, " ").as_vector(); - int nfield = words.count(); - int ndef = def_words.count(); + int nfield = words.size(); + int ndef = def_words.size(); // process fields one by one, add to index vector diff --git a/src/atom_vec_hybrid.cpp b/src/atom_vec_hybrid.cpp index 7e599863c0..9ba2b6f468 100644 --- a/src/atom_vec_hybrid.cpp +++ b/src/atom_vec_hybrid.cpp @@ -514,8 +514,8 @@ char *AtomVecHybrid::merge_fields(int inum, char *root, // identify unique words in concatenated string - Tokenizer words(concat, " "); - int nwords = words.count(); + std::vector words = Tokenizer(concat, " ").as_vector(); + int nwords = words.size(); int *unique = new int[nwords]; diff --git a/src/potential_file_reader.h b/src/potential_file_reader.h index c512e7886d..a73f5fdbaa 100644 --- a/src/potential_file_reader.h +++ b/src/potential_file_reader.h @@ -43,7 +43,7 @@ namespace LAMMPS_NS void skip_line(); char * next_line(int nparams = 0); void next_dvector(double * list, int n); - ValueTokenizer next_values(int nparams, const std::string & seperators = TOKENIZER_DEFAULT_SEPERATORS); + ValueTokenizer next_values(int nparams, const std::string & separators = TOKENIZER_DEFAULT_SEPARATORS); // convenience functions double next_double(); diff --git a/src/text_file_reader.h b/src/text_file_reader.h index b162bfb23c..80a5d756ea 100644 --- a/src/text_file_reader.h +++ b/src/text_file_reader.h @@ -42,7 +42,7 @@ namespace LAMMPS_NS char * next_line(int nparams = 0); void next_dvector(double * list, int n); - ValueTokenizer next_values(int nparams, const std::string & seperators = TOKENIZER_DEFAULT_SEPERATORS); + ValueTokenizer next_values(int nparams, const std::string & separators = TOKENIZER_DEFAULT_SEPARATORS); }; class FileReaderException : public std::exception { diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 7d048d9e02..f041c79baa 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -17,77 +17,118 @@ #include "tokenizer.h" #include "utils.h" +#include "fmt/format.h" using namespace LAMMPS_NS; -Tokenizer::Tokenizer(const std::string & str, const std::string & seperators) { - size_t end = -1; +TokenizerException::TokenizerException(const std::string & msg, const std::string & token){ + if(token.empty()) { + message = msg; + } else { + message = fmt::format("{}: '{}'", msg, token); + } +} - do { - size_t start = str.find_first_not_of(seperators, end + 1); - if(start == std::string::npos) break; +Tokenizer::Tokenizer(const std::string & str, const std::string & separators) : + text(str), separators(separators), start(0), ntokens(std::string::npos) +{ + reset(); +} - end = str.find_first_of(seperators, start); +Tokenizer::Tokenizer(const Tokenizer & rhs) : + text(rhs.text), separators(rhs.separators), ntokens(rhs.ntokens) +{ + reset(); +} + +Tokenizer::Tokenizer(Tokenizer && rhs) : + text(std::move(rhs.text)), separators(std::move(rhs.separators)), ntokens(rhs.ntokens) +{ + reset(); +} + +void Tokenizer::reset() { + start = text.find_first_not_of(separators); +} + +void Tokenizer::skip(int n) { + for(int i = 0; i < n; ++i) { + if(!has_next()) throw TokenizerException("No more tokens", ""); + + size_t end = text.find_first_of(separators, start); if(end == std::string::npos) { - tokens.push_back(str.substr(start)); + start = end; } else { - tokens.push_back(str.substr(start, end-start)); - } - } while(end != std::string::npos); + start = text.find_first_not_of(separators, end+1); + } + } } -Tokenizer::Tokenizer(const Tokenizer & rhs) : tokens(rhs.tokens) { +bool Tokenizer::has_next() const { + return start != std::string::npos; } -Tokenizer::Tokenizer(Tokenizer && rhs) : tokens(std::move(rhs.tokens)) { +std::string Tokenizer::next() { + if(!has_next()) throw TokenizerException("No more tokens", ""); + + size_t end = text.find_first_of(separators, start); + + if(end == std::string::npos) { + std::string token = text.substr(start); + start = end; + return token; + } + + std::string token = text.substr(start, end-start); + start = text.find_first_not_of(separators, end+1); + return token; } -Tokenizer::iterator Tokenizer::begin() { - return tokens.begin(); +size_t Tokenizer::count() { + // lazy evaluation + if (ntokens == std::string::npos) { + ntokens = utils::count_words(text, separators); + } + return ntokens; } -Tokenizer::iterator Tokenizer::end() { - return tokens.end(); -} +std::vector Tokenizer::as_vector() { + // store current state + size_t current = start; -Tokenizer::const_iterator Tokenizer::cbegin() const { - return tokens.cbegin(); -} + reset(); -Tokenizer::const_iterator Tokenizer::cend() const { - return tokens.cend(); -} + // generate vector + std::vector tokens; -std::string & Tokenizer::operator[](size_t index) { - return tokens[index]; -} + while(has_next()) { + tokens.emplace_back(next()); + } -size_t Tokenizer::count() const { - return tokens.size(); + // restore state + start = current; + + return tokens; } -ValueTokenizer::ValueTokenizer(const std::string & str, const std::string & seperators) : tokens(str, seperators) { - current = tokens.begin(); +ValueTokenizer::ValueTokenizer(const std::string & str, const std::string & separators) : tokens(str, separators) { } ValueTokenizer::ValueTokenizer(const ValueTokenizer & rhs) : tokens(rhs.tokens) { - current = tokens.begin(); } ValueTokenizer::ValueTokenizer(ValueTokenizer && rhs) : tokens(std::move(rhs.tokens)) { - current = tokens.begin(); } bool ValueTokenizer::has_next() const { - return current != tokens.cend(); + return tokens.has_next(); } std::string ValueTokenizer::next_string() { if (has_next()) { - std::string value = *current; - ++current; + std::string value = tokens.next(); return value; } return ""; @@ -95,11 +136,11 @@ std::string ValueTokenizer::next_string() { int ValueTokenizer::next_int() { if (has_next()) { - if(!utils::is_integer(*current)) { - throw InvalidIntegerException(*current); + std::string current = tokens.next(); + if(!utils::is_integer(current)) { + throw InvalidIntegerException(current); } - int value = atoi(current->c_str()); - ++current; + int value = atoi(current.c_str()); return value; } return 0; @@ -107,45 +148,44 @@ int ValueTokenizer::next_int() { bigint ValueTokenizer::next_bigint() { if (has_next()) { - if(!utils::is_integer(*current)) { - throw InvalidIntegerException(*current); + std::string current = tokens.next(); + if(!utils::is_integer(current)) { + throw InvalidIntegerException(current); } - bigint value = ATOBIGINT(current->c_str()); - ++current; + bigint value = ATOBIGINT(current.c_str()); return value; } return 0; } tagint ValueTokenizer::next_tagint() { - if (current != tokens.end()) { - if(!utils::is_integer(*current)) { - throw InvalidIntegerException(*current); + if (has_next()) { + std::string current = tokens.next(); + if(!utils::is_integer(current)) { + throw InvalidIntegerException(current); } - tagint value = ATOTAGINT(current->c_str()); - ++current; + tagint value = ATOTAGINT(current.c_str()); return value; } return 0; } double ValueTokenizer::next_double() { - if (current != tokens.end()) { - if(!utils::is_double(*current)) { - throw InvalidFloatException(*current); + if (has_next()) { + std::string current = tokens.next(); + if(!utils::is_double(current)) { + throw InvalidFloatException(current); } - - double value = atof(current->c_str()); - ++current; + double value = atof(current.c_str()); return value; } return 0.0; } -void ValueTokenizer::skip(int ntokens) { - current = std::next(current, ntokens); +void ValueTokenizer::skip(int n) { + tokens.skip(n); } -size_t ValueTokenizer::count() const { +size_t ValueTokenizer::count() { return tokens.count(); } diff --git a/src/tokenizer.h b/src/tokenizer.h index 89cb57b301..8ad19ce960 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -25,34 +25,33 @@ namespace LAMMPS_NS { -#define TOKENIZER_DEFAULT_SEPERATORS " \t\r\n\f" +#define TOKENIZER_DEFAULT_SEPARATORS " \t\r\n\f" class Tokenizer { - std::vector tokens; + std::string text; + std::string separators; + size_t start; + size_t ntokens; public: - typedef std::vector::iterator iterator; - typedef std::vector::const_iterator const_iterator; - - Tokenizer(const std::string & str, const std::string & seperators = TOKENIZER_DEFAULT_SEPERATORS); + Tokenizer(const std::string & str, const std::string & separators = TOKENIZER_DEFAULT_SEPARATORS); Tokenizer(Tokenizer &&); Tokenizer(const Tokenizer &); Tokenizer& operator=(const Tokenizer&) = default; Tokenizer& operator=(Tokenizer&&) = default; - iterator begin(); - iterator end(); - const_iterator cbegin() const; - const_iterator cend() const; + void reset(); + void skip(int n); + bool has_next() const; + std::string next(); - std::string & operator[](size_t index); - size_t count() const; + size_t count(); + std::vector as_vector(); }; class TokenizerException : public std::exception { std::string message; public: - TokenizerException(const std::string & msg, const std::string & token) : message(msg + ": '" + token + "'") { - } + TokenizerException(const std::string & msg, const std::string & token); ~TokenizerException() throw() { } @@ -76,9 +75,8 @@ public: class ValueTokenizer { Tokenizer tokens; - Tokenizer::const_iterator current; public: - ValueTokenizer(const std::string & str, const std::string & seperators = TOKENIZER_DEFAULT_SEPERATORS); + ValueTokenizer(const std::string & str, const std::string & separators = TOKENIZER_DEFAULT_SEPARATORS); ValueTokenizer(const ValueTokenizer &); ValueTokenizer(ValueTokenizer &&); ValueTokenizer& operator=(const ValueTokenizer&) = default; @@ -91,9 +89,9 @@ public: double next_double(); bool has_next() const; - void skip(int ntokens); + void skip(int n); - size_t count() const; + size_t count(); }; diff --git a/src/utils.cpp b/src/utils.cpp index a8dc4e308e..72193bb2c8 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -369,8 +369,20 @@ std::string utils::trim_comment(const std::string & line) { ------------------------------------------------------------------------- */ size_t utils::count_words(const std::string & text, const std::string & seperators) { - ValueTokenizer words(text, seperators); - return words.count(); + size_t count = 0; + size_t start = text.find_first_not_of(seperators); + + while (start != std::string::npos) { + size_t end = text.find_first_of(seperators, start); + ++count; + + if(end == std::string::npos) { + return count; + } else { + start = text.find_first_not_of(seperators, end + 1); + } + } + return count; } /* ---------------------------------------------------------------------- diff --git a/unittest/utils/test_tokenizer.cpp b/unittest/utils/test_tokenizer.cpp index 08c71338be..09487aabff 100644 --- a/unittest/utils/test_tokenizer.cpp +++ b/unittest/utils/test_tokenizer.cpp @@ -50,25 +50,21 @@ TEST(Tokenizer, postfix_seperators) { TEST(Tokenizer, iterate_words) { Tokenizer t(" test word ", " "); - ASSERT_THAT(t[0], Eq("test")); - ASSERT_THAT(t[1], Eq("word")); + ASSERT_THAT(t.next(), Eq("test")); + ASSERT_THAT(t.next(), Eq("word")); ASSERT_EQ(t.count(), 2); } TEST(Tokenizer, default_seperators) { Tokenizer t(" \r\n test \t word \f"); - ASSERT_THAT(t[0], Eq("test")); - ASSERT_THAT(t[1], Eq("word")); + ASSERT_THAT(t.next(), Eq("test")); + ASSERT_THAT(t.next(), Eq("word")); ASSERT_EQ(t.count(), 2); } -TEST(Tokenizer, for_loop) { +TEST(Tokenizer, as_vector) { Tokenizer t(" \r\n test \t word \f"); - std::vector list; - - for(auto word : t) { - list.push_back(word); - } + std::vector list = t.as_vector(); ASSERT_THAT(list[0], Eq("test")); ASSERT_THAT(list[1], Eq("word")); } diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp index e1c458a173..9830207c3e 100644 --- a/unittest/utils/test_utils.cpp +++ b/unittest/utils/test_utils.cpp @@ -32,6 +32,10 @@ TEST(Utils, trim_and_count_words) { ASSERT_EQ(utils::trim_and_count_words("some text # comment"), 2); } +TEST(Utils, count_words_with_extra_spaces) { + ASSERT_EQ(utils::count_words(" some text # comment "), 4); +} + TEST(Utils, valid_integer1) { ASSERT_TRUE(utils::is_integer("10")); } From 9945f737438288eca9564cd4650e43dc23c27476 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 11 Jun 2020 01:05:58 -0400 Subject: [PATCH 2/8] fix spelling in a few more files --- src/potential_file_reader.cpp | 4 ++-- src/text_file_reader.cpp | 4 ++-- src/utils.cpp | 12 ++++++------ src/utils.h | 8 ++++---- unittest/utils/test_tokenizer.cpp | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/potential_file_reader.cpp b/src/potential_file_reader.cpp index 2ec7908ae3..5fd361eff5 100644 --- a/src/potential_file_reader.cpp +++ b/src/potential_file_reader.cpp @@ -83,9 +83,9 @@ void PotentialFileReader::next_dvector(double * list, int n) { } } -ValueTokenizer PotentialFileReader::next_values(int nparams, const std::string & seperators) { +ValueTokenizer PotentialFileReader::next_values(int nparams, const std::string & separators) { try { - return reader->next_values(nparams, seperators); + return reader->next_values(nparams, separators); } catch (FileReaderException & e) { error->one(FLERR, e.what()); } diff --git a/src/text_file_reader.cpp b/src/text_file_reader.cpp index 8063bba87f..9015ddecee 100644 --- a/src/text_file_reader.cpp +++ b/src/text_file_reader.cpp @@ -116,6 +116,6 @@ void TextFileReader::next_dvector(double * list, int n) { } } -ValueTokenizer TextFileReader::next_values(int nparams, const std::string & seperators) { - return ValueTokenizer(next_line(nparams), seperators); +ValueTokenizer TextFileReader::next_values(int nparams, const std::string & separators) { + return ValueTokenizer(next_line(nparams), separators); } diff --git a/src/utils.cpp b/src/utils.cpp index 72193bb2c8..928a84883c 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -368,18 +368,18 @@ std::string utils::trim_comment(const std::string & line) { Return number of words ------------------------------------------------------------------------- */ -size_t utils::count_words(const std::string & text, const std::string & seperators) { +size_t utils::count_words(const std::string & text, const std::string & separators) { size_t count = 0; - size_t start = text.find_first_not_of(seperators); + size_t start = text.find_first_not_of(separators); while (start != std::string::npos) { - size_t end = text.find_first_of(seperators, start); + size_t end = text.find_first_of(separators, start); ++count; if(end == std::string::npos) { return count; } else { - start = text.find_first_not_of(seperators, end + 1); + start = text.find_first_not_of(separators, end + 1); } } return count; @@ -389,8 +389,8 @@ size_t utils::count_words(const std::string & text, const std::string & seperato Trim comment from string and return number of words ------------------------------------------------------------------------- */ -size_t utils::trim_and_count_words(const std::string & text, const std::string & seperators) { - return utils::count_words(utils::trim_comment(text), seperators); +size_t utils::trim_and_count_words(const std::string & text, const std::string & separators) { + return utils::count_words(utils::trim_comment(text), separators); } /* ---------------------------------------------------------------------- diff --git a/src/utils.h b/src/utils.h index 79fb2349d3..562293f2f3 100644 --- a/src/utils.h +++ b/src/utils.h @@ -153,18 +153,18 @@ namespace LAMMPS_NS { /** * \brief Count words in string * \param text string that should be searched - * \param seperators string containing characters that will be treated as whitespace + * \param separators string containing characters that will be treated as whitespace * \return number of words found */ - size_t count_words(const std::string & text, const std::string & seperators = " \t\r\n\f"); + size_t count_words(const std::string & text, const std::string & separators = " \t\r\n\f"); /** * \brief Count words in a single line, trim anything from '#' onward * \param text string that should be trimmed and searched - * \param seperators string containing characters that will be treated as whitespace + * \param separators string containing characters that will be treated as whitespace * \return number of words found */ - size_t trim_and_count_words(const std::string & text, const std::string & seperators = " \t\r\n\f"); + size_t trim_and_count_words(const std::string & text, const std::string & separators = " \t\r\n\f"); /** * \brief Check if string can be converted to valid integer diff --git a/unittest/utils/test_tokenizer.cpp b/unittest/utils/test_tokenizer.cpp index 09487aabff..903f660959 100644 --- a/unittest/utils/test_tokenizer.cpp +++ b/unittest/utils/test_tokenizer.cpp @@ -38,12 +38,12 @@ TEST(Tokenizer, two_words) { ASSERT_EQ(t.count(), 2); } -TEST(Tokenizer, prefix_seperators) { +TEST(Tokenizer, prefix_separators) { Tokenizer t(" test word", " "); ASSERT_EQ(t.count(), 2); } -TEST(Tokenizer, postfix_seperators) { +TEST(Tokenizer, postfix_separators) { Tokenizer t("test word ", " "); ASSERT_EQ(t.count(), 2); } @@ -55,7 +55,7 @@ TEST(Tokenizer, iterate_words) { ASSERT_EQ(t.count(), 2); } -TEST(Tokenizer, default_seperators) { +TEST(Tokenizer, default_separators) { Tokenizer t(" \r\n test \t word \f"); ASSERT_THAT(t.next(), Eq("test")); ASSERT_THAT(t.next(), Eq("word")); From 6cb5345cd086a29a975b7fe99490e5fd1cdb1251 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 13:37:22 -0400 Subject: [PATCH 3/8] Add optimized version of count_words for default whitespace chars --- src/utils.cpp | 29 +++++++++++++++++++++++++++++ src/utils.h | 10 +++++++++- unittest/utils/test_utils.cpp | 4 ++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/utils.cpp b/src/utils.cpp index 928a84883c..86f56e7b2a 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -368,6 +368,35 @@ std::string utils::trim_comment(const std::string & line) { Return number of words ------------------------------------------------------------------------- */ +size_t utils::count_words(const std::string & text) { + size_t count = 0; + const char * buf = text.c_str(); + char c = *buf; + + while (c) { + if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') { + c = *++buf; + continue; + }; + + ++count; + c = *++buf; + + while (c) { + if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f') { + break; + } + c = *++buf; + } + } + + return count; +} + +/* ---------------------------------------------------------------------- + Return number of words +------------------------------------------------------------------------- */ + size_t utils::count_words(const std::string & text, const std::string & separators) { size_t count = 0; size_t start = text.find_first_not_of(separators); diff --git a/src/utils.h b/src/utils.h index 562293f2f3..bce9ff3e66 100644 --- a/src/utils.h +++ b/src/utils.h @@ -156,7 +156,15 @@ namespace LAMMPS_NS { * \param separators string containing characters that will be treated as whitespace * \return number of words found */ - size_t count_words(const std::string & text, const std::string & separators = " \t\r\n\f"); + size_t count_words(const std::string & text, const std::string & separators); + + /** + * \brief Count words in string, ignore any whitespace matching " \t\r\n\f" + * \param text string that should be searched + * \param separators string containing characters that will be treated as whitespace + * \return number of words found + */ + size_t count_words(const std::string & text); /** * \brief Count words in a single line, trim anything from '#' onward diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp index 9830207c3e..5660c097f1 100644 --- a/unittest/utils/test_utils.cpp +++ b/unittest/utils/test_utils.cpp @@ -28,6 +28,10 @@ TEST(Utils, count_words) { ASSERT_EQ(utils::count_words("some text # comment"), 4); } +TEST(Utils, count_words_non_default) { + ASSERT_EQ(utils::count_words("some text # comment", " #"), 3); +} + TEST(Utils, trim_and_count_words) { ASSERT_EQ(utils::trim_and_count_words("some text # comment"), 2); } From 645d3b61baf0e36e605012f291521215aa65a64f Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 13:49:58 -0400 Subject: [PATCH 4/8] Only count new words --- src/text_file_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/text_file_reader.cpp b/src/text_file_reader.cpp index 9015ddecee..8abe1b001f 100644 --- a/src/text_file_reader.cpp +++ b/src/text_file_reader.cpp @@ -86,7 +86,7 @@ char *TextFileReader::next_line(int nparams) { // strip comment if (ignore_comments && (ptr = strchr(line, '#'))) *ptr = '\0'; - nwords = utils::count_words(line); + nwords += utils::count_words(&line[n]); // skip line if blank if (nwords > 0) { From 6a9073a0cbcbb5b8519988f693d9387d5c9febeb Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 13:50:26 -0400 Subject: [PATCH 5/8] Add count_words for C-Strings --- src/utils.cpp | 14 +++++++++++--- src/utils.h | 8 ++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 86f56e7b2a..88677bf542 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -365,12 +365,12 @@ std::string utils::trim_comment(const std::string & line) { } /* ---------------------------------------------------------------------- - Return number of words + return number of words ------------------------------------------------------------------------- */ -size_t utils::count_words(const std::string & text) { +size_t utils::count_words(const char * text) { size_t count = 0; - const char * buf = text.c_str(); + const char * buf = text; char c = *buf; while (c) { @@ -393,6 +393,14 @@ size_t utils::count_words(const std::string & text) { return count; } +/* ---------------------------------------------------------------------- + return number of words +------------------------------------------------------------------------- */ + +size_t utils::count_words(const std::string & text) { + return utils::count_words(text.c_str()); +} + /* ---------------------------------------------------------------------- Return number of words ------------------------------------------------------------------------- */ diff --git a/src/utils.h b/src/utils.h index bce9ff3e66..ef272087b7 100644 --- a/src/utils.h +++ b/src/utils.h @@ -166,6 +166,14 @@ namespace LAMMPS_NS { */ size_t count_words(const std::string & text); + /** + * \brief Count words in C-string, ignore any whitespace matching " \t\r\n\f" + * \param text string that should be searched + * \param separators string containing characters that will be treated as whitespace + * \return number of words found + */ + size_t count_words(const char * text); + /** * \brief Count words in a single line, trim anything from '#' onward * \param text string that should be trimmed and searched From f25f7fee8d60a511ee01ac5d42ed18b24e518f83 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 13:56:47 -0400 Subject: [PATCH 6/8] Only count new words --- src/MANYBODY/pair_eim.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MANYBODY/pair_eim.cpp b/src/MANYBODY/pair_eim.cpp index 0d552caebc..f3f965a231 100644 --- a/src/MANYBODY/pair_eim.cpp +++ b/src/MANYBODY/pair_eim.cpp @@ -1123,7 +1123,7 @@ char * EIMPotentialFileReader::next_line(FILE * fp) { concat = false; } - nwords = utils::count_words(line); + nwords += utils::count_words(&line[n]); // skip line if blank if (nwords > 0) { From 43d3133583fa39c285e0ef3896315b086bb7f1ae Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 11 Jun 2020 14:00:13 -0400 Subject: [PATCH 7/8] Whitespace --- src/tokenizer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index f041c79baa..7ea7c88e5e 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -35,13 +35,13 @@ Tokenizer::Tokenizer(const std::string & str, const std::string & separators) : reset(); } -Tokenizer::Tokenizer(const Tokenizer & rhs) : +Tokenizer::Tokenizer(const Tokenizer & rhs) : text(rhs.text), separators(rhs.separators), ntokens(rhs.ntokens) { reset(); } -Tokenizer::Tokenizer(Tokenizer && rhs) : +Tokenizer::Tokenizer(Tokenizer && rhs) : text(std::move(rhs.text)), separators(std::move(rhs.separators)), ntokens(rhs.ntokens) { reset(); @@ -55,13 +55,13 @@ void Tokenizer::skip(int n) { for(int i = 0; i < n; ++i) { if(!has_next()) throw TokenizerException("No more tokens", ""); - size_t end = text.find_first_of(separators, start); + size_t end = text.find_first_of(separators, start); if(end == std::string::npos) { start = end; } else { start = text.find_first_not_of(separators, end+1); - } + } } } @@ -78,7 +78,7 @@ std::string Tokenizer::next() { std::string token = text.substr(start); start = end; return token; - } + } std::string token = text.substr(start, end-start); start = text.find_first_not_of(separators, end+1); From 3c99471df8da67dc34056485e7e7a18128fc269a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 12 Jun 2020 01:29:42 -0400 Subject: [PATCH 8/8] add a few more unit tests for functions in utils --- unittest/utils/test_utils.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp index 5660c097f1..317c77cf3f 100644 --- a/unittest/utils/test_utils.cpp +++ b/unittest/utils/test_utils.cpp @@ -15,6 +15,9 @@ #include "gmock/gmock.h" #include "utils.h" #include +#include +#include +#include using namespace LAMMPS_NS; using ::testing::Eq; @@ -223,3 +226,35 @@ TEST(Utils, path_basename) { ASSERT_THAT(utils::path_basename("/parent/folder/filename"), Eq("filename")); #endif } + +TEST(Utils, getsyserror) { +#if defined(__linux__) + errno = ENOENT; + std::string errmesg = utils::getsyserror(); + ASSERT_THAT(errmesg, Eq("No such file or directory")); +#else + GTEST_SKIP(); +#endif +} + +TEST(Utils, potential_file) { + FILE *fp; + fp = fopen("ctest.txt","w"); + ASSERT_NE(fp,nullptr); + fputs("# DATE: 2020-02-20 CONTRIBUTOR: Nessuno\n",fp); + fclose(fp); + + EXPECT_TRUE(utils::file_is_readable("ctest.txt")); + EXPECT_FALSE(utils::file_is_readable("no_such_file.txt")); + + EXPECT_THAT(utils::get_potential_file_path("ctest.txt"),Eq("ctest.txt")); + const char *folder = getenv("LAMMPS_POTENTIALS"); + if (folder != nullptr) { + std::string path=utils::path_join(folder,"Cu_u3.eam"); + EXPECT_THAT(utils::get_potential_file_path("Cu_u3.eam"),Eq(path)); + } + + EXPECT_THAT(utils::get_potential_date("ctest.txt","Test"),Eq("2020-02-20")); + + remove("ctest.txt"); +}