Add Tokenizer class

This commit is contained in:
Richard Berger 2020-05-15 15:36:13 -04:00
parent 8691579def
commit d41927b056
No known key found for this signature in database
GPG Key ID: A9E83994E0BA0CAB
5 changed files with 161 additions and 0 deletions

53
src/tokenizer.cpp Normal file
View File

@ -0,0 +1,53 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Richard Berger (Temple U)
------------------------------------------------------------------------- */
#include "tokenizer.h"
using namespace LAMMPS_NS;
Tokenizer::Tokenizer(const std::string & str, const std::string & seperators) {
size_t end = -1;
do {
size_t start = str.find_first_not_of(seperators, end + 1);
if(start == std::string::npos) break;
end = str.find_first_of(seperators, start);
if(end == std::string::npos) {
tokens.push_back(str.substr(start));
} else {
tokens.push_back(str.substr(start, end-start));
}
} while(end != std::string::npos);
}
Tokenizer::iterator Tokenizer::begin() {
return tokens.begin();
}
Tokenizer::iterator Tokenizer::end() {
return tokens.end();
}
const std::string & Tokenizer::operator[](size_t index) {
return tokens[index];
}
const size_t Tokenizer::count() const {
return tokens.size();
}

42
src/tokenizer.h Normal file
View File

@ -0,0 +1,42 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Richard Berger (Temple U)
------------------------------------------------------------------------- */
#ifndef LMP_TOKENIZER_H
#define LMP_TOKENIZER_H
#include <string>
#include <vector>
namespace LAMMPS_NS {
class Tokenizer {
std::vector<std::string> tokens;
public:
typedef std::vector<std::string>::iterator iterator;
Tokenizer(const std::string & str, const std::string & seperators = " \t\r\n\f");
iterator begin();
iterator end();
const std::string & operator[](size_t index);
const size_t count() const;
};
}
#endif

View File

@ -1,3 +1,5 @@
include(GTest)
add_subdirectory(force-styles)
add_subdirectory(utils)

View File

@ -0,0 +1,3 @@
add_executable(test_tokenizer test_tokenizer.cpp)
target_link_libraries(test_tokenizer PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
add_test(Tokenizer test_tokenizer)

View File

@ -0,0 +1,61 @@
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include "tokenizer.h"
using namespace LAMMPS_NS;
using ::testing::Eq;
TEST(Tokenizer, empty_string) {
Tokenizer t("", " ");
ASSERT_EQ(t.count(), 0);
}
TEST(Tokenizer, whitespace_only) {
Tokenizer t(" ", " ");
ASSERT_EQ(t.count(), 0);
}
TEST(Tokenizer, single_word) {
Tokenizer t("test", " ");
ASSERT_EQ(t.count(), 1);
}
TEST(Tokenizer, two_words) {
Tokenizer t("test word", " ");
ASSERT_EQ(t.count(), 2);
}
TEST(Tokenizer, prefix_seperators) {
Tokenizer t(" test word", " ");
ASSERT_EQ(t.count(), 2);
}
TEST(Tokenizer, postfix_seperators) {
Tokenizer t("test word ", " ");
ASSERT_EQ(t.count(), 2);
}
TEST(Tokenizer, iterate_words) {
Tokenizer t(" test word ", " ");
ASSERT_THAT(t[0], Eq("test"));
ASSERT_THAT(t[1], Eq("word"));
ASSERT_EQ(t.count(), 2);
}
TEST(Tokenizer, default_seperators) {
Tokenizer t(" \r\n test \t word \f");
ASSERT_THAT(t[0], Eq("test"));
ASSERT_THAT(t[1], Eq("word"));
ASSERT_EQ(t.count(), 2);
}
TEST(Tokenizer, for_loop) {
Tokenizer t(" \r\n test \t word \f");
std::vector<std::string> list;
for(auto word : t) {
list.push_back(word);
}
ASSERT_THAT(list[0], Eq("test"));
ASSERT_THAT(list[1], Eq("word"));
}