From 2fca7aeb09329409d2a3dba0e72fae4cb6707df6 Mon Sep 17 00:00:00 2001 From: Marshall Clow Date: Thu, 12 Oct 2017 14:48:09 +0000 Subject: [PATCH] More fuzzing infastructre - regex llvm-svn: 315582 --- libcxx/fuzzing/fuzzing.cpp | 58 ++++++++++++++++++- libcxx/fuzzing/fuzzing.h | 10 +++- .../test/libcxx/fuzzing/regex_ECMAScript.cpp | 37 ++++++++++++ libcxx/test/libcxx/fuzzing/regex_POSIX.cpp | 37 ++++++++++++ libcxx/test/libcxx/fuzzing/regex_awk.cpp | 37 ++++++++++++ libcxx/test/libcxx/fuzzing/regex_egrep.cpp | 37 ++++++++++++ libcxx/test/libcxx/fuzzing/regex_extended.cpp | 37 ++++++++++++ libcxx/test/libcxx/fuzzing/regex_grep.cpp | 37 ++++++++++++ 8 files changed, 287 insertions(+), 3 deletions(-) create mode 100644 libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp create mode 100644 libcxx/test/libcxx/fuzzing/regex_POSIX.cpp create mode 100644 libcxx/test/libcxx/fuzzing/regex_awk.cpp create mode 100644 libcxx/test/libcxx/fuzzing/regex_egrep.cpp create mode 100644 libcxx/test/libcxx/fuzzing/regex_extended.cpp create mode 100644 libcxx/test/libcxx/fuzzing/regex_grep.cpp diff --git a/libcxx/fuzzing/fuzzing.cpp b/libcxx/fuzzing/fuzzing.cpp index cc349cdee69e..d4d0e602cc4a 100644 --- a/libcxx/fuzzing/fuzzing.cpp +++ b/libcxx/fuzzing/fuzzing.cpp @@ -26,8 +26,7 @@ #include "fuzzing.h" #include #include - -#include +#include // If we had C++14, we could use the four iterator version of is_permutation @@ -219,4 +218,59 @@ int partial_sort (const uint8_t *data, size_t size) return 0; } + +// -- regex fuzzers + +static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag) +{ + if (size > 0) + { + try + { + std::string s((const char *)data, size); + std::regex re(s, flag); + return std::regex_match(s, re) ? 1 : 0; + } + catch (std::regex_error &ex) {} + } + return 0; +} + + +int regex_ECMAScript (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::ECMAScript); + return 0; +} + +int regex_POSIX (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::basic); + return 0; +} + +int regex_extended (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::extended); + return 0; +} + +int regex_awk (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::awk); + return 0; +} + +int regex_grep (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::grep); + return 0; +} + +int regex_egrep (const uint8_t *data, size_t size) +{ + (void) regex_helper(data, size, std::regex_constants::egrep); + return 0; +} + } // namespace fuzzing diff --git a/libcxx/fuzzing/fuzzing.h b/libcxx/fuzzing/fuzzing.h index b8116fbc11dd..6624955f8ed4 100644 --- a/libcxx/fuzzing/fuzzing.h +++ b/libcxx/fuzzing/fuzzing.h @@ -27,7 +27,15 @@ namespace fuzzing { int nth_element (const uint8_t *data, size_t size); int partial_sort (const uint8_t *data, size_t size); - + +// Various flavors of regex + int regex_ECMAScript (const uint8_t *data, size_t size); + int regex_POSIX (const uint8_t *data, size_t size); + int regex_extended (const uint8_t *data, size_t size); + int regex_awk (const uint8_t *data, size_t size); + int regex_grep (const uint8_t *data, size_t size); + int regex_egrep (const uint8_t *data, size_t size); + } // namespace fuzzing #endif // _LIBCPP_FUZZING diff --git a/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp b/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp new file mode 100644 index 000000000000..2e57126022f3 --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_ECMAScript.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===--------------------- regex_ECMAScript.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_ECMAScript(data, size)); + } + return 0; +} diff --git a/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp b/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp new file mode 100644 index 000000000000..f0bd28919fe5 --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_POSIX.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------- regex_POSIX.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_POSIX(data, size)); + } + return 0; +} diff --git a/libcxx/test/libcxx/fuzzing/regex_awk.cpp b/libcxx/test/libcxx/fuzzing/regex_awk.cpp new file mode 100644 index 000000000000..2e57126022f3 --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_awk.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===--------------------- regex_ECMAScript.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_ECMAScript(data, size)); + } + return 0; +} diff --git a/libcxx/test/libcxx/fuzzing/regex_egrep.cpp b/libcxx/test/libcxx/fuzzing/regex_egrep.cpp new file mode 100644 index 000000000000..056869f5222c --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_egrep.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===------------------------ regex_egrep.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_egrep(data, size)); + } + return 0; +} diff --git a/libcxx/test/libcxx/fuzzing/regex_extended.cpp b/libcxx/test/libcxx/fuzzing/regex_extended.cpp new file mode 100644 index 000000000000..ac850eb5cbb4 --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_extended.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===---------------------- regex_extended.cpp ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_extended(data, size)); + } + return 0; +} diff --git a/libcxx/test/libcxx/fuzzing/regex_grep.cpp b/libcxx/test/libcxx/fuzzing/regex_grep.cpp new file mode 100644 index 000000000000..5b1dda293dfa --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/regex_grep.cpp @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===------------------------ regex_grep.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// XFAIL + +#include "fuzzing.h" +#include +#include // for strlen + +const char * test_cases[] = { + "", + "s", + "b*c", + "ba?sf" + "lka*ea", + "adsf*kas;lnc441[0-9]1r34525234" + }; + +const size_t k_num_tests = sizeof(test_cases)/sizeof(test_cases[0]); + +int main () +{ + for (size_t i = 0; i < k_num_tests; ++i) + { + const size_t size = std::strlen(test_cases[i]); + const uint8_t *data = (const uint8_t *) test_cases[i]; + assert(0 == fuzzing::regex_grep(data, size)); + } + return 0; +}