2009-08-30 16:24:09 +08:00
|
|
|
//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2009-08-30 16:24:09 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/Support/Regex.h"
|
2009-09-25 05:47:32 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2012-12-04 18:23:08 +08:00
|
|
|
#include "gtest/gtest.h"
|
2009-08-30 16:24:09 +08:00
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
class RegexTest : public ::testing::Test {
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(RegexTest, Basics) {
|
|
|
|
Regex r1("^[0-9]+$");
|
|
|
|
EXPECT_TRUE(r1.match("916"));
|
|
|
|
EXPECT_TRUE(r1.match("9"));
|
|
|
|
EXPECT_FALSE(r1.match("9a"));
|
|
|
|
|
|
|
|
SmallVector<StringRef, 1> Matches;
|
2009-09-27 05:27:04 +08:00
|
|
|
Regex r2("[0-9]+");
|
2009-08-30 16:24:09 +08:00
|
|
|
EXPECT_TRUE(r2.match("aa216b", &Matches));
|
|
|
|
EXPECT_EQ(1u, Matches.size());
|
|
|
|
EXPECT_EQ("216", Matches[0].str());
|
|
|
|
|
2009-09-27 05:27:04 +08:00
|
|
|
Regex r3("[0-9]+([a-f])?:([0-9]+)");
|
2009-08-30 16:24:09 +08:00
|
|
|
EXPECT_TRUE(r3.match("9a:513b", &Matches));
|
|
|
|
EXPECT_EQ(3u, Matches.size());
|
|
|
|
EXPECT_EQ("9a:513", Matches[0].str());
|
|
|
|
EXPECT_EQ("a", Matches[1].str());
|
|
|
|
EXPECT_EQ("513", Matches[2].str());
|
|
|
|
|
|
|
|
EXPECT_TRUE(r3.match("9:513b", &Matches));
|
|
|
|
EXPECT_EQ(3u, Matches.size());
|
|
|
|
EXPECT_EQ("9:513", Matches[0].str());
|
|
|
|
EXPECT_EQ("", Matches[1].str());
|
|
|
|
EXPECT_EQ("513", Matches[2].str());
|
|
|
|
|
2009-09-27 05:27:04 +08:00
|
|
|
Regex r4("a[^b]+b");
|
2009-08-30 16:24:09 +08:00
|
|
|
std::string String="axxb";
|
|
|
|
String[2] = '\0';
|
|
|
|
EXPECT_FALSE(r4.match("abb"));
|
|
|
|
EXPECT_TRUE(r4.match(String, &Matches));
|
|
|
|
EXPECT_EQ(1u, Matches.size());
|
|
|
|
EXPECT_EQ(String, Matches[0].str());
|
|
|
|
|
|
|
|
std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
|
|
|
|
String="YX99a:513b";
|
|
|
|
NulPattern[7] = '\0';
|
2009-09-27 05:27:04 +08:00
|
|
|
Regex r5(NulPattern);
|
2009-08-30 16:24:09 +08:00
|
|
|
EXPECT_FALSE(r5.match(String));
|
|
|
|
EXPECT_FALSE(r5.match("X9"));
|
|
|
|
String[3]='\0';
|
|
|
|
EXPECT_TRUE(r5.match(String));
|
|
|
|
}
|
|
|
|
|
Add backreference matching capabilities to Support/Regex, with
appropriate unit tests. This change in itself is not expected to
affect any functionality at this point, but it will serve as a
stepping stone to improve FileCheck's variable matching capabilities.
Luckily, our regex implementation already supports backreferences,
although a bit of hacking is required to enable it. It supports both
Basic Regular Expressions (BREs) and Extended Regular Expressions
(EREs), without supporting backrefs for EREs, following POSIX strictly
in this respect. And EREs is what we actually use (rightly). This is
contrary to many implementations (including the default on Linux) of
POSIX regexes, that do allow backrefs in EREs.
Adding backref support to our EREs is a very simple change in the
regcomp parsing code. I fail to think of significant cases where it
would clash with existing things, and can bring more versatility to
the regexes we write. There's always the danger of a backref in a
specially crafted regex causing exponential matching times, but since
we mainly use them for testing purposes I don't think it's a big
problem. [it can also be placed behind a flag specific to FileCheck,
if needed].
For more details, see:
* http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055840.html
* http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20121126/156878.html
llvm-svn: 168802
2012-11-29 03:00:02 +08:00
|
|
|
TEST_F(RegexTest, Backreferences) {
|
|
|
|
Regex r1("([a-z]+)_\\1");
|
|
|
|
SmallVector<StringRef, 4> Matches;
|
|
|
|
EXPECT_TRUE(r1.match("abc_abc", &Matches));
|
|
|
|
EXPECT_EQ(2u, Matches.size());
|
|
|
|
EXPECT_FALSE(r1.match("abc_ab", &Matches));
|
|
|
|
|
|
|
|
Regex r2("a([0-9])b\\1c\\1");
|
|
|
|
EXPECT_TRUE(r2.match("a4b4c4", &Matches));
|
|
|
|
EXPECT_EQ(2u, Matches.size());
|
|
|
|
EXPECT_EQ("4", Matches[1].str());
|
|
|
|
EXPECT_FALSE(r2.match("a2b2c3"));
|
|
|
|
|
|
|
|
Regex r3("a([0-9])([a-z])b\\1\\2");
|
|
|
|
EXPECT_TRUE(r3.match("a6zb6z", &Matches));
|
|
|
|
EXPECT_EQ(3u, Matches.size());
|
|
|
|
EXPECT_EQ("6", Matches[1].str());
|
|
|
|
EXPECT_EQ("z", Matches[2].str());
|
|
|
|
EXPECT_FALSE(r3.match("a6zb6y"));
|
|
|
|
EXPECT_FALSE(r3.match("a6zb7z"));
|
|
|
|
}
|
|
|
|
|
2010-02-18 04:08:42 +08:00
|
|
|
TEST_F(RegexTest, Substitution) {
|
|
|
|
std::string Error;
|
|
|
|
|
|
|
|
EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
|
|
|
|
|
|
|
|
// Standard Escapes
|
|
|
|
EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
|
|
|
|
EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
|
|
|
|
EXPECT_EQ(Error, "replacement string contained trailing backslash");
|
|
|
|
|
|
|
|
// Backreferences
|
|
|
|
EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
|
|
|
|
EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("", Error);
|
2010-02-18 04:08:42 +08:00
|
|
|
|
|
|
|
EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
|
|
|
|
EXPECT_EQ(Error, "invalid backreference string '100'");
|
|
|
|
}
|
|
|
|
|
2013-08-06 01:47:59 +08:00
|
|
|
TEST_F(RegexTest, IsLiteralERE) {
|
|
|
|
EXPECT_TRUE(Regex::isLiteralERE("abc"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("^abc"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc$"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc*"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc+"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc?"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc."));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
|
|
|
|
EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
|
|
|
|
}
|
|
|
|
|
2013-12-12 10:51:58 +08:00
|
|
|
TEST_F(RegexTest, Escape) {
|
2013-12-12 11:31:20 +08:00
|
|
|
EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
|
|
|
|
EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
|
2013-12-12 10:51:58 +08:00
|
|
|
}
|
|
|
|
|
2013-08-09 01:32:45 +08:00
|
|
|
TEST_F(RegexTest, IsValid) {
|
|
|
|
std::string Error;
|
|
|
|
EXPECT_FALSE(Regex("(foo").isValid(Error));
|
|
|
|
EXPECT_EQ("parentheses not balanced", Error);
|
|
|
|
EXPECT_FALSE(Regex("a[b-").isValid(Error));
|
|
|
|
EXPECT_EQ("invalid character range", Error);
|
|
|
|
}
|
|
|
|
|
2014-01-03 03:04:59 +08:00
|
|
|
TEST_F(RegexTest, MoveConstruct) {
|
|
|
|
Regex r1("^[0-9]+$");
|
|
|
|
Regex r2(std::move(r1));
|
|
|
|
EXPECT_TRUE(r2.match("916"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(RegexTest, MoveAssign) {
|
|
|
|
Regex r1("^[0-9]+$");
|
|
|
|
Regex r2("abc");
|
|
|
|
r2 = std::move(r1);
|
|
|
|
EXPECT_TRUE(r2.match("916"));
|
2016-09-02 16:44:46 +08:00
|
|
|
std::string Error;
|
|
|
|
EXPECT_FALSE(r1.isValid(Error));
|
2014-01-03 03:04:59 +08:00
|
|
|
}
|
|
|
|
|
2016-09-01 16:00:28 +08:00
|
|
|
TEST_F(RegexTest, NoArgConstructor) {
|
|
|
|
std::string Error;
|
|
|
|
Regex r1;
|
|
|
|
EXPECT_FALSE(r1.isValid(Error));
|
|
|
|
EXPECT_EQ("invalid regular expression", Error);
|
|
|
|
r1 = Regex("abc");
|
|
|
|
EXPECT_TRUE(r1.isValid(Error));
|
|
|
|
}
|
|
|
|
|
2016-09-02 16:44:46 +08:00
|
|
|
TEST_F(RegexTest, MatchInvalid) {
|
|
|
|
Regex r1;
|
|
|
|
std::string Error;
|
|
|
|
EXPECT_FALSE(r1.isValid(Error));
|
|
|
|
EXPECT_FALSE(r1.match("X"));
|
|
|
|
}
|
|
|
|
|
2017-10-28 03:15:13 +08:00
|
|
|
// https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
|
|
|
|
TEST_F(RegexTest, OssFuzz3727Regression) {
|
|
|
|
// Wrap in a StringRef so the NUL byte doesn't terminate the string
|
|
|
|
Regex r(StringRef("[[[=GS\x00[=][", 10));
|
|
|
|
std::string Error;
|
|
|
|
EXPECT_FALSE(r.isValid(Error));
|
|
|
|
}
|
|
|
|
|
2009-08-30 16:24:09 +08:00
|
|
|
}
|