add and document regex support for FileCheck. You can now do stuff like:

; CHECK: movl {{%e[a-z][xi]}}, %eax

or whatever.

llvm-svn: 82717
This commit is contained in:
Chris Lattner 2009-09-24 21:47:32 +00:00
parent abab11abb2
commit f08d2db928
5 changed files with 162 additions and 16 deletions

View File

@ -624,6 +624,40 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {
</div>
<!-- _______________________________________________________________________ -->
<div class="doc_subsubsection"><a
name="FileCheck-Matching">FileCheck Pattern Matting Syntax</a></div>
<div class="doc_text">
<p>The CHECK: and CHECK-NOT: directives both take a pattern to match. For most
uses of FileCheck, fixed string matching is perfectly sufficient. For some
things, a more flexible form of matching is desired. To support this, FileCheck
allows you to specify regular expressions in matching strings, surrounded by
double braces: <b>{{yourregex}}</b>. Because we want to use fixed string
matching for a majority of what we do, FileCheck has been designed to support
mixing and matching fixed string matching with regular expressions. This allows
you to write things like this:</p>
<div class="doc_code">
<pre>
; CHECK: movhpd <b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
</pre>
</div>
<p>In this case, any offset from the ESP register will be allowed, and any xmm
register will be allowed.</p>
<p>Because regular expressions are enclosed with double braces, they are
visually distinct, and you don't need to use escape characters within the double
braces like you would in C. In the rare case that you want to match double
braces explicitly from the input, you can use something ugly like
<b>{{[{][{]}}</b> as your pattern.</p>
</div>
<!-- _______________________________________________________________________ -->
<div class="doc_subsection"><a name="dgvars">Variables and
substitutions</a></div>

View File

@ -11,11 +11,14 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <string>
struct llvm_regex;
namespace llvm {
class StringRef;
template<typename T> class SmallVectorImpl;
class Regex {
public:
enum {
@ -54,6 +57,8 @@ namespace llvm {
/// Matches.
/// For this feature to be enabled you must construct the regex using
/// Regex("...", Regex::Sub) constructor.
///
/// This returns true on a successful match.
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
private:
struct llvm_regex *preg;

View File

@ -14,13 +14,14 @@
#include "llvm/Support/Regex.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallVector.h"
#include "regex_impl.h"
#include <string>
using namespace llvm;
Regex::Regex(const StringRef &regex, unsigned Flags) {
unsigned flags = 0;
preg = new struct llvm_regex;
preg = new llvm_regex();
preg->re_endp = regex.end();
if (Flags & IgnoreCase)
flags |= REG_ICASE;
@ -60,7 +61,7 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
}
// pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 2> pm;
SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1);
pm[0].rm_so = 0;
pm[0].rm_eo = String.size();

View File

@ -9,6 +9,7 @@
#include "gtest/gtest.h"
#include "llvm/Support/Regex.h"
#include "llvm/ADT/SmallVector.h"
#include <cstring>
using namespace llvm;

View File

@ -19,6 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
//===----------------------------------------------------------------------===//
class Pattern {
/// Str - The string to match.
StringRef Str;
/// Chunks - The pattern chunks to match. If the bool is false, it is a fixed
/// string match, if it is true, it is a regex match.
SmallVector<std::pair<StringRef, bool>, 4> Chunks;
public:
Pattern() { }
@ -55,10 +57,7 @@ public:
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Match(StringRef Buffer, size_t &MatchLen) const {
MatchLen = Str.size();
return Buffer.find(Str);
}
size_t Match(StringRef Buffer, size_t &MatchLen) const;
};
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
@ -74,13 +73,119 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
"error");
return true;
}
// Scan the pattern to break it into regex and non-regex pieces.
while (!PatternStr.empty()) {
// Handle fixed string matches.
if (PatternStr.size() < 2 ||
PatternStr[0] != '{' || PatternStr[1] != '{') {
// Find the end, which is the start of the next regex.
size_t FixedMatchEnd = PatternStr.find("{{");
Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd),
false));
PatternStr = PatternStr.substr(FixedMatchEnd);
continue;
}
// Otherwise, this is the start of a regex match. Scan for the }}.
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
"found start of regex string with no end '}}'", "error");
return true;
}
Regex R(PatternStr.substr(2, End-2));
std::string Error;
if (!R.isValid(Error)) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
"invalid regex: " + Error, "error");
return true;
}
Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true));
PatternStr = PatternStr.substr(End+2);
}
Str = PatternStr;
return false;
}
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
size_t FirstMatch = StringRef::npos;
MatchLen = 0;
SmallVector<StringRef, 4> MatchInfo;
while (!Buffer.empty()) {
StringRef MatchAttempt = Buffer;
unsigned ChunkNo = 0, e = Chunks.size();
for (; ChunkNo != e; ++ChunkNo) {
StringRef PatternStr = Chunks[ChunkNo].first;
size_t ThisMatch = StringRef::npos;
size_t ThisLength = StringRef::npos;
if (!Chunks[ChunkNo].second) {
// Fixed string match.
ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first);
ThisLength = Chunks[ChunkNo].first.size();
} else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) {
// Successful regex match.
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
MatchInfo.clear();
ThisMatch = FullMatch.data()-MatchAttempt.data();
ThisLength = FullMatch.size();
}
// Otherwise, what we do depends on if this is the first match or not. If
// this is the first match, it doesn't match to match at the start of
// MatchAttempt.
if (ChunkNo == 0) {
// If the first match fails then this pattern will never match in
// Buffer.
if (ThisMatch == StringRef::npos)
return ThisMatch;
FirstMatch = ThisMatch;
MatchAttempt = MatchAttempt.substr(FirstMatch);
ThisMatch = 0;
}
// If this chunk didn't match, then the entire pattern didn't match from
// FirstMatch, try later in the buffer.
if (ThisMatch == StringRef::npos)
break;
// Ok, if the match didn't match at the beginning of MatchAttempt, then we
// have something like "ABC{{DEF}} and something was in-between. Reject
// the match.
if (ThisMatch != 0)
break;
// Otherwise, match the string and move to the next chunk.
MatchLen += ThisLength;
MatchAttempt = MatchAttempt.substr(ThisLength);
}
// If the whole thing matched, we win.
if (ChunkNo == e)
return FirstMatch;
// Otherwise, try matching again after FirstMatch to see if this pattern
// matches later in the buffer.
Buffer = Buffer.substr(FirstMatch+1);
}
// If we ran out of stuff to scan, then we didn't match.
return StringRef::npos;
}
//===----------------------------------------------------------------------===//
// Check Strings.
@ -367,14 +472,14 @@ int main(int argc, char **argv) {
// If this match had "not strings", verify that they don't exist in the
// skipped region.
for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) {
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
size_t MatchLen = 0;
size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen);
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
if (Pos == StringRef::npos) continue;
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
CheckPrefix+"-NOT: string occurred!", "error");
SM.PrintMessage(CheckStr.NotStrings[i].first,
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
CheckPrefix+"-NOT: pattern specified here", "note");
return 1;
}