Enhance llvm::SourceMgr to support diagnostic ranges, the same way clang does. Enhance

the X86 asmparser to produce ranges in the one case that was annoying me, for example:

test.s:10:15: error: invalid operand for instruction
movl 0(%rax), 0(%edx)
              ^~~~~~~

It should be straight-forward to enhance filecheck, tblgen, and/or the .ll parser to use 
ranges where appropriate if someone is interested.

llvm-svn: 142106
This commit is contained in:
Chris Lattner 2011-10-16 04:47:35 +00:00
parent 25ea4e5ad3
commit a3a0681083
20 changed files with 188 additions and 79 deletions

View File

@ -11,6 +11,7 @@
#define LLVM_MC_MCASMPARSER_H
#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/ArrayRef.h"
namespace llvm {
class AsmToken;
@ -22,6 +23,7 @@ class MCExpr;
class MCStreamer;
class MCTargetAsmParser;
class SMLoc;
class SMRange;
class SourceMgr;
class StringRef;
class Twine;
@ -72,14 +74,16 @@ public:
/// Msg.
///
/// \return The return value is true, if warnings are fatal.
virtual bool Warning(SMLoc L, const Twine &Msg) = 0;
virtual bool Warning(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
/// Error - Emit an error at the location \arg L, with the message \arg
/// Msg.
///
/// \return The return value is always true, as an idiomatic convenience to
/// clients.
virtual bool Error(SMLoc L, const Twine &Msg) = 0;
virtual bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
/// Lex - Get the next AsmToken in the stream, possibly handling file
/// inclusion first.
@ -89,7 +93,8 @@ public:
const AsmToken &getTok();
/// \brief Report an error at the current lexer location.
bool TokError(const Twine &Msg);
bool TokError(const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
/// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
/// and set \arg Res to the identifier contents.

View File

@ -15,9 +15,11 @@
#ifndef SUPPORT_SMLOC_H
#define SUPPORT_SMLOC_H
#include <cassert>
namespace llvm {
// SMLoc - Represents a location in source code.
/// SMLoc - Represents a location in source code.
class SMLoc {
const char *Ptr;
public:
@ -38,7 +40,23 @@ public:
}
};
}
/// SMRange - Represents a range in source code. Note that unlike standard STL
/// ranges, the locations specified are considered to be *inclusive*. For
/// example, [X,X] *does* include X, it isn't an empty range.
class SMRange {
public:
SMLoc Start, End;
SMRange() {}
SMRange(SMLoc Start, SMLoc End) : Start(Start), End(End) {
assert(Start.isValid() == End.isValid() &&
"Start and end should either both be valid or both be invalid!");
}
bool isValid() const { return Start.isValid(); }
};
} // end namespace llvm
#endif

View File

@ -17,10 +17,8 @@
#define SUPPORT_SOURCEMGR_H
#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/ArrayRef.h"
#include <string>
#include <vector>
#include <cassert>
namespace llvm {
class MemoryBuffer;
@ -125,6 +123,7 @@ public:
/// prefixed to the message.
/// @param ShowLine - Should the diagnostic show the source line.
void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
bool ShowLine = true) const;
@ -136,6 +135,7 @@ public:
/// @param ShowLine - Should the diagnostic show the source line.
SMDiagnostic GetMessage(SMLoc Loc,
const Twine &Msg, const char *Type,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
bool ShowLine = true) const;
/// PrintIncludeStack - Prints the names of included files and the line of the
@ -157,6 +157,7 @@ class SMDiagnostic {
int LineNo, ColumnNo;
std::string Message, LineContents;
unsigned ShowLine : 1;
std::vector<std::pair<unsigned, unsigned> > Ranges;
public:
// Null diagnostic.
@ -170,9 +171,7 @@ public:
SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
int Line, int Col,
const std::string &Msg, const std::string &LineStr,
bool showline = true)
: SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
LineContents(LineStr), ShowLine(showline) {}
ArrayRef<std::pair<unsigned,unsigned> > Ranges, bool showline);
const SourceMgr *getSourceMgr() const { return SM; }
SMLoc getLoc() const { return Loc; }
@ -182,8 +181,10 @@ public:
const std::string &getMessage() const { return Message; }
const std::string &getLineContents() const { return LineContents; }
bool getShowLine() const { return ShowLine; }
void Print(const char *ProgName, raw_ostream &S) const;
const std::vector<std::pair<unsigned, unsigned> > &getRanges() const {
return Ranges;
}
void print(const char *ProgName, raw_ostream &S) const;
};
} // end llvm namespace

View File

@ -337,13 +337,9 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) {
return 0;
}
static void diag_handler(const SMDiagnostic &diag,
void *context)
{
if (context) {
EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
diag.Print("", disassembler->ErrorStream);
}
static void diag_handler(const SMDiagnostic &diag, void *context) {
if (context)
diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
}
int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,

View File

@ -142,8 +142,10 @@ public:
virtual MCContext &getContext() { return Ctx; }
virtual MCStreamer &getStreamer() { return Out; }
virtual bool Warning(SMLoc L, const Twine &Msg);
virtual bool Error(SMLoc L, const Twine &Msg);
virtual bool Warning(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
virtual bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
const AsmToken &Lex();
@ -170,8 +172,9 @@ private:
void PrintMacroInstantiations();
void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
bool ShowLine = true) const {
SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
SrcMgr.PrintMessage(Loc, Msg, Type, Ranges, ShowLine);
}
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
@ -393,17 +396,17 @@ void AsmParser::PrintMacroInstantiations() {
"note");
}
bool AsmParser::Warning(SMLoc L, const Twine &Msg) {
bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
if (FatalAssemblerWarnings)
return Error(L, Msg);
PrintMessage(L, Msg, "warning");
return Error(L, Msg, Ranges);
PrintMessage(L, Msg, "warning", Ranges);
PrintMacroInstantiations();
return false;
}
bool AsmParser::Error(SMLoc L, const Twine &Msg) {
bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
HadError = true;
PrintMessage(L, Msg, "error");
PrintMessage(L, Msg, "error", Ranges);
PrintMacroInstantiations();
return true;
}
@ -496,7 +499,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
Sym->getName() + "' not defined", "error", false);
Sym->getName() + "' not defined", "error",
ArrayRef<SMRange>(), false);
}
}
@ -1284,7 +1288,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
if (!Parser->CppHashLineNumber ||
&DiagSrcMgr != &Parser->SrcMgr ||
DiagBuf != CppHashBuf) {
Diag.Print(0, OS);
Diag.print(0, OS);
return;
}
@ -1299,16 +1303,12 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
int LineNo = Parser->CppHashLineNumber - 1 +
(DiagLocLineNo - CppHashLocLineNo);
SMDiagnostic NewDiag(*Diag.getSourceMgr(),
Diag.getLoc(),
Filename,
LineNo,
Diag.getColumnNo(),
Diag.getMessage(),
Diag.getLineContents(),
Diag.getShowLine());
SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(),
Filename, LineNo, Diag.getColumnNo(),
Diag.getMessage(), Diag.getLineContents(),
Diag.getRanges(), Diag.getShowLine());
NewDiag.Print(0, OS);
NewDiag.print(0, OS);
}
bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,

View File

@ -33,8 +33,8 @@ const AsmToken &MCAsmParser::getTok() {
return getLexer().getTok();
}
bool MCAsmParser::TokError(const Twine &Msg) {
Error(getLexer().getLoc(), Msg);
bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
Error(getLexer().getLoc(), Msg, Ranges);
return true;
}

View File

@ -141,7 +141,8 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
/// @param Type - If non-null, the kind of message (e.g., "error") which is
/// prefixed to the message.
SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
const char *Type, bool ShowLine) const {
const char *Type, ArrayRef<SMRange> Ranges,
bool ShowLine) const {
// First thing to do: find the current buffer containing the specified
// location.
@ -156,15 +157,12 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
LineStart[-1] != '\n' && LineStart[-1] != '\r')
--LineStart;
std::string LineStr;
if (ShowLine) {
// Get the end of the line.
const char *LineEnd = Loc.getPointer();
while (LineEnd != CurMB->getBufferEnd() &&
LineEnd[0] != '\n' && LineEnd[0] != '\r')
++LineEnd;
LineStr = std::string(LineStart, LineEnd);
}
// Get the end of the line.
const char *LineEnd = Loc.getPointer();
while (LineEnd != CurMB->getBufferEnd() &&
LineEnd[0] != '\n' && LineEnd[0] != '\r')
++LineEnd;
std::string LineStr(LineStart, LineEnd);
std::string PrintedMsg;
raw_string_ostream OS(PrintedMsg);
@ -172,17 +170,40 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
OS << Type << ": ";
OS << Msg;
// Convert any ranges to column ranges that only intersect the line of the
// location.
SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
SMRange R = Ranges[i];
if (!R.isValid()) continue;
// If the line doesn't contain any part of the range, then ignore it.
if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
continue;
// Ignore pieces of the range that go onto other lines.
if (R.Start.getPointer() < LineStart)
R.Start = SMLoc::getFromPointer(LineStart);
if (R.End.getPointer() > LineEnd)
R.End = SMLoc::getFromPointer(LineEnd);
// Translate from SMLoc ranges to column ranges.
ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
R.End.getPointer()-LineStart));
}
return SMDiagnostic(*this, Loc,
CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
Loc.getPointer()-LineStart, OS.str(),
LineStr, ShowLine);
LineStr, ColRanges, ShowLine);
}
void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
const char *Type, bool ShowLine) const {
const char *Type, ArrayRef<SMRange> Ranges,
bool ShowLine) const {
// Report the message with the diagnostic handler if present.
if (DiagHandler) {
DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
DiagHandler(GetMessage(Loc, Msg, Type, Ranges, ShowLine), DiagContext);
return;
}
@ -192,14 +213,23 @@ void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
assert(CurBuf != -1 && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
GetMessage(Loc, Msg, Type, Ranges, ShowLine).print(0, OS);
}
//===----------------------------------------------------------------------===//
// SMDiagnostic Implementation
//===----------------------------------------------------------------------===//
void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
int Line, int Col, const std::string &Msg,
const std::string &LineStr,
ArrayRef<std::pair<unsigned,unsigned> > Ranges,
bool showline)
: SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
LineContents(LineStr), ShowLine(showline), Ranges(Ranges.vec()) {}
void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const {
if (ProgName && ProgName[0])
S << ProgName << ": ";
@ -219,14 +249,63 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
S << Message << '\n';
if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
S << LineContents << '\n';
if (LineNo == -1 || ColumnNo == -1 || !ShowLine)
return;
// Print out spaces/tabs before the caret.
for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
S << (LineContents[i] == '\t' ? '\t' : ' ');
S << "^\n";
// Build the line with the caret and ranges.
std::string CaretLine(LineContents.size()+1, ' ');
// Expand any ranges.
for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
std::pair<unsigned, unsigned> R = Ranges[r];
for (unsigned i = R.first,
e = std::min(R.second, (unsigned)LineContents.size())+1; i != e; ++i)
CaretLine[i] = '~';
}
// Finally, plop on the caret.
if (unsigned(ColumnNo) <= LineContents.size())
CaretLine[ColumnNo] = '^';
else
CaretLine[LineContents.size()] = '^';
// ... and remove trailing whitespace so the output doesn't wrap for it. We
// know that the line isn't completely empty because it has the caret in it at
// least.
CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
// Print out the source line one character at a time, so we can expand tabs.
for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
if (LineContents[i] != '\t') {
S << LineContents[i];
++OutCol;
continue;
}
// If we have a tab, emit at least one space, then round up to 8 columns.
do {
S << ' ';
++OutCol;
} while (OutCol & 7);
}
S << '\n';
// Print out the caret line, matching tabs in the source line.
for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
if (i >= LineContents.size() || LineContents[i] != '\t') {
S << CaretLine[i];
++OutCol;
continue;
}
// Okay, we have a tab. Insert the appropriate number of characters.
do {
S << CaretLine[i];
++OutCol;
} while (OutCol & 7);
}
S << '\n';
}

View File

@ -41,7 +41,10 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool Error(SMLoc L, const Twine &Msg,
ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
return Parser.Error(L, Msg, Ranges);
}
X86Operand *ParseOperand();
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
@ -145,6 +148,8 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
virtual void print(raw_ostream &OS) const {}
@ -1083,16 +1088,19 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
// Recover location info for the operand if we know which was the problem.
SMLoc ErrorLoc = IDLoc;
if (OrigErrorInfo != ~0U) {
if (OrigErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
if (Operand->getStartLoc().isValid()) {
SMRange OperandRange = Operand->getLocRange();
return Error(Operand->getStartLoc(), "invalid operand for instruction",
OperandRange);
}
}
return Error(ErrorLoc, "invalid operand for instruction");
return Error(IDLoc, "invalid operand for instruction");
}
// If one instruction matched with a missing feature, report this as a
@ -1112,7 +1120,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
}
// If all of these were an outright failure, report it in a useless way.
// FIXME: We should give nicer diagnostics about the exact failure.
Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
return true;
}

View File

@ -15,3 +15,6 @@ addl $0, 0(%rax)
# 8 "test.s"
movi $8,%eax
movl 0(%rax), 0(%edx) // error: invalid operand for instruction

View File

@ -87,7 +87,7 @@ Module *llvm::ParseInputFile(const std::string &Filename,
SMDiagnostic Err;
Module *Result = ParseIRFile(Filename, Err, Ctxt);
if (!Result)
Err.Print("bugpoint", errs());
Err.print("bugpoint", errs());
// If we don't have an override triple, use the first one to configure
// bugpoint, or use the host triple if none provided.

View File

@ -247,7 +247,7 @@ int main(int argc, char **argv) {
M.reset(ParseIRFile(InputFilename, Err, Context));
if (M.get() == 0) {
Err.Print(argv[0], errs());
Err.print(argv[0], errs());
return 1;
}
Module &mod = *M.get();

View File

@ -178,7 +178,7 @@ int main(int argc, char **argv, char * const *envp) {
SMDiagnostic Err;
Module *Mod = ParseIRFile(InputFile, Err, Context);
if (!Mod) {
Err.Print(argv[0], errs());
Err.print(argv[0], errs());
return 1;
}

View File

@ -96,7 +96,7 @@ int main(int argc, char **argv) {
SMDiagnostic Err;
std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
if (M.get() == 0) {
Err.Print(argv[0], errs());
Err.print(argv[0], errs());
return 1;
}

View File

@ -38,7 +38,7 @@ static Module *ReadModule(LLVMContext &Context, StringRef Name) {
SMDiagnostic Diag;
Module *M = ParseIRFile(Name, Diag, Context);
if (!M)
Diag.Print("llvmdiff", errs());
Diag.print("llvm-diff", errs());
return M;
}

View File

@ -90,7 +90,7 @@ int main(int argc, char **argv) {
M.reset(getLazyIRFileModule(InputFilename, Err, Context));
if (M.get() == 0) {
Err.Print(argv[0], errs());
Err.print(argv[0], errs());
return 1;
}

View File

@ -69,7 +69,7 @@ static inline std::auto_ptr<Module> LoadFile(const char *argv0,
Result = ParseIRFile(FNStr, Err, Context);
if (Result) return std::auto_ptr<Module>(Result); // Load successful!
Err.Print(argv0, errs());
Err.print(argv0, errs());
return std::auto_ptr<Module>();
}

View File

@ -505,7 +505,7 @@ int main(int argc, char **argv) {
M.reset(ParseIRFile(InputFilename, Err, Context));
if (M.get() == 0) {
Err.Print(argv[0], errs());
Err.print(argv[0], errs());
return 1;
}

View File

@ -184,7 +184,7 @@ bool LoadAssemblyInto(Module *M, const char *assembly) {
NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
std::string errMsg;
raw_string_ostream os(errMsg);
Error.Print("", os);
Error.print("", os);
EXPECT_TRUE(success) << os.str();
return success;
}

View File

@ -26,7 +26,7 @@ bool LoadAssemblyInto(Module *M, const char *assembly) {
NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
std::string errMsg;
raw_string_ostream os(errMsg);
Error.Print("", os);
Error.print("", os);
EXPECT_TRUE(success) << os.str();
return success;
}

View File

@ -384,7 +384,7 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
}
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), OS.str(), "note",
/*ShowLine=*/false);
ArrayRef<SMRange>(), /*ShowLine=*/false);
}
}