diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h index 1b005519e5d4..3255e143d56b 100644 --- a/llvm/include/llvm/Support/SourceMgr.h +++ b/llvm/include/llvm/Support/SourceMgr.h @@ -61,10 +61,9 @@ private: /// into relatively small files (often smaller than 2^8 or 2^16 bytes), /// we select the offset vector element type dynamically based on the /// size of Buffer. - using VariableSizeOffsets = PointerUnion *, - std::vector *, - std::vector *, - std::vector *>; + using VariableSizeOffsets = + PointerUnion *, std::vector *, + std::vector *, std::vector *>; /// Vector of offsets into Buffer at which there are line-endings /// (lazily populated). Once populated, the '\n' that marks the end of @@ -74,12 +73,17 @@ private: /// offset corresponding to a particular SMLoc). mutable VariableSizeOffsets OffsetCache; - /// Populate \c OffsetCache and look up a given \p Ptr in it, assuming - /// it points somewhere into \c Buffer. The static type parameter \p T - /// must be an unsigned integer type from uint{8,16,32,64}_t large - /// enough to store offsets inside \c Buffer. - template + /// Look up a given \p Ptr in in the buffer, determining which line it came + /// from. unsigned getLineNumber(const char *Ptr) const; + template + unsigned getLineNumberSpecialized(const char *Ptr) const; + + /// Return a pointer to the first character of the specified line number or + /// null if the line number is invalid. + const char *getPointerForLineNumber(unsigned LineNo) const; + template + const char *getPointerForLineNumberSpecialized(unsigned LineNo) const; /// This is the location of the parent include, or null if at the top level. SMLoc IncludeLoc; @@ -134,9 +138,7 @@ public: return Buffers[i - 1].Buffer.get(); } - unsigned getNumBuffers() const { - return Buffers.size(); - } + unsigned getNumBuffers() const { return Buffers.size(); } unsigned getMainFileID() const { assert(getNumBuffers()); @@ -184,12 +186,16 @@ public: std::pair getLineAndColumn(SMLoc Loc, unsigned BufferID = 0) const; + /// Given a line and column number in a mapped buffer, turn it into an SMLoc. + /// This will return a null SMLoc if the line/column location is invalid. + SMLoc FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, + unsigned ColNo); + /// Emit a message about the specified location with the specified string. /// /// \param ShowColors Display colored messages if output is a terminal and /// the default error handler is used. - void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, - const Twine &Msg, + void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef Ranges = None, ArrayRef FixIts = None, bool ShowColors = true) const; @@ -234,13 +240,13 @@ class SMFixIt { public: // FIXME: Twine.str() is not very efficient. SMFixIt(SMLoc Loc, const Twine &Insertion) - : Range(Loc, Loc), Text(Insertion.str()) { + : Range(Loc, Loc), Text(Insertion.str()) { assert(Loc.isValid()); } // FIXME: Twine.str() is not very efficient. SMFixIt(SMRange R, const Twine &Replacement) - : Range(R), Text(Replacement.str()) { + : Range(R), Text(Replacement.str()) { assert(R.isValid()); } @@ -274,13 +280,12 @@ public: SMDiagnostic() = default; // Diagnostic with no location (e.g. file not found, command line arg error). SMDiagnostic(StringRef filename, SourceMgr::DiagKind Knd, StringRef Msg) - : Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Knd), Message(Msg) {} + : Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Knd), Message(Msg) {} // Diagnostic with a location. - SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, - int Line, int Col, SourceMgr::DiagKind Kind, - StringRef Msg, StringRef LineStr, - ArrayRef> Ranges, + SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, int Col, + SourceMgr::DiagKind Kind, StringRef Msg, StringRef LineStr, + ArrayRef> Ranges, ArrayRef FixIts = None); const SourceMgr *getSourceMgr() const { return SM; } @@ -293,13 +298,9 @@ public: StringRef getLineContents() const { return LineContents; } ArrayRef> getRanges() const { return Ranges; } - void addFixIt(const SMFixIt &Hint) { - FixIts.push_back(Hint); - } + void addFixIt(const SMFixIt &Hint) { FixIts.push_back(Hint); } - ArrayRef getFixIts() const { - return FixIts; - } + ArrayRef getFixIts() const { return FixIts; } void print(const char *ProgName, raw_ostream &S, bool ShowColors = true, bool ShowKindLabel = true) const; diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index acb7101ba020..5fe95badd547 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -42,7 +42,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, std::string &IncludedFile) { IncludedFile = Filename; ErrorOr> NewBufOrErr = - MemoryBuffer::getFile(IncludedFile); + MemoryBuffer::getFile(IncludedFile); // If the file didn't exist directly, see if it's in an include path. for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; @@ -69,54 +69,110 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { } template -unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { +static std::vector &GetOrCreateOffsetCache( + PointerUnion *, std::vector *, + std::vector *, std::vector *> &OffsetCache, + MemoryBuffer *Buffer) { + if (!OffsetCache.isNull()) + return *OffsetCache.get *>(); - // Ensure OffsetCache is allocated and populated with offsets of all the - // '\n' bytes. - std::vector *Offsets = nullptr; - if (OffsetCache.isNull()) { - Offsets = new std::vector(); - OffsetCache = Offsets; - size_t Sz = Buffer->getBufferSize(); - assert(Sz <= std::numeric_limits::max()); - StringRef S = Buffer->getBuffer(); - for (size_t N = 0; N < Sz; ++N) { - if (S[N] == '\n') { - Offsets->push_back(static_cast(N)); - } - } - } else { - Offsets = OffsetCache.get *>(); + // Lazily fill in the offset cache. + auto *Offsets = new std::vector(); + OffsetCache = Offsets; + size_t Sz = Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits::max()); + StringRef S = Buffer->getBuffer(); + for (size_t N = 0; N < Sz; ++N) { + if (S[N] == '\n') + Offsets->push_back(static_cast(N)); } + return *Offsets; +} + +template +unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const { + std::vector &Offsets = + GetOrCreateOffsetCache(OffsetCache, Buffer.get()); + const char *BufStart = Buffer->getBufferStart(); assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); ptrdiff_t PtrDiff = Ptr - BufStart; - assert(PtrDiff >= 0 && static_cast(PtrDiff) <= std::numeric_limits::max()); + assert(PtrDiff >= 0 && + static_cast(PtrDiff) <= std::numeric_limits::max()); T PtrOffset = static_cast(PtrDiff); // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get // the line number. - return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1; + return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1; +} + +/// Look up a given \p Ptr in in the buffer, determining which line it came +/// from. +unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { + size_t Sz = Buffer->getBufferSize(); + if (Sz <= std::numeric_limits::max()) + return getLineNumberSpecialized(Ptr); + else if (Sz <= std::numeric_limits::max()) + return getLineNumberSpecialized(Ptr); + else if (Sz <= std::numeric_limits::max()) + return getLineNumberSpecialized(Ptr); + else + return getLineNumberSpecialized(Ptr); +} + +template +const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized( + unsigned LineNo) const { + std::vector &Offsets = + GetOrCreateOffsetCache(OffsetCache, Buffer.get()); + + // We start counting line and column numbers from 1. + if (LineNo != 0) + --LineNo; + + const char *BufStart = Buffer->getBufferStart(); + + // The offset cache contains the location of the \n for the specified line, + // we want the start of the line. As such, we look for the previous entry. + if (LineNo == 0) + return BufStart; + if (LineNo > Offsets.size()) + return nullptr; + return BufStart + Offsets[LineNo - 1] + 1; +} + +/// Return a pointer to the first character of the specified line number or +/// null if the line number is invalid. +const char * +SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const { + size_t Sz = Buffer->getBufferSize(); + if (Sz <= std::numeric_limits::max()) + return getPointerForLineNumberSpecialized(LineNo); + else if (Sz <= std::numeric_limits::max()) + return getPointerForLineNumberSpecialized(LineNo); + else if (Sz <= std::numeric_limits::max()) + return getPointerForLineNumberSpecialized(LineNo); + else + return getPointerForLineNumberSpecialized(LineNo); } SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) - : Buffer(std::move(Other.Buffer)), - OffsetCache(Other.OffsetCache), - IncludeLoc(Other.IncludeLoc) { + : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache), + IncludeLoc(Other.IncludeLoc) { Other.OffsetCache = nullptr; } SourceMgr::SrcBuffer::~SrcBuffer() { if (!OffsetCache.isNull()) { - if (OffsetCache.is*>()) - delete OffsetCache.get*>(); - else if (OffsetCache.is*>()) - delete OffsetCache.get*>(); - else if (OffsetCache.is*>()) - delete OffsetCache.get*>(); + if (OffsetCache.is *>()) + delete OffsetCache.get *>(); + else if (OffsetCache.is *>()) + delete OffsetCache.get *>(); + else if (OffsetCache.is *>()) + delete OffsetCache.get *>(); else - delete OffsetCache.get*>(); + delete OffsetCache.get *>(); OffsetCache = nullptr; } } @@ -130,39 +186,58 @@ SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { auto &SB = getBufferInfo(BufferID); const char *Ptr = Loc.getPointer(); - size_t Sz = SB.Buffer->getBufferSize(); - unsigned LineNo; - if (Sz <= std::numeric_limits::max()) - LineNo = SB.getLineNumber(Ptr); - else if (Sz <= std::numeric_limits::max()) - LineNo = SB.getLineNumber(Ptr); - else if (Sz <= std::numeric_limits::max()) - LineNo = SB.getLineNumber(Ptr); - else - LineNo = SB.getLineNumber(Ptr); - + unsigned LineNo = SB.getLineNumber(Ptr); const char *BufStart = SB.Buffer->getBufferStart(); - size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); - if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; - return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); + size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r"); + if (NewlineOffs == StringRef::npos) + NewlineOffs = ~(size_t)0; + return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs); +} + +/// Given a line and column number in a mapped buffer, turn it into an SMLoc. +/// This will return a null SMLoc if the line/column location is invalid. +SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, + unsigned ColNo) { + auto &SB = getBufferInfo(BufferID); + const char *Ptr = SB.getPointerForLineNumber(LineNo); + if (!Ptr) + return SMLoc(); + + // We start counting line and column numbers from 1. + if (ColNo != 0) + --ColNo; + + // If we have a column number, validate it. + if (ColNo) { + // Make sure the location is within the current line. + if (Ptr + ColNo > SB.Buffer->getBufferEnd()) + return SMLoc(); + + // Make sure there is no newline in the way. + if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos) + return SMLoc(); + + Ptr += ColNo; + } + + return SMLoc::getFromPointer(Ptr); } void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { - if (IncludeLoc == SMLoc()) return; // Top of stack. + if (IncludeLoc == SMLoc()) + return; // Top of stack. unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); assert(CurBuf && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); - OS << "Included from " - << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; } SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, - const Twine &Msg, - ArrayRef Ranges, + const Twine &Msg, ArrayRef Ranges, ArrayRef FixIts) const { // First thing to do: find the current buffer containing the specified // location to pull out the source line. @@ -196,7 +271,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, // location. for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { SMRange R = Ranges[i]; - if (!R.isValid()) continue; + if (!R.isValid()) + continue; // If the line doesn't contain any part of the range, then ignore it. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) @@ -210,16 +286,16 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, // Translate from SMLoc ranges to column ranges. // FIXME: Handle multibyte characters. - ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, - R.End.getPointer()-LineStart)); + ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart, + R.End.getPointer() - LineStart)); } LineAndCol = getLineAndColumn(Loc, CurBuf); } return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, - LineAndCol.second-1, Kind, Msg.str(), - LineStr, ColRanges, FixIts); + LineAndCol.second - 1, Kind, Msg.str(), LineStr, + ColRanges, FixIts); } void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, @@ -240,9 +316,9 @@ void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, } void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, - SourceMgr::DiagKind Kind, - const Twine &Msg, ArrayRef Ranges, - ArrayRef FixIts, bool ShowColors) const { + SourceMgr::DiagKind Kind, const Twine &Msg, + ArrayRef Ranges, ArrayRef FixIts, + bool ShowColors) const { PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); } @@ -268,7 +344,8 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, } static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, - ArrayRef FixIts, ArrayRef SourceLine){ + ArrayRef FixIts, + ArrayRef SourceLine) { if (FixIts.empty()) return; @@ -277,8 +354,8 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, size_t PrevHintEndCol = 0; - for (ArrayRef::iterator I = FixIts.begin(), E = FixIts.end(); - I != E; ++I) { + for (ArrayRef::iterator I = FixIts.begin(), E = FixIts.end(); I != E; + ++I) { // If the fixit contains a newline or tab, ignore it. if (I->getText().find_first_of("\n\r\t") != StringRef::npos) continue; @@ -361,12 +438,10 @@ static void printSourceLine(raw_ostream &S, StringRef LineContents) { S << '\n'; } -static bool isNonASCII(char c) { - return c & 0x80; -} +static bool isNonASCII(char c) { return c & 0x80; } -void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, - bool ShowColors, bool ShowKindLabel) const { +void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors, + bool ShowKindLabel) const { { WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors); @@ -423,22 +498,21 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, size_t NumColumns = LineContents.size(); // Build the line with the caret and ranges. - std::string CaretLine(NumColumns+1, ' '); + std::string CaretLine(NumColumns + 1, ' '); // Expand any ranges. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { std::pair R = Ranges[r]; std::fill(&CaretLine[R.first], - &CaretLine[std::min((size_t)R.second, CaretLine.size())], - '~'); + &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~'); } // Add any fix-its. // FIXME: Find the beginning of the line properly for multibyte characters. std::string FixItInsertionLine; - buildFixItLine(CaretLine, FixItInsertionLine, FixIts, - makeArrayRef(Loc.getPointer() - ColumnNo, - LineContents.size())); + buildFixItLine( + CaretLine, FixItInsertionLine, FixIts, + makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size())); // Finally, plop on the caret. if (unsigned(ColumnNo) <= NumColumns) @@ -449,7 +523,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, // ... and remove trailing whitespace so the output doesn't wrap for it. We // know that the line isn't completely empty because it has the caret in it at // least. - CaretLine.erase(CaretLine.find_last_not_of(' ')+1); + CaretLine.erase(CaretLine.find_last_not_of(' ') + 1); printSourceLine(OS, LineContents); diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index d180b437eefe..ec2ca9363408 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -334,32 +334,32 @@ ScopedDiagnosticHandler::~ScopedDiagnosticHandler() { namespace mlir { namespace detail { struct SourceMgrDiagnosticHandlerImpl { - /// Get a memory buffer for the given file, or nullptr if one is not found. - const llvm::MemoryBuffer *getBufferForFile(llvm::SourceMgr &mgr, - StringRef filename) { + /// Return the SrcManager buffer id for the specified file, or zero if none + /// can be found. + unsigned getSourceMgrBufferIDForFile(llvm::SourceMgr &mgr, + StringRef filename) { // Check for an existing mapping to the buffer id for this file. - auto bufferIt = filenameToBuf.find(filename); - if (bufferIt != filenameToBuf.end()) + auto bufferIt = filenameToBufId.find(filename); + if (bufferIt != filenameToBufId.end()) return bufferIt->second; // Look for a buffer in the manager that has this filename. for (unsigned i = 1, e = mgr.getNumBuffers() + 1; i != e; ++i) { auto *buf = mgr.getMemoryBuffer(i); if (buf->getBufferIdentifier() == filename) - return filenameToBuf[filename] = buf; + return filenameToBufId[filename] = i; } // Otherwise, try to load the source file. - const llvm::MemoryBuffer *newBuf = nullptr; std::string ignored; - if (auto newBufID = - mgr.AddIncludeFile(std::string(filename), llvm::SMLoc(), ignored)) - newBuf = mgr.getMemoryBuffer(newBufID); - return filenameToBuf[filename] = newBuf; + unsigned id = + mgr.AddIncludeFile(std::string(filename), llvm::SMLoc(), ignored); + filenameToBufId[filename] = id; + return id; } - /// Mapping between file name and buffer pointer. - llvm::StringMap filenameToBuf; + /// Mapping between file name and buffer ID's. + llvm::StringMap filenameToBufId; }; } // end namespace detail } // end namespace mlir @@ -501,68 +501,18 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Diagnostic &diag) { /// Get a memory buffer for the given file, or nullptr if one is not found. const llvm::MemoryBuffer * SourceMgrDiagnosticHandler::getBufferForFile(StringRef filename) { - return impl->getBufferForFile(mgr, filename); + if (unsigned id = impl->getSourceMgrBufferIDForFile(mgr, filename)) + return mgr.getMemoryBuffer(id); + return nullptr; } /// Get a memory buffer for the given file, or the main file of the source /// manager if one doesn't exist. This always returns non-null. llvm::SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) { - // Get the buffer for this filename. - auto *membuf = getBufferForFile(loc.getFilename()); - if (!membuf) + unsigned bufferId = impl->getSourceMgrBufferIDForFile(mgr, loc.getFilename()); + if (!bufferId) return llvm::SMLoc(); - - // TODO: This should really be upstreamed to be a method on llvm::SourceMgr. - // Doing so would allow it to use the offset cache that is already maintained - // by SrcBuffer, making this more efficient. - unsigned lineNo = loc.getLine(); - unsigned columnNo = loc.getColumn(); - - // Scan for the correct line number. - const char *position = membuf->getBufferStart(); - const char *end = membuf->getBufferEnd(); - - // We start counting line and column numbers from 1. - if (lineNo != 0) - --lineNo; - if (columnNo != 0) - --columnNo; - - while (position < end && lineNo) { - auto curChar = *position++; - - // Scan for newlines. If this isn't one, ignore it. - if (curChar != '\r' && curChar != '\n') - continue; - - // We saw a line break, decrement our counter. - --lineNo; - - // Check for \r\n and \n\r and treat it as a single escape. We know that - // looking past one character is safe because MemoryBuffer's are always nul - // terminated. - if (*position != curChar && (*position == '\r' || *position == '\n')) - ++position; - } - - // If the line/column counter was invalid, return a pointer to the start of - // the buffer. - if (lineNo || position + columnNo > end) - return llvm::SMLoc::getFromPointer(membuf->getBufferStart()); - - // If the column is zero, try to skip to the first non-whitespace character. - if (columnNo == 0) { - auto isNewline = [](char c) { return c == '\n' || c == '\r'; }; - auto isWhitespace = [](char c) { return c == ' ' || c == '\t'; }; - - // Look for a valid non-whitespace character before the next line. - for (auto *newPos = position; newPos < end && !isNewline(*newPos); ++newPos) - if (!isWhitespace(*newPos)) - return llvm::SMLoc::getFromPointer(newPos); - } - - // Otherwise return the right pointer. - return llvm::SMLoc::getFromPointer(position + columnNo); + return mgr.FindLocForLineAndColumn(bufferId, loc.getLine(), loc.getColumn()); } //===----------------------------------------------------------------------===//