forked from OSchip/llvm-project
PR25416: Improve performance of processing inline assembly consisting of many
implicitly-concatenated string literals. When looking for the start of a token in the inline assembly, start from the end of the previous token, not the start of the entire string. Patch by Yunlian Jiang! llvm-svn: 255198
This commit is contained in:
parent
6fd28dfe5d
commit
efb116fba6
|
@ -1631,13 +1631,15 @@ public:
|
|||
/// and can have escape sequences in them in addition to the usual trigraph
|
||||
/// and escaped newline business. This routine handles this complexity.
|
||||
///
|
||||
SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
||||
const LangOptions &Features,
|
||||
const TargetInfo &Target) const;
|
||||
SourceLocation
|
||||
getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
||||
const LangOptions &Features, const TargetInfo &Target,
|
||||
unsigned *StartToken = nullptr,
|
||||
unsigned *StartTokenByteOffset = nullptr) const;
|
||||
|
||||
typedef const SourceLocation *tokloc_iterator;
|
||||
tokloc_iterator tokloc_begin() const { return TokLocs; }
|
||||
tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
|
||||
tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
|
||||
|
||||
SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; }
|
||||
SourceLocation getLocEnd() const LLVM_READONLY {
|
||||
|
|
|
@ -1007,15 +1007,33 @@ void StringLiteral::setString(const ASTContext &C, StringRef Str,
|
|||
/// can have escape sequences in them in addition to the usual trigraph and
|
||||
/// escaped newline business. This routine handles this complexity.
|
||||
///
|
||||
SourceLocation StringLiteral::
|
||||
getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
||||
const LangOptions &Features, const TargetInfo &Target) const {
|
||||
/// The *StartToken sets the first token to be searched in this function and
|
||||
/// the *StartTokenByteOffset is the byte offset of the first token. Before
|
||||
/// returning, it updates the *StartToken to the TokNo of the token being found
|
||||
/// and sets *StartTokenByteOffset to the byte offset of the token in the
|
||||
/// string.
|
||||
/// Using these two parameters can reduce the time complexity from O(n^2) to
|
||||
/// O(n) if one wants to get the location of byte for all the tokens in a
|
||||
/// string.
|
||||
///
|
||||
SourceLocation
|
||||
StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
||||
const LangOptions &Features,
|
||||
const TargetInfo &Target, unsigned *StartToken,
|
||||
unsigned *StartTokenByteOffset) const {
|
||||
assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
|
||||
"Only narrow string literals are currently supported");
|
||||
|
||||
// Loop over all of the tokens in this string until we find the one that
|
||||
// contains the byte we're looking for.
|
||||
unsigned TokNo = 0;
|
||||
unsigned StringOffset = 0;
|
||||
if (StartToken)
|
||||
TokNo = *StartToken;
|
||||
if (StartTokenByteOffset) {
|
||||
StringOffset = *StartTokenByteOffset;
|
||||
ByteNo -= StringOffset;
|
||||
}
|
||||
while (1) {
|
||||
assert(TokNo < getNumConcatenated() && "Invalid byte number!");
|
||||
SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
|
||||
|
@ -1026,11 +1044,17 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
|||
SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
|
||||
|
||||
// Re-lex the token to get its length and original spelling.
|
||||
std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
|
||||
std::pair<FileID, unsigned> LocInfo =
|
||||
SM.getDecomposedLoc(StrTokSpellingLoc);
|
||||
bool Invalid = false;
|
||||
StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
|
||||
if (Invalid)
|
||||
if (Invalid) {
|
||||
if (StartTokenByteOffset != nullptr)
|
||||
*StartTokenByteOffset = StringOffset;
|
||||
if (StartToken != nullptr)
|
||||
*StartToken = TokNo;
|
||||
return StrTokSpellingLoc;
|
||||
}
|
||||
|
||||
const char *StrData = Buffer.data()+LocInfo.second;
|
||||
|
||||
|
@ -1051,10 +1075,15 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
|||
|
||||
// Now that we know the offset of the token in the spelling, use the
|
||||
// preprocessor to get the offset in the original source.
|
||||
if (StartTokenByteOffset != nullptr)
|
||||
*StartTokenByteOffset = StringOffset;
|
||||
if (StartToken != nullptr)
|
||||
*StartToken = TokNo;
|
||||
return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
|
||||
}
|
||||
|
||||
// Move to the next string token.
|
||||
StringOffset += TokNumBytes;
|
||||
++TokNo;
|
||||
ByteNo -= TokNumBytes;
|
||||
}
|
||||
|
|
|
@ -1714,13 +1714,15 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
|
|||
if (!StrVal.empty()) {
|
||||
const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
|
||||
const LangOptions &LangOpts = CGF.CGM.getLangOpts();
|
||||
unsigned StartToken = 0;
|
||||
unsigned ByteOffset = 0;
|
||||
|
||||
// Add the location of the start of each subsequent line of the asm to the
|
||||
// MDNode.
|
||||
for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) {
|
||||
for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
|
||||
if (StrVal[i] != '\n') continue;
|
||||
SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
|
||||
CGF.getTarget());
|
||||
SourceLocation LineLoc = Str->getLocationOfByte(
|
||||
i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
|
||||
Locs.push_back(llvm::ConstantAsMetadata::get(
|
||||
llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue