diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 9cdd2eb2cc93..d717cadf4e4a 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -108,6 +108,9 @@ struct ParseStatementInfo { /// Was there an error parsing the inline assembly? bool ParseError = false; + /// The value associated with a macro exit. + Optional ExitValue; + SmallVectorImpl *AsmRewrites = nullptr; ParseStatementInfo() = delete; @@ -368,6 +371,7 @@ private: /// This is the current buffer index we're lexing from as managed by the /// SourceMgr object. unsigned CurBuffer; + std::vector EndStatementAtEOFStack; AsmCond TheCondState; std::vector TheCondStack; @@ -539,8 +543,6 @@ private: bool parseCurlyBlockScope(SmallVectorImpl& AsmStrRewrites); bool parseCppHashLineFilenameComment(SMLoc L); - void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, - ArrayRef Parameters); bool expandMacro(raw_svector_ostream &OS, StringRef Body, ArrayRef Parameters, ArrayRef A, @@ -553,7 +555,15 @@ private: /// /// \param M The macro. /// \param NameLoc Instantiation location. - bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); + bool handleMacroEntry( + const MCAsmMacro *M, SMLoc NameLoc, + AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement); + + /// Handle invocation of macro function. + /// + /// \param M The macro. + /// \param NameLoc Invocation location. + bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc); /// Handle exit from macro instantiation. void handleMacroExit(); @@ -593,7 +603,8 @@ private: /// /// \param InBuffer If not 0, should be the known buffer id that contains the /// location. - void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0); + void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0, + bool EndStatementAtEOF = true); /// Parse up to a token of kind \p EndTok and return the contents from the /// current token up to (but not including) this token; the current token on @@ -901,7 +912,7 @@ private: // macro directives bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); - bool parseDirectiveExitMacro(StringRef Directive); + bool parseDirectiveExitMacro(StringRef Directive, std::string &Value); bool parseDirectiveEndMacro(StringRef Directive); bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc); @@ -1011,6 +1022,7 @@ MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, // Set our own handler which calls the saved handler. SrcMgr.setDiagHandler(DiagHandler, this); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); + EndStatementAtEOFStack.push_back(true); // Initialize the platform / file format parser. switch (Ctx.getObjectFileInfo()->getObjectFileType()) { @@ -1080,13 +1092,15 @@ bool MasmParser::enterIncludeFile(const std::string &Filename) { CurBuffer = NewBuf; Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); + EndStatementAtEOFStack.push_back(true); return false; } -void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer) { +void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer, + bool EndStatementAtEOF) { CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), - Loc.getPointer()); + Loc.getPointer(), EndStatementAtEOF); } const AsmToken &MasmParser::Lex() { @@ -1115,6 +1129,7 @@ const AsmToken &MasmParser::Lex() { getTok().getEndLoc()); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, /*EndStatementAtEOF=*/false); + EndStatementAtEOFStack.push_back(false); tok = &Lexer.Lex(); } else { break; @@ -1141,9 +1156,12 @@ const AsmToken &MasmParser::Lex() { // include stack. SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { - jumpToLoc(ParentIncludeLoc); + EndStatementAtEOFStack.pop_back(); + jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); return Lex(); } + EndStatementAtEOFStack.pop_back(); + assert(EndStatementAtEOFStack.empty()); } return *tok; @@ -1345,6 +1363,7 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// Parse a primary expression and return it. /// primaryexpr ::= (parenexpr +/// primaryexpr ::= macro_function "(" macro_arguments ")" /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' @@ -1395,6 +1414,12 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); return false; } + // Parse macro function invocation. + if (const MCAsmMacro *M = getContext().lookupMacro(Identifier)) { + if (handleMacroInvocation(M, FirstTokenLoc)) + return true; + return parsePrimaryExpr(Res, EndLoc, nullptr); + } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -2281,8 +2306,10 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_CFI_WINDOW_SAVE: return parseDirectiveCFIWindowSave(); case DK_EXITM: - return parseDirectiveExitMacro(IDVal); + Info.ExitValue = ""; + return parseDirectiveExitMacro(IDVal, *Info.ExitValue); case DK_ENDM: + Info.ExitValue = ""; return parseDirectiveEndMacro(IDVal); case DK_PURGEM: return parseDirectivePurgeMacro(IDLoc); @@ -2615,13 +2642,10 @@ void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { NewDiag.print(nullptr, OS); } -// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The -// difference being that that function accepts '@' as part of identifiers and -// we can't do that. AsmLexer.cpp should probably be changed to handle -// '@' as a special case when needed. -static bool isIdentifierChar(char c) { - return isalnum(static_cast(c)) || c == '_' || c == '$' || - c == '.'; +// This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does +// not accept '.'. +static bool isMacroParameterChar(char C) { + return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?'; } bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, @@ -2648,11 +2672,11 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, std::size_t End = Body.size(), Pos = 0; std::size_t IdentifierPos = End; for (; Pos != End; ++Pos) { - // Find the next possible identifier, including identifiers preceding a - // '&' inside quotes. + // Find the next possible macro parameter, including preceding a '&' + // inside quotes. if (Body[Pos] == '&') break; - if (isIdentifierChar(Body[Pos])) { + if (isMacroParameterChar(Body[Pos])) { if (!CurrentQuote.hasValue()) break; if (IdentifierPos == End) @@ -2695,7 +2719,7 @@ bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, ++I; ++Pos; } - while (isIdentifierChar(Body[I]) && I + 1 != End) + while (isMacroParameterChar(Body[I]) && I + 1 != End) ++I; const char *Begin = Body.data() + Pos; @@ -2805,7 +2829,7 @@ bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP, if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) { const char *StrChar = StrLoc.getPointer() + 1; const char *EndChar = EndLoc.getPointer() - 1; - jumpToLoc(EndLoc, CurBuffer); + jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); /// Eat from '<' to '>'. Lex(); MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar)); @@ -2854,7 +2878,7 @@ bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP, // handleMacroEntry relies on not advancing the lexer here // to be able to fill in the remaining default parameter values - if (Lexer.is(EndTok)) + if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0)) break; // Adjust the current parentheses level. @@ -2998,7 +3022,8 @@ bool MasmParser::parseMacroArguments(const MCAsmMacro *M, return TokError("too many positional arguments"); } -bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { +bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc, + AsmToken::TokenKind ArgumentEndTok) { // Arbitrarily limit macro nesting depth (default matches 'as'). We can // eliminate this, although we should protect against infinite loops. unsigned MaxNestingDepth = AsmMacroMaxNestingDepth; @@ -3012,7 +3037,7 @@ bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { } MCAsmMacroArguments A; - if (parseMacroArguments(M, A)) + if (parseMacroArguments(M, A, ArgumentEndTok)) return true; // Macro instantiation is lexical, unfortunately. We construct a new buffer @@ -3042,14 +3067,17 @@ bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { // Jump to the macro instantiation and prime the lexer. CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); + EndStatementAtEOFStack.push_back(true); Lex(); return false; } void MasmParser::handleMacroExit() { - // Jump to the EndOfStatement we should return to, and consume it. - jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer); + // Jump to the token we should return to, and consume it. + EndStatementAtEOFStack.pop_back(); + jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer, + EndStatementAtEOFStack.back()); Lex(); // Pop the instantiation entry. @@ -3057,6 +3085,60 @@ void MasmParser::handleMacroExit() { ActiveMacros.pop_back(); } +bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) { + if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name + + "' requires arguments in parentheses") || + handleMacroEntry(M, NameLoc, AsmToken::RParen)) + return true; + + // Parse all statements in the macro, retrieving the exit value when it ends. + std::string ExitValue; + SmallVector AsmStrRewrites; + while (Lexer.isNot(AsmToken::Eof)) { + ParseStatementInfo Info(&AsmStrRewrites); + bool Parsed = parseStatement(Info, nullptr); + + if (!Parsed && Info.ExitValue.hasValue()) { + ExitValue = std::move(*Info.ExitValue); + break; + } + + // If we have a Lexer Error we are on an Error Token. Load in Lexer Error + // for printing ErrMsg via Lex() only if no (presumably better) parser error + // exists. + if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) { + Lex(); + } + + // parseStatement returned true so may need to emit an error. + printPendingErrors(); + + // Skipping to the next line if needed. + if (Parsed && !getLexer().isAtStartOfStatement()) + eatToEndOfStatement(); + } + + // Consume the right-parenthesis on the other side of the arguments. + if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name + + "' requires arguments in parentheses")) + return true; + + // Exit values may require lexing, unfortunately. We construct a new buffer to + // hold the exit value. + std::unique_ptr MacroValue = + MemoryBuffer::getMemBufferCopy(ExitValue, ""); + + // Jump from this location to the instantiated exit value, and prime the + // lexer. + CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, + /*EndStatementAtEOF=*/false); + EndStatementAtEOFStack.push_back(false); + Lex(); + + return false; +} + /// parseIdentifier: /// ::= identifier /// ::= string @@ -3189,7 +3271,7 @@ bool MasmParser::parseAngleBracketString(std::string &Data) { if (isAngleBracketString(StartLoc, EndLoc)) { const char *StartChar = StartLoc.getPointer() + 1; const char *EndChar = EndLoc.getPointer() - 1; - jumpToLoc(EndLoc, CurBuffer); + jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); // Eat from '<' to '>'. Lex(); @@ -5412,7 +5494,6 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { const char *BodyStart = StartToken.getLoc().getPointer(); const char *BodyEnd = EndToken.getLoc().getPointer(); StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); - checkForBadMacro(NameLoc, Name, Body, Parameters); MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals)); DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n"; Macro.dump()); @@ -5420,114 +5501,14 @@ bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { return false; } -/// checkForBadMacro -/// -/// With the support added for named parameters there may be code out there that -/// is transitioning from positional parameters. In versions of gas that did -/// not support named parameters they would be ignored on the macro definition. -/// But to support both styles of parameters this is not possible so if a macro -/// definition has named parameters but does not use them and has what appears -/// to be positional parameters, strings like $1, $2, ... and $n, then issue a -/// warning that the positional parameter found in body which have no effect. -/// Hoping the developer will either remove the named parameters from the macro -/// definition so the positional parameters get used if that was what was -/// intended or change the macro to use the named parameters. It is possible -/// this warning will trigger when the none of the named parameters are used -/// and the strings like $1 are infact to simply to be passed trough unchanged. -void MasmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, - StringRef Body, - ArrayRef Parameters) { - // If this macro is not defined with named parameters the warning we are - // checking for here doesn't apply. - unsigned NParameters = Parameters.size(); - if (NParameters == 0) - return; - - bool NamedParametersFound = false; - bool PositionalParametersFound = false; - - // Look at the body of the macro for use of both the named parameters and what - // are likely to be positional parameters. This is what expandMacro() is - // doing when it finds the parameters in the body. - while (!Body.empty()) { - // Scan for the next possible parameter. - std::size_t End = Body.size(), Pos = 0; - for (; Pos != End; ++Pos) { - // Check for a substitution or escape. - // This macro is defined with parameters, look for \foo, \bar, etc. - if (Body[Pos] == '\\' && Pos + 1 != End) - break; - - // This macro should have parameters, but look for $0, $1, ..., $n too. - if (Body[Pos] != '$' || Pos + 1 == End) - continue; - char Next = Body[Pos + 1]; - if (Next == '$' || Next == 'n' || - isdigit(static_cast(Next))) - break; - } - - // Check if we reached the end. - if (Pos == End) - break; - - if (Body[Pos] == '$') { - switch (Body[Pos + 1]) { - // $$ => $ - case '$': - break; - - // $n => number of arguments - case 'n': - PositionalParametersFound = true; - break; - - // $[0-9] => argument - default: { - PositionalParametersFound = true; - break; - } - } - Pos += 2; - } else { - unsigned I = Pos + 1; - while (isIdentifierChar(Body[I]) && I + 1 != End) - ++I; - - const char *Begin = Body.data() + Pos + 1; - StringRef Argument(Begin, I - (Pos + 1)); - unsigned Index = 0; - for (; Index < NParameters; ++Index) - if (Parameters[Index].Name == Argument) - break; - - if (Index == NParameters) { - if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')') - Pos += 3; - else { - Pos = I; - } - } else { - NamedParametersFound = true; - Pos += 1 + Argument.size(); - } - } - // Update the scan point. - Body = Body.substr(Pos); - } - - if (!NamedParametersFound && PositionalParametersFound) - Warning(DirectiveLoc, "macro defined with named parameters which are not " - "used in macro body, possible positional parameter " - "found in body which will have no effect"); -} - /// parseDirectiveExitMacro -/// ::= exitm -bool MasmParser::parseDirectiveExitMacro(StringRef Directive) { - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '" + Directive + "' directive")) - return true; +/// ::= "exitm" [textitem] +bool MasmParser::parseDirectiveExitMacro(StringRef Directive, + std::string &Value) { + if (getTok().isNot(AsmToken::EndOfStatement)) { + parseTextItem(Value); + } + eatToEndOfStatement(); if (!isInsideMacroInstantiation()) return TokError("unexpected '" + Directive + "' in file, " @@ -6452,6 +6433,7 @@ void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, // Jump to the macro instantiation and prime the lexer. CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); + EndStatementAtEOFStack.push_back(true); Lex(); } diff --git a/llvm/test/tools/llvm-ml/macro_function.test b/llvm/test/tools/llvm-ml/macro_function.test new file mode 100644 index 000000000000..135bdeb1eede --- /dev/null +++ b/llvm/test/tools/llvm-ml/macro_function.test @@ -0,0 +1,97 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +identity MACRO arg + exitm +endm + +argument_test PROC +; CHECK-LABEL: argument_test: + + mov eax, identity(2) +; CHECK: mov eax, 2 + + ret +argument_test ENDP + +argument_with_parens_test PROC +; CHECK-LABEL: argument_with_parens_test: + + mov eax, identity((3)) +; CHECK: mov eax, 3 + mov eax, identity(((4-1)-1)) +; CHECK: mov eax, 2 + + ret +argument_with_parens_test ENDP + +offsetof MACRO structure, field + EXITM +ENDM + +S1 STRUCT + W byte 0 + X byte 0 + Y byte 0 +S1 ENDS + +substitutions_test PROC +; CHECK-LABEL: substitutions_test: + + mov eax, offsetof(S1, X) +; CHECK: mov eax, 1 + mov eax, offsetof(S1, Y) +; CHECK: mov eax, 2 + + ret +substitutions_test ENDP + +repeated_invocations_test PROC +; CHECK-LABEL: repeated_invocations_test: + + mov eax, identity(identity(1)) +; CHECK: mov eax, 1 + + ret +repeated_invocations_test ENDP + +factorial MACRO n + IF n LE 1 + EXITM <(1)> + ELSE + EXITM <(n)*factorial(n-1)> + ENDIF +ENDM + +; NOTE: This version is more sensitive to unintentional end-of-statement tokens. +factorial2 MACRO n + IF n LE 1 + EXITM <(1)> + ELSE + EXITM <(n)*(factorial(n-1))> + ENDIF +ENDM + +fibonacci MACRO n + IF n LE 2 + EXITM <(1)> + ELSE + EXITM <(fibonacci(n-1)+fibonacci(n-2))> + ENDIF +ENDM + +recursive_test PROC +; CHECK-LABEL: recursive_test: + + mov eax, factorial(5) +; CHECK: mov eax, 120 + mov eax, factorial2(4) +; CHECK: mov eax, 24 + mov eax, 11 + fibonacci(7) - 11 +; CHECK: mov eax, 13 + + ret +recursive_test ENDP + +end