[llvm-ml] Improve indirect call parsing

In MASM, if a QWORD symbol is passed to a jmp or call instruction in
64-bit mode or a DWORD or WORD symbol is passed in 32-bit mode, then
MSVC's assembler recognizes that as an indirect call. Additionally, if
the operand is qualified as a ptr, then that should also be an indirect
call.

Furthermore, in 64-bit mode, such operands are implicitly rip-relative
(in fact, MSVC's assembler ml64.exe does not allow explicitly specifying
rip as a base register.)

To keep this patch managable, this patch does not include:
* error messages for wrong operand types (e.g. passing a QWORD in 32-bit
  mode)
* resolving indirect calls if the symbol is declared after it's first
  use (llvm-ml currently only runs a single pass).
* imlementing the extern keyword (required to resolve
  https://crbug.com/762167.)

This patch is likely missing a bunch of edge cases, so please do point
them out in the review.

Reviewed By: epastor, hans, MaskRay

Committed By: epastor (on behalf of ayzhao)

Differential Revision: https://reviews.llvm.org/D124413
This commit is contained in:
Alan Zhao 2022-04-28 13:01:10 -04:00 committed by Eric Astor
parent a9215ed9cc
commit 3333c28fc0
3 changed files with 249 additions and 24 deletions

View File

@ -1107,9 +1107,9 @@ private:
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
OperandVector &FinalOperands);
bool ParseOperand(OperandVector &Operands);
bool ParseATTOperand(OperandVector &Operands);
bool ParseIntelOperand(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Name);
bool parseATTOperand(OperandVector &Operands);
bool parseIntelOperand(OperandVector &Operands, StringRef Name);
bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
InlineAsmIdentifierInfo &Info, SMLoc &End);
bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
@ -1736,11 +1736,11 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
return false;
}
bool X86AsmParser::ParseOperand(OperandVector &Operands) {
bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
if (isParsingIntelSyntax())
return ParseIntelOperand(Operands);
return parseIntelOperand(Operands, Name);
return ParseATTOperand(Operands);
return parseATTOperand(Operands);
}
bool X86AsmParser::CreateMemForMSInlineAsm(
@ -2513,7 +2513,7 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
return false;
}
bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@ -2635,25 +2635,49 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
// When parsing x64 MS-style assembly, all non-absolute references to a named
// variable default to RIP-relative.
if (Parser.isParsingMasm() && is64BitMode() && SM.getElementSize() > 0) {
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
BaseReg, IndexReg, Scale, Start,
End, Size,
/*DefaultBaseReg=*/X86::RIP));
return false;
unsigned DefaultBaseReg = X86::NoRegister;
bool MaybeDirectBranchDest = true;
if (Parser.isParsingMasm()) {
bool IsUnconditionalBranch =
Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
if (is64BitMode() && SM.getElementSize() > 0) {
DefaultBaseReg = X86::RIP;
}
if (IsUnconditionalBranch) {
if (PtrInOperand) {
MaybeDirectBranchDest = false;
if (is64BitMode())
DefaultBaseReg = X86::RIP;
} else if (!BaseReg && !IndexReg && Disp &&
Disp->getKind() == MCExpr::SymbolRef) {
if (is64BitMode()) {
if (SM.getSize() == 8) {
MaybeDirectBranchDest = false;
DefaultBaseReg = X86::RIP;
}
} else {
if (SM.getSize() == 4 || SM.getSize() == 2)
MaybeDirectBranchDest = false;
}
}
}
}
if ((BaseReg || IndexReg || RegNo))
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
BaseReg, IndexReg, Scale, Start,
End, Size));
if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
Operands.push_back(X86Operand::CreateMem(
getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
/*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
else
Operands.push_back(
X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size));
Operands.push_back(X86Operand::CreateMem(
getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
/*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
MaybeDirectBranchDest));
return false;
}
bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
switch (getLexer().getKind()) {
case AsmToken::Dollar: {
@ -3409,7 +3433,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Read the operands.
while (true) {
if (ParseOperand(Operands))
if (parseOperand(Operands, Name))
return true;
if (HandleAVX512Operand(Operands))
return true;

View File

@ -72,6 +72,11 @@ struct X86Operand final : public MCParsedAsmOperand {
/// If the memory operand is unsized and there are multiple instruction
/// matches, prefer the one with this size.
unsigned FrontendSize;
/// If false, then this operand must be a memory operand for an indirect
/// branch instruction. Otherwise, this operand may belong to either a
/// direct or indirect branch instruction.
bool MaybeDirectBranchDest;
};
union {
@ -209,6 +214,10 @@ struct X86Operand final : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.FrontendSize;
}
bool isMaybeDirectBranchDest() const {
assert(Kind == Memory && "Invalid access!");
return Mem.MaybeDirectBranchDest;
}
bool isToken() const override {return Kind == Token; }
@ -374,8 +383,9 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
!getMemIndexReg() && getMemScale() == 1 && isMaybeDirectBranchDest();
}
bool isAVX512RC() const{
return isImm();
}
@ -672,7 +682,7 @@ struct X86Operand final : public MCParsedAsmOperand {
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
void *OpDecl = nullptr, unsigned FrontendSize = 0,
bool UseUpRegs = false) {
bool UseUpRegs = false, bool MaybeDirectBranchDest = true) {
auto Res = std::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@ -683,6 +693,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
@ -697,7 +708,8 @@ struct X86Operand final : public MCParsedAsmOperand {
SMLoc EndLoc, unsigned Size = 0,
unsigned DefaultBaseReg = X86::NoRegister,
StringRef SymName = StringRef(), void *OpDecl = nullptr,
unsigned FrontendSize = 0, bool UseUpRegs = false) {
unsigned FrontendSize = 0, bool UseUpRegs = false,
bool MaybeDirectBranchDest = true) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg || DefaultBaseReg) &&
@ -716,6 +728,7 @@ struct X86Operand final : public MCParsedAsmOperand {
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
Res->Mem.FrontendSize = FrontendSize;
Res->Mem.MaybeDirectBranchDest = MaybeDirectBranchDest;
Res->UseUpRegs = UseUpRegs;
Res->SymName = SymName;
Res->OpDecl = OpDecl;

View File

@ -0,0 +1,188 @@
; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-64,CHECK
; RUN: llvm-ml -m32 -filetype=s %s /Fo - | FileCheck %s --check-prefixes=CHECK-32,CHECK
.data
ifdef rax
fn_ref qword 1
else
fn_ref dword 1
endif
fn_ref_word word 2
fn PROC
BranchTargetStruc struc
member0 dword ?
ifdef rax
member1 dword ?
endif
BranchTargetStruc ends
ifdef rax
fn_ref_struc BranchTargetStruc {3, 3}
else
fn_ref_struc BranchTargetStruc {3}
endif
.code
t0:
call fn_ref
jmp fn_ref
; CHECK-LABEL: t0:
; CHECK-64: call qword ptr [rip + fn_ref]
; CHECK-64: jmp qword ptr [rip + fn_ref]
; CHECK-32: call dword ptr [fn_ref]
; CHECK-32: jmp dword ptr [fn_ref]
t1:
call [fn_ref]
jmp [fn_ref]
; CHECK-LABEL: t1:
; CHECK-64: call qword ptr [rip + fn_ref]
; CHECK-64: jmp qword ptr [rip + fn_ref]
; CHECK-32: call dword ptr [fn_ref]
; CHECK-32: jmp dword ptr [fn_ref]
ifdef rax
t2:
call qword ptr [fn_ref]
jmp qword ptr [fn_ref]
; CHECK-64-LABEL: t2:
; CHECK-64: call qword ptr [rip + fn_ref]
; CHECK-64: jmp qword ptr [rip + fn_ref]
else
t2:
call dword ptr [fn_ref]
jmp dword ptr [fn_ref]
; CHECK-32-LABEL: t2:
; CHECK-32: call dword ptr [fn_ref]
; CHECK-32: jmp dword ptr [fn_ref]
t3:
call fn_ref_word
jmp fn_ref_word
; CHECK-32-LABEL: t3:
; CHECK-32: call word ptr [fn_ref_word]
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
t4:
call [fn_ref_word]
jmp [fn_ref_word]
; CHECK-32-LABEL: t4:
; CHECK-32: call word ptr [fn_ref_word]
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
t5:
call word ptr [fn_ref_word]
jmp word ptr [fn_ref_word]
; CHECK-32-LABEL: t5:
; CHECK-32: call word ptr [fn_ref_word]
; CHECK-32-NEXT: jmp word ptr [fn_ref_word]
endif
t6:
call t6
jmp t6
; CHECK-LABEL: t6:
; CHECK: call t6
; CHECK-NEXT: jmp t6
t7:
call [t7]
jmp [t7]
; CHECK-LABEL: t7:
; CHECK: call t7
; CHECK-NEXT: jmp t7
ifdef rax
t8:
call qword ptr [t8]
jmp qword ptr [t8]
; CHECK-64-LABEL: t8:
; CHECK-64: call qword ptr [rip + t8]
; CHECK-64-NEXT: jmp qword ptr [rip + t8]
else
t8:
call dword ptr [t8]
jmp dword ptr [t8]
; CHECK-32-LABEL: t8:
; CHECK-32: call dword ptr [t8]
; CHECK-32-NEXT: jmp dword ptr [t8]
endif
t9:
call fn
jmp fn
; CHECK-LABEL: t9:
; CHECK: call fn
; CHECK-NEXT: jmp fn
t10:
call [fn]
jmp [fn]
; CHECK-LABEL: t10:
; CHECK: call fn
; CHECK-NEXT: jmp fn
ifdef rax
t11:
call qword ptr [fn]
jmp qword ptr [fn]
; CHECK-64-LABEL: t11:
; CHECK-64: call qword ptr [rip + fn]
; CHECK-64-NEXT: jmp qword ptr [rip + fn]
else
t11:
call dword ptr [fn]
jmp dword ptr [fn]
; CHECK-32-LABEL: t11:
; CHECK-32: call dword ptr [fn]
; CHECK-32-NEXT: jmp dword ptr [fn]
endif
t12:
call fn_ref_struc
jmp fn_ref_struc
; CHECK-LABEL: t12:
; CHECK-64: call qword ptr [rip + fn_ref_struc]
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
; CHECK-32: call dword ptr [fn_ref_struc]
; CHECK-32: jmp dword ptr [fn_ref_struc]
t13:
call [fn_ref_struc]
jmp [fn_ref_struc]
; CHECK-LABEL: t13:
; CHECK-64: call qword ptr [rip + fn_ref_struc]
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
; CHECK-32: call dword ptr [fn_ref_struc]
; CHECK-32: jmp dword ptr [fn_ref_struc]
ifdef rax
t14:
call qword ptr [fn_ref_struc]
jmp qword ptr [fn_ref_struc]
; CHECK-64-LABEL: t14:
; CHECK-64: call qword ptr [rip + fn_ref_struc]
; CHECK-64: jmp qword ptr [rip + fn_ref_struc]
else
t14:
call dword ptr [fn_ref_struc]
jmp dword ptr [fn_ref_struc]
; CHECK-32-LABEL: t14:
; CHECK-32: call dword ptr [fn_ref_struc]
; CHECK-32: jmp dword ptr [fn_ref_struc]
endif
t15:
je t15
; CHECK-LABEL: t15:
; CHECK: je t15
t16:
je [t16];
; CHECK-LABEL: t16:
; CHECK: je t16