[X86] Fix handling of maskmovdqu in X32

The maskmovdqu instruction is an odd one: it has a 32-bit and a 64-bit
variant, the former using EDI, the latter RDI, but the use of the
register is implicit. In 64-bit mode, a 0x67 prefix can be used to get
the version using EDI, but there is no way to express this in
assembly in a single instruction, the only way is with an explicit
addr32.

This change adds support for the instruction. When generating assembly
text, that explicit addr32 will be added. When not generating assembly
text, it will be kept as a single instruction and will be emitted with
that 0x67 prefix. When parsing assembly text, it will be re-parsed as
ADDR32 followed by MASKMOVDQU64, which still results in the correct
bytes when converted to machine code.

The same applies to vmaskmovdqu as well.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D103427
This commit is contained in:
Harald van Dijk 2021-07-15 22:56:08 +01:00
parent 81ce3aa30c
commit a8ad917054
No known key found for this signature in database
GPG Key ID: 7D71BB318A5BD56D
10 changed files with 1389 additions and 20 deletions

View File

@ -116,6 +116,8 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
ENUM_ENTRY(IC_64BIT_VEX_OPSIZE, 4, "requires 64-bit mode and VEX") \
ENUM_ENTRY(IC_64BIT_VEX_OPSIZE_ADSIZE, 5, "requires 64-bit mode, VEX, and AdSize")\
ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \

View File

@ -1119,6 +1119,8 @@ static int getInstructionID(struct InternalInstruction *insn,
switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
break;
case VEX_PREFIX_F3:
attrMask |= ATTR_XS;
@ -1175,6 +1177,8 @@ static int getInstructionID(struct InternalInstruction *insn,
case 0x66:
if (insn->mode != MODE_16BIT)
attrMask |= ATTR_OPSIZE;
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
break;
case 0x67:
attrMask |= ATTR_ADSIZE;

View File

@ -4011,7 +4011,15 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
VEX, VEX_WIG;
VEX, VEX_WIG, AdSize64;
let Uses = [EDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQUX32 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask), "",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
VEX, VEX_WIG, AdSize32 {
let AsmString = "addr32 vmaskmovdqu\t{$mask, $src|$src, $mask}";
let AsmVariantName = "NonParsable";
}
let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@ -4020,7 +4028,15 @@ def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
AdSize64;
let Uses = [EDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQUX32 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"addr32 maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
AdSize32 {
let AsmVariantName = "NonParsable";
}
} // ExeDomain = SSEPackedInt

View File

@ -835,8 +835,8 @@ def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JAL
let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
let NumMicroOps = 63;
}
def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
VMASKMOVDQU, VMASKMOVDQU64)>;
def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, MASKMOVDQUX32,
VMASKMOVDQU, VMASKMOVDQU64, VMASKMOVDQUX32)>;
///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.

View File

@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=i686_SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_64_SSE2
; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+sse2,-avx | FileCheck %s --check-prefix=x86_x32_SSE2
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=i686_AVX
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=x86_64_AVX
; RUN: llc < %s -mtriple=x86_64--gnux32 -mattr=+avx | FileCheck %s --check-prefix=x86_x32_AVX
; rdar://6573467
define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
@ -20,6 +22,13 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_SSE2-NEXT: maskmovdqu %xmm1, %xmm0
; x86_64_SSE2-NEXT: retq
;
; x86_x32_SSE2-LABEL: test:
; x86_x32_SSE2: # %bb.0: # %entry
; x86_x32_SSE2-NEXT: movq %rsi, %rdi
; x86_x32_SSE2-NEXT: # kill: def $edi killed $edi killed $rdi
; x86_x32_SSE2-NEXT: addr32 maskmovdqu %xmm1, %xmm0
; x86_x32_SSE2-NEXT: retq
;
; i686_AVX-LABEL: test:
; i686_AVX: # %bb.0: # %entry
; i686_AVX-NEXT: pushl %edi
@ -33,6 +42,12 @@ define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
; x86_64_AVX-NEXT: movq %rsi, %rdi
; x86_64_AVX-NEXT: vmaskmovdqu %xmm1, %xmm0
; x86_64_AVX-NEXT: retq
; x86_x32_AVX-LABEL: test:
; x86_x32_AVX: # %bb.0: # %entry
; x86_x32_AVX-NEXT: movq %rsi, %rdi
; x86_x32_AVX-NEXT: # kill: def $edi killed $edi killed $rdi
; x86_x32_AVX-NEXT: addr32 vmaskmovdqu %xmm1, %xmm0
; x86_x32_AVX-NEXT: retq
entry:
tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
ret void

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
// RUN: llvm-mc -triple i386-- --show-encoding %s |\
// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
// RUN: llvm-mc -triple i386-- -filetype=obj %s |\
// RUN: llvm-objdump -d - | FileCheck %s
// CHECK-NOT: addr32
// CHECK: maskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
maskmovdqu %xmm1, %xmm0
// CHECK-NOT: addr32
// CHECK: vmaskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
vmaskmovdqu %xmm1, %xmm0

View File

@ -0,0 +1,27 @@
// RUN: llvm-mc -triple x86_64-- --show-encoding %s |\
// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
// RUN: llvm-mc -triple x86_64-- -filetype=obj %s |\
// RUN: llvm-objdump -d - | FileCheck %s
// CHECK-NOT: addr32
// CHECK: maskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
maskmovdqu %xmm1, %xmm0
// CHECK-NOT: addr32
// CHECK: vmaskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
vmaskmovdqu %xmm1, %xmm0
// CHECK: addr32
// ENCODING: encoding: [0x67]
// CHECK: maskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0x66,0x0f,0xf7,0xc1]
addr32 maskmovdqu %xmm1, %xmm0
// CHECK: addr32
// ENCODING: encoding: [0x67]
// CHECK: vmaskmovdqu %xmm1, %xmm0
// ENCODING: encoding: [0xc5,0xf9,0xf7,0xc1]
addr32 vmaskmovdqu %xmm1, %xmm0

View File

@ -102,7 +102,8 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_ADSIZE:
return (noPrefix && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE, noPrefix));
case IC_64BIT_OPSIZE_ADSIZE:
return false;
return (noPrefix &&
inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE, noPrefix));
case IC_XD:
return inheritsFrom(child, IC_64BIT_XD);
case IC_XS:
@ -123,10 +124,11 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_OPSIZE:
return inheritsFrom(child, IC_64BIT_REXW_OPSIZE) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_OPSIZE_ADSIZE)) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE));
(!AdSize64 && inheritsFrom(child, IC_64BIT_REXW_ADSIZE)) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE));
case IC_64BIT_XD:
return(inheritsFrom(child, IC_64BIT_REXW_XD) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
return (inheritsFrom(child, IC_64BIT_REXW_XD) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_XD_ADSIZE)));
case IC_64BIT_XS:
return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
(!AdSize64 && inheritsFrom(child, IC_64BIT_XS_ADSIZE)));
@ -156,7 +158,12 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_VEX_OPSIZE:
return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
(VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
(VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
(VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE)) ||
inheritsFrom(child, IC_64BIT_VEX_OPSIZE);
case IC_64BIT_VEX_OPSIZE:
return inheritsFrom(child, IC_64BIT_VEX_OPSIZE_ADSIZE);
case IC_64BIT_VEX_OPSIZE_ADSIZE:
return false;
case IC_VEX_W:
return VEX_LIG && inheritsFrom(child, IC_VEX_L_W);
case IC_VEX_W_XS:
@ -881,6 +888,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
if (index & ATTR_EVEX)
o << "IC_EVEX";
else if ((index & (ATTR_64BIT | ATTR_VEXL | ATTR_REXW | ATTR_OPSIZE)) ==
(ATTR_64BIT | ATTR_OPSIZE))
o << "IC_64BIT_VEX";
else
o << "IC_VEX";
@ -892,9 +902,13 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_REXW)
o << "_W";
if (index & ATTR_OPSIZE)
if (index & ATTR_OPSIZE) {
o << "_OPSIZE";
else if (index & ATTR_XD)
if ((index & (ATTR_64BIT | ATTR_EVEX | ATTR_VEX | ATTR_VEXL |
ATTR_REXW | ATTR_ADSIZE)) ==
(ATTR_64BIT | ATTR_VEX | ATTR_ADSIZE))
o << "_ADSIZE";
} else if (index & ATTR_XD)
o << "_XD";
else if (index & ATTR_XS)
o << "_XS";
@ -908,8 +922,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
if (index & ATTR_EVEXB)
o << "_B";
}
}
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
} else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_64BIT_REXW_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
o << "IC_64BIT_REXW_XD";

View File

@ -125,13 +125,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
return;
}
// Special case since there is no attribute class for 64-bit and VEX
if (Name == "VMASKMOVDQU64") {
ShouldBeEmitted = false;
return;
}
ShouldBeEmitted = true;
ShouldBeEmitted = true;
}
void RecognizableInstr::processInstr(DisassemblerTables &tables,
@ -267,6 +261,11 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_VEX_L_OPSIZE;
else if (OpPrefix == X86Local::PD && HasVEX_W)
insnContext = IC_VEX_W_OPSIZE;
else if (OpPrefix == X86Local::PD && Is64Bit &&
AdSize == X86Local::AdSize32)
insnContext = IC_64BIT_VEX_OPSIZE_ADSIZE;
else if (OpPrefix == X86Local::PD && Is64Bit)
insnContext = IC_64BIT_VEX_OPSIZE;
else if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_OPSIZE;
else if (HasVEX_LPrefix && OpPrefix == X86Local::XS)