Reapply r106896:

Add several AVX MOV flavors
Support VEX encoding for MRMDestReg

llvm-svn: 106912
This commit is contained in:
Bruno Cardoso Lopes 2010-06-25 23:33:42 +00:00
parent c3bcc36a0b
commit 83651094ad
5 changed files with 291 additions and 23 deletions

View File

@ -286,12 +286,12 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX, HasSSE1]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
VEX_4V, Requires<[HasAVX, HasSSE1]>;
Requires<[HasAVX, HasSSE1]>;
// SSE2 Instruction Templates:
//
@ -320,12 +320,12 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
Requires<[HasAVX, HasSSE2]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
VEX_4V, OpSize, Requires<[HasAVX, HasSSE2]>;
OpSize, Requires<[HasAVX, HasSSE2]>;
// SSE3 Instruction Templates:
//

View File

@ -566,25 +566,47 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
string asm, Domain d,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm, [], d>;
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (ld_frag addr:$src))], d>;
}
let isAsmParserOnly = 1 in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
"movapd", SSEPackedDouble>, OpSize, VEX;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
}
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB;
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
"movapd\t{$src, $dst|$dst, $src}",
SSEPackedDouble>, TB, OpSize;
"movapd", SSEPackedDouble>, TB, OpSize;
defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB;
"movups", SSEPackedSingle>, TB;
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd\t{$src, $dst|$dst, $src}",
SSEPackedDouble, 0>, TB, OpSize;
"movupd", SSEPackedDouble, 0>, TB, OpSize;
let isAsmParserOnly = 1 in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
}
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@ -599,6 +621,25 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS/D load and store
let isAsmParserOnly = 1 in {
let canFoldAsLoad = 1, isReMaterializable = 1 in
def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
(ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
(ins f128mem:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
@ -634,6 +675,12 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
SSEPackedDouble>, TB, OpSize;
}
let isAsmParserOnly = 1, AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $dst|$dst, $src2}">;
@ -641,6 +688,16 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"\t{$src2, $dst|$dst, $src2}">;
}
let isAsmParserOnly = 1 in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)]>, VEX;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@ -652,6 +709,20 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
let isAsmParserOnly = 1 in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(unpckh (bc_v2f64 (v4f32 VR128:$src)),
(undef)), (iPTR 0))), addr:$dst)]>,
VEX;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>,
VEX;
}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@ -663,6 +734,20 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
let isAsmParserOnly = 1, AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
VEX_4V;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
VEX_4V;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),

View File

@ -431,7 +431,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
case 0: break; // No prefix!
case X86II::T8: // 0F 38
VEX_5M = 0x2;
break;
@ -448,21 +447,29 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
case X86II::TB: // Bypass: Not used by VEX
case 0:
break; // No prefix!
}
unsigned NumOps = MI.getNumOperands();
unsigned i = 0, CurOp = 0;
bool IsSrcMem = false;
unsigned CurOp = 0;
if ((TSFlags & X86II::FormMask) == X86II::MRMDestMem)
NumOps = CurOp = X86AddrNumOperands;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMSrcMem:
IsSrcMem = true;
case X86II::MRMDestMem:
case X86II::MRMSrcReg:
if (MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
// If the memory destination has been checked first,
// go back to the first operand
CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
// range 0-7 and the difference between the 2 groups is given by the
@ -486,12 +493,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
CurOp++;
}
i = CurOp;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
for (; CurOp != NumOps; ++CurOp) {
const MCOperand &MO = MI.getOperand(CurOp);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_B = 0x0;
if (!VEX_B && MO.isReg() && IsSrcMem &&
if (!VEX_B && MO.isReg() &&
((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
VEX_X = 0x0;
}

View File

@ -10718,3 +10718,91 @@
// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
vcvtsi2sd (%eax), %xmm1, %xmm2
// CHECK: vmovaps (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
vmovaps (%eax), %xmm2
// CHECK: vmovaps %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
vmovaps %xmm1, %xmm2
// CHECK: vmovaps %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
vmovaps %xmm1, (%eax)
// CHECK: vmovapd (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
vmovapd (%eax), %xmm2
// CHECK: vmovapd %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
vmovapd %xmm1, %xmm2
// CHECK: vmovapd %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
vmovapd %xmm1, (%eax)
// CHECK: vmovups (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
vmovups (%eax), %xmm2
// CHECK: vmovups %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
vmovups %xmm1, %xmm2
// CHECK: vmovups %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
vmovups %xmm1, (%eax)
// CHECK: vmovupd (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
vmovupd (%eax), %xmm2
// CHECK: vmovupd %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
vmovupd %xmm1, %xmm2
// CHECK: vmovupd %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
vmovupd %xmm1, (%eax)
// CHECK: vmovlps %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
vmovlps %xmm1, (%eax)
// CHECK: vmovlps (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
vmovlps (%eax), %xmm2, %xmm3
// CHECK: vmovlpd %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
vmovlpd %xmm1, (%eax)
// CHECK: vmovlpd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
vmovlpd (%eax), %xmm2, %xmm3
// CHECK: vmovhps %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
vmovhps %xmm1, (%eax)
// CHECK: vmovhps (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
vmovhps (%eax), %xmm2, %xmm3
// CHECK: vmovhpd %xmm1, (%eax)
// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
vmovhpd %xmm1, (%eax)
// CHECK: vmovhpd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
vmovhpd (%eax), %xmm2, %xmm3
// CHECK: vmovlhps %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
vmovlhps %xmm1, %xmm2, %xmm3
// CHECK: vmovhlps %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
vmovhlps %xmm1, %xmm2, %xmm3

View File

@ -766,4 +766,92 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
vcvtsi2sd (%rax), %xmm11, %xmm12
// CHECK: vmovaps (%rax), %xmm12
// CHECK: encoding: [0xc5,0x78,0x28,0x20]
vmovaps (%rax), %xmm12
// CHECK: vmovaps %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
vmovaps %xmm11, %xmm12
// CHECK: vmovaps %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x78,0x29,0x18]
vmovaps %xmm11, (%rax)
// CHECK: vmovapd (%rax), %xmm12
// CHECK: encoding: [0xc5,0x79,0x28,0x20]
vmovapd (%rax), %xmm12
// CHECK: vmovapd %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
vmovapd %xmm11, %xmm12
// CHECK: vmovapd %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x79,0x29,0x18]
vmovapd %xmm11, (%rax)
// CHECK: vmovups (%rax), %xmm12
// CHECK: encoding: [0xc5,0x78,0x10,0x20]
vmovups (%rax), %xmm12
// CHECK: vmovups %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
vmovups %xmm11, %xmm12
// CHECK: vmovups %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x78,0x11,0x18]
vmovups %xmm11, (%rax)
// CHECK: vmovupd (%rax), %xmm12
// CHECK: encoding: [0xc5,0x79,0x10,0x20]
vmovupd (%rax), %xmm12
// CHECK: vmovupd %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
vmovupd %xmm11, %xmm12
// CHECK: vmovupd %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x79,0x11,0x18]
vmovupd %xmm11, (%rax)
// CHECK: vmovlps %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x78,0x13,0x18]
vmovlps %xmm11, (%rax)
// CHECK: vmovlps (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x18,0x12,0x28]
vmovlps (%rax), %xmm12, %xmm13
// CHECK: vmovlpd %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x79,0x13,0x18]
vmovlpd %xmm11, (%rax)
// CHECK: vmovlpd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x12,0x28]
vmovlpd (%rax), %xmm12, %xmm13
// CHECK: vmovhps %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x78,0x17,0x18]
vmovhps %xmm11, (%rax)
// CHECK: vmovhps (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x18,0x16,0x28]
vmovhps (%rax), %xmm12, %xmm13
// CHECK: vmovhpd %xmm11, (%rax)
// CHECK: encoding: [0xc5,0x79,0x17,0x18]
vmovhpd %xmm11, (%rax)
// CHECK: vmovhpd (%rax), %xmm12, %xmm13
// CHECK: encoding: [0xc5,0x19,0x16,0x28]
vmovhpd (%rax), %xmm12, %xmm13
// CHECK: vmovlhps %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
vmovlhps %xmm11, %xmm12, %xmm13
// CHECK: vmovhlps %xmm11, %xmm12, %xmm13
// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
vmovhlps %xmm11, %xmm12, %xmm13