Merge AVX_SET0PSY/AVX_SET0PDY/AVX2_SET0 into a single post-RA pseudo.

llvm-svn: 162738
This commit is contained in:
Craig Topper 2012-08-28 07:05:28 +00:00
parent 3dd531dbd4
commit bd509eea4a
4 changed files with 19 additions and 32 deletions

View File

@ -3429,6 +3429,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::FsFLD0SS:
case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MI, get(X86::VXORPSYrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
@ -3780,10 +3783,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX2_SETALLONES:
case X86::AVX2_SET0:
case X86::AVX_SET0:
Alignment = 32;
break;
case X86::V_SET0:
@ -3824,11 +3825,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (LoadMI->getOpcode()) {
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX_SET0PSY:
case X86::AVX_SET0PDY:
case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
case X86::AVX2_SET0:
case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
@ -3860,9 +3859,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Ty = Type::getFloatTy(MF.getFunction()->getContext());
else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);

View File

@ -382,12 +382,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, neverHasSideEffects = 1 in {
isPseudo = 1 in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
@ -395,30 +394,24 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// The same as done above but for AVX. The 256-bit ISA does not support PI,
// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI,
// and doesn't need it because on sandy bridge the register is set to zero
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
// FIXME: Change encoding to pseudo! This is blocked right now by the x86
// JIT implementatioan, it does not expand the instructions below like
// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
let Predicates = [HasAVX] in {
def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
}
let Predicates = [HasAVX2] in
def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v4i64 immAllZerosV))]>, VEX_4V;
isPseudo = 1, Predicates = [HasAVX] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
let Predicates = [HasAVX] in
def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
let Predicates = [HasAVX2] in {
def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
}
// AVX1 has no support for 256-bit integer instructions, but since the 128-bit

View File

@ -378,11 +378,8 @@ ReSimplify:
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0

View File

@ -5,7 +5,7 @@
; It's hard to test for the ISEL condition because CodeGen optimizes
; away the bugpointed code. Just ensure the basics are still there.
;CHECK: func:
;CHECK: vpxor
;CHECK: vxorps
;CHECK: vinsertf128
;CHECK: vpshufd
;CHECK: vpshufd