forked from OSchip/llvm-project
[AVX512] Update X86InstrInfo::foldMemoryOperandCustom to handle the EVEX encoded instructions too.
llvm-svn: 276390
This commit is contained in:
parent
522a91181a
commit
ab13b33ded
|
@ -5780,6 +5780,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
|
|||
switch (MI.getOpcode()) {
|
||||
case X86::INSERTPSrr:
|
||||
case X86::VINSERTPSrr:
|
||||
case X86::VINSERTPSZrr:
|
||||
// Attempt to convert the load of inserted vector into a fold load
|
||||
// of a single float.
|
||||
if (OpNum == 2) {
|
||||
|
@ -5793,8 +5794,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
|
|||
int PtrOffset = SrcIdx * 4;
|
||||
unsigned NewImm = (DstIdx << 4) | ZMask;
|
||||
unsigned NewOpCode =
|
||||
(MI.getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
|
||||
: X86::INSERTPSrm);
|
||||
(MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
|
||||
(MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm :
|
||||
X86::INSERTPSrm;
|
||||
MachineInstr *NewMI =
|
||||
FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
|
||||
NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
|
||||
|
@ -5804,6 +5806,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
|
|||
break;
|
||||
case X86::MOVHLPSrr:
|
||||
case X86::VMOVHLPSrr:
|
||||
case X86::VMOVHLPSZrr:
|
||||
// Move the upper 64-bits of the second operand to the lower 64-bits.
|
||||
// To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
|
||||
// TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
|
||||
|
@ -5811,8 +5814,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
|
|||
unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
|
||||
if (Size <= RCSize && 8 <= Align) {
|
||||
unsigned NewOpCode =
|
||||
(MI.getOpcode() == X86::VMOVHLPSrr ? X86::VMOVLPSrm
|
||||
: X86::MOVLPSrm);
|
||||
(MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
|
||||
(MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
|
||||
X86::MOVLPSrm;
|
||||
MachineInstr *NewMI =
|
||||
FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
|
||||
return NewMI;
|
||||
|
|
|
@ -68,6 +68,16 @@ define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
|
|||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
|
||||
;CHECK-LABEL: stack_fold_insertps
|
||||
;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||
|
||||
define double @stack_fold_mulsd(double %a0, double %a1) {
|
||||
;CHECK-LABEL: stack_fold_mulsd
|
||||
;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
||||
|
|
Loading…
Reference in New Issue