forked from OSchip/llvm-project
[X86] Block UndefRegUpdate
Summary: Prevent folding of operations with memory loads when one of the sources has undefined register update. Reviewers: craig.topper Subscribers: llvm-commits, mike.dvoretsky, ashlykov Differential Revision: https://reviews.llvm.org/D47621 llvm-svn: 334175
This commit is contained in:
parent
1a83d06768
commit
f8c7637027
|
@ -8479,6 +8479,19 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
|
||||
if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
|
||||
!MI.getOperand(1).isReg())
|
||||
return false;
|
||||
|
||||
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
||||
MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
|
||||
if (VRegDef == nullptr)
|
||||
return false;
|
||||
return VRegDef->isImplicitDef();
|
||||
}
|
||||
|
||||
|
||||
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
|
||||
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
|
||||
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
|
||||
|
@ -8497,10 +8510,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
|
|||
MI.getOpcode() == X86::PUSH64r))
|
||||
return nullptr;
|
||||
|
||||
// Avoid partial register update stalls unless optimizing for size.
|
||||
// TODO: we should block undef reg update as well.
|
||||
// Avoid partial and undef register update stalls unless optimizing for size.
|
||||
if (!MF.getFunction().optForSize() &&
|
||||
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
|
||||
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
|
||||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
|
||||
return nullptr;
|
||||
|
||||
unsigned NumOps = MI.getDesc().getNumOperands();
|
||||
|
@ -8674,11 +8687,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
|
|||
if (NoFusing)
|
||||
return nullptr;
|
||||
|
||||
// Unless optimizing for size, don't fold to avoid partial
|
||||
// register update stalls
|
||||
// TODO: we should block undef reg update as well.
|
||||
// Avoid partial and undef register update stalls unless optimizing for size.
|
||||
if (!MF.getFunction().optForSize() &&
|
||||
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
|
||||
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
|
||||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
|
||||
return nullptr;
|
||||
|
||||
// Don't fold subreg spills, or reloads that use a high subreg.
|
||||
|
@ -8875,10 +8887,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
|
|||
// Check switch flag
|
||||
if (NoFusing) return nullptr;
|
||||
|
||||
// Avoid partial register update stalls unless optimizing for size.
|
||||
// TODO: we should block undef reg update as well.
|
||||
// Avoid partial and undef register update stalls unless optimizing for size.
|
||||
if (!MF.getFunction().optForSize() &&
|
||||
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
|
||||
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
|
||||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
|
||||
return nullptr;
|
||||
|
||||
// Determine the alignment of the load.
|
||||
|
|
|
@ -27,7 +27,8 @@ define double @long_to_double_rm(i64* %a) {
|
|||
;
|
||||
; AVX-LABEL: long_to_double_rm:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: movq (%rdi), %rax
|
||||
; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = load i64, i64* %a
|
||||
|
@ -75,7 +76,8 @@ define float @long_to_float_rm(i64* %a) {
|
|||
;
|
||||
; AVX-LABEL: long_to_float_rm:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: movq (%rdi), %rax
|
||||
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%0 = load i64, i64* %a
|
||||
|
|
|
@ -43,7 +43,8 @@ define double @int_to_double_rr(i32 %a) {
|
|||
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
|
||||
; AVX_X86-NEXT: andl $-8, %esp
|
||||
; AVX_X86-NEXT: subl $8, %esp
|
||||
; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
|
||||
; AVX_X86-NEXT: movl 8(%ebp), %eax
|
||||
; AVX_X86-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
|
||||
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
|
||||
; AVX_X86-NEXT: fldl (%esp)
|
||||
; AVX_X86-NEXT: movl %ebp, %esp
|
||||
|
@ -64,7 +65,8 @@ define double @int_to_double_rm(i32* %a) {
|
|||
;
|
||||
; AVX-LABEL: int_to_double_rm:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: movl (%rdi), %eax
|
||||
; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; SSE2_X86-LABEL: int_to_double_rm:
|
||||
|
@ -187,7 +189,8 @@ define float @int_to_float_rr(i32 %a) {
|
|||
; AVX_X86: # %bb.0: # %entry
|
||||
; AVX_X86-NEXT: pushl %eax
|
||||
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
|
||||
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX_X86-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
|
||||
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; AVX_X86-NEXT: flds (%esp)
|
||||
; AVX_X86-NEXT: popl %eax
|
||||
|
@ -207,7 +210,8 @@ define float @int_to_float_rm(i32* %a) {
|
|||
;
|
||||
; AVX-LABEL: int_to_float_rm:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
|
||||
; AVX-NEXT: movl (%rdi), %eax
|
||||
; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; SSE2_X86-LABEL: int_to_float_rm:
|
||||
|
|
|
@ -5,8 +5,10 @@
|
|||
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: sqrtd2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
|
@ -27,10 +29,14 @@ declare double @sqrt(double) local_unnamed_addr #1
|
|||
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: sqrtf4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1
|
||||
; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2
|
||||
; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
|
||||
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
|
||||
|
|
Loading…
Reference in New Issue