Ensure all uses of permute instructions feed vector stores

There is a problem in VSXSwapRemoval where it is incorrectly removing permute instructions.
In this case, the permute is feeding both a vector store and also a non-store instruction. In this case, the permute cannot be removed.

The fix is to simply look at all the uses of the vector register defined by the permute and ensure that all the uses are vector store instructions.

This problem was reported in PR 27735 (https://llvm.org/bugs/show_bug.cgi?id=27735).

Test case based on the original problem reported.

Phabricator Review: http://reviews.llvm.org/D21802

llvm-svn: 274645
This commit is contained in:
Kit Barton 2016-07-06 18:03:52 +00:00
parent 1c3c0afc53
commit f9d0a40573
2 changed files with 75 additions and 0 deletions

View File

@ -692,6 +692,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
unsigned UseReg = MI->getOperand(0).getReg();
MachineInstr *DefMI = MRI->getVRegDef(UseReg);
unsigned DefReg = DefMI->getOperand(0).getReg();
int DefIdx = SwapMap[DefMI];
if (!SwapVector[DefIdx].IsSwap || SwapVector[DefIdx].IsLoad ||
@ -707,6 +708,25 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
DEBUG(MI->dump());
DEBUG(dbgs() << "\n");
}
// Ensure all uses of the register defined by DefMI feed store
// instructions
for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DefReg)) {
int UseIdx = SwapMap[&UseMI];
if (SwapVector[UseIdx].VSEMI->getOpcode() != MI->getOpcode()) {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
format("Web %d rejected for swap not feeding only stores\n",
Repr));
DEBUG(dbgs() << " def " << " : ");
DEBUG(DefMI->dump());
DEBUG(dbgs() << " use " << UseIdx << ": ");
DEBUG(SwapVector[UseIdx].VSEMI->dump());
DEBUG(dbgs() << "\n");
}
}
}
}

View File

@ -0,0 +1,55 @@
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
;
; This is a regression test based on https://llvm.org/bugs/show_bug.cgi?id=27735
;
@G1 = global <2 x double> <double 2.0, double -10.0>
@G2 = global <2 x double> <double 3.0, double 4.0>
@G3 = global <2 x double> <double 5.0, double 6.0>
@G4 = global <2 x double> <double 7.0, double 8.0>
; CHECK-LABEL: @zg
; CHECK: xxspltd
; CHECK-NEXT: xxspltd
; CHECK-NEXT: xxswapd
; CHECK-NEXT: xvmuldp
; CHECK-NEXT: xvmuldp
; CHECK-NEXT: xvsubdp
; CHECK-NEXT: xvadddp
; CHECK-NEXT: xxpermdi
; CHECK-NEXT: xvsubdp
; CHECK-NEXT: xxswapd
; CHECK-NEXT: stxvd2x
; CHECK-NEXT: blr
; Function Attrs: noinline
define void @zg(i8* %.G0011_640.0, i8* %.G0012_642.0, <2 x double>* %JJ, <2 x double>* %.ka0000_391, double %.unpack, double %.unpack66) #0 {
L.JA291:
%Z.L.JA291.2 = load <2 x double>, <2 x double>* %.ka0000_391, align 16
store <2 x double> %Z.L.JA291.2, <2 x double>* %JJ, align 8
%Z.L.JA291.3 = bitcast i8* %.G0012_642.0 to <2 x double>*
%Z.L.JA291.4 = load <2 x double>, <2 x double>* %Z.L.JA291.3, align 1
%.elt136 = bitcast i8* %.G0011_640.0 to double*
%.unpack137 = load double, double* %.elt136, align 1
%.elt138 = getelementptr inbounds i8, i8* %.G0011_640.0, i64 8
%Z.L.JA291.5 = bitcast i8* %.elt138 to double*
%.unpack139 = load double, double* %Z.L.JA291.5, align 1
%Z.L.JA291.6 = insertelement <2 x double> undef, double %.unpack137, i32 0
%Z.L.JA291.7 = insertelement <2 x double> %Z.L.JA291.6, double %.unpack137, i32 1
%Z.L.JA291.8 = fmul <2 x double> %Z.L.JA291.2, %Z.L.JA291.7
%Z.L.JA291.9 = shufflevector <2 x double> %Z.L.JA291.2, <2 x double> undef, <2 x i32> <i32 1, i32 0>
%Z.L.JA291.10 = insertelement <2 x double> undef, double %.unpack139, i32 0
%Z.L.JA291.11 = insertelement <2 x double> %Z.L.JA291.10, double %.unpack139, i32 1
%Z.L.JA291.12 = fmul <2 x double> %Z.L.JA291.9, %Z.L.JA291.11
%Z.L.JA291.13 = fsub <2 x double> %Z.L.JA291.8, %Z.L.JA291.12
%Z.L.JA291.14 = fadd <2 x double> %Z.L.JA291.8, %Z.L.JA291.12
%Z.L.JA291.15 = shufflevector <2 x double> %Z.L.JA291.13, <2 x double> %Z.L.JA291.14, <2 x i32> <i32 0, i32 3>
%Z.L.JA291.16 = fsub <2 x double> %Z.L.JA291.4, %Z.L.JA291.15
%Z.L.JA291.17 = bitcast i8* %.G0012_642.0 to <2 x double>*
store <2 x double> %Z.L.JA291.16, <2 x double>* %Z.L.JA291.17, align 8
%.. = bitcast <2 x double>* %JJ to i32*
%.pre = load i32, i32* %.., align 32
ret void
}
attributes #0 = { noinline }