forked from OSchip/llvm-project
[PowerPC] Fix use check of swap-reduction
This will fix swap-reduction in DAGISel for cases where COPY_TO_REGCLASS has multiple uses.
This commit is contained in:
parent
030ac786d4
commit
033c9c2552
|
@ -6903,19 +6903,22 @@ static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
|
|||
// TODO: Can we put this a common method for DAG?
|
||||
auto SkipRCCopy = [](SDValue V) {
|
||||
while (V->isMachineOpcode() &&
|
||||
V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS)
|
||||
V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
|
||||
// All values in the chain should have single use.
|
||||
if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
|
||||
return SDValue();
|
||||
V = V->getOperand(0);
|
||||
return V;
|
||||
}
|
||||
return V.hasOneUse() ? V : SDValue();
|
||||
};
|
||||
|
||||
SDValue VecOp = SkipRCCopy(N->getOperand(0));
|
||||
if (!isLaneInsensitive(VecOp) || !VecOp.hasOneUse())
|
||||
if (!VecOp || !isLaneInsensitive(VecOp))
|
||||
return;
|
||||
|
||||
SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
|
||||
RHS = SkipRCCopy(VecOp.getOperand(1));
|
||||
if (!LHS.hasOneUse() || !RHS.hasOneUse() || !isVSXSwap(LHS) ||
|
||||
!isVSXSwap(RHS))
|
||||
if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
|
||||
return;
|
||||
|
||||
// These swaps may still have chain-uses here, count on dead code elimination
|
||||
|
|
|
@ -83,5 +83,31 @@ entry:
|
|||
ret i16 %2
|
||||
}
|
||||
|
||||
define signext i32 @vecop_uses2([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
|
||||
; CHECK-LABEL: vecop_uses2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvd2x 0, 0, 3
|
||||
; CHECK-NEXT: lxvd2x 1, 0, 4
|
||||
; CHECK-NEXT: xxswapd 34, 0
|
||||
; CHECK-NEXT: xxswapd 35, 1
|
||||
; CHECK-NEXT: xxsldwi 0, 34, 34, 3
|
||||
; CHECK-NEXT: vmuluwm 2, 3, 2
|
||||
; CHECK-NEXT: mffprwz 3, 0
|
||||
; CHECK-NEXT: xxswapd 0, 34
|
||||
; CHECK-NEXT: extsw 3, 3
|
||||
; CHECK-NEXT: stxvd2x 0, 0, 5
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%0 = bitcast [4 x i32]* %a to <4 x i32>*
|
||||
%1 = load <4 x i32>, <4 x i32>* %0, align 4
|
||||
%2 = bitcast [4 x i32]* %b to <4 x i32>*
|
||||
%3 = load <4 x i32>, <4 x i32>* %2, align 4
|
||||
%4 = mul <4 x i32> %3, %1
|
||||
%5 = bitcast [4 x i32]* %c to <4 x i32>*
|
||||
store <4 x i32> %4, <4 x i32>* %5, align 4
|
||||
%6 = extractelement <4 x i32> %1, i32 3
|
||||
ret i32 %6
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.ppc.altivec.vavgsb(<16 x i8>, <16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
|
||||
|
|
Loading…
Reference in New Issue