forked from OSchip/llvm-project
[LegalizeTypes] Properly handle the case when UpdateNodeOperands in PromoteIntOp_MLOAD triggers CSE instead of updating the node in place.
The caller can't handle the node having multiple results like a masked load does. So we need to detect the case and do our own result replacement. Fixes PR46532.
This commit is contained in:
parent
91836fd7f3
commit
361853c96f
|
@ -1723,7 +1723,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
|
|||
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
|
||||
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
|
||||
NewOps[OpNo] = Mask;
|
||||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
|
||||
if (Res == N)
|
||||
return SDValue(Res, 0);
|
||||
|
||||
// Update triggered CSE, do our own replacement since caller can't.
|
||||
ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
|
||||
ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=haswell | FileCheck %s
|
||||
|
||||
define void @WhileWithLoopInvariantOperation.21() {
|
||||
; CHECK-LABEL: WhileWithLoopInvariantOperation.21:
|
||||
; CHECK: # %bb.0: # %while.1.body.preheader
|
||||
; CHECK-NEXT: movq (%rax), %rax
|
||||
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovaps %xmm0, 32(%rax)
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967295,0,0,0,0,0,0]
|
||||
; CHECK-NEXT: vmaskmovps %ymm0, %ymm0, (%rax)
|
||||
while.1.body.preheader:
|
||||
%0 = load i8*, i8** undef, align 8, !invariant.load !0, !dereferenceable !1, !align !2
|
||||
%1 = getelementptr inbounds i8, i8* %0, i64 32
|
||||
tail call void @llvm.memset.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %1, i8 0, i64 16, i1 false)
|
||||
%2 = tail call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 4, <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x float> <float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>)
|
||||
%3 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 4, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> <float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>)
|
||||
tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* nonnull undef, i32 4, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|
||||
|
||||
declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32 immarg, <8 x i1>, <8 x float>)
|
||||
|
||||
declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32 immarg, <16 x i1>, <16 x float>)
|
||||
|
||||
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32 immarg, <16 x i1>)
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{i64 65}
|
||||
!2 = !{i64 16}
|
||||
|
Loading…
Reference in New Issue