forked from OSchip/llvm-project
[AArch64] Fix bug in store of vector 0 DAGCombine.
Summary: Avoid using XZR/WZR directly as operands to split stores of zero vectors. Doing so can lead to the XZR/WZR being used by an instruction that doesn't allow it (e.g. add). Fixes bug 34674. Reviewers: t.p.northover, efriedma, MatzeB Subscribers: aemerson, rengolin, javed.absar, mcrosier, eraman, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D38146 llvm-svn: 313916
This commit is contained in:
parent
75d616b13f
commit
bb23df92b5
|
@ -9448,11 +9448,20 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from
|
||||
// undoing this transformation.
|
||||
SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32
|
||||
? DAG.getRegister(AArch64::WZR, MVT::i32)
|
||||
: DAG.getRegister(AArch64::XZR, MVT::i64);
|
||||
// Use a CopyFromReg WZR/XZR here to prevent
|
||||
// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
|
||||
SDLoc DL(&St);
|
||||
unsigned ZeroReg;
|
||||
EVT ZeroVT;
|
||||
if (VT.getVectorElementType().getSizeInBits() == 32) {
|
||||
ZeroReg = AArch64::WZR;
|
||||
ZeroVT = MVT::i32;
|
||||
} else {
|
||||
ZeroReg = AArch64::XZR;
|
||||
ZeroVT = MVT::i64;
|
||||
}
|
||||
SDValue SplatVal =
|
||||
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
|
||||
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,9 +12,9 @@ entry:
|
|||
define void @t2() nounwind ssp {
|
||||
entry:
|
||||
; CHECK-LABEL: t2:
|
||||
; CHECK: stp xzr, xzr, [sp, #16]
|
||||
; CHECK: strh wzr, [sp, #32]
|
||||
; CHECK: stp xzr, xzr, [sp, #8]
|
||||
; CHECK: str xzr, [sp, #24]
|
||||
; CHECK: str xzr, [sp, #8]
|
||||
%buf = alloca [26 x i8], align 1
|
||||
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
|
||||
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
|
||||
|
|
|
@ -21,9 +21,11 @@ define fastcc void @func_stack0() {
|
|||
call fastcc void @func_stack8([8 x i32] undef, i32 42)
|
||||
; CHECK: bl func_stack8
|
||||
; CHECK-NOT: sub sp, sp,
|
||||
; CHECK-NOT: [sp, #{{[-0-9]+}}]!
|
||||
; CHECK-NOT: [sp], #{{[-0-9]+}}
|
||||
|
||||
; CHECK-TAIL: bl func_stack8
|
||||
; CHECK-TAIL: sub sp, sp, #16
|
||||
; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
|
||||
|
||||
|
||||
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
|
||||
|
@ -72,10 +74,12 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
|
|||
call fastcc void @func_stack8([8 x i32] undef, i32 42)
|
||||
; CHECK: bl func_stack8
|
||||
; CHECK-NOT: sub sp, sp,
|
||||
; CHECK-NOT: [sp, #{{[-0-9]+}}]!
|
||||
; CHECK-NOT: [sp], #{{[-0-9]+}}
|
||||
|
||||
|
||||
; CHECK-TAIL: bl func_stack8
|
||||
; CHECK-TAIL: sub sp, sp, #16
|
||||
; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
|
||||
|
||||
|
||||
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
|
||||
|
@ -116,9 +120,11 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
|
|||
call fastcc void @func_stack8([8 x i32] undef, i32 42)
|
||||
; CHECK: bl func_stack8
|
||||
; CHECK-NOT: sub sp, sp,
|
||||
; CHECK-NOT: [sp, #{{[-0-9]+}}]!
|
||||
; CHECK-NOT: [sp], #{{[-0-9]+}}
|
||||
|
||||
; CHECK-TAIL: bl func_stack8
|
||||
; CHECK-TAIL: sub sp, sp, #16
|
||||
; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
|
||||
|
||||
|
||||
call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
|
||||
|
|
|
@ -1667,4 +1667,17 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
|
||||
; Check for bug 34674 where invalid add of xzr was being generated.
|
||||
; CHECK-LABEL: bug34674:
|
||||
; CHECK: // %entry
|
||||
; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
|
||||
; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0]
|
||||
; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
|
||||
define i64 @bug34674(<2 x i64>* %p) {
|
||||
entry:
|
||||
store <2 x i64> zeroinitializer, <2 x i64>* %p
|
||||
%p2 = bitcast <2 x i64>* %p to i64*
|
||||
%ld = load i64, i64* %p2
|
||||
%add = add i64 %ld, 1
|
||||
ret i64 %add
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue