forked from OSchip/llvm-project
[AArch64] Avoid vector->load dependency cycles when creating LD1*post.
They would break the SelectionDAG. Note that the opposite load->vector dependency is already obvious in: (LD1*post vec, ..) llvm-svn: 235224
This commit is contained in:
parent
dcd89368cb
commit
2448ef5f33
|
@ -8116,6 +8116,13 @@ static SDValue performPostLD1Combine(SDNode *N,
|
|||
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
|
||||
}
|
||||
|
||||
// Finally, check that the vector doesn't depend on the load.
|
||||
// Again, this would create a cycle.
|
||||
// The load depending on the vector is fine, as that's the case for the
|
||||
// LD1*post we'll eventually generate anyway.
|
||||
if (LoadSDN->isPredecessorOf(Vector.getNode()))
|
||||
continue;
|
||||
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
Ops.push_back(LD->getOperand(0)); // Chain
|
||||
if (IsLaneOp) {
|
||||
|
|
|
@ -6171,4 +6171,25 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i6
|
|||
%tmp3 = getelementptr double, double* %bar, i64 %inc
|
||||
store double* %tmp3, double** %ptr
|
||||
ret <2 x double> %tmp2
|
||||
}
|
||||
}
|
||||
|
||||
; Check for depencies between the vector and the scalar load.
|
||||
define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2) {
|
||||
; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
|
||||
; CHECk: BB#0:
|
||||
; CHECk-NEXT: ldr s[[LD:[0-9]+]], [x0]
|
||||
; CHECk-NEXT: movi.2d v0, #0000000000000000
|
||||
; CHECk-NEXT: str q0, [x3]
|
||||
; CHECk-NEXT: ldr q0, [x4]
|
||||
; CHECk-NEXT: ins.s v0[1], v[[LD]][0]
|
||||
; CHECk-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
|
||||
; CHECk-NEXT: str [[POST]], [x1]
|
||||
; CHECk-NEXT: ret
|
||||
%tmp1 = load float, float* %bar
|
||||
store <4 x float> zeroinitializer, <4 x float>* %dep_ptr_1, align 16
|
||||
%A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
|
||||
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
|
||||
%tmp3 = getelementptr float, float* %bar, i64 %inc
|
||||
store float* %tmp3, float** %ptr
|
||||
ret <4 x float> %tmp2
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue