forked from OSchip/llvm-project
[ARM] Add an extra fold for f32 extract(vdup(i32))
This adds another small fold for extract of a vdup, between a i32 and a f32, converting to a BITCAST. This allows some extra folding to happen, simplifying the resulting code. Differential Revision: https://reviews.llvm.org/D104857
This commit is contained in:
parent
d25e572421
commit
a1c0f09a89
|
@ -14650,6 +14650,8 @@ static SDValue PerformExtractEltCombine(SDNode *N,
|
|||
return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
|
||||
if (VT == MVT::i32 && X.getValueType() == MVT::f16)
|
||||
return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
|
||||
if (VT == MVT::f32 && X.getValueType() == MVT::i32)
|
||||
return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
|
||||
|
||||
while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
|
||||
X = X->getOperand(0);
|
||||
|
|
|
@ -205,37 +205,34 @@ define void @vst4_v4i32_align1(<4 x i32> *%src, <16 x i32> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9}
|
||||
; CHECK-NEXT: vpush {d8, d9}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q3, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0]
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vdup.32 q4, r3
|
||||
; CHECK-NEXT: vmov.f64 d0, d6
|
||||
; CHECK-NEXT: vmov.f32 s1, s4
|
||||
; CHECK-NEXT: vmov.f32 s4, s13
|
||||
; CHECK-NEXT: vmov.f64 d4, d7
|
||||
; CHECK-NEXT: vmov.f32 s12, s15
|
||||
; CHECK-NEXT: vmov.f32 s13, s7
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f32 s15, s19
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d2, d8
|
||||
; CHECK-NEXT: vmov.f32 s5, s0
|
||||
; CHECK-NEXT: vmov s10, r2
|
||||
; CHECK-NEXT: vmov s14, r3
|
||||
; CHECK-NEXT: vmov.f32 s8, s18
|
||||
; CHECK-NEXT: vmov s20, lr
|
||||
; CHECK-NEXT: vmov.f32 s9, s2
|
||||
; CHECK-NEXT: vmov s6, r12
|
||||
; CHECK-NEXT: vmov.f32 s0, s17
|
||||
; CHECK-NEXT: vmov.f32 s12, s19
|
||||
; CHECK-NEXT: vmov.f32 s13, s3
|
||||
; CHECK-NEXT: vmov.f32 s2, s20
|
||||
; CHECK-NEXT: vmov.f32 s15, s14
|
||||
; CHECK-NEXT: vmov.f32 s11, s10
|
||||
; CHECK-NEXT: vstrb.8 q3, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s9, s6
|
||||
; CHECK-NEXT: vdup.32 q3, r2
|
||||
; CHECK-NEXT: vmov.f32 s10, s14
|
||||
; CHECK-NEXT: vmov.f32 s11, s15
|
||||
; CHECK-NEXT: vmov.f32 s3, s20
|
||||
; CHECK-NEXT: vstrb.8 q2, [r1, #32]
|
||||
; CHECK-NEXT: vdup.32 q2, lr
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s7, s11
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1, #16]
|
||||
; CHECK-NEXT: vdup.32 q1, r12
|
||||
; CHECK-NEXT: vmov.f32 s2, s6
|
||||
; CHECK-NEXT: vmov.f32 s3, s7
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9}
|
||||
; CHECK-NEXT: vmov.f32 s7, s6
|
||||
; CHECK-NEXT: vstrb.8 q0, [r1, #16]
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x i32>, <4 x i32>* %src, i32 0
|
||||
|
@ -975,37 +972,34 @@ define void @vst4_v4f32_align1(<4 x float> *%src, <16 x float> *%dst) {
|
|||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .save {r7, lr}
|
||||
; CHECK-NEXT: push {r7, lr}
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vldrw.u32 q4, [r0]
|
||||
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12}
|
||||
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12}
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
|
||||
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
|
||||
; CHECK-NEXT: vmov.f64 d2, d8
|
||||
; CHECK-NEXT: vldrw.u32 q5, [r0]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
|
||||
; CHECK-NEXT: vmov r2, r3, d1
|
||||
; CHECK-NEXT: vmov.f32 s5, s8
|
||||
; CHECK-NEXT: vdup.32 q5, r3
|
||||
; CHECK-NEXT: vmov.f32 s8, s17
|
||||
; CHECK-NEXT: vmov.f64 d6, d9
|
||||
; CHECK-NEXT: vmov.f32 s16, s19
|
||||
; CHECK-NEXT: vmov.f32 s17, s11
|
||||
; CHECK-NEXT: vmov.f32 s18, s22
|
||||
; CHECK-NEXT: vmov.f32 s19, s23
|
||||
; CHECK-NEXT: vstrb.8 q4, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s13, s10
|
||||
; CHECK-NEXT: vdup.32 q4, r2
|
||||
; CHECK-NEXT: vmov r12, lr, d0
|
||||
; CHECK-NEXT: vmov.f32 s14, s18
|
||||
; CHECK-NEXT: vmov.f64 d4, d10
|
||||
; CHECK-NEXT: vmov.f32 s9, s4
|
||||
; CHECK-NEXT: vmov s14, r2
|
||||
; CHECK-NEXT: vmov s18, r3
|
||||
; CHECK-NEXT: vmov.f32 s12, s22
|
||||
; CHECK-NEXT: vmov s24, lr
|
||||
; CHECK-NEXT: vmov.f32 s13, s6
|
||||
; CHECK-NEXT: vmov.f32 s4, s21
|
||||
; CHECK-NEXT: vmov.f32 s16, s23
|
||||
; CHECK-NEXT: vmov.f32 s17, s7
|
||||
; CHECK-NEXT: vmov s10, r12
|
||||
; CHECK-NEXT: vmov.f32 s6, s24
|
||||
; CHECK-NEXT: vmov.f32 s19, s18
|
||||
; CHECK-NEXT: vmov.f32 s15, s2
|
||||
; CHECK-NEXT: vstrb.8 q4, [r1, #48]
|
||||
; CHECK-NEXT: vmov.f32 s7, s24
|
||||
; CHECK-NEXT: vstrb.8 q3, [r1, #32]
|
||||
; CHECK-NEXT: vdup.32 q3, lr
|
||||
; CHECK-NEXT: vmov.f32 s10, s14
|
||||
; CHECK-NEXT: vmov.f32 s11, s15
|
||||
; CHECK-NEXT: vstrb.8 q2, [r1, #16]
|
||||
; CHECK-NEXT: vdup.32 q2, r12
|
||||
; CHECK-NEXT: vmov.f32 s6, s10
|
||||
; CHECK-NEXT: vmov.f32 s7, s0
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-NEXT: vmov.f32 s11, s0
|
||||
; CHECK-NEXT: vstrb.8 q1, [r1, #16]
|
||||
; CHECK-NEXT: vstrb.8 q2, [r1]
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12}
|
||||
; CHECK-NEXT: pop {r7, pc}
|
||||
entry:
|
||||
%s1 = getelementptr <4 x float>, <4 x float>* %src, i32 0
|
||||
|
|
Loading…
Reference in New Issue