forked from OSchip/llvm-project
[ARM] Simplify extract of VMOVDRR
Under SoftFP calling conventions, we can be left with extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) patterns that can simplify to a or b, depending on the extract lane. Differential Revision: https://reviews.llvm.org/D94990
This commit is contained in:
parent
4648098f97
commit
6ab792b68d
|
@ -13949,7 +13949,8 @@ static SDValue PerformInsertEltCombine(SDNode *N,
|
|||
}
|
||||
|
||||
static SDValue PerformExtractEltCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const ARMSubtarget *ST) {
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc dl(N);
|
||||
|
@ -13968,6 +13969,19 @@ static SDValue PerformExtractEltCombine(SDNode *N,
|
|||
return X;
|
||||
}
|
||||
|
||||
// extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
|
||||
if (Op0.getValueType() == MVT::v4i32 &&
|
||||
isa<ConstantSDNode>(N->getOperand(1)) &&
|
||||
Op0.getOpcode() == ISD::BITCAST &&
|
||||
Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
|
||||
Op0.getOperand(0).getValueType() == MVT::v2f64) {
|
||||
SDValue BV = Op0.getOperand(0);
|
||||
unsigned Offset = N->getConstantOperandVal(1);
|
||||
SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
|
||||
if (MOV.getOpcode() == ARMISD::VMOVDRR)
|
||||
return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -16340,7 +16354,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
|
||||
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
|
||||
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
|
||||
case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
return PerformExtractEltCombine(N, DCI, Subtarget);
|
||||
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
|
||||
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
|
||||
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
|
||||
|
|
|
@ -95,51 +95,38 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
|||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .save {r7, lr}
|
||||
; CHECK-MVE-NEXT: push {r7, lr}
|
||||
; CHECK-MVE-NEXT: vmov d1, r2, r3
|
||||
; CHECK-MVE-NEXT: add r2, sp, #8
|
||||
; CHECK-MVE-NEXT: vldrw.u32 q1, [r2]
|
||||
; CHECK-MVE-NEXT: vmov d0, r0, r1
|
||||
; CHECK-MVE-NEXT: vmov r1, s2
|
||||
; CHECK-MVE-NEXT: vmov r3, s6
|
||||
; CHECK-MVE-NEXT: vmov r0, s3
|
||||
; CHECK-MVE-NEXT: vmov r2, s7
|
||||
; CHECK-MVE-NEXT: adds.w lr, r1, r3
|
||||
; CHECK-MVE-NEXT: add.w r12, sp, #8
|
||||
; CHECK-MVE-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-MVE-NEXT: vmov lr, s2
|
||||
; CHECK-MVE-NEXT: vmov r12, s3
|
||||
; CHECK-MVE-NEXT: adds.w r2, r2, lr
|
||||
; CHECK-MVE-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-MVE-NEXT: vmov r3, s0
|
||||
; CHECK-MVE-NEXT: vmov r1, s4
|
||||
; CHECK-MVE-NEXT: adc.w r12, r0, r2
|
||||
; CHECK-MVE-NEXT: vmov r2, s1
|
||||
; CHECK-MVE-NEXT: vmov r0, s5
|
||||
; CHECK-MVE-NEXT: adds r1, r1, r3
|
||||
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, lr
|
||||
; CHECK-MVE-NEXT: adcs r0, r2
|
||||
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-MVE-NEXT: vmov r0, r1, d0
|
||||
; CHECK-MVE-NEXT: vmov r2, r3, d1
|
||||
; CHECK-MVE-NEXT: adds r0, r0, r3
|
||||
; CHECK-MVE-NEXT: vmov q1[2], q1[0], r0, r2
|
||||
; CHECK-MVE-NEXT: vmov r0, s1
|
||||
; CHECK-MVE-NEXT: adcs r0, r1
|
||||
; CHECK-MVE-NEXT: vmov q1[3], q1[1], r0, r12
|
||||
; CHECK-MVE-NEXT: vmov r0, r1, d2
|
||||
; CHECK-MVE-NEXT: vmov r2, r3, d3
|
||||
; CHECK-MVE-NEXT: pop {r7, pc}
|
||||
;
|
||||
; CHECK-BE-LABEL: vector_add_i64:
|
||||
; CHECK-BE: @ %bb.0: @ %entry
|
||||
; CHECK-BE-NEXT: .save {r7, lr}
|
||||
; CHECK-BE-NEXT: push {r7, lr}
|
||||
; CHECK-BE-NEXT: vmov d1, r3, r2
|
||||
; CHECK-BE-NEXT: add r2, sp, #8
|
||||
; CHECK-BE-NEXT: vmov d0, r1, r0
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vldrw.u32 q0, [r2]
|
||||
; CHECK-BE-NEXT: vmov r1, s7
|
||||
; CHECK-BE-NEXT: vmov r3, s3
|
||||
; CHECK-BE-NEXT: vmov r0, s6
|
||||
; CHECK-BE-NEXT: vmov r2, s2
|
||||
; CHECK-BE-NEXT: adds.w r12, r1, r3
|
||||
; CHECK-BE-NEXT: vmov r3, s5
|
||||
; CHECK-BE-NEXT: vmov r1, s0
|
||||
; CHECK-BE-NEXT: adc.w lr, r0, r2
|
||||
; CHECK-BE-NEXT: vmov r0, s1
|
||||
; CHECK-BE-NEXT: vmov r2, s4
|
||||
; CHECK-BE-NEXT: adds r0, r0, r3
|
||||
; CHECK-BE-NEXT: adcs r1, r2
|
||||
; CHECK-BE-NEXT: vmov q0[2], q0[0], r1, lr
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-BE-NEXT: add.w r12, sp, #8
|
||||
; CHECK-BE-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-BE-NEXT: vmov lr, s3
|
||||
; CHECK-BE-NEXT: vmov r12, s2
|
||||
; CHECK-BE-NEXT: adds.w lr, lr, r3
|
||||
; CHECK-BE-NEXT: vmov r3, s0
|
||||
; CHECK-BE-NEXT: adc.w r12, r12, r2
|
||||
; CHECK-BE-NEXT: vmov r2, s1
|
||||
; CHECK-BE-NEXT: adds r1, r1, r2
|
||||
; CHECK-BE-NEXT: adcs r0, r3
|
||||
; CHECK-BE-NEXT: vmov q0[2], q0[0], r0, r12
|
||||
; CHECK-BE-NEXT: vmov q0[3], q0[1], r1, lr
|
||||
; CHECK-BE-NEXT: vrev64.32 q1, q0
|
||||
; CHECK-BE-NEXT: vmov r1, r0, d2
|
||||
; CHECK-BE-NEXT: vmov r3, r2, d3
|
||||
|
@ -149,24 +136,18 @@ define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
|||
; CHECK-FP: @ %bb.0: @ %entry
|
||||
; CHECK-FP-NEXT: .save {r7, lr}
|
||||
; CHECK-FP-NEXT: push {r7, lr}
|
||||
; CHECK-FP-NEXT: vmov d1, r2, r3
|
||||
; CHECK-FP-NEXT: vmov d0, r0, r1
|
||||
; CHECK-FP-NEXT: add r0, sp, #8
|
||||
; CHECK-FP-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-FP-NEXT: vmov r1, s2
|
||||
; CHECK-FP-NEXT: vmov r0, s3
|
||||
; CHECK-FP-NEXT: vmov r3, s6
|
||||
; CHECK-FP-NEXT: vmov r2, s7
|
||||
; CHECK-FP-NEXT: adds.w lr, r1, r3
|
||||
; CHECK-FP-NEXT: vmov r3, s0
|
||||
; CHECK-FP-NEXT: vmov r1, s4
|
||||
; CHECK-FP-NEXT: adc.w r12, r0, r2
|
||||
; CHECK-FP-NEXT: vmov r2, s1
|
||||
; CHECK-FP-NEXT: vmov r0, s5
|
||||
; CHECK-FP-NEXT: adds r1, r1, r3
|
||||
; CHECK-FP-NEXT: vmov q0[2], q0[0], r1, lr
|
||||
; CHECK-FP-NEXT: adcs r0, r2
|
||||
; CHECK-FP-NEXT: vmov q0[3], q0[1], r0, r12
|
||||
; CHECK-FP-NEXT: add.w r12, sp, #8
|
||||
; CHECK-FP-NEXT: vldrw.u32 q0, [r12]
|
||||
; CHECK-FP-NEXT: vmov lr, s2
|
||||
; CHECK-FP-NEXT: vmov r12, s3
|
||||
; CHECK-FP-NEXT: adds.w lr, lr, r2
|
||||
; CHECK-FP-NEXT: vmov r2, s0
|
||||
; CHECK-FP-NEXT: adc.w r12, r12, r3
|
||||
; CHECK-FP-NEXT: vmov r3, s1
|
||||
; CHECK-FP-NEXT: adds r0, r0, r2
|
||||
; CHECK-FP-NEXT: adcs r1, r3
|
||||
; CHECK-FP-NEXT: vmov q0[2], q0[0], r0, lr
|
||||
; CHECK-FP-NEXT: vmov q0[3], q0[1], r1, r12
|
||||
; CHECK-FP-NEXT: vmov r0, r1, d0
|
||||
; CHECK-FP-NEXT: vmov r2, r3, d1
|
||||
; CHECK-FP-NEXT: pop {r7, pc}
|
||||
|
|
Loading…
Reference in New Issue