diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index fbfb472d515c..3111ff598b63 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -326,6 +326,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::BUILD_VECTOR); } computeRegisterProperties(); @@ -4342,6 +4343,31 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for +/// ISD::BUILD_VECTOR. +static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) { + // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): + // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value + // into a pair of GPRs, which is fine when the value is used as a scalar, + // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. + if (N->getNumOperands() == 2) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BIT_CONVERT) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::BIT_CONVERT) + Op1 = Op1.getOperand(0); + if (Op0.getOpcode() == ARMISD::VMOVRRD && + Op0.getNode() == Op1.getNode() && + Op0.getResNo() == 0 && Op1.getResNo() == 1) { + return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + N->getValueType(0), Op0.getOperand(0)); + } + } + + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4760,6 +4786,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); diff --git a/llvm/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll b/llvm/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll new file mode 100644 index 000000000000..a210986c0b7f --- /dev/null +++ b/llvm/test/CodeGen/ARM/2010-09-17-vmovrrd-combine.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is +; converted back to be used as a vector type. + +; CHECK: test: +define <4 x i32> @test() nounwind { +entry: + br i1 undef, label %bb1, label %bb2 + +bb1: + %0 = bitcast <2 x i64> zeroinitializer to <2 x double> + %1 = extractelement <2 x double> %0, i32 0 + %2 = bitcast double %1 to i64 + %3 = insertelement <1 x i64> undef, i64 %2, i32 0 +; CHECK-NOT: vmov s +; CHECK: vext.8 + %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> + %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8> + %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> + %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32> + br i1 undef, label %bb2, label %bb1 + +bb2: + %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ] + ret <4 x i32> %result +}