[AArch64] Vector FCOPYSIGN supports Custom-lowering: mark it as such.

There's a bunch of code in LowerFCOPYSIGN that does smart lowering, and is actually already vector-aware; let's use it instead of scalarizing! The only interesting change is that for v2f32, we previously always used use v4i32 as the integer vector type. Use v2i32 instead, and mark FCOPYSIGN as Custom. llvm-svn: 243926
2015-08-04 00:42:34 +00:00 · 2015-08-04 00:42:34 +00:00 · b0ae36f0d1
parent f65371a235
commit b0ae36f0d1
2 changed files with 17 additions and 46 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -632,6 +632,9 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
    setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+
+    // But we do support custom-lowering for FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
  }

  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
@ -3651,7 +3654,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
  SDValue VecVal1, VecVal2;
  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
    EltVT = MVT::i32;
-    VecVT = MVT::v4i32;
+    VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
    EltMask = 0x80000000ULL;

    if (!VT.isVector()) {
--- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
@ -8,12 +8,8 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
 ; CHECK-LABEL: test_copysign_v1f32_v1f32:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    mov s2, v1[1]
-; CHECK-NEXT:    mov s3, v0[1]
-; CHECK-NEXT:    movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT:    bit.16b v3, v2, v4
-; CHECK-NEXT:    bit.16b v0, v1, v4
-; CHECK-NEXT:    ins.s v0[1], v3[0]
+; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.8b v0, v1, v2
 ; CHECK-NEXT:    ret
  %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
  ret <1 x float> %r
@ -68,12 +64,8 @@ declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
 define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    mov s2, v1[1]
-; CHECK-NEXT:    mov s3, v0[1]
-; CHECK-NEXT:    movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT:    bit.16b v3, v2, v4
-; CHECK-NEXT:    bit.16b v0, v1, v4
-; CHECK-NEXT:    ins.s v0[1], v3[0]
+; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.8b v0, v1, v2
 ; CHECK-NEXT:    ret
  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
  ret <2 x float> %r
@ -103,20 +95,8 @@ declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
 define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    mov s2, v1[1]
-; CHECK-NEXT:    mov s3, v0[1]
-; CHECK-NEXT:    movi.4s v4, #0x80, lsl #24
-; CHECK-NEXT:    mov s5, v0[2]
-; CHECK-NEXT:    bit.16b v3, v2, v4
-; CHECK-NEXT:    mov s2, v0[3]
-; CHECK-NEXT:    mov s6, v1[2]
-; CHECK-NEXT:    bit.16b v0, v1, v4
-; CHECK-NEXT:    bit.16b v5, v6, v4
-; CHECK-NEXT:    mov s1, v1[3]
-; CHECK-NEXT:    ins.s v0[1], v3[0]
-; CHECK-NEXT:    ins.s v0[2], v5[0]
-; CHECK-NEXT:    bit.16b v2, v1, v4
-; CHECK-NEXT:    ins.s v0[3], v2[0]
+; CHECK-NEXT:    movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.16b v0, v1, v2
 ; CHECK-NEXT:    ret
  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
  ret <4 x float> %r
@ -174,13 +154,9 @@ define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #
 define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    mov d2, v1[1]
-; CHECK-NEXT:    mov d3, v0[1]
-; CHECK-NEXT:    movi.2d v4, #0000000000000000
-; CHECK-NEXT:    fneg.2d v4, v4
-; CHECK-NEXT:    bit.16b v3, v2, v4
-; CHECK-NEXT:    bit.16b v0, v1, v4
-; CHECK-NEXT:    ins.d v0[1], v3[0]
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fneg.2d v2, v2
+; CHECK-NEXT:    bit.16b v0, v1, v2
 ; CHECK-NEXT:    ret
  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
  ret <2 x double> %r
@ -221,18 +197,10 @@ define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b)
 define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    mov d4, v2[1]
-; CHECK-NEXT:    mov d5, v0[1]
-; CHECK-NEXT:    movi.2d v6, #0000000000000000
-; CHECK-NEXT:    fneg.2d v6, v6
-; CHECK-NEXT:    bit.16b v5, v4, v6
-; CHECK-NEXT:    mov d4, v3[1]
-; CHECK-NEXT:    bit.16b v0, v2, v6
-; CHECK-NEXT:    mov d2, v1[1]
-; CHECK-NEXT:    bit.16b v2, v4, v6
-; CHECK-NEXT:    bit.16b v1, v3, v6
-; CHECK-NEXT:    ins.d v0[1], v5[0]
-; CHECK-NEXT:    ins.d v1[1], v2[0]
+; CHECK-NEXT:    movi.2d v4, #0000000000000000
+; CHECK-NEXT:    fneg.2d v4, v4
+; CHECK-NEXT:    bit.16b v0, v2, v4
+; CHECK-NEXT:    bit.16b v1, v3, v4
 ; CHECK-NEXT:    ret
  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
  ret <4 x double> %r