forked from OSchip/llvm-project
[ARM NEON] Define vfms_f32 on ARM, and all vfms using vfma.
r259537 added vfma/vfms to armv7, but the builtin was only lowered on the AArch64 side. Instead of supporting it on ARM, get rid of it. The vfms builtin lowered to: %nb = fsub float -0.0, %b %r = @llvm.fma.f32(%a, %nb, %c) Instead, define the operation in terms of vfma, and swap the multiplicands. It now lowers to: %na = fsub float -0.0, %a %r = @llvm.fma.f32(%na, %b, %c) This matches the instruction more closely, and lets current LLVM generate the "natural" operand ordering: fmls.2s v0, v1, v2 instead of the crooked (but equivalent): fmls.2s v0, v2, v1 Except for theses changes, assembly is identical. LLVM accepts both commutations, and the LLVM tests in: test/CodeGen/AArch64/arm64-fmadd.ll test/CodeGen/AArch64/fp-dp3.ll test/CodeGen/AArch64/neon-fma.ll test/CodeGen/ARM/fusedMAC.ll already check either the new one only, or both. Also verified against the test-suite unittests. llvm-svn: 266807
This commit is contained in:
parent
e885d5e4d3
commit
1d9de10130
|
@ -339,6 +339,7 @@ def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
|
|||
(call "vget_high", $p2))>;
|
||||
def OP_MLALHi_N : Op<(call "vmlal_n", $p0, (call "vget_high", $p1), $p2)>;
|
||||
def OP_MLS : Op<(op "-", $p0, (op "*", $p1, $p2))>;
|
||||
def OP_FMLS : Op<(call "vfma", $p0, (op "-", $p1), $p2)>;
|
||||
def OP_MLSL : Op<(op "-", $p0, (call "vmull", $p1, $p2))>;
|
||||
def OP_MLSLHi : Op<(call "vmlsl", $p0, (call "vget_high", $p1),
|
||||
(call "vget_high", $p2))>;
|
||||
|
@ -347,7 +348,7 @@ def OP_MUL_N : Op<(op "*", $p0, (dup $p1))>;
|
|||
def OP_MLA_N : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>;
|
||||
def OP_MLS_N : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>;
|
||||
def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>;
|
||||
def OP_FMLS_N : Op<(call "vfms", $p0, $p1, (dup $p2))>;
|
||||
def OP_FMLS_N : Op<(call "vfma", $p0, (op "-", $p1), (dup $p2))>;
|
||||
def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
|
||||
def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
|
||||
def OP_MUL_LN : Op<(op "*", $p0, (splat $p1, $p2))>;
|
||||
|
@ -377,8 +378,8 @@ def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
|
|||
def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
|
||||
def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
|
||||
def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
|
||||
def OP_FMS_LN : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
|
||||
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
|
||||
def OP_FMS_LN : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
|
||||
def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
|
||||
def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
|
||||
(decimate mask1, 2)))>;
|
||||
def OP_ZIP1 : Op<(shuffle $p0, $p1, (lowhalf (interleave mask0, mask1)))>;
|
||||
|
@ -826,7 +827,7 @@ def VREINTERPRET
|
|||
|
||||
let ArchGuard = "defined(__ARM_FEATURE_FMA)" in {
|
||||
def VFMA : SInst<"vfma", "dddd", "fQf">;
|
||||
def VFMS : SInst<"vfms", "dddd", "fQf">;
|
||||
def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -911,7 +912,7 @@ def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>;
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector fused multiply-add operations
|
||||
def FMLA : SInst<"vfma", "dddd", "dQd">;
|
||||
def FMLS : SInst<"vfms", "dddd", "dQd">;
|
||||
def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MUL, MLA, MLS, FMA, FMS definitions with scalar argument
|
||||
|
|
|
@ -5319,22 +5319,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|||
Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
|
||||
return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
|
||||
}
|
||||
case NEON::BI__builtin_neon_vfms_v:
|
||||
case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types
|
||||
// FIXME: probably remove when we no longer support aarch64_simd.h
|
||||
// (arm_neon.h delegates to vfma).
|
||||
|
||||
// The ARM builtins (and instructions) have the addend as the first
|
||||
// operand, but the 'fma' intrinsics have it last. Swap it around here.
|
||||
Value *Subtrahend = Ops[0];
|
||||
Value *Multiplicand = Ops[2];
|
||||
Ops[0] = Multiplicand;
|
||||
Ops[2] = Subtrahend;
|
||||
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
|
||||
Ops[1] = Builder.CreateFNeg(Ops[1]);
|
||||
Int = Intrinsic::fma;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
|
||||
}
|
||||
case NEON::BI__builtin_neon_vmull_v:
|
||||
// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
|
||||
Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
|
||||
|
|
|
@ -333,10 +333,10 @@ float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1>
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
|
@ -348,10 +348,10 @@ float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
|
@ -363,10 +363,10 @@ float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
|
@ -378,10 +378,10 @@ float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
|
@ -421,10 +421,10 @@ float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
|
@ -436,10 +436,10 @@ float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
|
@ -461,33 +461,33 @@ float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
|
||||
// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
|
||||
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
|
||||
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
|
||||
// CHECK: ret double [[TMP2]]
|
||||
float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) {
|
||||
return vfmsd_lane_f64(a, b, v, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
|
||||
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
|
||||
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
|
||||
// CHECK: ret float [[TMP2]]
|
||||
float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
|
||||
return vfmss_laneq_f32(a, b, v, 3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
|
||||
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
|
||||
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
|
||||
// CHECK: ret double [[TMP2]]
|
||||
float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) {
|
||||
return vfmsd_laneq_f64(a, b, v, 1);
|
||||
|
@ -1955,10 +1955,10 @@ float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v)
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
|
@ -1970,10 +1970,10 @@ float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
|
@ -1985,10 +1985,10 @@ float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
|
@ -2000,10 +2000,10 @@ float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
|
@ -2029,10 +2029,10 @@ float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v)
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
|
@ -3508,35 +3508,35 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0
|
||||
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[FMLS1_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[FMLS2_I_I:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLS_I_I]], <2 x float> [[TMP4]], <2 x float> [[FMLS1_I_I]]) #2
|
||||
// CHECK: ret <2 x float> [[FMLS2_I_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2
|
||||
// CHECK: ret <2 x float> [[TMP6]]
|
||||
float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
|
||||
return vfms_n_f32(a, b, n);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
|
||||
// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
|
||||
// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
|
||||
// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
// CHECK: [[FMLS1_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[FMLS2_I_I:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLS_I_I]], <4 x float> [[TMP4]], <4 x float> [[FMLS1_I_I]]) #2
|
||||
// CHECK: ret <4 x float> [[FMLS2_I_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2
|
||||
// CHECK: ret <4 x float> [[TMP6]]
|
||||
float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
|
||||
return vfmsq_n_f32(a, b, n);
|
||||
}
|
||||
|
|
|
@ -227,17 +227,17 @@ float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
|
||||
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
|
||||
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
// CHECK: [[FMLS1_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[FMLS2_I_I:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLS_I_I]], <2 x double> [[TMP4]], <2 x double> [[FMLS1_I_I]]) #2
|
||||
// CHECK: ret <2 x double> [[FMLS2_I_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2
|
||||
// CHECK: ret <2 x double> [[TMP6]]
|
||||
float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
|
||||
return vfmsq_n_f64(a, b, c);
|
||||
}
|
||||
|
|
|
@ -674,43 +674,43 @@ float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
|
|||
return vfmaq_f64(v1, v2, v3);
|
||||
}
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[FMLS1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[FMLS2_I:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLS_I]], <2 x float> [[TMP4]], <2 x float> [[FMLS1_I]]) #4
|
||||
// CHECK: ret <2 x float> [[FMLS2_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
|
||||
// CHECK: ret <2 x float> [[TMP6]]
|
||||
float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
|
||||
return vfms_f32(v1, v2, v3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[FMLS2_I:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLS_I]], <4 x float> [[TMP4]], <4 x float> [[FMLS1_I]]) #4
|
||||
// CHECK: ret <4 x float> [[FMLS2_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
|
||||
// CHECK: ret <4 x float> [[TMP6]]
|
||||
float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
|
||||
return vfmsq_f32(v1, v2, v3);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[FMLS2_I:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLS_I]], <2 x double> [[TMP4]], <2 x double> [[FMLS1_I]]) #4
|
||||
// CHECK: ret <2 x double> [[FMLS2_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4
|
||||
// CHECK: ret <2 x double> [[TMP6]]
|
||||
float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
|
||||
return vfmsq_f64(v1, v2, v3);
|
||||
}
|
||||
|
@ -22133,15 +22133,15 @@ float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
|
||||
// CHECK: [[TMP4:%.*]] = fsub <1 x double> <double -0.000000e+00>, [[TMP3]]
|
||||
// CHECK: [[FMLS_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
|
||||
// CHECK: [[FMLS1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
|
||||
// CHECK: [[FMLS2_I:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLS_I]], <1 x double> [[TMP4]], <1 x double> [[FMLS1_I]]) #4
|
||||
// CHECK: ret <1 x double> [[FMLS2_I]]
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
|
||||
// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4
|
||||
// CHECK: ret <1 x double> [[TMP6]]
|
||||
float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
|
||||
return vfms_f64(a, b, c);
|
||||
}
|
||||
|
|
|
@ -178,11 +178,11 @@ float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
|
||||
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
|
||||
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
|
||||
// CHECK: ret float [[TMP2]]
|
||||
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
|
||||
return vfmss_lane_f32(a, b, c, 1);
|
||||
|
@ -203,10 +203,10 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <1 x double> @test_vfms_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
|
||||
// CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
|
||||
// CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
|
||||
|
@ -233,10 +233,10 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
|
|||
}
|
||||
|
||||
// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
|
||||
// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v
|
||||
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
|
||||
|
|
|
@ -3429,6 +3429,34 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
|
|||
return vfmaq_f32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4
|
||||
// CHECK: ret <2 x float> [[TMP6]]
|
||||
float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
|
||||
return vfms_f32(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
|
||||
// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
|
||||
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
|
||||
// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
|
||||
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
|
||||
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
|
||||
// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
|
||||
// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
|
||||
// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4
|
||||
// CHECK: ret <4 x float> [[TMP6]]
|
||||
float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
|
||||
return vfmsq_f32(a, b, c);
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 {
|
||||
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
|
Loading…
Reference in New Issue