forked from OSchip/llvm-project
[AArch64] Refactor the Neon vector/scalar floating-point convert intrinsics so
that they use float/double rather than the vector equivalents when appropriate. llvm-svn: 196930
This commit is contained in:
parent
763ec2ba45
commit
7a9bba442f
|
@ -36,11 +36,11 @@ def int_aarch64_neon_xtn :
|
||||||
// Vector floating-point convert
|
// Vector floating-point convert
|
||||||
def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
|
def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
|
||||||
def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
|
def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
|
||||||
def int_aarch64_neon_fcvtxn :
|
def int_aarch64_neon_vcvtxn :
|
||||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||||
def int_aarch64_neon_fcvtzs :
|
def int_aarch64_neon_vcvtzs :
|
||||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||||
def int_aarch64_neon_fcvtzu :
|
def int_aarch64_neon_vcvtzu :
|
||||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
// Vector maxNum (Floating Point)
|
// Vector maxNum (Floating Point)
|
||||||
|
@ -240,6 +240,30 @@ def int_aarch64_neon_vcvtf32_u32 :
|
||||||
def int_aarch64_neon_vcvtf64_u64 :
|
def int_aarch64_neon_vcvtf64_u64 :
|
||||||
Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
|
// Scalar Floating-point Convert
|
||||||
|
def int_aarch64_neon_fcvtxn :
|
||||||
|
Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtns :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtnu :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtps :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtpu :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtms :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtmu :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtas :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtau :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtzs :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
def int_aarch64_neon_fcvtzu :
|
||||||
|
Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
// Scalar Floating-point Reciprocal Exponent
|
// Scalar Floating-point Reciprocal Exponent
|
||||||
def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
|
def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
|
||||||
|
|
||||||
|
|
|
@ -4307,18 +4307,23 @@ multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
|
||||||
|
|
||||||
class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
|
class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
|
||||||
Instruction INSTD>
|
Instruction INSTD>
|
||||||
: Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))),
|
: Pat<(f32 (opnode (f64 FPR64:$Rn))),
|
||||||
(INSTD FPR64:$Rn)>;
|
(INSTD FPR64:$Rn)>;
|
||||||
|
|
||||||
multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
|
multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
|
||||||
Instruction INSTS,
|
Instruction INSTS,
|
||||||
Instruction INSTD> {
|
Instruction INSTD> {
|
||||||
def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))),
|
def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
|
||||||
(INSTS FPR32:$Rn)>;
|
(INSTS FPR32:$Rn)>;
|
||||||
def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
|
def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
|
||||||
(INSTD FPR64:$Rn)>;
|
(INSTD FPR64:$Rn)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
|
||||||
|
Instruction INSTD>
|
||||||
|
: Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
|
||||||
|
(INSTD FPR64:$Rn)>;
|
||||||
|
|
||||||
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
|
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
|
||||||
SDPatternOperator Dopnode,
|
SDPatternOperator Dopnode,
|
||||||
Instruction INSTS,
|
Instruction INSTS,
|
||||||
|
@ -4982,44 +4987,56 @@ def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
|
||||||
FCVTXN>;
|
FCVTXN>;
|
||||||
|
|
||||||
defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
|
defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtns,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
|
||||||
FCVTNSss, FCVTNSdd>;
|
FCVTNSss, FCVTNSdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
|
||||||
|
|
||||||
defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
|
defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtnu,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
|
||||||
FCVTNUss, FCVTNUdd>;
|
FCVTNUss, FCVTNUdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
|
||||||
|
|
||||||
defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
|
defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtms,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
|
||||||
FCVTMSss, FCVTMSdd>;
|
FCVTMSss, FCVTMSdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
|
||||||
|
|
||||||
defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
|
defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtmu,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
|
||||||
FCVTMUss, FCVTMUdd>;
|
FCVTMUss, FCVTMUdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
|
||||||
|
|
||||||
defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
|
defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtas,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
|
||||||
FCVTASss, FCVTASdd>;
|
FCVTASss, FCVTASdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
|
||||||
|
|
||||||
defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
|
defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtau,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
|
||||||
FCVTAUss, FCVTAUdd>;
|
FCVTAUss, FCVTAUdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
|
||||||
|
|
||||||
defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
|
defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtps,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
|
||||||
FCVTPSss, FCVTPSdd>;
|
FCVTPSss, FCVTPSdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
|
||||||
|
|
||||||
defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
|
defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_arm_neon_vcvtpu,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
|
||||||
FCVTPUss, FCVTPUdd>;
|
FCVTPUss, FCVTPUdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
|
||||||
|
|
||||||
defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
|
defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
|
||||||
FCVTZSss, FCVTZSdd>;
|
FCVTZSss, FCVTZSdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
|
||||||
|
FCVTZSdd>;
|
||||||
|
|
||||||
defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
|
defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
|
||||||
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
|
defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
|
||||||
FCVTZUss, FCVTZUdd>;
|
FCVTZUss, FCVTZUdd>;
|
||||||
|
def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
|
||||||
|
FCVTZUdd>;
|
||||||
|
|
||||||
// Patterns For Convert Instructions Between v1f64 and v1i64
|
// Patterns For Convert Instructions Between v1f64 and v1i64
|
||||||
class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
|
class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
|
||||||
|
@ -8297,12 +8314,12 @@ multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
|
||||||
let Constraints = "$src = $Rd";
|
let Constraints = "$src = $Rd";
|
||||||
}
|
}
|
||||||
|
|
||||||
def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
|
def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
|
||||||
(!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
|
(!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
|
||||||
|
|
||||||
def : Pat<(v4f32 (concat_vectors
|
def : Pat<(v4f32 (concat_vectors
|
||||||
(v2f32 VPR64:$src),
|
(v2f32 VPR64:$src),
|
||||||
(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
|
(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
|
||||||
(!cast<Instruction>(prefix # "2d4s")
|
(!cast<Instruction>(prefix # "2d4s")
|
||||||
(v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
|
(v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
|
||||||
VPR128:$Rn)>;
|
VPR128:$Rn)>;
|
||||||
|
|
|
@ -894,13 +894,13 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
|
||||||
|
|
||||||
define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
|
define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
|
||||||
; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
|
; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
|
||||||
%vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4
|
%vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4
|
||||||
ret <2 x float> %vcvtx_f32_f641.i
|
ret <2 x float> %vcvtx_f32_f641.i
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
|
define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
|
||||||
; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
|
; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
|
||||||
%vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4
|
%vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4
|
||||||
%shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
%shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
ret <4 x float> %shuffle.i
|
ret <4 x float> %shuffle.i
|
||||||
}
|
}
|
||||||
|
@ -1462,7 +1462,7 @@ declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2
|
||||||
|
|
||||||
declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
|
declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2
|
||||||
|
|
||||||
declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2
|
declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2
|
||||||
|
|
||||||
declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
|
declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
|
||||||
|
|
||||||
|
|
|
@ -6,250 +6,228 @@ define float @test_vcvtxn(double %a) {
|
||||||
; CHECK: test_vcvtxn
|
; CHECK: test_vcvtxn
|
||||||
; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtf.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a)
|
||||||
%vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i)
|
ret float %vcvtf
|
||||||
%0 = extractelement <1 x float> %vcvtf1.i, i32 0
|
|
||||||
ret float %0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>)
|
declare float @llvm.aarch64.neon.fcvtxn(double)
|
||||||
|
|
||||||
define i32 @test_vcvtass(float %a) {
|
define i32 @test_vcvtass(float %a) {
|
||||||
; CHECK: test_vcvtass
|
; CHECK: test_vcvtass
|
||||||
; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a)
|
||||||
%vcvtas1.i = call <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtas1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtas1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtas.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_test_vcvtasd(double %a) {
|
define i64 @test_test_vcvtasd(double %a) {
|
||||||
; CHECK: test_test_vcvtasd
|
; CHECK: test_test_vcvtasd
|
||||||
; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a)
|
||||||
%vcvtas1.i = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtas1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtas1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtaus(float %a) {
|
define i32 @test_vcvtaus(float %a) {
|
||||||
; CHECK: test_vcvtaus
|
; CHECK: test_vcvtaus
|
||||||
; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a)
|
||||||
%vcvtau1.i = call <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtau1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtau1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtau.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtaud(double %a) {
|
define i64 @test_vcvtaud(double %a) {
|
||||||
; CHECK: test_vcvtaud
|
; CHECK: test_vcvtaud
|
||||||
; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a)
|
||||||
%vcvtau1.i = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtau1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtau1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtmss(float %a) {
|
define i32 @test_vcvtmss(float %a) {
|
||||||
; CHECK: test_vcvtmss
|
; CHECK: test_vcvtmss
|
||||||
; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a)
|
||||||
%vcvtms1.i = call <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtms1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtms1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtms.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtmd_s64_f64(double %a) {
|
define i64 @test_vcvtmd_s64_f64(double %a) {
|
||||||
; CHECK: test_vcvtmd_s64_f64
|
; CHECK: test_vcvtmd_s64_f64
|
||||||
; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a)
|
||||||
%vcvtms1.i = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtms1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtms1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtmus(float %a) {
|
define i32 @test_vcvtmus(float %a) {
|
||||||
; CHECK: test_vcvtmus
|
; CHECK: test_vcvtmus
|
||||||
; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a)
|
||||||
%vcvtmu1.i = call <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtmu.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtmud(double %a) {
|
define i64 @test_vcvtmud(double %a) {
|
||||||
; CHECK: test_vcvtmud
|
; CHECK: test_vcvtmud
|
||||||
; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a)
|
||||||
%vcvtmu1.i = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtnss(float %a) {
|
define i32 @test_vcvtnss(float %a) {
|
||||||
; CHECK: test_vcvtnss
|
; CHECK: test_vcvtnss
|
||||||
; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a)
|
||||||
%vcvtns1.i = call <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtns1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtns1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtns.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtnd_s64_f64(double %a) {
|
define i64 @test_vcvtnd_s64_f64(double %a) {
|
||||||
; CHECK: test_vcvtnd_s64_f64
|
; CHECK: test_vcvtnd_s64_f64
|
||||||
; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a)
|
||||||
%vcvtns1.i = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtns1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtns1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtnus(float %a) {
|
define i32 @test_vcvtnus(float %a) {
|
||||||
; CHECK: test_vcvtnus
|
; CHECK: test_vcvtnus
|
||||||
; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a)
|
||||||
%vcvtnu1.i = call <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtnu.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtnud(double %a) {
|
define i64 @test_vcvtnud(double %a) {
|
||||||
; CHECK: test_vcvtnud
|
; CHECK: test_vcvtnud
|
||||||
; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a)
|
||||||
%vcvtnu1.i = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtpss(float %a) {
|
define i32 @test_vcvtpss(float %a) {
|
||||||
; CHECK: test_vcvtpss
|
; CHECK: test_vcvtpss
|
||||||
; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a)
|
||||||
%vcvtps1.i = call <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtps1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtps1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtps.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtpd_s64_f64(double %a) {
|
define i64 @test_vcvtpd_s64_f64(double %a) {
|
||||||
; CHECK: test_vcvtpd_s64_f64
|
; CHECK: test_vcvtpd_s64_f64
|
||||||
; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a)
|
||||||
%vcvtps1.i = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtps1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtps1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtpus(float %a) {
|
define i32 @test_vcvtpus(float %a) {
|
||||||
; CHECK: test_vcvtpus
|
; CHECK: test_vcvtpus
|
||||||
; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a)
|
||||||
%vcvtpu1.i = call <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.arm.neon.vcvtpu.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtpud(double %a) {
|
define i64 @test_vcvtpud(double %a) {
|
||||||
; CHECK: test_vcvtpud
|
; CHECK: test_vcvtpud
|
||||||
; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a)
|
||||||
%vcvtpu1.i = tail call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtss(float %a) {
|
define i32 @test_vcvtss(float %a) {
|
||||||
; CHECK: test_vcvtss
|
; CHECK: test_vcvtss
|
||||||
; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a)
|
||||||
%vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtd_s64_f64(double %a) {
|
define i64 @test_vcvtd_s64_f64(double %a) {
|
||||||
; CHECK: test_vcvtd_s64_f64
|
; CHECK: test_vcvtd_s64_f64
|
||||||
; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvzs.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a)
|
||||||
%vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvzs1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvzs1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double)
|
||||||
|
|
||||||
define i32 @test_vcvtus(float %a) {
|
define i32 @test_vcvtus(float %a) {
|
||||||
; CHECK: test_vcvtus
|
; CHECK: test_vcvtus
|
||||||
; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
|
; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0
|
%vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a)
|
||||||
%vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i)
|
|
||||||
%0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
|
%0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
|
||||||
ret i32 %0
|
ret i32 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>)
|
declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float)
|
||||||
|
|
||||||
define i64 @test_vcvtud(double %a) {
|
define i64 @test_vcvtud(double %a) {
|
||||||
; CHECK: test_vcvtud
|
; CHECK: test_vcvtud
|
||||||
; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
|
; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
|
||||||
entry:
|
entry:
|
||||||
%vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0
|
%vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a)
|
||||||
%vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i)
|
|
||||||
%0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
|
%0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
|
||||||
ret i64 %0
|
ret i64 %0
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>)
|
declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double)
|
||||||
|
|
Loading…
Reference in New Issue