forked from OSchip/llvm-project
[AArch64] Remove Duplicate FP16 Patterns with same encoding, match on existing patterns
llvm-svn: 335715
This commit is contained in:
parent
a582419ac7
commit
68cb233c0f
|
@ -7923,27 +7923,6 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
|
|||
|
||||
multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
|
||||
let Predicates = [HasNEON, HasFullFP16] in {
|
||||
def HSr : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
|
||||
FPR16, FPR32, vecshiftR16, asm, []> {
|
||||
let Inst{19-16} = imm{3-0};
|
||||
let Inst{23-22} = 0b11;
|
||||
}
|
||||
def SHr : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
|
||||
FPR32, FPR16, vecshiftR32, asm, []> {
|
||||
let Inst{19-16} = imm{3-0};
|
||||
let Inst{22-21} = 0b01;
|
||||
}
|
||||
def HDr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
|
||||
FPR16, FPR64, vecshiftR32, asm, []> {
|
||||
let Inst{21-16} = imm{5-0};
|
||||
let Inst{23-22} = 0b11;
|
||||
}
|
||||
def DHr : BaseSIMDScalarShift<U, opc, {1,1,1,?,?,?,?},
|
||||
FPR64, FPR16, vecshiftR64, asm, []> {
|
||||
let Inst{21-16} = imm{5-0};
|
||||
let Inst{23-22} = 0b01;
|
||||
let Inst{31} = 1;
|
||||
}
|
||||
def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
|
||||
FPR16, FPR16, vecshiftR16, asm, []> {
|
||||
let Inst{19-16} = imm{3-0};
|
||||
|
|
|
@ -4958,16 +4958,6 @@ def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
|
|||
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
|
||||
vecshiftR64:$imm)),
|
||||
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
|
||||
(FCVTZSHDr (i64 FPR64:$Rn), vecshiftR32:$imm)>;
|
||||
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu FPR16:$Rn, vecshiftR32:$imm)),
|
||||
(FCVTZUSHr FPR16:$Rn, vecshiftR32:$imm)>;
|
||||
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs FPR16:$Rn, vecshiftR32:$imm)),
|
||||
(FCVTZSSHr FPR16:$Rn, vecshiftR32:$imm)>;
|
||||
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
|
||||
(FCVTZSDHr (f16 FPR16:$Rn), vecshiftR64:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
|
||||
(UCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
|
||||
def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
|
||||
(UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
|
||||
def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
|
||||
|
@ -4975,10 +4965,6 @@ def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
|
|||
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
|
||||
vecshiftR64:$imm)),
|
||||
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
|
||||
(SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR16:$imm)),
|
||||
(SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>;
|
||||
def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
|
||||
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
|
||||
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
|
||||
|
@ -4987,6 +4973,43 @@ def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
|
|||
def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
|
||||
(SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
|
||||
|
||||
// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
|
||||
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
|
||||
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
|
||||
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
|
||||
(SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
|
||||
(and FPR32:$Rn, (i32 65535)),
|
||||
vecshiftR16:$imm)),
|
||||
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
|
||||
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
|
||||
(UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
|
||||
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
|
||||
(i32 (INSERT_SUBREG
|
||||
(i32 (IMPLICIT_DEF)),
|
||||
(FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
|
||||
hsub))>;
|
||||
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
|
||||
(i64 (INSERT_SUBREG
|
||||
(i64 (IMPLICIT_DEF)),
|
||||
(FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
|
||||
hsub))>;
|
||||
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
|
||||
(i32 (INSERT_SUBREG
|
||||
(i32 (IMPLICIT_DEF)),
|
||||
(FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
|
||||
hsub))>;
|
||||
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
|
||||
(i64 (INSERT_SUBREG
|
||||
(i64 (IMPLICIT_DEF)),
|
||||
(FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
|
||||
hsub))>;
|
||||
|
||||
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
|
||||
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
|
||||
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
|
||||
|
|
|
@ -72,6 +72,19 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
|
||||
unsigned Idx) const {
|
||||
// edge case for GPR/FPR register classes
|
||||
if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub)
|
||||
return &AArch64::FPR32RegClass;
|
||||
else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub)
|
||||
return &AArch64::FPR64RegClass;
|
||||
|
||||
// Forward to TableGen's default version.
|
||||
return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
|
||||
}
|
||||
|
||||
const uint32_t *
|
||||
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID CC) const {
|
||||
|
|
|
@ -46,6 +46,10 @@ public:
|
|||
return 5;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
getSubClassWithSubReg(const TargetRegisterClass *RC,
|
||||
unsigned Idx) const override;
|
||||
|
||||
// Calls involved in thread-local variable lookup save more registers than
|
||||
// normal calls, so they need a different mask to represent this.
|
||||
const uint32_t *getTLSCallPreservedMask() const;
|
||||
|
|
|
@ -136,9 +136,8 @@ declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1
|
|||
|
||||
define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s16_1:
|
||||
; CHECK: sxth w[[wReg:[0-9]+]], w0
|
||||
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: scvtf h0, s0, #1
|
||||
; CHECK: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: scvtf h0, h0, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%sext = sext i16 %a to i32
|
||||
|
@ -148,9 +147,8 @@ entry:
|
|||
|
||||
define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s16_16:
|
||||
; CHECK: sxth w[[wReg:[0-9]+]], w0
|
||||
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: scvtf h0, s0, #16
|
||||
; CHECK: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: scvtf h0, h0, #16
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%sext = sext i16 %a to i32
|
||||
|
@ -161,7 +159,7 @@ entry:
|
|||
define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s32_1:
|
||||
; CHECK: fmov s0, w0
|
||||
; CHECK-NEXT: scvtf h0, s0, #1
|
||||
; CHECK-NEXT: scvtf h0, h0, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
|
||||
|
@ -171,7 +169,7 @@ entry:
|
|||
define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s32_16:
|
||||
; CHECK: fmov s0, w0
|
||||
; CHECK-NEXT: scvtf h0, s0, #16
|
||||
; CHECK-NEXT: scvtf h0, h0, #16
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16)
|
||||
|
@ -181,7 +179,7 @@ entry:
|
|||
define dso_local half @test_vcvth_n_f16_s64_1(i64 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s64_1:
|
||||
; CHECK: fmov d0, x0
|
||||
; CHECK-NEXT: fcvtzs h0, d0, #1
|
||||
; CHECK-NEXT: scvtf h0, h0, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1)
|
||||
|
@ -191,7 +189,7 @@ entry:
|
|||
define dso_local half @test_vcvth_n_f16_s64_16(i64 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_s64_16:
|
||||
; CHECK: fmov d0, x0
|
||||
; CHECK-NEXT: fcvtzs h0, d0, #16
|
||||
; CHECK-NEXT: scvtf h0, h0, #16
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16)
|
||||
|
@ -200,7 +198,7 @@ entry:
|
|||
|
||||
define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s16_f16_1:
|
||||
; CHECK: fcvtzs s0, h0, #1
|
||||
; CHECK: fcvtzs h0, h0, #1
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -211,7 +209,7 @@ entry:
|
|||
|
||||
define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s16_f16_16:
|
||||
; CHECK: fcvtzs s0, h0, #16
|
||||
; CHECK: fcvtzs h0, h0, #16
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -222,7 +220,7 @@ entry:
|
|||
|
||||
define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s32_f16_1:
|
||||
; CHECK: fcvtzs s0, h0, #1
|
||||
; CHECK: fcvtzs h0, h0, #1
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -232,7 +230,7 @@ entry:
|
|||
|
||||
define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s32_f16_16:
|
||||
; CHECK: fcvtzs s0, h0, #16
|
||||
; CHECK: fcvtzs h0, h0, #16
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -242,7 +240,7 @@ entry:
|
|||
|
||||
define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s64_f16_1:
|
||||
; CHECK: fcvtzs d0, h0, #1
|
||||
; CHECK: fcvtzs h0, h0, #1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -252,7 +250,7 @@ entry:
|
|||
|
||||
define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_s64_f16_32:
|
||||
; CHECK: fcvtzs d0, h0, #32
|
||||
; CHECK: fcvtzs h0, h0, #32
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -262,9 +260,7 @@ entry:
|
|||
|
||||
define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_u16_1:
|
||||
; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
|
||||
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: ucvtf h0, s0, #1
|
||||
; CHECK: ucvtf h0, h0, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = zext i16 %a to i32
|
||||
|
@ -274,9 +270,7 @@ entry:
|
|||
|
||||
define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_u16_16:
|
||||
; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
|
||||
; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
|
||||
; CHECK-NEXT: ucvtf h0, s0, #16
|
||||
; CHECK: ucvtf h0, h0, #16
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = zext i16 %a to i32
|
||||
|
@ -287,7 +281,7 @@ entry:
|
|||
define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_u32_1:
|
||||
; CHECK: fmov s0, w0
|
||||
; CHECK-NEXT: ucvtf h0, s0, #1
|
||||
; CHECK-NEXT: ucvtf h0, h0, #1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
|
||||
|
@ -296,8 +290,7 @@ entry:
|
|||
|
||||
define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_f16_u32_16:
|
||||
; CHECK: fmov s0, w0
|
||||
; CHECK-NEXT: ucvtf h0, s0, #16
|
||||
; CHECK: ucvtf h0, h0, #16
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16)
|
||||
|
@ -306,7 +299,7 @@ entry:
|
|||
|
||||
define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_u16_f16_1:
|
||||
; CHECK: fcvtzu s0, h0, #1
|
||||
; CHECK: fcvtzu h0, h0, #1
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -317,7 +310,7 @@ entry:
|
|||
|
||||
define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_u16_f16_16:
|
||||
; CHECK: fcvtzu s0, h0, #16
|
||||
; CHECK: fcvtzu h0, h0, #16
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -328,7 +321,7 @@ entry:
|
|||
|
||||
define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_u32_f16_1:
|
||||
; CHECK: fcvtzu s0, h0, #1
|
||||
; CHECK: fcvtzu h0, h0, #1
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
@ -338,7 +331,7 @@ entry:
|
|||
|
||||
define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) {
|
||||
; CHECK-LABEL: test_vcvth_n_u32_f16_16:
|
||||
; CHECK: fcvtzu s0, h0, #16
|
||||
; CHECK: fcvtzu h0, h0, #16
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue