From 417dd6782505650081aa1f05057231e0375ee836 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Fri, 25 Oct 2019 14:45:14 +0200 Subject: [PATCH] [MIPS GlobalISel] Select MSA vector generic and builtin fsqrt selectImpl is able to select G_FSQRT when we set bank for vector operands to fprb. Add detailed tests. Note: G_FSQRT is generated from llvm-ir intrinsics llvm.sqrt.*, and at the moment MIPS is not able to generate this intrinsic for vector type (some targets generate vector llvm.sqrt.* from calls to a builtin function). __builtin_msa_fsqrt_ will be transformed into G_FSQRT in legalizeIntrinsic and selected in the same way. Differential Revision: https://reviews.llvm.org/D69376 --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 20 +++++-- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp | 4 +- .../instruction-select/fsqrt_vec.mir | 60 +++++++++++++++++++ .../Mips/GlobalISel/legalizer/fsqrt_vec.mir | 56 +++++++++++++++++ .../legalizer/fsqrt_vec_builtin.mir | 60 +++++++++++++++++++ .../Mips/GlobalISel/llvm-ir/fsqrt_vec.ll | 34 +++++++++++ .../GlobalISel/llvm-ir/fsqrt_vec_builtin.ll | 35 +++++++++++ .../GlobalISel/regbankselect/fsqrt_vec.mir | 58 ++++++++++++++++++ 8 files changed, 320 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec_builtin.ll create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 087a41cca6ec..ab0543b8cc81 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -188,10 +188,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}); - getActionDefinitionsBuilder(G_FSQRT) - .legalFor({s32, s64}); - - getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS}) + getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT}) .legalIf([=, &ST](const LegalityQuery &Query) { if (CheckTyN(0, Query, {s32, s64})) return true; @@ -326,6 +323,17 @@ static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, return true; } +bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode, + MachineIRBuilder &MIRBuilder, + const MipsSubtarget &ST) { + assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA."); + MIRBuilder.buildInstr(Opcode) + .add(MI.getOperand(0)) + .add(MI.getOperand(2)); + MI.eraseFromParent(); + return true; +} + bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { @@ -429,6 +437,10 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST); case Intrinsic::mips_fmax_a_d: return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST); + case Intrinsic::mips_fsqrt_w: + return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST); + case Intrinsic::mips_fsqrt_d: + return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST); default: break; } diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp index ab4be5fa1028..50d144f5234b 100644 --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -539,14 +539,12 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &Mips::ValueMappings[Mips::GPRIdx]}); MappingID = CustomMappingID; break; - case G_FSQRT: - OperandsMapping = getFprbMapping(Op0Size); - break; case G_FADD: case G_FSUB: case G_FMUL: case G_FDIV: case G_FABS: + case G_FSQRT: OperandsMapping = getFprbMapping(Op0Size); if (Op0Size == 128) OperandsMapping = getMSAMapping(MF); diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir new file mode 100644 index 000000000000..5a81540f2947 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fsqrt_vec.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void } + define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void } + +... +--- +name: sqrt_v4f32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v4f32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[FSQRT_W:%[0-9]+]]:msa128w = FSQRT_W [[LD_W]] + ; P5600: ST_W [[FSQRT_W]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: RetRA + %0:gprb(p0) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:fprb(<4 x s32>) = G_FSQRT %2 + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... +--- +name: sqrt_v2f64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v2f64 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a) + ; P5600: [[FSQRT_D:%[0-9]+]]:msa128d = FSQRT_D [[LD_D]] + ; P5600: ST_D [[FSQRT_D]], [[COPY1]], 0 :: (store 16 into %ir.c) + ; P5600: RetRA + %0:gprb(p0) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:fprb(<2 x s64>) = G_FSQRT %2 + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir new file mode 100644 index 000000000000..a5994b6e88fd --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec.mir @@ -0,0 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void } + define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void } + +... +--- +name: sqrt_v4f32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v4f32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<4 x s32>) = G_FSQRT %2 + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... +--- +name: sqrt_v2f64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v2f64 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<2 x s64>) = G_FSQRT %2 + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir new file mode 100644 index 000000000000..e6d31789a486 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fsqrt_vec_builtin.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>) + define void @fsqrt_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) { entry: ret void } + + declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>) + define void @fsqrt_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) { entry: ret void } + +... +--- +name: fsqrt_v4f32_builtin +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: fsqrt_v4f32_builtin + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.w), %2(<4 x s32>) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... +--- +name: fsqrt_v2f64_builtin +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: fsqrt_v2f64_builtin + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.d), %2(<2 x s64>) + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... + diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec.ll new file mode 100644 index 000000000000..27bbb9aa59c8 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600 + +declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val) +define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { +; P5600-LABEL: sqrt_v4f32: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($4) +; P5600-NEXT: fsqrt.w $w0, $w0 +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x float>, <4 x float>* %a, align 16 + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %0) + store <4 x float> %sqrt, <4 x float>* %c, align 16 + ret void +} + +declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val) +define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { +; P5600-LABEL: sqrt_v2f64: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.d $w0, 0($4) +; P5600-NEXT: fsqrt.d $w0, $w0 +; P5600-NEXT: st.d $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <2 x double>, <2 x double>* %a, align 16 + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %0) + store <2 x double> %sqrt, <2 x double>* %c, align 16 + ret void +} diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec_builtin.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec_builtin.ll new file mode 100644 index 000000000000..a765591d42f3 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fsqrt_vec_builtin.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600 + +declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>) +define void @fsqrt_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) { +; P5600-LABEL: fsqrt_v4f32_builtin: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($4) +; P5600-NEXT: fsqrt.w $w0, $w0 +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x float>, <4 x float>* %a, align 16 + %1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0) + store <4 x float> %1, <4 x float>* %c, align 16 + ret void +} + +declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>) +define void @fsqrt_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) { +; P5600-LABEL: fsqrt_v2f64_builtin: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.d $w0, 0($4) +; P5600-NEXT: fsqrt.d $w0, $w0 +; P5600-NEXT: st.d $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <2 x double>, <2 x double>* %a, align 16 + %1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0) + store <2 x double> %1, <2 x double>* %c, align 16 + ret void +} + diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir new file mode 100644 index 000000000000..d36a0e519778 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fsqrt_vec.mir @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void } + define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void } + +... +--- +name: sqrt_v4f32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v4f32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:fprb(<4 x s32>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<4 x s32>) = G_FSQRT %2 + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c) + RetRA + +... +--- +name: sqrt_v2f64 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: sqrt_v2f64 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a) + ; P5600: [[FSQRT:%[0-9]+]]:fprb(<2 x s64>) = G_FSQRT [[LOAD]] + ; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a) + %3:_(<2 x s64>) = G_FSQRT %2 + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c) + RetRA + +...