forked from OSchip/llvm-project
[MIPS GlobalISel] Select MSA vector generic and builtin fsqrt
selectImpl is able to select G_FSQRT when we set bank for vector operands to fprb. Add detailed tests. Note: G_FSQRT is generated from llvm-ir intrinsics llvm.sqrt.*, and at the moment MIPS is not able to generate this intrinsic for vector type (some targets generate vector llvm.sqrt.* from calls to a builtin function). __builtin_msa_fsqrt_<format> will be transformed into G_FSQRT in legalizeIntrinsic and selected in the same way. Differential Revision: https://reviews.llvm.org/D69376
This commit is contained in:
parent
3d9632a997
commit
417dd67825
|
@ -188,10 +188,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
|
|||
getActionDefinitionsBuilder(G_FCONSTANT)
|
||||
.legalFor({s32, s64});
|
||||
|
||||
getActionDefinitionsBuilder(G_FSQRT)
|
||||
.legalFor({s32, s64});
|
||||
|
||||
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS})
|
||||
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
|
||||
.legalIf([=, &ST](const LegalityQuery &Query) {
|
||||
if (CheckTyN(0, Query, {s32, s64}))
|
||||
return true;
|
||||
|
@ -326,6 +323,17 @@ static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
|
||||
MachineIRBuilder &MIRBuilder,
|
||||
const MipsSubtarget &ST) {
|
||||
assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.");
|
||||
MIRBuilder.buildInstr(Opcode)
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(2));
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
|
@ -429,6 +437,10 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST);
|
||||
case Intrinsic::mips_fmax_a_d:
|
||||
return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST);
|
||||
case Intrinsic::mips_fsqrt_w:
|
||||
return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
|
||||
case Intrinsic::mips_fsqrt_d:
|
||||
return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -539,14 +539,12 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
&Mips::ValueMappings[Mips::GPRIdx]});
|
||||
MappingID = CustomMappingID;
|
||||
break;
|
||||
case G_FSQRT:
|
||||
OperandsMapping = getFprbMapping(Op0Size);
|
||||
break;
|
||||
case G_FADD:
|
||||
case G_FSUB:
|
||||
case G_FMUL:
|
||||
case G_FDIV:
|
||||
case G_FABS:
|
||||
case G_FSQRT:
|
||||
OperandsMapping = getFprbMapping(Op0Size);
|
||||
if (Op0Size == 128)
|
||||
OperandsMapping = getMSAMapping(MF);
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
|
||||
--- |
|
||||
|
||||
define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
|
||||
define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v4f32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v4f32
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
|
||||
; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT_W:%[0-9]+]]:msa128w = FSQRT_W [[LD_W]]
|
||||
; P5600: ST_W [[FSQRT_W]], [[COPY1]], 0 :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:gprb(p0) = COPY $a0
|
||||
%1:gprb(p0) = COPY $a1
|
||||
%2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:fprb(<4 x s32>) = G_FSQRT %2
|
||||
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v2f64
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v2f64
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
|
||||
; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT_D:%[0-9]+]]:msa128d = FSQRT_D [[LD_D]]
|
||||
; P5600: ST_D [[FSQRT_D]], [[COPY1]], 0 :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:gprb(p0) = COPY $a0
|
||||
%1:gprb(p0) = COPY $a1
|
||||
%2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:fprb(<2 x s64>) = G_FSQRT %2
|
||||
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
|
@ -0,0 +1,56 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
|
||||
--- |
|
||||
|
||||
define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
|
||||
define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v4f32
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v4f32
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<4 x s32>) = G_FSQRT %2
|
||||
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v2f64
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v2f64
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<2 x s64>) = G_FSQRT %2
|
||||
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
|
@ -0,0 +1,60 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
|
||||
--- |
|
||||
|
||||
declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>)
|
||||
define void @fsqrt_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
|
||||
|
||||
declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>)
|
||||
define void @fsqrt_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: fsqrt_v4f32_builtin
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: fsqrt_v4f32_builtin
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:_(<4 x s32>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.w), %2(<4 x s32>)
|
||||
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
||||
---
|
||||
name: fsqrt_v2f64_builtin
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: fsqrt_v2f64_builtin
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:_(<2 x s64>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fsqrt.d), %2(<2 x s64>)
|
||||
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
|
||||
|
||||
declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val)
|
||||
define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) {
|
||||
; P5600-LABEL: sqrt_v4f32:
|
||||
; P5600: # %bb.0: # %entry
|
||||
; P5600-NEXT: ld.w $w0, 0($4)
|
||||
; P5600-NEXT: fsqrt.w $w0, $w0
|
||||
; P5600-NEXT: st.w $w0, 0($5)
|
||||
; P5600-NEXT: jr $ra
|
||||
; P5600-NEXT: nop
|
||||
entry:
|
||||
%0 = load <4 x float>, <4 x float>* %a, align 16
|
||||
%sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %0)
|
||||
store <4 x float> %sqrt, <4 x float>* %c, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
|
||||
define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) {
|
||||
; P5600-LABEL: sqrt_v2f64:
|
||||
; P5600: # %bb.0: # %entry
|
||||
; P5600-NEXT: ld.d $w0, 0($4)
|
||||
; P5600-NEXT: fsqrt.d $w0, $w0
|
||||
; P5600-NEXT: st.d $w0, 0($5)
|
||||
; P5600-NEXT: jr $ra
|
||||
; P5600-NEXT: nop
|
||||
entry:
|
||||
%0 = load <2 x double>, <2 x double>* %a, align 16
|
||||
%sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %0)
|
||||
store <2 x double> %sqrt, <2 x double>* %c, align 16
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
|
||||
|
||||
declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>)
|
||||
define void @fsqrt_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) {
|
||||
; P5600-LABEL: fsqrt_v4f32_builtin:
|
||||
; P5600: # %bb.0: # %entry
|
||||
; P5600-NEXT: ld.w $w0, 0($4)
|
||||
; P5600-NEXT: fsqrt.w $w0, $w0
|
||||
; P5600-NEXT: st.w $w0, 0($5)
|
||||
; P5600-NEXT: jr $ra
|
||||
; P5600-NEXT: nop
|
||||
entry:
|
||||
%0 = load <4 x float>, <4 x float>* %a, align 16
|
||||
%1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0)
|
||||
store <4 x float> %1, <4 x float>* %c, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>)
|
||||
define void @fsqrt_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) {
|
||||
; P5600-LABEL: fsqrt_v2f64_builtin:
|
||||
; P5600: # %bb.0: # %entry
|
||||
; P5600-NEXT: ld.d $w0, 0($4)
|
||||
; P5600-NEXT: fsqrt.d $w0, $w0
|
||||
; P5600-NEXT: st.d $w0, 0($5)
|
||||
; P5600-NEXT: jr $ra
|
||||
; P5600-NEXT: nop
|
||||
entry:
|
||||
%0 = load <2 x double>, <2 x double>* %a, align 16
|
||||
%1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0)
|
||||
store <2 x double> %1, <2 x double>* %c, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
|
||||
--- |
|
||||
|
||||
define void @sqrt_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
|
||||
define void @sqrt_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v4f32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v4f32
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:fprb(<4 x s32>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<4 x s32>) = G_FSQRT %2
|
||||
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
||||
---
|
||||
name: sqrt_v2f64
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; P5600-LABEL: name: sqrt_v2f64
|
||||
; P5600: liveins: $a0, $a1
|
||||
; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
|
||||
; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
|
||||
; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
|
||||
; P5600: [[FSQRT:%[0-9]+]]:fprb(<2 x s64>) = G_FSQRT [[LOAD]]
|
||||
; P5600: G_STORE [[FSQRT]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
|
||||
; P5600: RetRA
|
||||
%0:_(p0) = COPY $a0
|
||||
%1:_(p0) = COPY $a1
|
||||
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
|
||||
%3:_(<2 x s64>) = G_FSQRT %2
|
||||
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
|
||||
RetRA
|
||||
|
||||
...
|
Loading…
Reference in New Issue