diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b74bec0198ac..3128dfa5739b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -466,7 +466,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalFor({S32, S64}) .scalarize(0); - if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + if (ST.has16BitInsts()) { + getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT}) + .legalFor({S16, S32, S64}) + .clampScalar(0, S16, S64) + .scalarize(0); + } else if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT}) .legalFor({S32, S64}) .clampScalar(0, S32, S64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir new file mode 100644 index 000000000000..537464f09401 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.mir @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: fceil_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: fceil_s32_vv + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: $vgpr0 = COPY [[V_CEIL_F32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_FCEIL %0 + $vgpr0 = COPY %1 +... + +--- +name: fceil_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: fceil_s32_vs + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[V_CEIL_F32_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F32_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: $vgpr0 = COPY [[V_CEIL_F32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_FCEIL %0 + $vgpr0 = COPY %1 +... + +--- +name: fceil_s64_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: fceil_s64_sv + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s64) = G_FCEIL %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: fceil_s64_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: fceil_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; CHECK: [[V_CEIL_F64_e64_:%[0-9]+]]:vreg_64 = V_CEIL_F64_e64 0, [[COPY]], 0, 0, implicit $exec + ; CHECK: $vgpr0_vgpr1 = COPY [[V_CEIL_F64_e64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_FCEIL %0 + $vgpr0_vgpr1 = COPY %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir new file mode 100644 index 000000000000..92b615e8cf6e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir @@ -0,0 +1,93 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: fceil_s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fceil_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FCEIL:%[0-9]+]]:sreg_32(s16) = G_FCEIL [[TRUNC]] + ; GCN: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FCEIL]](s16) + ; GCN: $sgpr0 = COPY [[COPY1]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FCEIL %1 + %3:sgpr(s32) = G_ANYEXT %2 + $sgpr0 = COPY %3 +... + +--- +name: fceil_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fceil_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FCEIL %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fceil_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fceil_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FCEIL %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: fceil_fneg_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fceil_fneg_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_CEIL_F16_e64_:%[0-9]+]]:vgpr_32 = V_CEIL_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_CEIL_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s16) = G_FCEIL %2 + %4:vgpr(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir new file mode 100644 index 000000000000..c72ea740a398 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-frint.s16.mir @@ -0,0 +1,93 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: frint_s16_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: frint_s16_ss + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GCN: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GCN: [[FRINT:%[0-9]+]]:sreg_32(s16) = G_FRINT [[TRUNC]] + ; GCN: [[COPY1:%[0-9]+]]:sreg_32(s32) = COPY [[FRINT]](s16) + ; GCN: $sgpr0 = COPY [[COPY1]](s32) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:sgpr(s16) = G_FRINT %1 + %3:sgpr(s32) = G_ANYEXT %2 + $sgpr0 = COPY %3 +... + +--- +name: frint_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: frint_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FRINT %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: frint_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: frint_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FRINT %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: frint_fneg_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: frint_fneg_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_RNDNE_F16_e64_:%[0-9]+]]:vgpr_32 = V_RNDNE_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_RNDNE_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s16) = G_FRINT %2 + %4:vgpr(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir new file mode 100644 index 000000000000..1bf97cac9602 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-intrinsic-trunc.s16.mir @@ -0,0 +1,69 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: intrinsic_trunc_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: intrinsic_trunc_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: intrinsic_trunc_s16_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: intrinsic_trunc_s16_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_INTRINSIC_TRUNC %1 + %3:vgpr(s32) = G_ANYEXT %2 + $vgpr0 = COPY %3 +... + +--- +name: intrinsic_trunc_fneg_s16_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: intrinsic_trunc_fneg_s16_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_TRUNC_F16_e64_:%[0-9]+]]:vgpr_32 = V_TRUNC_F16_e64 1, [[COPY]], 0, 0, implicit $exec + ; GCN: $vgpr0 = COPY [[V_TRUNC_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s16) = G_INTRINSIC_TRUNC %2 + %4:vgpr(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir index 4d39291c41e6..39537fef6471 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fceil.mir @@ -29,18 +29,14 @@ body: | ; VI-LABEL: name: test_fceil_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; VI: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCEIL]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fceil_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX9: [[FCEIL:%[0-9]+]]:_(s16) = G_FCEIL [[TRUNC]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FCEIL]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir index d1d9528e1fca..ad3d02eea0ef 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-trunc.mir @@ -29,18 +29,14 @@ body: | ; VI-LABEL: name: test_intrinsic_trunc_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; VI: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INTRINSIC_TRUNC]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_intrinsic_trunc_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INTRINSIC_TRUNC]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0