From 83a25a101051b404bec1a5ba9cb867705f31262d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 26 Mar 2021 17:29:36 -0400 Subject: [PATCH] GlobalISel: Restrict narrow scalar for fptoui/fptosi results This practically only works for the f16 case AMDGPU uses, not wider types. Fixes bug 49710 by failing legalization. --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 44 ++++++++++++------- .../AArch64/GlobalISel/legalize-fptoi.mir | 28 ++++++++++++ 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index d276fab8988a..e40bdbca33c0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -330,6 +330,7 @@ public: LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty); + LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 9eb4c80e803e..99b6ea805d9c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1216,22 +1216,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; - } - case TargetOpcode::G_FPTOSI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); case TargetOpcode::G_FPEXT: if (TypeIdx != 0) return UnableToLegalize; @@ -4846,6 +4833,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir index 94390563f6ba..b2ee3a6cc777 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir @@ -238,3 +238,31 @@ body: | %1:_(<4 x s32>) = G_FPTOSI %0 $q0 = COPY %1 ... + +--- +name: test_fptoui_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptoui_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s128) = G_FPTOUI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOUI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOUI %0 + $q0 = COPY %1 +... + +--- +name: test_fptosi_s128_s32 +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: test_fptosi_s128_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s128) = G_FPTOSI [[COPY]](s32) + ; CHECK: $q0 = COPY [[FPTOSI]](s128) + %0:_(s32) = COPY $w0 + %1:_(s128) = G_FPTOSI %0 + $q0 = COPY %1 +...