From 25e9938a45e8dfde752a4e93c48ff0184d4784d9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 10 Jan 2020 10:07:24 -0500 Subject: [PATCH] GlobalISel: Handle more cases of G_SEXT narrowing This now develops the same problem G_ZEXT/G_ANYEXT have where the requested type is assumed to be the source type. This will be fixed separately by creating intermediate merges. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 39 ++++------- .../AMDGPU/GlobalISel/legalize-sext.mir | 67 ++++++++++++++----- 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8a26703c2741..2fdd2fff7df3 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -657,35 +657,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_SEXT: { - if (TypeIdx != 0) - return UnableToLegalize; - - Register SrcReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // FIXME: support the general case where the requested NarrowTy may not be - // the same as the source type. E.g. s128 = sext(s32) - if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || - SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { - LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); - return UnableToLegalize; - } - - // Shift the sign bit of the low register through the high register. - auto ShiftAmt = - MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); - auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); - MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: { if (TypeIdx != 0) return UnableToLegalize; - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); uint64_t SizeOp1 = SrcTy.getSizeInBits(); if (SizeOp0 % SizeOp1 != 0) return UnableToLegalize; @@ -693,13 +672,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register PadReg; if (MI.getOpcode() == TargetOpcode::G_ZEXT) PadReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); - else + else if (MI.getOpcode() == TargetOpcode::G_ANYEXT) PadReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + else { + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), SrcTy.getSizeInBits() - 1); + PadReg = MIRBuilder.buildAShr(SrcTy, SrcReg, ShiftAmt).getReg(0); + } // Generate a merge where the bottom bits are taken from the source, and - // zero/impdef everything else. + // zero/impdef/sign bit everything else. unsigned NumParts = SizeOp0 / SizeOp1; - SmallVector Srcs = {MI.getOperand(1).getReg()}; + SmallVector Srcs = {SrcReg}; for (unsigned Part = 1; Part < NumParts; ++Part) Srcs.push_back(PadReg); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index cbff2b8f109c..1febfef963ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- name: test_sext_s32_to_s64 @@ -336,8 +336,10 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s128 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SEXT:%[0-9]+]]:_(s128) = G_SEXT [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s128) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s128) %0:_(s32) = COPY $vgpr0 %1:_(s128) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -351,8 +353,10 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) %0:_(s32) = COPY $vgpr0 %1:_(s256) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -366,8 +370,10 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s512 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SEXT:%[0-9]+]]:_(s512) = G_SEXT [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s512) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s512) %0:_(s32) = COPY $vgpr0 %1:_(s512) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -381,8 +387,10 @@ body: | ; CHECK-LABEL: name: test_sext_s32_to_s1024 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[COPY]](s32) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s1024) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024) %0:_(s32) = COPY $vgpr0 %1:_(s1024) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -413,8 +421,10 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s64) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s256) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -428,8 +438,10 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s512 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXT:%[0-9]+]]:_(s512) = G_SEXT [[COPY]](s64) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s512) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s512) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s512) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -443,8 +455,10 @@ body: | ; CHECK-LABEL: name: test_sext_s64_to_s1024 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[COPY]](s64) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s1024) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s1024) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -469,8 +483,27 @@ body: | ; CHECK-LABEL: name: test_sext_s128_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s128) - ; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[C1]] + ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[C]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[C]](s32), [[C1]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[C]](s32), [[C2]] + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32) + ; CHECK: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[MV]](s128) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s256) = G_SEXT %0 S_ENDPGM 0, implicit %1