From 0528e2cfb387af854f184697459d2c808a916c73 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Mon, 26 Nov 2018 11:07:02 +0000 Subject: [PATCH] [ARM GlobalISel] Support G_CTLZ and G_CTLZ_ZERO_UNDEF We can now select CLZ via the TableGen'erated code, so support G_CTLZ and G_CTLZ_ZERO_UNDEF throughout the pipeline for types <= s32. Legalizer: If the CLZ instruction is available, use it for both G_CTLZ and G_CTLZ_ZERO_UNDEF. Otherwise, use a libcall for G_CTLZ_ZERO_UNDEF and lower G_CTLZ in terms of it. In order to achieve this we need to add support to the LegalizerHelper for the legalization of G_CTLZ_ZERO_UNDEF for s32 as a libcall (__clzsi2). We also need to allow lowering of G_CTLZ in terms of G_CTLZ_ZERO_UNDEF if that is supported as a libcall, as opposed to just if it is Legal or Custom. Due to a minor refactoring of the helper function in charge of this, we will also allow the same behaviour for G_CTTZ and G_CTPOP. This is not going to be a problem in practice since we don't yet have support for treating G_CTTZ and G_CTPOP as libcalls (not even in DAGISel). Reg bank select: Map G_CTLZ to GPR. G_CTLZ_ZERO_UNDEF should not make it to this point. Instruction select: Nothing to do. llvm-svn: 347545 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 21 ++- llvm/lib/Target/ARM/ARMLegalizerInfo.cpp | 16 ++ llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp | 1 + .../ARM/GlobalISel/arm-legalize-bitcounts.mir | 177 ++++++++++++++++++ .../ARM/GlobalISel/arm-regbankselect.mir | 22 +++ .../GlobalISel/LegalizerHelperTest.cpp | 27 +++ 6 files changed, 255 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37069ce3cea5..d3773c619861 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -93,6 +93,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_UREM: assert(Size == 32 && "Unsupported size"); return RTLIB::UREM_I32; + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + assert(Size == 32 && "Unsupported size"); + return RTLIB::CTLZ_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -189,7 +192,8 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_SDIV: case TargetOpcode::G_UDIV: case TargetOpcode::G_SREM: - case TargetOpcode::G_UREM: { + case TargetOpcode::G_UREM: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: { Type *HLTy = Type::getInt32Ty(Ctx); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) @@ -1108,9 +1112,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Opc = MI.getOpcode(); auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); - auto isLegalOrCustom = [this](const LegalityQuery &Q) { + auto isSupported = [this](const LegalityQuery &Q) { auto QAction = LI.getAction(Q).Action; - return QAction == Legal || QAction == Custom; + return QAction == Legal || QAction == Libcall || QAction == Custom; }; switch (Opc) { default: @@ -1124,9 +1128,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_CTLZ: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { - // If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with - // zero. + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { + // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg); auto MIBZero = MIRBuilder.buildConstant(Ty, 0); @@ -1173,7 +1176,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_CTTZ: { unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. auto MIBCttzZU = @@ -1197,8 +1200,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { auto MIBTmp = MIRBuilder.buildInstr( TargetOpcode::G_AND, Ty, MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1)); - if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) && - isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) { + if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && + isSupported({TargetOpcode::G_CTLZ, {Ty}})) { auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); MIRBuilder.buildInstr( TargetOpcode::G_SUB, MI.getOperand(0).getReg(), diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 891418306903..42b164fb8db0 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -109,6 +109,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32}); + if (ST.hasV5TOps()) { + getActionDefinitionsBuilder(G_CTLZ) + .legalFor({s32}) + .clampScalar(0, s32, s32); + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) + .lowerFor({s32}) + .clampScalar(0, s32, s32); + } else { + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) + .libcallFor({s32}) + .clampScalar(0, s32, s32); + getActionDefinitionsBuilder(G_CTLZ) + .lowerFor({s32}) + .clampScalar(0, s32, s32); + } + getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}}); getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0}, diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 0e16d6bcfe2b..4f28f2dafc70 100644 --- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -234,6 +234,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_GEP: case G_INTTOPTR: case G_PTRTOINT: + case G_CTLZ: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir new file mode 100644 index 000000000000..c9323eedced4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir @@ -0,0 +1,177 @@ +# RUN: llc -mtriple arm-linux-gnueabi -mattr=+v5t -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,CLZ +# RUN: llc -mtriple arm-linux-gnueabi -mattr=-v5t -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALLS +--- | + define void @test_ctlz_s32() { ret void } + define void @test_ctlz_zero_undef_s32() { ret void } + + ; same as above but with extensions + define void @test_ctlz_s16() { ret void } + define void @test_ctlz_zero_undef_s8() { ret void } +... +--- +name: test_ctlz_s32 +# CHECK-LABEL: name: test_ctlz_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0 + %0(s32) = COPY $r0 + + ; CLZ: [[R:%[0-9]+]]:_(s32) = G_CTLZ [[X]] + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: ADJCALLSTACKDOWN + ; LIBCALLS: $r0 = COPY [[X]] + ; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0 + ; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = COPY $r0 + ; LIBCALLS: ADJCALLSTACKUP + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[X]](s32), [[ZERO]] + ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[COUNT]] + ; LIBCALLS-NOT: G_CTLZ + %1(s32) = G_CTLZ %0 + + ; CHECK: $r0 = COPY [[R]] + $r0 = COPY %1(s32) + BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_ctlz_zero_undef_s32 +# CHECK-LABEL: name: test_ctlz_zero_undef_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0 + %0(s32) = COPY $r0 + + ; CLZ: [[R:%[0-9]+]]:_(s32) = G_CTLZ [[X]] + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: ADJCALLSTACKDOWN + ; LIBCALLS: $r0 = COPY [[X]] + ; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0 + ; LIBCALLS: [[R:%[0-9]+]]:_(s32) = COPY $r0 + ; LIBCALLS: ADJCALLSTACKUP + ; LIBCALLS-NOT: G_CTLZ + %1(s32) = G_CTLZ_ZERO_UNDEF %0 + + ; CHECK: $r0 = COPY [[R]] + $r0 = COPY %1(s32) + BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_ctlz_s16 +# CHECK-LABEL: name: test_ctlz_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK: [[BITMASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[XAGAIN:%[0-9]+]]:_(s32) = COPY [[X]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[XAGAIN]], [[BITMASK]] + %0(s32) = COPY $r0 + %1(s16) = G_TRUNC %0(s32) + + ; Check that the operation is performed for 32 bits + ; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ [[X32]] + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: ADJCALLSTACKDOWN + ; LIBCALLS: $r0 = COPY [[X32]] + ; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0 + ; LIBCALLS: [[UNDEFCOUNT:%[0-9]+]]:_(s32) = COPY $r0 + ; LIBCALLS: ADJCALLSTACKUP + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), [[ZERO]] + ; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[UNDEFCOUNT]] + ; LIBCALLS-NOT: G_CTLZ + ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]] + %2(s16) = G_CTLZ %1 + + ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[RAGAIN:%[0-9]+]]:_(s32) = COPY [[R32]] + ; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[RAGAIN]], [[BITDIFF]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] + ; CHECK: $r0 = COPY [[R]] + %3(s32) = G_SEXT %2(s16) + $r0 = COPY %3(s32) + BX_RET 14, $noreg, implicit $r0 +... +--- +name: test_ctlz_zero_undef_s8 +# CHECK-LABEL: name: test_ctlz_zero_undef_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK: [[BITMASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[XAGAIN:%[0-9]+]]:_(s32) = COPY [[X]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[XAGAIN]], [[BITMASK]] + %0(s32) = COPY $r0 + %1(s8) = G_TRUNC %0(s32) + + ; Check that the operation is performed for 32 bits + ; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ + ; CLZ-NOT: G_CTLZ_ZERO_UNDEF + ; LIBCALLS-NOT: G_CTLZ + ; LIBCALLS: ADJCALLSTACKDOWN + ; LIBCALLS: $r0 = COPY [[X32]] + ; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0 + ; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = COPY $r0 + ; LIBCALLS: ADJCALLSTACKUP + ; LIBCALLS-NOT: G_CTLZ + ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]] + %2(s8) = G_CTLZ_ZERO_UNDEF %1 + + ; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[RAGAIN:%[0-9]+]]:_(s32) = COPY [[R32]] + ; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[RAGAIN]], [[BITDIFF]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]] + ; CHECK: $r0 = COPY [[R]] + %3(s32) = G_SEXT %2(s8) + $r0 = COPY %3(s32) + BX_RET 14, $noreg, implicit $r0 +... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index 4634e5a0d9df..281218192a18 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -27,6 +27,8 @@ define void @test_inttoptr_s32() { ret void } define void @test_ptrtoint_s32() { ret void } + define void @test_ctlz_s32() #3 { ret void } + @a_global = global float 1.0 define void @test_globals() { ret void } @@ -83,6 +85,7 @@ attributes #0 = { "target-features"="+vfp2"} attributes #1 = { "target-features"="+hwdiv-arm" } attributes #2 = { "target-features"="+vfp4"} + attributes #3 = { "target-features"="+v5t"} ... --- name: test_add_s32 @@ -561,6 +564,25 @@ body: | BX_RET 14, $noreg, implicit $r0 ... --- +name: test_ctlz_s32 +# CHECK-LABEL: name: test_ctlz_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + %0(s32) = COPY $r0 + %1(s32) = G_CTLZ %0(s32) + $r0 = COPY %1(s32) + BX_RET 14, $noreg, implicit $r0 +... +--- name: test_globals # CHECK-LABEL: name: test_globals legalized: true diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 0cc989be3c09..f0527611975c 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -148,6 +148,33 @@ TEST_F(LegalizerHelperTest, LowerBitCountingCTLZ0) { ASSERT_TRUE(CheckMachineFunction(*MF, CheckStr)); } +// CTLZ expansion in terms of CTLZ_ZERO_UNDEF if the latter is a libcall +TEST_F(LegalizerHelperTest, LowerBitCountingCTLZLibcall) { + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo( + A, { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({s64}); }); + // Build + auto MIBCTLZ = B.buildInstr(TargetOpcode::G_CTLZ, LLT::scalar(64), Copies[0]); + AInfo Info(MF->getSubtarget()); + LegalizerHelper Helper(*MF, Info); + ASSERT_TRUE(Helper.lower(*MIBCTLZ, 0, LLT::scalar(64)) == + LegalizerHelper::LegalizeResult::Legalized); + + auto CheckStr = R"( + CHECK: [[CZU:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF %0 + CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + CHECK: [[THIRTY2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), %0:_(s64), [[ZERO]] + CHECK: [[SEL:%[0-9]+]]:_(s64) = G_SELECT [[CMP]]:_(s1), [[THIRTY2]]:_, [[CZU]] + )"; + + // Check + ASSERT_TRUE(CheckMachineFunction(*MF, CheckStr)); +} + // CTLZ expansion TEST_F(LegalizerHelperTest, LowerBitCountingCTLZ1) { if (!TM)