forked from OSchip/llvm-project
[MIPS GlobalISel] Select count leading zeros
llvm.ctlz.<type> intrinsic has additional i1 argument is_zero_undef, it tells whether zero as the first argument produces a defined result. MIPS clz instruction returns 32 for zero input. G_CTLZ is generated from llvm.ctlz.<type> (<type> <src>, i1 false) intrinsics, clang generates these intrinsics from __builtin_clz and __builtin_clzll. G_CTLZ_ZERO_UNDEF can also be generated from llvm.ctlz with true as second argument. It is also traditionally part of and many algorithms that are now predicated on avoiding zero-value inputs. Add narrow scalar for G_CTLZ (algorithm uses G_CTLZ_ZERO_UNDEF). Lower G_CTLZ_ZERO_UNDEF and select G_CTLZ for MIPS32. Differential Revision: https://reviews.llvm.org/D73214
This commit is contained in:
parent
941f20c3bd
commit
2b66d32f3f
|
@ -241,6 +241,7 @@ public:
|
|||
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
|
||||
LegalizeResult lowerBitcast(MachineInstr &MI);
|
||||
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
|
|
|
@ -977,8 +977,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
|
|||
case TargetOpcode::G_CTTZ:
|
||||
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
|
||||
case TargetOpcode::G_CTPOP:
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize; // TODO
|
||||
if (TypeIdx == 1)
|
||||
switch (MI.getOpcode()) {
|
||||
case TargetOpcode::G_CTLZ:
|
||||
return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
|
||||
default:
|
||||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
Observer.changingInstr(MI);
|
||||
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
|
||||
|
@ -3849,6 +3854,37 @@ LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
|
|||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT NarrowTy) {
|
||||
if (TypeIdx != 1)
|
||||
return UnableToLegalize;
|
||||
|
||||
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
|
||||
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
||||
|
||||
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
|
||||
MachineIRBuilder &B = MIRBuilder;
|
||||
auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1));
|
||||
// ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
|
||||
auto C_0 = B.buildConstant(NarrowTy, 0);
|
||||
auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
|
||||
UnmergeSrc.getReg(1), C_0);
|
||||
auto LoCTLZ = B.buildCTLZ(NarrowTy, UnmergeSrc.getReg(0));
|
||||
auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize);
|
||||
auto HiIsZeroCTLZ = B.buildAdd(NarrowTy, LoCTLZ, C_NarrowSize);
|
||||
auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(1));
|
||||
auto LoOut = B.buildSelect(NarrowTy, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
|
||||
|
||||
B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)});
|
||||
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
|
|
|
@ -202,6 +202,12 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
|
|||
.lowerFor({s32})
|
||||
.maxScalar(0, s32);
|
||||
|
||||
getActionDefinitionsBuilder(G_CTLZ)
|
||||
.legalFor({{s32, s32}})
|
||||
.maxScalar(1, s32);
|
||||
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
|
||||
.lowerFor({{s32, s32}});
|
||||
|
||||
// FP instructions
|
||||
getActionDefinitionsBuilder(G_FCONSTANT)
|
||||
.legalFor({s32, s64});
|
||||
|
|
|
@ -453,6 +453,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case G_BRINDIRECT:
|
||||
case G_VASTART:
|
||||
case G_BSWAP:
|
||||
case G_CTLZ:
|
||||
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
|
||||
break;
|
||||
case G_ADD:
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
|
||||
---
|
||||
name: ctlz_i32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0
|
||||
|
||||
; MIPS32-LABEL: name: ctlz_i32
|
||||
; MIPS32: liveins: $a0
|
||||
; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
|
||||
; MIPS32: [[CLZ:%[0-9]+]]:gpr32 = CLZ [[COPY]]
|
||||
; MIPS32: $v0 = COPY [[CLZ]]
|
||||
; MIPS32: RetRA implicit $v0
|
||||
%0:gprb(s32) = COPY $a0
|
||||
%1:gprb(s32) = G_CTLZ %0(s32)
|
||||
$v0 = COPY %1(s32)
|
||||
RetRA implicit $v0
|
||||
|
||||
...
|
|
@ -0,0 +1,57 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
|
||||
---
|
||||
name: ctlz_i32
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0
|
||||
|
||||
; MIPS32-LABEL: name: ctlz_i32
|
||||
; MIPS32: liveins: $a0
|
||||
; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
|
||||
; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
|
||||
; MIPS32: $v0 = COPY [[CTLZ]](s32)
|
||||
; MIPS32: RetRA implicit $v0
|
||||
%0:_(s32) = COPY $a0
|
||||
%1:_(s32) = G_CTLZ %0(s32)
|
||||
$v0 = COPY %1(s32)
|
||||
RetRA implicit $v0
|
||||
|
||||
...
|
||||
---
|
||||
name: ctlz_i64
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0, $a1
|
||||
|
||||
; MIPS32-LABEL: name: ctlz_i64
|
||||
; MIPS32: liveins: $a0, $a1
|
||||
; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
|
||||
; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
|
||||
; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
|
||||
; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32)
|
||||
; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||
; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]]
|
||||
; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32)
|
||||
; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
|
||||
; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
|
||||
; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]]
|
||||
; MIPS32: $v0 = COPY [[SELECT]](s32)
|
||||
; MIPS32: $v1 = COPY [[C]](s32)
|
||||
; MIPS32: RetRA implicit $v0, implicit $v1
|
||||
%1:_(s32) = COPY $a0
|
||||
%2:_(s32) = COPY $a1
|
||||
%0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32)
|
||||
%3:_(s64) = G_CTLZ %0(s64)
|
||||
%4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3(s64)
|
||||
$v0 = COPY %4(s32)
|
||||
$v1 = COPY %5(s32)
|
||||
RetRA implicit $v0, implicit $v1
|
||||
|
||||
...
|
|
@ -0,0 +1,34 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32
|
||||
|
||||
define i32 @ctlz_i32(i32 %a) {
|
||||
; MIPS32-LABEL: ctlz_i32:
|
||||
; MIPS32: # %bb.0: # %entry
|
||||
; MIPS32-NEXT: clz $2, $4
|
||||
; MIPS32-NEXT: jr $ra
|
||||
; MIPS32-NEXT: nop
|
||||
entry:
|
||||
%0 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
|
||||
ret i32 %0
|
||||
}
|
||||
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
|
||||
|
||||
|
||||
define i64 @ctlz_i64(i64 %a) {
|
||||
; MIPS32-LABEL: ctlz_i64:
|
||||
; MIPS32: # %bb.0: # %entry
|
||||
; MIPS32-NEXT: ori $3, $zero, 0
|
||||
; MIPS32-NEXT: sltiu $1, $5, 1
|
||||
; MIPS32-NEXT: clz $2, $4
|
||||
; MIPS32-NEXT: addiu $2, $2, 32
|
||||
; MIPS32-NEXT: clz $4, $5
|
||||
; MIPS32-NEXT: andi $1, $1, 1
|
||||
; MIPS32-NEXT: movn $4, $2, $1
|
||||
; MIPS32-NEXT: move $2, $4
|
||||
; MIPS32-NEXT: jr $ra
|
||||
; MIPS32-NEXT: nop
|
||||
entry:
|
||||
%0 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
|
||||
ret i64 %0
|
||||
}
|
||||
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
|
|
@ -0,0 +1,23 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
|
||||
---
|
||||
name: ctlz_i32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $a0
|
||||
|
||||
; MIPS32-LABEL: name: ctlz_i32
|
||||
; MIPS32: liveins: $a0
|
||||
; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
|
||||
; MIPS32: [[CTLZ:%[0-9]+]]:gprb(s32) = G_CTLZ [[COPY]](s32)
|
||||
; MIPS32: $v0 = COPY [[CTLZ]](s32)
|
||||
; MIPS32: RetRA implicit $v0
|
||||
%0:_(s32) = COPY $a0
|
||||
%1:_(s32) = G_CTLZ %0(s32)
|
||||
$v0 = COPY %1(s32)
|
||||
RetRA implicit $v0
|
||||
|
||||
...
|
Loading…
Reference in New Issue