forked from OSchip/llvm-project
GlobalISel: Fix narrowing of G_CTPOP
The result type is separate from the source type. Tests will be included in a future AMDGPU patch which uses this from RegBankSelect/applyMappingImpl.
This commit is contained in:
parent
8de2dad9e0
commit
3b198518ad
|
@ -4040,16 +4040,17 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
|
|||
if (TypeIdx != 1)
|
||||
return UnableToLegalize;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
|
||||
unsigned NarrowSize = NarrowTy.getSizeInBits();
|
||||
|
||||
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
|
||||
auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
|
||||
|
||||
auto LoCTPOP = MIRBuilder.buildCTPOP(NarrowTy, UnmergeSrc.getReg(0));
|
||||
auto HiCTPOP = MIRBuilder.buildCTPOP(NarrowTy, UnmergeSrc.getReg(1));
|
||||
auto Out = MIRBuilder.buildAdd(NarrowTy, HiCTPOP, LoCTPOP);
|
||||
MIRBuilder.buildZExt(MI.getOperand(0), Out);
|
||||
auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
|
||||
auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
|
||||
MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
|
|
|
@ -216,6 +216,7 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
|
|||
|
||||
getActionDefinitionsBuilder(G_CTPOP)
|
||||
.lowerFor({{s32, s32}})
|
||||
.clampScalar(0, s32, s32)
|
||||
.clampScalar(1, s32, s32);
|
||||
|
||||
// FP instructions
|
||||
|
|
Loading…
Reference in New Issue