[ARM] GlobalISel: Add support for i32 modulo

Add support for modulo for targets that have hardware division and for
those that don't. When hardware division is not available, we have to
choose the correct libcall to use. This is generally straightforward,
except for AEABI.

The AEABI variant is trickier than the other libcalls because it
returns { quotient, remainder }, instead of just one value like the
other libcalls that we've seen so far. Therefore, we need to use custom
lowering for it. However, we don't want to have too much special code,
so we refactor the target-independent code in the legalizer by adding a
helper for replacing an instruction with a libcall. This helper is used
by the legalizer itself when dealing with simple calls, and also by the
custom ARM legalization for the more complicated AEABI divmod calls.

llvm-svn: 305459
This commit is contained in:
Diana Picus 2017-06-15 10:53:31 +00:00
parent 8fd1601d32
commit 02e11010b2
5 changed files with 186 additions and 17 deletions

View File

@ -21,9 +21,11 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H
#define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
namespace llvm {
// Forward declarations.
@ -99,6 +101,12 @@ private:
const LegalizerInfo &LI;
};
/// Helper function that replaces \p MI with a libcall.
LegalizerHelper::LegalizeResult
replaceWithLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result,
ArrayRef<CallLowering::ArgInfo> Args);
} // End namespace llvm.
#endif

View File

@ -82,6 +82,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_UDIV:
assert(Size == 32 && "Unsupported size");
return RTLIB::UDIV_I32;
case TargetOpcode::G_SREM:
assert(Size == 32 && "Unsupported size");
return RTLIB::SREM_I32;
case TargetOpcode::G_UREM:
assert(Size == 32 && "Unsupported size");
return RTLIB::UREM_I32;
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@ -93,43 +99,57 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
LegalizerHelper::LegalizeResult llvm::replaceWithLibcall(
MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) {
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
const char *Name = TLI.getLibcallName(Libcall);
MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
MachineOperand::CreateES(Name),
{MI.getOperand(0).getReg(), OpType},
{{MI.getOperand(1).getReg(), OpType},
{MI.getOperand(2).getReg(), OpType}});
MIRBuilder.setInstr(MI);
if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
MachineOperand::CreateES(Name), Result, Args))
return LegalizerHelper::UnableToLegalize;
// We're about to remove MI, so move the insert point after it.
MIRBuilder.setInsertPt(MIRBuilder.getMBB(),
std::next(MIRBuilder.getInsertPt()));
MI.eraseFromParent();
return LegalizerHelper::Legalized;
}
static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Type *OpType) {
auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
return replaceWithLibcall(MI, MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), OpType},
{{MI.getOperand(1).getReg(), OpType},
{MI.getOperand(2).getReg(), OpType}});
}
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = Ty.getSizeInBits();
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV: {
Type *Ty = Type::getInt32Ty(Ctx);
return simpleLibcall(MI, MIRBuilder, Size, Ty);
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
Type *HLTy = Type::getInt32Ty(Ctx);
return simpleLibcall(MI, MIRBuilder, Size, HLTy);
}
case TargetOpcode::G_FADD:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
return simpleLibcall(MI, MIRBuilder, Size, Ty);
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
return simpleLibcall(MI, MIRBuilder, Size, HLTy);
}
}
}

View File

@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
#include "ARMCallLowering.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@ -63,6 +65,16 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
// FIXME: Support s8 and s16 as well
for (unsigned Op : {G_SREM, G_UREM})
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Lower);
else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() ||
ST.isTargetMuslAEABI())
setAction({Op, s32}, Custom);
else
setAction({Op, s32}, Libcall);
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
for (auto Ty : {s1, s8, s16})
@ -134,5 +146,38 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
}
return true;
}
case G_SREM:
case G_UREM: {
unsigned OriginalResult = MI.getOperand(0).getReg();
auto Size = MRI.getType(OriginalResult).getSizeInBits();
if (Size != 32)
return false;
auto Libcall =
MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
// Our divmod libcalls return a struct containing the quotient and the
// remainder. We need to create a virtual register for it.
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
Type *ArgTy = Type::getInt32Ty(Ctx);
StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
auto RetVal = MRI.createGenericVirtualRegister(
getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy},
{{MI.getOperand(1).getReg(), ArgTy},
{MI.getOperand(2).getReg(), ArgTy}});
if (Status != LegalizerHelper::Legalized)
return false;
// The remainder is the second result of divmod. Split the return value into
// a new, unused register for the quotient and the destination of the
// original instruction for the remainder.
MIRBuilder.buildUnmerge(
{MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
RetVal);
return LegalizerHelper::Legalized;
}
}
}

View File

@ -66,3 +66,24 @@ define arm_aapcscc i8 @test_udiv_i8(i8 %a, i8 %b) {
ret i8 %r
}
define arm_aapcscc i32 @test_srem_i32(i32 %x, i32 %y) {
; CHECK-LABEL: test_srem_i32:
; HWDIV: sdiv [[Q:r[0-9]+]], r0, r1
; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
; HWDIV: sub r0, r0, [[P]]
; SOFT-AEABI: blx __aeabi_idivmod
; SOFT-DEFAULT: blx __modsi3
%r = srem i32 %x, %y
ret i32 %r
}
define arm_aapcscc i32 @test_urem_i32(i32 %x, i32 %y) {
; CHECK-LABEL: test_urem_i32:
; HWDIV: udiv [[Q:r[0-9]+]], r0, r1
; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1
; HWDIV: sub r0, r0, [[P]]
; SOFT-AEABI: blx __aeabi_uidivmod
; SOFT-DEFAULT: blx __umodsi3
%r = urem i32 %x, %y
ret i32 %r
}

View File

@ -11,6 +11,9 @@
define void @test_sdiv_i8() { ret void }
define void @test_udiv_i8() { ret void }
define void @test_srem_i32() { ret void }
define void @test_urem_i32() { ret void }
...
---
name: test_sdiv_i32
@ -228,3 +231,75 @@ body: |
%r0 = COPY %2(s8)
BX_RET 14, _, implicit %r0
...
---
name: test_srem_i32
# CHECK-LABEL: name: test_srem_i32
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.0:
liveins: %r0, %r1
; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1
%0(s32) = COPY %r0
%1(s32) = COPY %r1
; HWDIV: [[Q:%[0-9]+]](s32) = G_SDIV [[X]], [[Y]]
; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]]
; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]]
; SOFT: ADJCALLSTACKDOWN
; SOFT-DAG: %r0 = COPY [[X]]
; SOFT-DAG: %r1 = COPY [[Y]]
; SOFT-AEABI: BLX $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1
; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1
; SOFT-DEFAULT: BLX $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0
; SOFT: ADJCALLSTACKUP
%2(s32) = G_SREM %0, %1
; CHECK: %r0 = COPY [[R]]
%r0 = COPY %2(s32)
BX_RET 14, _, implicit %r0
...
---
name: test_urem_i32
# CHECK-LABEL: name: test_urem_i32
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
body: |
bb.0:
liveins: %r0, %r1
; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0
; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1
%0(s32) = COPY %r0
%1(s32) = COPY %r1
; HWDIV: [[Q:%[0-9]+]](s32) = G_UDIV [[X]], [[Y]]
; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]]
; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]]
; SOFT: ADJCALLSTACKDOWN
; SOFT-DAG: %r0 = COPY [[X]]
; SOFT-DAG: %r1 = COPY [[Y]]
; SOFT-AEABI: BLX $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1
; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1
; SOFT-DEFAULT: BLX $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0
; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0
; SOFT: ADJCALLSTACKUP
%2(s32) = G_UREM %0, %1
; CHECK: %r0 = COPY [[R]]
%r0 = COPY %2(s32)
BX_RET 14, _, implicit %r0
...