[ARM] GlobalISel: Load i1, i8 and i16 args from stack

Add support for loading i1, i8 and i16 arguments from the stack, with or without
the ABI extension flags.

When the ABI extension flags are present, we load a 4-byte value, otherwise we
preserve the size of the load and let the instruction selector replace it with a
LDRB/LDRH. This generates the same thing as DAGISel.

Differential Revision: https://reviews.llvm.org/D27803

llvm-svn: 293163
This commit is contained in:
Diana Picus 2017-01-26 09:20:47 +00:00
parent 7a7510ea97
commit 278c722e6d
6 changed files with 128 additions and 22 deletions

View File

@ -122,7 +122,7 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
assert(Size == 4 && "Unsupported size");
assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size");
auto &MFI = MIRBuilder.getMF().getFrameInfo();
@ -138,7 +138,16 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
assert(Size == 4 && "Unsupported size");
assert((Size == 1 || Size == 2 || Size == 4) && "Unsupported size");
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt) {
// If the argument is zero- or sign-extended by the caller, its size
// becomes 4 bytes, so that's what we should load.
Size = 4;
assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
MRI.setType(ValVReg, LLT::scalar(32));
}
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
@ -177,18 +186,10 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
return false;
auto &Args = F.getArgumentList();
unsigned ArgIdx = 0;
for (auto &Arg : Args) {
ArgIdx++;
for (auto &Arg : Args)
if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
// FIXME: This check as well as ArgIdx are going away as soon as we support
// loading values < 32 bits.
if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
return false;
}
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());

View File

@ -85,6 +85,22 @@ static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
llvm_unreachable("Unsupported opcode");
}
/// Select the opcode for simple loads. For types smaller than 32 bits, the
/// value will be zero extended.
static unsigned selectLoadOpCode(unsigned Size) {
switch (Size) {
case 1:
case 8:
return ARM::LDRBi12;
case 16:
return ARM::LDRH;
case 32:
return ARM::LDRi12;
}
llvm_unreachable("Unsupported size");
}
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@ -167,10 +183,22 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(ARM::ADDri));
MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
break;
case G_LOAD:
I.setDesc(TII.get(ARM::LDRi12));
case G_LOAD: {
LLT ValTy = MRI.getType(I.getOperand(0).getReg());
const auto ValSize = ValTy.getSizeInBits();
if (ValSize != 32 && ValSize != 16 && ValSize != 8 && ValSize != 1)
return false;
const auto NewOpc = selectLoadOpCode(ValSize);
I.setDesc(TII.get(NewOpc));
if (NewOpc == ARM::LDRH)
// LDRH has a funny addressing mode (there's already a FIXME for it).
MIB.addReg(0);
MIB.addImm(0).add(predOps(ARMCC::AL));
break;
}
default:
return false;
}

View File

@ -35,7 +35,8 @@ ARMLegalizerInfo::ARMLegalizerInfo() {
setAction({G_FRAME_INDEX, p0}, Legal);
setAction({G_LOAD, s32}, Legal);
for (auto Ty : {s1, s8, s16, s32})
setAction({G_LOAD, Ty}, Legal);
setAction({G_LOAD, 1, p0}, Legal);
for (auto Ty : {s1, s8, s16, s32})

View File

@ -233,19 +233,26 @@ registers:
# CHECK-DAG: id: 2, class: gpr
# CHECK-DAG: id: 3, class: gpr
fixedStack:
- { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false }
- { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
- { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
# CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8
# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0
# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8
body: |
bb.0:
liveins: %r0, %r1, %r2, %r3
%0(p0) = G_FRAME_INDEX %fixed-stack.2
; CHECK: [[FIVREG:%[0-9]+]] = ADDri %fixed-stack.[[FRAME_INDEX]], 0, 14, _, _
; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _
%1(s32) = G_LOAD %0(p0)
; CHECK: {{%[0-9]+}} = LDRi12 [[FIVREG]], 0, 14, _
; CHECK: {{%[0-9]+}} = LDRi12 [[FI32VREG]], 0, 14, _
%2(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _
%3(s1) = G_LOAD %2(p0)
; CHECK: {{%[0-9]+}} = LDRBi12 [[FI1VREG]], 0, 14, _
BX_RET 14, _
; CHECK: BX_RET 14, _

View File

@ -82,8 +82,8 @@ entry:
ret i32 %sum
}
define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: name: test_many_args
define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: name: test_stack_args
; CHECK: fixedStack:
; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4
; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4
@ -98,3 +98,39 @@ entry:
%sum = add i32 %p2, %p5
ret i32 %sum
}
define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
i8 signext %p4, i16 signext %p5) {
; CHECK-LABEL: name: test_stack_args_signext
; CHECK: fixedStack:
; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1
; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]]
; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0)
; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]]
; CHECK: %r0 = COPY [[SUM]]
; CHECK: BX_RET 14, _, implicit %r0
entry:
%sum = add i16 %p1, %p5
ret i16 %sum
}
define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3,
i8 zeroext %p4, i16 zeroext %p5) {
; CHECK-LABEL: name: test_stack_args_zeroext
; CHECK: fixedStack:
; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1
; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2
; CHECK: liveins: %r0, %r1, %r2, %r3
; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2
; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]]
; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0)
; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]]
; CHECK: %r0 = COPY [[SUM]]
; CHECK: BX_RET 14, _, implicit %r0
entry:
%sum = add i8 %p2, %p4
ret i8 %sum
}

View File

@ -67,8 +67,8 @@ entry:
ret i32 %sum
}
define i32 @test_many_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: test_many_args:
define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
; CHECK-LABEL: test_stack_args_i32:
; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
; CHECK: ldr [[P5:r[0-9]+]], {{.*}}[[P5ADDR]]
; CHECK: add r0, r2, [[P5]]
@ -77,3 +77,36 @@ entry:
%sum = add i32 %p2, %p5
ret i32 %sum
}
define i16 @test_stack_args_mixed(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4, i16 %p5) {
; CHECK-LABEL: test_stack_args_mixed:
; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
; CHECK: ldrh [[P5:r[0-9]+]], {{.*}}[[P5ADDR]]
; CHECK: add r0, r1, [[P5]]
; CHECK: bx lr
entry:
%sum = add i16 %p1, %p5
ret i16 %sum
}
define i16 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i16 zeroext %p4) {
; CHECK-LABEL: test_stack_args_zeroext:
; CHECK: mov [[P4ADDR:r[0-9]+]], sp
; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]]
; CHECK: add r0, r1, [[P4]]
; CHECK: bx lr
entry:
%sum = add i16 %p1, %p4
ret i16 %sum
}
define i8 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 signext %p4) {
; CHECK-LABEL: test_stack_args_signext:
; CHECK: mov [[P4ADDR:r[0-9]+]], sp
; CHECK: ldr [[P4:r[0-9]+]], {{.*}}[[P4ADDR]]
; CHECK: add r0, r2, [[P4]]
; CHECK: bx lr
entry:
%sum = add i8 %p2, %p4
ret i8 %sum
}