forked from OSchip/llvm-project
ARM: convert ORR instructions to ADD where possible on Thumb.
Thumb has more 16-bit encoding space dedicated to ADD than ORR, allowing both a 3-address encoding and a wider range of immediates. So, particularly when optimizing for code size (but it doesn't make things worse elsewhere) it's beneficial to select an OR operation to an ADD if we know overflow won't occur. This is made even better by LLVM's penchant for putting operations in canonical form by converting the other way. llvm-svn: 335119
This commit is contained in:
parent
70666e7765
commit
644a819534
|
@ -97,6 +97,8 @@ public:
|
|||
return SelectImmShifterOperand(N, A, B, false);
|
||||
}
|
||||
|
||||
bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
|
||||
|
||||
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
|
||||
|
||||
|
@ -569,6 +571,14 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
|
|||
return true;
|
||||
}
|
||||
|
||||
// Determine whether an ISD::OR's operands are suitable to turn the operation
|
||||
// into an addition, which often has more compact encodings.
|
||||
bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
|
||||
assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
|
||||
Out = N;
|
||||
return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
|
||||
}
|
||||
|
||||
|
||||
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
|
||||
SDValue &Base,
|
||||
|
|
|
@ -270,6 +270,14 @@ def t_addrmode_sp : MemOperand,
|
|||
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
|
||||
}
|
||||
|
||||
// Inspects parent to determine whether an or instruction can be implemented as
|
||||
// an add (i.e. whether we know overflow won't occur in the add).
|
||||
def AddLikeOrOp : ComplexPattern<i32, 1, "SelectAddLikeOr", [],
|
||||
[SDNPWantParent]>;
|
||||
|
||||
// Pattern to exclude immediates from matching
|
||||
def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa<ConstantSDNode>(N); }]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Instructions.
|
||||
//
|
||||
|
@ -997,6 +1005,15 @@ let isAdd = 1 in {
|
|||
}
|
||||
}
|
||||
|
||||
// Thumb has more flexible short encodings for ADD than ORR, so use those where
|
||||
// possible.
|
||||
def : T1Pat<(or AddLikeOrOp:$Rn, imm0_7:$imm), (tADDi3 $Rn, imm0_7:$imm)>;
|
||||
|
||||
def : T1Pat<(or AddLikeOrOp:$Rn, imm8_255:$imm), (tADDi8 $Rn, imm8_255:$imm)>;
|
||||
|
||||
def : T1Pat<(or AddLikeOrOp:$Rn, tGPR:$Rm), (tADDrr $Rn, $Rm)>;
|
||||
|
||||
|
||||
def : tInstAlias <"add${s}${p} $Rdn, $Rm",
|
||||
(tADDrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>;
|
||||
|
||||
|
|
|
@ -2594,6 +2594,18 @@ def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
|
|||
def : T2Pat<(t2_so_imm_not:$src),
|
||||
(t2MVNi t2_so_imm_not:$src)>;
|
||||
|
||||
// There are shorter Thumb encodings for ADD than ORR, so to increase
|
||||
// Thumb2SizeReduction's chances later on we select a t2ADD for an or where
|
||||
// possible.
|
||||
def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm),
|
||||
(t2ADDri $Rn, t2_so_imm:$imm)>;
|
||||
|
||||
def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm),
|
||||
(t2ADDri12 $Rn, imm0_4095:$Rm)>;
|
||||
|
||||
def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm),
|
||||
(t2ADDrr $Rn, $Rm)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Multiply Instructions.
|
||||
//
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
; RUN: llc -mtriple=thumbv6m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T1
|
||||
; RUN: llc -mtriple=thumbv7m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
|
||||
|
||||
define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: test_add_i3:
|
||||
; CHECK: adds r0, {{r[0-9]+}}, #2
|
||||
%tmp = and i32 %a, -7
|
||||
%tmp1 = and i32 %b, -4
|
||||
%int = select i1 %tst, i32 %tmp, i32 %tmp1
|
||||
|
||||
; Call to force %int into a register that isn't r0 so using the i3 form is a
|
||||
; good idea.
|
||||
call void @foo(i32 %int)
|
||||
%res = or i32 %int, 2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_add_i8(i32 %a, i32 %b, i1 %tst) {
|
||||
; CHECK-LABEL: test_add_i8:
|
||||
; CHECK-T1: adds r0, #12
|
||||
; CHECK-T2: add.w r0, {{r[0-9]+}}, #12
|
||||
|
||||
%tmp = and i32 %a, -256
|
||||
%tmp1 = and i32 %b, -512
|
||||
%int = select i1 %tst, i32 %tmp, i32 %tmp1
|
||||
%res = or i32 %int, 12
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) {
|
||||
; CHECK-LABEL: test_add_i12:
|
||||
; CHECK-T2: addw r0, {{r[0-9]+}}, #854
|
||||
|
||||
%tmp = and i32 %a, -4096
|
||||
%tmp1 = and i32 %b, -8192
|
||||
%int = select i1 %tst, i32 %tmp, i32 %tmp1
|
||||
%res = or i32 %int, 854
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare void @foo(i32)
|
|
@ -130,7 +130,7 @@ entry:
|
|||
; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2]
|
||||
; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4]
|
||||
; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16
|
||||
; CHECK-V6M-NEXT: orrs r0, r1
|
||||
; CHECK-V6M-NEXT: adds r0, r1, r0
|
||||
; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4]
|
||||
; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2]
|
||||
; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16
|
||||
|
|
|
@ -15,7 +15,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
|
|||
|
||||
; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
|
||||
; CHECK: bl _f2
|
||||
; CHECK: cmp r0, #0
|
||||
; CHECK: cmp {{r[0-9]+}}, #0
|
||||
; CHECK-NOT: cmp
|
||||
; CHECK: InlineAsm Start
|
||||
define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
|
||||
|
|
|
@ -109,13 +109,13 @@ define i32 @test10(i32 %p0) {
|
|||
; CHECK-DSP: and.w r0, r1, r0, lsr #7
|
||||
; CHECK-DSP: lsrs r1, r0, #5
|
||||
; CHECK-DSP: uxtb16 r1, r1
|
||||
; CHECk-DSP: orrs r0, r1
|
||||
; CHECk-DSP: adds r0, r1
|
||||
|
||||
; CHECK-NO-DSP: mov.w r1, #16253176
|
||||
; CHECK-NO-DSP: and.w r0, r1, r0, lsr #7
|
||||
; CHECK-NO-DSP: mov.w r1, #458759
|
||||
; CHECK-NO-DSP: and.w r1, r1, r0, lsr #5
|
||||
; CHECK-NO-DSP: orrs r0, r1
|
||||
; CHECK-NO-DSP: add r0, r1
|
||||
%tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
|
||||
%tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]
|
||||
%tmp4 = lshr i32 %tmp2, 5 ; <i32> [#uses=1]
|
||||
|
|
Loading…
Reference in New Issue