From 644a81953417eaf0176c11927c880309dd73c486 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 20 Jun 2018 12:09:44 +0000 Subject: [PATCH] ARM: convert ORR instructions to ADD where possible on Thumb. Thumb has more 16-bit encoding space dedicated to ADD than ORR, allowing both a 3-address encoding and a wider range of immediates. So, particularly when optimizing for code size (but it doesn't make things worse elsewhere) it's beneficial to select an OR operation to an ADD if we know overflow won't occur. This is made even better by LLVM's penchant for putting operations in canonical form by converting the other way. llvm-svn: 335119 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 10 +++++ llvm/lib/Target/ARM/ARMInstrThumb.td | 17 ++++++++ llvm/lib/Target/ARM/ARMInstrThumb2.td | 12 ++++++ llvm/test/CodeGen/ARM/add-like-or.ll | 41 +++++++++++++++++++ llvm/test/CodeGen/ARM/shift-combine.ll | 2 +- .../CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll | 2 +- llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll | 4 +- 7 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/add-like-or.ll diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7d6963c3608b..c3c44d710927 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -97,6 +97,8 @@ public: return SelectImmShifterOperand(N, A, B, false); } + bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -569,6 +571,14 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, return true; } +// Determine whether an ISD::OR's operands are suitable to turn the operation +// into an addition, which often has more compact encodings. +bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { + assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); + Out = N; + return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); +} + bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 31f888b16f21..88aab47a79bf 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -270,6 +270,14 @@ def t_addrmode_sp : MemOperand, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +// Inspects parent to determine whether an or instruction can be implemented as +// an add (i.e. whether we know overflow won't occur in the add). +def AddLikeOrOp : ComplexPattern; + +// Pattern to exclude immediates from matching +def non_imm32 : PatLeaf<(i32 GPR), [{ return !isa(N); }]>; + //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // @@ -997,6 +1005,15 @@ let isAdd = 1 in { } } +// Thumb has more flexible short encodings for ADD than ORR, so use those where +// possible. +def : T1Pat<(or AddLikeOrOp:$Rn, imm0_7:$imm), (tADDi3 $Rn, imm0_7:$imm)>; + +def : T1Pat<(or AddLikeOrOp:$Rn, imm8_255:$imm), (tADDi8 $Rn, imm8_255:$imm)>; + +def : T1Pat<(or AddLikeOrOp:$Rn, tGPR:$Rm), (tADDrr $Rn, $Rm)>; + + def : tInstAlias <"add${s}${p} $Rdn, $Rm", (tADDrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 249445ccccef..c9dd01b58543 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2594,6 +2594,18 @@ def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), def : T2Pat<(t2_so_imm_not:$src), (t2MVNi t2_so_imm_not:$src)>; +// There are shorter Thumb encodings for ADD than ORR, so to increase +// Thumb2SizeReduction's chances later on we select a t2ADD for an or where +// possible. +def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm), + (t2ADDri $Rn, t2_so_imm:$imm)>; + +def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm), + (t2ADDri12 $Rn, imm0_4095:$Rm)>; + +def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm), + (t2ADDrr $Rn, $Rm)>; + //===----------------------------------------------------------------------===// // Multiply Instructions. // diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll new file mode 100644 index 000000000000..d958478e1084 --- /dev/null +++ b/llvm/test/CodeGen/ARM/add-like-or.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=thumbv6m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7m-apple-macho %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 + +define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) { +; CHECK-LABEL: test_add_i3: +; CHECK: adds r0, {{r[0-9]+}}, #2 + %tmp = and i32 %a, -7 + %tmp1 = and i32 %b, -4 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + + ; Call to force %int into a register that isn't r0 so using the i3 form is a + ; good idea. + call void @foo(i32 %int) + %res = or i32 %int, 2 + ret i32 %res +} + +define i32 @test_add_i8(i32 %a, i32 %b, i1 %tst) { +; CHECK-LABEL: test_add_i8: +; CHECK-T1: adds r0, #12 +; CHECK-T2: add.w r0, {{r[0-9]+}}, #12 + + %tmp = and i32 %a, -256 + %tmp1 = and i32 %b, -512 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + %res = or i32 %int, 12 + ret i32 %res +} + +define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) { +; CHECK-LABEL: test_add_i12: +; CHECK-T2: addw r0, {{r[0-9]+}}, #854 + + %tmp = and i32 %a, -4096 + %tmp1 = and i32 %b, -8192 + %int = select i1 %tst, i32 %tmp, i32 %tmp1 + %res = or i32 %int, 854 + ret i32 %res +} + +declare void @foo(i32) diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index 82aa28b91557..24c392cb9c88 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -130,7 +130,7 @@ entry: ; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2] ; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4] ; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16 -; CHECK-V6M-NEXT: orrs r0, r1 +; CHECK-V6M-NEXT: adds r0, r1, r0 ; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4] ; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2] ; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16 diff --git a/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll index d02947fc3b1c..de45f72c8437 100644 --- a/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll +++ b/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll @@ -15,7 +15,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc. ; CHECK: bl _f2 -; CHECK: cmp r0, #0 +; CHECK: cmp {{r[0-9]+}}, #0 ; CHECK-NOT: cmp ; CHECK: InlineAsm Start define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind { diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll index af4532cf6f3d..8f6d556f8030 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -109,13 +109,13 @@ define i32 @test10(i32 %p0) { ; CHECK-DSP: and.w r0, r1, r0, lsr #7 ; CHECK-DSP: lsrs r1, r0, #5 ; CHECK-DSP: uxtb16 r1, r1 -; CHECk-DSP: orrs r0, r1 +; CHECk-DSP: adds r0, r1 ; CHECK-NO-DSP: mov.w r1, #16253176 ; CHECK-NO-DSP: and.w r0, r1, r0, lsr #7 ; CHECK-NO-DSP: mov.w r1, #458759 ; CHECK-NO-DSP: and.w r1, r1, r0, lsr #5 -; CHECK-NO-DSP: orrs r0, r1 +; CHECK-NO-DSP: add r0, r1 %tmp1 = lshr i32 %p0, 7 ; [#uses=1] %tmp2 = and i32 %tmp1, 16253176 ; [#uses=2] %tmp4 = lshr i32 %tmp2, 5 ; [#uses=1]