From 02f25a95655e9e320c524978e54df8e4a22ef921 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 19 May 2016 14:19:47 +0000 Subject: [PATCH] [AArch64 ] Generate a BFXIL from 'or (and X, Mask0Imm),(and Y, Mask1Imm)'. Mask0Imm and ~Mask1Imm must be equivalent and one of the MaskImms is a shifted mask (e.g., 0x000ffff0). Both 'and's must have a single use. This changes code like: and w8, w0, #0xffff000f and w9, w1, #0x0000fff0 orr w0, w9, w8 into lsr w8, w1, #4 bfi w0, w8, #4, #12 llvm-svn: 270063 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 59 ++++++++++++++++ llvm/test/CodeGen/AArch64/bitfield-insert.ll | 67 +++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index b8ee03a35585..fe5d6045bde6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1974,6 +1974,13 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, return true; } +static bool isShiftedMask(uint64_t Mask, EVT VT) { + assert(VT == MVT::i32 || VT == MVT::i64); + if (VT == MVT::i32) + return isShiftedMask_32(Mask); + return isShiftedMask_64(Mask); +} + static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); @@ -2084,6 +2091,58 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, CurDAG->SelectNodeTo(N, Opc, VT, Ops); return true; } + + // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff + // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted + // mask (e.g., 0x000ffff0). + uint64_t Mask0Imm, Mask1Imm; + SDValue And0 = N->getOperand(0); + SDValue And1 = N->getOperand(1); + if (And0.hasOneUse() && And1.hasOneUse() && + isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && + isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && + APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && + (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { + + // We should have already caught the case where we extract hi and low parts. + // E.g. BFXIL from 'or (and X, 0xffff0000), (and Y, 0x0000ffff)'. + assert(!(isShiftedMask(Mask0Imm, VT) && isShiftedMask(Mask1Imm, VT)) && + "BFXIL should have already been optimized."); + + // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), + // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the + // bits to be inserted. + if (isShiftedMask(Mask0Imm, VT)) { + std::swap(And0, And1); + std::swap(Mask0Imm, Mask1Imm); + } + + SDValue Src = And1->getOperand(0); + SDValue Dst = And0->getOperand(0); + unsigned LSB = countTrailingZeros(Mask1Imm); + int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); + + // The BFXIL inserts the low-order bits from a source register, so right + // shift the needed bits into place. + SDLoc DL(N); + unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; + SDNode *LSR = CurDAG->getMachineNode( + ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), + CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); + + // BFXIL is an alias of BFM, so translate to BFM operands. + unsigned ImmR = (BitWidth - LSB) % BitWidth; + unsigned ImmS = Width - 1; + + // Create the BFXIL instruction. + SDValue Ops[] = {Dst, SDValue(LSR, 0), + CurDAG->getTargetConstant(ImmR, DL, VT), + CurDAG->getTargetConstant(ImmS, DL, VT)}; + unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; + CurDAG->SelectNodeTo(N, Opc, VT, Ops); + return true; + } + return false; } diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index b89d06be84de..8c517f85c38f 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -311,3 +311,70 @@ entry: store i16 %trunc, i16* %gep ret void } + +; The next set of tests generate a BFXIL from 'or (and X, Mask0Imm), +; (and Y, Mask1Imm)' iff Mask0Imm and ~Mask1Imm are equivalent and one of the +; MaskImms is a shifted mask (e.g., 0x000ffff0). + +; CHECK-LABEL: @test_or_and_and1 +; CHECK: lsr w8, w1, #4 +; CHECK: bfi w0, w8, #4, #12 +define i32 @test_or_and_and1(i32 %a, i32 %b) { +entry: + %and = and i32 %a, -65521 ; 0xffff000f + %and1 = and i32 %b, 65520 ; 0x0000fff0 + %or = or i32 %and1, %and + ret i32 %or +} + +; CHECK-LABEL: @test_or_and_and2 +; CHECK: lsr w8, w0, #4 +; CHECK: bfi w1, w8, #4, #12 +define i32 @test_or_and_and2(i32 %a, i32 %b) { +entry: + %and = and i32 %a, 65520 ; 0x0000fff0 + %and1 = and i32 %b, -65521 ; 0xffff000f + %or = or i32 %and1, %and + ret i32 %or +} + +; CHECK-LABEL: @test_or_and_and3 +; CHECK: lsr x8, x1, #16 +; CHECK: bfi x0, x8, #16, #32 +define i64 @test_or_and_and3(i64 %a, i64 %b) { +entry: + %and = and i64 %a, -281474976645121 ; 0xffff00000000ffff + %and1 = and i64 %b, 281474976645120 ; 0x0000ffffffff0000 + %or = or i64 %and1, %and + ret i64 %or +} + +; Don't convert 'and' with multiple uses. +; CHECK-LABEL: @test_or_and_and4 +; CHECK: and w8, w0, #0xffff000f +; CHECK: and w9, w1, #0xfff0 +; CHECK: orr w0, w9, w8 +; CHECK: str w8, [x2 +define i32 @test_or_and_and4(i32 %a, i32 %b, i32* %ptr) { +entry: + %and = and i32 %a, -65521 + store i32 %and, i32* %ptr, align 4 + %and2 = and i32 %b, 65520 + %or = or i32 %and2, %and + ret i32 %or +} + +; Don't convert 'and' with multiple uses. +; CHECK-LABEL: @test_or_and_and5 +; CHECK: and w8, w1, #0xfff0 +; CHECK: and w9, w0, #0xffff000f +; CHECK: orr w0, w8, w9 +; CHECK: str w8, [x2] +define i32 @test_or_and_and5(i32 %a, i32 %b, i32* %ptr) { +entry: + %and = and i32 %b, 65520 + store i32 %and, i32* %ptr, align 4 + %and1 = and i32 %a, -65521 + %or = or i32 %and, %and1 + ret i32 %or +}