From 43ec15e57e0ecb9779ffc46772ef8f8f808aae76 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Fri, 18 Sep 2015 17:11:53 +0000 Subject: [PATCH] [AArch64] Improved bitfield instruction selection. Summary: For bitfield insert OR matching, check both operands for larger pattern first before checking for smaller pattern. Add pattern for unsigned bitfield insert-in-zero done with SHL+AND. Resolves PR21631. Reviewers: jmolloy, t.p.northover Subscribers: aemerson, rengolin, llvm-commits, mcrosier Differential Revision: http://reviews.llvm.org/D12908 llvm-svn: 248006 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 78 ++++++++++++++++--- llvm/test/CodeGen/AArch64/bitfield-insert.ll | 19 +++++ llvm/test/CodeGen/AArch64/xbfiz.ll | 30 +++++++ 3 files changed, 116 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index a4126ab12656..4c9df00c1ece 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -166,6 +166,7 @@ public: SDNode *SelectBitfieldExtractOp(SDNode *N); SDNode *SelectBitfieldInsertOp(SDNode *N); + SDNode *SelectBitfieldInsertInZeroOp(SDNode *N); SDNode *SelectLIBM(SDNode *N); SDNode *SelectFPConvertWithRound(SDNode *N); @@ -1918,6 +1919,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { /// Does this tree qualify as an attempt to move a bitfield into position, /// essentially "(and (shl VAL, N), Mask)". static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, + bool BiggerPattern, SDValue &Src, int &ShiftAmount, int &MaskWidth) { EVT VT = Op.getValueType(); @@ -1940,6 +1942,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, Op = Op.getOperand(0); } + // Don't match if the SHL has more than one use, since then we'll end up + // generating SHL+UBFIZ instead of just keeping SHL+AND. + if (!BiggerPattern && !Op.hasOneUse()) + return false; + uint64_t ShlImm; if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) return false; @@ -1953,7 +1960,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, // BFI encompasses sufficiently many nodes that it's worth inserting an extra // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL - // amount. + // amount. BiggerPattern is true when this pattern is being matched for BFI, + // BiggerPattern is false when this pattern is being matched for UBFIZ, in + // which case it is not profitable to insert an extra shift. + if (ShlImm - ShiftAmount != 0 && !BiggerPattern) + return false; Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); return true; @@ -1990,17 +2001,26 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); - // OR is commutative, check both possibilities (does llvm provide a - // way to do that directely, e.g., via code matcher?) - SDValue OrOpd1Val = N->getOperand(1); - SDNode *OrOpd0 = N->getOperand(0).getNode(); - SDNode *OrOpd1 = N->getOperand(1).getNode(); - for (int i = 0; i < 2; - ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { + // OR is commutative, check all combinations of operand order and values of + // BiggerPattern, i.e. + // Opd0, Opd1, BiggerPattern=false + // Opd1, Opd0, BiggerPattern=false + // Opd0, Opd1, BiggerPattern=true + // Opd1, Opd0, BiggerPattern=true + // Several of these combinations may match, so check with BiggerPattern=false + // first since that will produce better results by matching more instructions + // and/or inserting fewer extra instructions. + for (int I = 0; I < 4; ++I) { + + bool BiggerPattern = I / 2; + SDNode *OrOpd0 = N->getOperand(I % 2).getNode(); + SDValue OrOpd1Val = N->getOperand((I + 1) % 2); + SDNode *OrOpd1 = OrOpd1Val.getNode(); + unsigned BFXOpc; int DstLSB, Width; if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, - NumberOfIgnoredLowBits, true)) { + NumberOfIgnoredLowBits, BiggerPattern)) { // Check that the returned opcode is compatible with the pattern, // i.e., same type and zero extended (U and not S) if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || @@ -2018,8 +2038,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, // If the mask on the insertee is correct, we have a BFXIL operation. We // can share the ImmR and ImmS values from the already-computed UBFM. - } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, - DstLSB, Width)) { + } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), + BiggerPattern, + Src, DstLSB, Width)) { ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); ImmS = Width - 1; } else @@ -2082,6 +2103,39 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } +/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the +/// equivalent of a left shift by a constant amount followed by an and masking +/// out a contiguous set of bits. +SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) { + if (N->getOpcode() != ISD::AND) + return nullptr; + + EVT VT = N->getValueType(0); + unsigned Opc; + if (VT == MVT::i32) + Opc = AArch64::UBFMWri; + else if (VT == MVT::i64) + Opc = AArch64::UBFMXri; + else + return nullptr; + + SDValue Op0; + int DstLSB, Width; + if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, + Op0, DstLSB, Width)) + return nullptr; + + // ImmR is the rotate right amount. + unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); + // ImmS is the most significant bit of the source to be moved. + unsigned ImmS = Width - 1; + + SDLoc DL(N); + SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), + CurDAG->getTargetConstant(ImmS, DL, VT)}; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops); +} + /// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact /// signal on round-to-integer operations if needed. C11 leaves it /// implementation-defined whether these operations trigger an inexact @@ -2443,6 +2497,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { case ISD::SRA: if (SDNode *I = SelectBitfieldExtractOp(Node)) return I; + if (SDNode *I = SelectBitfieldInsertInZeroOp(Node)) + return I; break; case ISD::OR: diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index 9b731fa72a47..9df51dcc4478 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -196,3 +196,22 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) { ret void } + +; Bitfield insert where the second or operand is a better match to be folded into the BFM +define void @test_32bit_opnd1_better(i32* %existing, i32* %new) { +; CHECK-LABEL: test_32bit_opnd1_better: + + %oldval = load volatile i32, i32* %existing + %oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff + + %newval = load i32, i32* %new + %newval_shifted = shl i32 %newval, 16 + %newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing +; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff +; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8 + + ret void +} diff --git a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll index f763400d7f6a..3211cc3f2ced 100644 --- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++ b/llvm/test/CodeGen/AArch64/xbfiz.ll @@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) { %shr = lshr i32 %shl, 2 ret i32 %shr } + +define i64 @ubfiz64and(i64 %v) { +; CHECK-LABEL: ubfiz64and: +; CHECK: ubfiz x0, x0, #36, #11 + %shl = shl i64 %v, 36 + %and = and i64 %shl, 140668768878592 + ret i64 %and +} + +define i32 @ubfiz32and(i32 %v) { +; CHECK-LABEL: ubfiz32and: +; CHECK: ubfiz w0, w0, #6, #24 + %shl = shl i32 %v, 6 + %and = and i32 %shl, 1073741760 + ret i32 %and +} + +; Check that we don't generate a ubfiz if the lsl has more than one +; use, since we'd just be replacing an and with a ubfiz. +define i32 @noubfiz32(i32 %v) { +; CHECK-LABEL: noubfiz32: +; CHECK: lsl w[[REG1:[0-9]+]], w0, #6 +; CHECK: and w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0 +; CHECK: add w0, w[[REG1]], w[[REG2]] +; CHECK: ret + %shl = shl i32 %v, 6 + %and = and i32 %shl, 1073741760 + %add = add i32 %shl, %and + ret i32 %add +}