[AArch64] Improved bitfield instruction selection.

Summary:
For bitfield insert OR matching, check both operands for larger pattern
first before checking for smaller pattern.

Add pattern for unsigned bitfield insert-in-zero done with SHL+AND.

Resolves PR21631.

Reviewers: jmolloy, t.p.northover

Subscribers: aemerson, rengolin, llvm-commits, mcrosier

Differential Revision: http://reviews.llvm.org/D12908

llvm-svn: 248006
This commit is contained in:
Geoff Berry 2015-09-18 17:11:53 +00:00
parent 0f0e8d624c
commit 43ec15e57e
3 changed files with 116 additions and 11 deletions

View File

@ -166,6 +166,7 @@ public:
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
SDNode *SelectLIBM(SDNode *N);
SDNode *SelectFPConvertWithRound(SDNode *N);
@ -1918,6 +1919,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
/// Does this tree qualify as an attempt to move a bitfield into position,
/// essentially "(and (shl VAL, N), Mask)".
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
bool BiggerPattern,
SDValue &Src, int &ShiftAmount,
int &MaskWidth) {
EVT VT = Op.getValueType();
@ -1940,6 +1942,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
Op = Op.getOperand(0);
}
// Don't match if the SHL has more than one use, since then we'll end up
// generating SHL+UBFIZ instead of just keeping SHL+AND.
if (!BiggerPattern && !Op.hasOneUse())
return false;
uint64_t ShlImm;
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
return false;
@ -1953,7 +1960,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
// amount.
// amount. BiggerPattern is true when this pattern is being matched for BFI,
// BiggerPattern is false when this pattern is being matched for UBFIZ, in
// which case it is not profitable to insert an extra shift.
if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
return false;
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
return true;
@ -1990,17 +2001,26 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
// OR is commutative, check both possibilities (does llvm provide a
// way to do that directely, e.g., via code matcher?)
SDValue OrOpd1Val = N->getOperand(1);
SDNode *OrOpd0 = N->getOperand(0).getNode();
SDNode *OrOpd1 = N->getOperand(1).getNode();
for (int i = 0; i < 2;
++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
// OR is commutative, check all combinations of operand order and values of
// BiggerPattern, i.e.
// Opd0, Opd1, BiggerPattern=false
// Opd1, Opd0, BiggerPattern=false
// Opd0, Opd1, BiggerPattern=true
// Opd1, Opd0, BiggerPattern=true
// Several of these combinations may match, so check with BiggerPattern=false
// first since that will produce better results by matching more instructions
// and/or inserting fewer extra instructions.
for (int I = 0; I < 4; ++I) {
bool BiggerPattern = I / 2;
SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
SDNode *OrOpd1 = OrOpd1Val.getNode();
unsigned BFXOpc;
int DstLSB, Width;
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
NumberOfIgnoredLowBits, true)) {
NumberOfIgnoredLowBits, BiggerPattern)) {
// Check that the returned opcode is compatible with the pattern,
// i.e., same type and zero extended (U and not S)
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@ -2018,8 +2038,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
// If the mask on the insertee is correct, we have a BFXIL operation. We
// can share the ImmR and ImmS values from the already-computed UBFM.
} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
DstLSB, Width)) {
} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
BiggerPattern,
Src, DstLSB, Width)) {
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
ImmS = Width - 1;
} else
@ -2082,6 +2103,39 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
/// equivalent of a left shift by a constant amount followed by an and masking
/// out a contiguous set of bits.
SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
if (N->getOpcode() != ISD::AND)
return nullptr;
EVT VT = N->getValueType(0);
unsigned Opc;
if (VT == MVT::i32)
Opc = AArch64::UBFMWri;
else if (VT == MVT::i64)
Opc = AArch64::UBFMXri;
else
return nullptr;
SDValue Op0;
int DstLSB, Width;
if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
Op0, DstLSB, Width))
return nullptr;
// ImmR is the rotate right amount.
unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
// ImmS is the most significant bit of the source to be moved.
unsigned ImmS = Width - 1;
SDLoc DL(N);
SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
CurDAG->getTargetConstant(ImmS, DL, VT)};
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
/// signal on round-to-integer operations if needed. C11 leaves it
/// implementation-defined whether these operations trigger an inexact
@ -2443,6 +2497,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case ISD::SRA:
if (SDNode *I = SelectBitfieldExtractOp(Node))
return I;
if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
return I;
break;
case ISD::OR:

View File

@ -196,3 +196,22 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) {
ret void
}
; Bitfield insert where the second or operand is a better match to be folded into the BFM
define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
; CHECK-LABEL: test_32bit_opnd1_better:
%oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
%newval = load i32, i32* %new
%newval_shifted = shl i32 %newval, 16
%newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
%combined = or i32 %oldval_keep, %newval_masked
store volatile i32 %combined, i32* %existing
; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff
; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8
ret void
}

View File

@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) {
%shr = lshr i32 %shl, 2
ret i32 %shr
}
define i64 @ubfiz64and(i64 %v) {
; CHECK-LABEL: ubfiz64and:
; CHECK: ubfiz x0, x0, #36, #11
%shl = shl i64 %v, 36
%and = and i64 %shl, 140668768878592
ret i64 %and
}
define i32 @ubfiz32and(i32 %v) {
; CHECK-LABEL: ubfiz32and:
; CHECK: ubfiz w0, w0, #6, #24
%shl = shl i32 %v, 6
%and = and i32 %shl, 1073741760
ret i32 %and
}
; Check that we don't generate a ubfiz if the lsl has more than one
; use, since we'd just be replacing an and with a ubfiz.
define i32 @noubfiz32(i32 %v) {
; CHECK-LABEL: noubfiz32:
; CHECK: lsl w[[REG1:[0-9]+]], w0, #6
; CHECK: and w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0
; CHECK: add w0, w[[REG1]], w[[REG2]]
; CHECK: ret
%shl = shl i32 %v, 6
%and = and i32 %shl, 1073741760
%add = add i32 %shl, %and
ret i32 %add
}