forked from OSchip/llvm-project
[AArch64] Improved bitfield instruction selection.
Summary: For bitfield insert OR matching, check both operands for larger pattern first before checking for smaller pattern. Add pattern for unsigned bitfield insert-in-zero done with SHL+AND. Resolves PR21631. Reviewers: jmolloy, t.p.northover Subscribers: aemerson, rengolin, llvm-commits, mcrosier Differential Revision: http://reviews.llvm.org/D12908 llvm-svn: 248006
This commit is contained in:
parent
0f0e8d624c
commit
43ec15e57e
|
@ -166,6 +166,7 @@ public:
|
|||
|
||||
SDNode *SelectBitfieldExtractOp(SDNode *N);
|
||||
SDNode *SelectBitfieldInsertOp(SDNode *N);
|
||||
SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
|
||||
|
||||
SDNode *SelectLIBM(SDNode *N);
|
||||
SDNode *SelectFPConvertWithRound(SDNode *N);
|
||||
|
@ -1918,6 +1919,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
|
|||
/// Does this tree qualify as an attempt to move a bitfield into position,
|
||||
/// essentially "(and (shl VAL, N), Mask)".
|
||||
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
|
||||
bool BiggerPattern,
|
||||
SDValue &Src, int &ShiftAmount,
|
||||
int &MaskWidth) {
|
||||
EVT VT = Op.getValueType();
|
||||
|
@ -1940,6 +1942,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
|
|||
Op = Op.getOperand(0);
|
||||
}
|
||||
|
||||
// Don't match if the SHL has more than one use, since then we'll end up
|
||||
// generating SHL+UBFIZ instead of just keeping SHL+AND.
|
||||
if (!BiggerPattern && !Op.hasOneUse())
|
||||
return false;
|
||||
|
||||
uint64_t ShlImm;
|
||||
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
|
||||
return false;
|
||||
|
@ -1953,7 +1960,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
|
|||
|
||||
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
|
||||
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
|
||||
// amount.
|
||||
// amount. BiggerPattern is true when this pattern is being matched for BFI,
|
||||
// BiggerPattern is false when this pattern is being matched for UBFIZ, in
|
||||
// which case it is not profitable to insert an extra shift.
|
||||
if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
|
||||
return false;
|
||||
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
|
||||
|
||||
return true;
|
||||
|
@ -1990,17 +2001,26 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
|
|||
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
|
||||
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
|
||||
|
||||
// OR is commutative, check both possibilities (does llvm provide a
|
||||
// way to do that directely, e.g., via code matcher?)
|
||||
SDValue OrOpd1Val = N->getOperand(1);
|
||||
SDNode *OrOpd0 = N->getOperand(0).getNode();
|
||||
SDNode *OrOpd1 = N->getOperand(1).getNode();
|
||||
for (int i = 0; i < 2;
|
||||
++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
|
||||
// OR is commutative, check all combinations of operand order and values of
|
||||
// BiggerPattern, i.e.
|
||||
// Opd0, Opd1, BiggerPattern=false
|
||||
// Opd1, Opd0, BiggerPattern=false
|
||||
// Opd0, Opd1, BiggerPattern=true
|
||||
// Opd1, Opd0, BiggerPattern=true
|
||||
// Several of these combinations may match, so check with BiggerPattern=false
|
||||
// first since that will produce better results by matching more instructions
|
||||
// and/or inserting fewer extra instructions.
|
||||
for (int I = 0; I < 4; ++I) {
|
||||
|
||||
bool BiggerPattern = I / 2;
|
||||
SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
|
||||
SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
|
||||
SDNode *OrOpd1 = OrOpd1Val.getNode();
|
||||
|
||||
unsigned BFXOpc;
|
||||
int DstLSB, Width;
|
||||
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
|
||||
NumberOfIgnoredLowBits, true)) {
|
||||
NumberOfIgnoredLowBits, BiggerPattern)) {
|
||||
// Check that the returned opcode is compatible with the pattern,
|
||||
// i.e., same type and zero extended (U and not S)
|
||||
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
|
||||
|
@ -2018,8 +2038,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
|
|||
|
||||
// If the mask on the insertee is correct, we have a BFXIL operation. We
|
||||
// can share the ImmR and ImmS values from the already-computed UBFM.
|
||||
} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
|
||||
DstLSB, Width)) {
|
||||
} else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
|
||||
BiggerPattern,
|
||||
Src, DstLSB, Width)) {
|
||||
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
|
||||
ImmS = Width - 1;
|
||||
} else
|
||||
|
@ -2082,6 +2103,39 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
|
|||
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
|
||||
}
|
||||
|
||||
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
|
||||
/// equivalent of a left shift by a constant amount followed by an and masking
|
||||
/// out a contiguous set of bits.
|
||||
SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
|
||||
if (N->getOpcode() != ISD::AND)
|
||||
return nullptr;
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
unsigned Opc;
|
||||
if (VT == MVT::i32)
|
||||
Opc = AArch64::UBFMWri;
|
||||
else if (VT == MVT::i64)
|
||||
Opc = AArch64::UBFMXri;
|
||||
else
|
||||
return nullptr;
|
||||
|
||||
SDValue Op0;
|
||||
int DstLSB, Width;
|
||||
if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
|
||||
Op0, DstLSB, Width))
|
||||
return nullptr;
|
||||
|
||||
// ImmR is the rotate right amount.
|
||||
unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
|
||||
// ImmS is the most significant bit of the source to be moved.
|
||||
unsigned ImmS = Width - 1;
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
|
||||
CurDAG->getTargetConstant(ImmS, DL, VT)};
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
|
||||
}
|
||||
|
||||
/// GenerateInexactFlagIfNeeded - Insert FRINTX instruction to generate inexact
|
||||
/// signal on round-to-integer operations if needed. C11 leaves it
|
||||
/// implementation-defined whether these operations trigger an inexact
|
||||
|
@ -2443,6 +2497,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
|
|||
case ISD::SRA:
|
||||
if (SDNode *I = SelectBitfieldExtractOp(Node))
|
||||
return I;
|
||||
if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
|
||||
return I;
|
||||
break;
|
||||
|
||||
case ISD::OR:
|
||||
|
|
|
@ -196,3 +196,22 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) {
|
|||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Bitfield insert where the second or operand is a better match to be folded into the BFM
|
||||
define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
|
||||
; CHECK-LABEL: test_32bit_opnd1_better:
|
||||
|
||||
%oldval = load volatile i32, i32* %existing
|
||||
%oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
|
||||
|
||||
%newval = load i32, i32* %new
|
||||
%newval_shifted = shl i32 %newval, 16
|
||||
%newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
|
||||
|
||||
%combined = or i32 %oldval_keep, %newval_masked
|
||||
store volatile i32 %combined, i32* %existing
|
||||
; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff
|
||||
; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8
|
||||
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) {
|
|||
%shr = lshr i32 %shl, 2
|
||||
ret i32 %shr
|
||||
}
|
||||
|
||||
define i64 @ubfiz64and(i64 %v) {
|
||||
; CHECK-LABEL: ubfiz64and:
|
||||
; CHECK: ubfiz x0, x0, #36, #11
|
||||
%shl = shl i64 %v, 36
|
||||
%and = and i64 %shl, 140668768878592
|
||||
ret i64 %and
|
||||
}
|
||||
|
||||
define i32 @ubfiz32and(i32 %v) {
|
||||
; CHECK-LABEL: ubfiz32and:
|
||||
; CHECK: ubfiz w0, w0, #6, #24
|
||||
%shl = shl i32 %v, 6
|
||||
%and = and i32 %shl, 1073741760
|
||||
ret i32 %and
|
||||
}
|
||||
|
||||
; Check that we don't generate a ubfiz if the lsl has more than one
|
||||
; use, since we'd just be replacing an and with a ubfiz.
|
||||
define i32 @noubfiz32(i32 %v) {
|
||||
; CHECK-LABEL: noubfiz32:
|
||||
; CHECK: lsl w[[REG1:[0-9]+]], w0, #6
|
||||
; CHECK: and w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0
|
||||
; CHECK: add w0, w[[REG1]], w[[REG2]]
|
||||
; CHECK: ret
|
||||
%shl = shl i32 %v, 6
|
||||
%and = and i32 %shl, 1073741760
|
||||
%add = add i32 %shl, %and
|
||||
ret i32 %add
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue