forked from OSchip/llvm-project
ARM64: Combine shifts and uses from different basic block to bit-extract instruction
llvm-svn: 206774
This commit is contained in:
parent
36f025e697
commit
d069f6393a
|
@ -182,6 +182,9 @@ public:
|
||||||
return HasMultipleConditionRegisters;
|
return HasMultipleConditionRegisters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if the target has BitExtract instructions.
|
||||||
|
bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
|
||||||
|
|
||||||
/// Return true if a vector of the given type should be split
|
/// Return true if a vector of the given type should be split
|
||||||
/// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
|
/// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
|
||||||
/// legalization.
|
/// legalization.
|
||||||
|
@ -1010,6 +1013,14 @@ protected:
|
||||||
HasMultipleConditionRegisters = hasManyRegs;
|
HasMultipleConditionRegisters = hasManyRegs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tells the code generator that the target has BitExtract instructions.
|
||||||
|
/// The code generator will aggressively sink "shift"s into the blocks of
|
||||||
|
/// their users if the users will generate "and" instructions which can be
|
||||||
|
/// combined with "shift" to BitExtract instructions.
|
||||||
|
void setHasExtractBitsInsn(bool hasExtractInsn = true) {
|
||||||
|
HasExtractBitsInsn = hasExtractInsn;
|
||||||
|
}
|
||||||
|
|
||||||
/// Tells the code generator not to expand sequence of operations into a
|
/// Tells the code generator not to expand sequence of operations into a
|
||||||
/// separate sequences that increases the amount of flow control.
|
/// separate sequences that increases the amount of flow control.
|
||||||
void setJumpIsExpensive(bool isExpensive = true) {
|
void setJumpIsExpensive(bool isExpensive = true) {
|
||||||
|
@ -1436,6 +1447,12 @@ private:
|
||||||
/// the blocks of their users.
|
/// the blocks of their users.
|
||||||
bool HasMultipleConditionRegisters;
|
bool HasMultipleConditionRegisters;
|
||||||
|
|
||||||
|
/// Tells the code generator that the target has BitExtract instructions.
|
||||||
|
/// The code generator will aggressively sink "shift"s into the blocks of
|
||||||
|
/// their users if the users will generate "and" instructions which can be
|
||||||
|
/// combined with "shift" to BitExtract instructions.
|
||||||
|
bool HasExtractBitsInsn;
|
||||||
|
|
||||||
/// Tells the code generator not to expand integer divides by constants into a
|
/// Tells the code generator not to expand integer divides by constants into a
|
||||||
/// sequence of muls, adds, and shifts. This is a hack until a real cost
|
/// sequence of muls, adds, and shifts. This is a hack until a real cost
|
||||||
/// model is in place. If we ever optimize for size, this will be set to true
|
/// model is in place. If we ever optimize for size, this will be set to true
|
||||||
|
|
|
@ -628,6 +628,187 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
|
||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// isExtractBitsCandidateUse - Check if the candidates could
|
||||||
|
/// be combined with shift instruction, which includes:
|
||||||
|
/// 1. Truncate instruction
|
||||||
|
/// 2. And instruction and the imm is a mask of the low bits:
|
||||||
|
/// imm & (imm+1) == 0
|
||||||
|
bool isExtractBitsCandidateUse(Instruction *User) {
|
||||||
|
if (!isa<TruncInst>(User)) {
|
||||||
|
if (User->getOpcode() != Instruction::And ||
|
||||||
|
!isa<ConstantInt>(User->getOperand(1)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned Cimm = dyn_cast<ConstantInt>(User->getOperand(1))->getZExtValue();
|
||||||
|
|
||||||
|
if (Cimm & (Cimm + 1))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SinkShiftAndTruncate - sink both shift and truncate instruction
|
||||||
|
/// to the use of truncate's BB.
|
||||||
|
bool
|
||||||
|
SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
|
||||||
|
DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
|
||||||
|
const TargetLowering &TLI) {
|
||||||
|
BasicBlock *UserBB = User->getParent();
|
||||||
|
DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
|
||||||
|
TruncInst *TruncI = dyn_cast<TruncInst>(User);
|
||||||
|
bool MadeChange = false;
|
||||||
|
|
||||||
|
for (Value::user_iterator TruncUI = TruncI->user_begin(),
|
||||||
|
TruncE = TruncI->user_end();
|
||||||
|
TruncUI != TruncE;) {
|
||||||
|
|
||||||
|
Use &TruncTheUse = TruncUI.getUse();
|
||||||
|
Instruction *TruncUser = cast<Instruction>(*TruncUI);
|
||||||
|
// Preincrement use iterator so we don't invalidate it.
|
||||||
|
|
||||||
|
++TruncUI;
|
||||||
|
|
||||||
|
int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
|
||||||
|
if (!ISDOpcode)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If the use is actually a legal node, there will not be an implicit
|
||||||
|
// truncate.
|
||||||
|
if (TLI.isOperationLegalOrCustom(ISDOpcode,
|
||||||
|
EVT::getEVT(TruncUser->getType())))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Don't bother for PHI nodes.
|
||||||
|
if (isa<PHINode>(TruncUser))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
BasicBlock *TruncUserBB = TruncUser->getParent();
|
||||||
|
|
||||||
|
if (UserBB == TruncUserBB)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
|
||||||
|
CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
|
||||||
|
|
||||||
|
if (!InsertedShift && !InsertedTrunc) {
|
||||||
|
BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
|
||||||
|
// Sink the shift
|
||||||
|
if (ShiftI->getOpcode() == Instruction::AShr)
|
||||||
|
InsertedShift =
|
||||||
|
BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
|
||||||
|
else
|
||||||
|
InsertedShift =
|
||||||
|
BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
|
||||||
|
|
||||||
|
// Sink the trunc
|
||||||
|
BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
|
||||||
|
TruncInsertPt++;
|
||||||
|
|
||||||
|
InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
|
||||||
|
TruncI->getType(), "", TruncInsertPt);
|
||||||
|
|
||||||
|
MadeChange = true;
|
||||||
|
|
||||||
|
TruncTheUse = InsertedTrunc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MadeChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
|
||||||
|
/// the uses could potentially be combined with this shift instruction and
|
||||||
|
/// generate BitExtract instruction. It will only be applied if the architecture
|
||||||
|
/// supports BitExtract instruction. Here is an example:
|
||||||
|
/// BB1:
|
||||||
|
/// %x.extract.shift = lshr i64 %arg1, 32
|
||||||
|
/// BB2:
|
||||||
|
/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
|
||||||
|
/// ==>
|
||||||
|
///
|
||||||
|
/// BB2:
|
||||||
|
/// %x.extract.shift.1 = lshr i64 %arg1, 32
|
||||||
|
/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
|
||||||
|
///
|
||||||
|
/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
|
||||||
|
/// instruction.
|
||||||
|
/// Return true if any changes are made.
|
||||||
|
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
|
||||||
|
const TargetLowering &TLI) {
|
||||||
|
BasicBlock *DefBB = ShiftI->getParent();
|
||||||
|
|
||||||
|
/// Only insert instructions in each block once.
|
||||||
|
DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
|
||||||
|
|
||||||
|
bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
|
||||||
|
|
||||||
|
bool MadeChange = false;
|
||||||
|
for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
|
||||||
|
UI != E;) {
|
||||||
|
Use &TheUse = UI.getUse();
|
||||||
|
Instruction *User = cast<Instruction>(*UI);
|
||||||
|
// Preincrement use iterator so we don't invalidate it.
|
||||||
|
++UI;
|
||||||
|
|
||||||
|
// Don't bother for PHI nodes.
|
||||||
|
if (isa<PHINode>(User))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!isExtractBitsCandidateUse(User))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
BasicBlock *UserBB = User->getParent();
|
||||||
|
|
||||||
|
if (UserBB == DefBB) {
|
||||||
|
// If the shift and truncate instruction are in the same BB. The use of
|
||||||
|
// the truncate(TruncUse) may still introduce another truncate if not
|
||||||
|
// legal. In this case, we would like to sink both shift and truncate
|
||||||
|
// instruction to the BB of TruncUse.
|
||||||
|
// for example:
|
||||||
|
// BB1:
|
||||||
|
// i64 shift.result = lshr i64 opnd, imm
|
||||||
|
// trunc.result = trunc shift.result to i16
|
||||||
|
//
|
||||||
|
// BB2:
|
||||||
|
// ----> We will have an implicit truncate here if the architecture does
|
||||||
|
// not have i16 compare.
|
||||||
|
// cmp i16 trunc.result, opnd2
|
||||||
|
//
|
||||||
|
if (isa<TruncInst>(User) && shiftIsLegal
|
||||||
|
// If the type of the truncate is legal, no trucate will be
|
||||||
|
// introduced in other basic blocks.
|
||||||
|
&& (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
|
||||||
|
MadeChange =
|
||||||
|
SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// If we have already inserted a shift into this block, use it.
|
||||||
|
BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
|
||||||
|
|
||||||
|
if (!InsertedShift) {
|
||||||
|
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
|
||||||
|
|
||||||
|
if (ShiftI->getOpcode() == Instruction::AShr)
|
||||||
|
InsertedShift =
|
||||||
|
BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
|
||||||
|
else
|
||||||
|
InsertedShift =
|
||||||
|
BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
|
||||||
|
|
||||||
|
MadeChange = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace a use of the shift with a use of the new shift.
|
||||||
|
TheUse = InsertedShift;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we removed all uses, nuke the shift.
|
||||||
|
if (ShiftI->use_empty())
|
||||||
|
ShiftI->eraseFromParent();
|
||||||
|
|
||||||
|
return MadeChange;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
|
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
|
||||||
protected:
|
protected:
|
||||||
|
@ -3225,6 +3406,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
|
||||||
|
|
||||||
|
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
|
||||||
|
BinOp->getOpcode() == Instruction::LShr)) {
|
||||||
|
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
|
||||||
|
if (TLI && CI && TLI->hasExtractBitsInsn())
|
||||||
|
return OptimizeExtractBits(BinOp, CI, *TLI);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
|
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
|
||||||
if (GEPI->hasAllZeroIndices()) {
|
if (GEPI->hasAllZeroIndices()) {
|
||||||
/// The GEP operand must be a pointer, so must its result -> BitCast
|
/// The GEP operand must be a pointer, so must its result -> BitCast
|
||||||
|
|
|
@ -1183,6 +1183,14 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
|
||||||
// Make sure to clamp the MSB so that we preserve the semantics of the
|
// Make sure to clamp the MSB so that we preserve the semantics of the
|
||||||
// original operations.
|
// original operations.
|
||||||
ClampMSB = true;
|
ClampMSB = true;
|
||||||
|
} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
|
||||||
|
isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
|
||||||
|
Srl_imm)) {
|
||||||
|
// If the shift result was truncated, we can still combine them.
|
||||||
|
Opd0 = Op0->getOperand(0).getOperand(0);
|
||||||
|
|
||||||
|
// Use the type of SRL node.
|
||||||
|
VT = Opd0->getValueType(0);
|
||||||
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
|
} else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
|
||||||
Opd0 = Op0->getOperand(0);
|
Opd0 = Op0->getOperand(0);
|
||||||
} else if (BiggerPattern) {
|
} else if (BiggerPattern) {
|
||||||
|
@ -1277,8 +1285,19 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
|
||||||
|
|
||||||
// we're looking for a shift of a shift
|
// we're looking for a shift of a shift
|
||||||
uint64_t Shl_imm = 0;
|
uint64_t Shl_imm = 0;
|
||||||
|
uint64_t Trunc_bits = 0;
|
||||||
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
|
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
|
||||||
Opd0 = N->getOperand(0).getOperand(0);
|
Opd0 = N->getOperand(0).getOperand(0);
|
||||||
|
} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
|
||||||
|
N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
|
||||||
|
// We are looking for a shift of truncate. Truncate from i64 to i32 could
|
||||||
|
// be considered as setting high 32 bits as zero. Our strategy here is to
|
||||||
|
// always generate 64bit UBFM. This consistency will help the CSE pass
|
||||||
|
// later find more redundancy.
|
||||||
|
Opd0 = N->getOperand(0).getOperand(0);
|
||||||
|
Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
|
||||||
|
VT = Opd0->getValueType(0);
|
||||||
|
assert(VT == MVT::i64 && "the promoted type should be i64");
|
||||||
} else if (BiggerPattern) {
|
} else if (BiggerPattern) {
|
||||||
// Let's pretend a 0 shift left has been performed.
|
// Let's pretend a 0 shift left has been performed.
|
||||||
// FIXME: Currently we limit this to the bigger pattern case,
|
// FIXME: Currently we limit this to the bigger pattern case,
|
||||||
|
@ -1295,7 +1314,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
|
||||||
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
|
assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
|
||||||
"bad amount in shift node!");
|
"bad amount in shift node!");
|
||||||
// Note: The width operand is encoded as width-1.
|
// Note: The width operand is encoded as width-1.
|
||||||
unsigned Width = VT.getSizeInBits() - Srl_imm - 1;
|
unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
|
||||||
int sLSB = Srl_imm - Shl_imm;
|
int sLSB = Srl_imm - Shl_imm;
|
||||||
if (sLSB < 0)
|
if (sLSB < 0)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1354,8 +1373,23 @@ SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT),
|
|
||||||
CurDAG->getTargetConstant(MSB, VT) };
|
// If the bit extract operation is 64bit but the original type is 32bit, we
|
||||||
|
// need to add one EXTRACT_SUBREG.
|
||||||
|
if ((Opc == ARM64::SBFMXri || Opc == ARM64::UBFMXri) && VT == MVT::i32) {
|
||||||
|
SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
|
||||||
|
CurDAG->getTargetConstant(MSB, MVT::i64)};
|
||||||
|
|
||||||
|
SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
|
||||||
|
SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
|
||||||
|
MachineSDNode *Node =
|
||||||
|
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
|
||||||
|
SDValue(BFM, 0), SubReg);
|
||||||
|
return Node;
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
|
||||||
|
CurDAG->getTargetConstant(MSB, VT)};
|
||||||
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
|
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -438,6 +438,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
|
||||||
setDivIsWellDefined(true);
|
setDivIsWellDefined(true);
|
||||||
|
|
||||||
RequireStrictAlign = StrictAlign;
|
RequireStrictAlign = StrictAlign;
|
||||||
|
|
||||||
|
setHasExtractBitsInsn(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
; RUN: opt -codegenprepare -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s
|
||||||
; RUN: llc < %s -march=arm64 | FileCheck %s
|
; RUN: llc < %s -march=arm64 | FileCheck %s
|
||||||
%struct.X = type { i8, i8, [2 x i8] }
|
%struct.X = type { i8, i8, [2 x i8] }
|
||||||
%struct.Y = type { i32, i8 }
|
%struct.Y = type { i32, i8 }
|
||||||
|
@ -404,3 +405,75 @@ define i64 @fct18(i32 %xor72) nounwind ssp {
|
||||||
%result = and i64 %conv82, 255
|
%result = and i64 %conv82, 255
|
||||||
ret i64 %result
|
ret i64 %result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Using the access to the global array to keep the instruction and control flow.
|
||||||
|
@first_ones = external global [65536 x i8]
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readonly ssp
|
||||||
|
define i32 @fct19(i64 %arg1) nounwind readonly ssp {
|
||||||
|
; CHECK-LABEL: fct19:
|
||||||
|
entry:
|
||||||
|
%x.sroa.1.0.extract.shift = lshr i64 %arg1, 16
|
||||||
|
%x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16
|
||||||
|
%x.sroa.3.0.extract.shift = lshr i64 %arg1, 32
|
||||||
|
%x.sroa.5.0.extract.shift = lshr i64 %arg1, 48
|
||||||
|
%tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0
|
||||||
|
br i1 %tobool, label %if.end, label %if.then
|
||||||
|
|
||||||
|
if.then: ; preds = %entry
|
||||||
|
%arrayidx3 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
|
||||||
|
%0 = load i8* %arrayidx3, align 1
|
||||||
|
%conv = zext i8 %0 to i32
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
; OPT-LABEL: if.end
|
||||||
|
if.end: ; preds = %entry
|
||||||
|
; OPT: lshr
|
||||||
|
; CHECK: ubfm [[REG1:x[0-9]+]], [[REG2:x[0-9]+]], #32, #47
|
||||||
|
%x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16
|
||||||
|
%tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0
|
||||||
|
; CHECK: cbz
|
||||||
|
br i1 %tobool6, label %if.end13, label %if.then7
|
||||||
|
|
||||||
|
; OPT-LABEL: if.then7
|
||||||
|
if.then7: ; preds = %if.end
|
||||||
|
; OPT: lshr
|
||||||
|
; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
|
||||||
|
; So neither of them should be in the assemble code.
|
||||||
|
; CHECK-NOT: and
|
||||||
|
; CHECK-NOT: ubfm
|
||||||
|
%idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
|
||||||
|
%arrayidx11 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
|
||||||
|
%1 = load i8* %arrayidx11, align 1
|
||||||
|
%conv12 = zext i8 %1 to i32
|
||||||
|
%add = add nsw i32 %conv12, 16
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
; OPT-LABEL: if.end13
|
||||||
|
if.end13: ; preds = %if.end
|
||||||
|
; OPT: lshr
|
||||||
|
; OPT: trunc
|
||||||
|
; CHECK: ubfm [[REG3:x[0-9]+]], [[REG4:x[0-9]+]], #16, #31
|
||||||
|
%tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0
|
||||||
|
; CHECK: cbz
|
||||||
|
br i1 %tobool16, label %return, label %if.then17
|
||||||
|
|
||||||
|
; OPT-LABEL: if.then17
|
||||||
|
if.then17: ; preds = %if.end13
|
||||||
|
; OPT: lshr
|
||||||
|
; "and" should be combined to "ubfm" while "ubfm" should be removed by cse.
|
||||||
|
; So neither of them should be in the assemble code.
|
||||||
|
; CHECK-NOT: and
|
||||||
|
; CHECK-NOT: ubfm
|
||||||
|
%idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
|
||||||
|
%arrayidx21 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
|
||||||
|
%2 = load i8* %arrayidx21, align 1
|
||||||
|
%conv22 = zext i8 %2 to i32
|
||||||
|
%add23 = add nsw i32 %conv22, 32
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
return: ; preds = %if.end13, %if.then17, %if.then7, %if.then
|
||||||
|
; CHECK: ret
|
||||||
|
%retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ]
|
||||||
|
ret i32 %retval.0
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue