2010-01-05 15:50:36 +08:00
|
|
|
//===- InstCombineAndOrXor.cpp --------------------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements the visitAnd, visitOr, and visitXor functions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-01-22 13:25:13 +08:00
|
|
|
#include "InstCombineInternal.h"
|
2017-08-15 05:39:51 +08:00
|
|
|
#include "llvm/Analysis/CmpInstAnalysis.h"
|
2010-01-05 15:50:36 +08:00
|
|
|
#include "llvm/Analysis/InstructionSimplify.h"
|
2018-06-05 05:23:21 +08:00
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
2014-03-04 20:24:34 +08:00
|
|
|
#include "llvm/IR/ConstantRange.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2014-03-04 19:08:18 +08:00
|
|
|
#include "llvm/IR/PatternMatch.h"
|
2010-01-05 15:50:36 +08:00
|
|
|
using namespace llvm;
|
|
|
|
using namespace PatternMatch;
|
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "instcombine"
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
/// a four bit mask.
|
|
|
|
static unsigned getFCmpCode(FCmpInst::Predicate CC) {
|
|
|
|
assert(FCmpInst::FCMP_FALSE <= CC && CC <= FCmpInst::FCMP_TRUE &&
|
|
|
|
"Unexpected FCmp predicate!");
|
|
|
|
// Take advantage of the bit pattern of FCmpInst::Predicate here.
|
|
|
|
// U L G E
|
|
|
|
static_assert(FCmpInst::FCMP_FALSE == 0, ""); // 0 0 0 0
|
|
|
|
static_assert(FCmpInst::FCMP_OEQ == 1, ""); // 0 0 0 1
|
|
|
|
static_assert(FCmpInst::FCMP_OGT == 2, ""); // 0 0 1 0
|
|
|
|
static_assert(FCmpInst::FCMP_OGE == 3, ""); // 0 0 1 1
|
|
|
|
static_assert(FCmpInst::FCMP_OLT == 4, ""); // 0 1 0 0
|
|
|
|
static_assert(FCmpInst::FCMP_OLE == 5, ""); // 0 1 0 1
|
|
|
|
static_assert(FCmpInst::FCMP_ONE == 6, ""); // 0 1 1 0
|
|
|
|
static_assert(FCmpInst::FCMP_ORD == 7, ""); // 0 1 1 1
|
|
|
|
static_assert(FCmpInst::FCMP_UNO == 8, ""); // 1 0 0 0
|
|
|
|
static_assert(FCmpInst::FCMP_UEQ == 9, ""); // 1 0 0 1
|
|
|
|
static_assert(FCmpInst::FCMP_UGT == 10, ""); // 1 0 1 0
|
|
|
|
static_assert(FCmpInst::FCMP_UGE == 11, ""); // 1 0 1 1
|
|
|
|
static_assert(FCmpInst::FCMP_ULT == 12, ""); // 1 1 0 0
|
|
|
|
static_assert(FCmpInst::FCMP_ULE == 13, ""); // 1 1 0 1
|
|
|
|
static_assert(FCmpInst::FCMP_UNE == 14, ""); // 1 1 1 0
|
|
|
|
static_assert(FCmpInst::FCMP_TRUE == 15, ""); // 1 1 1 1
|
|
|
|
return CC;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// This is the complement of getICmpCode, which turns an opcode and two
|
|
|
|
/// operands into either a constant true or false, or a brand new ICmp
|
|
|
|
/// instruction. The sign is passed in to determine which kind of predicate to
|
|
|
|
/// use in the new icmp instruction.
|
2012-02-06 19:28:19 +08:00
|
|
|
static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
|
2017-07-08 07:16:26 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2011-12-17 09:20:32 +08:00
|
|
|
ICmpInst::Predicate NewPred;
|
|
|
|
if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
|
|
|
|
return NewConstant;
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(NewPred, LHS, RHS);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// This is the complement of getFCmpCode, which turns an opcode and two
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
/// operands into either a FCmp instruction, or a true/false constant.
|
|
|
|
static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
|
2017-07-08 07:16:26 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
const auto Pred = static_cast<FCmpInst::Predicate>(Code);
|
|
|
|
assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE &&
|
|
|
|
"Unexpected FCmp predicate!");
|
|
|
|
if (Pred == FCmpInst::FCMP_FALSE)
|
|
|
|
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
|
|
|
|
if (Pred == FCmpInst::FCMP_TRUE)
|
|
|
|
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateFCmp(Pred, LHS, RHS);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
|
2017-07-07 00:24:22 +08:00
|
|
|
/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
|
2014-12-04 17:44:01 +08:00
|
|
|
/// \param I Binary operator to transform.
|
|
|
|
/// \return Pointer to node that must replace the original binary operator, or
|
|
|
|
/// null pointer if no transformation was made.
|
2017-07-07 00:24:23 +08:00
|
|
|
static Value *SimplifyBSwap(BinaryOperator &I,
|
2017-07-08 07:16:26 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2017-07-03 13:54:11 +08:00
|
|
|
assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
|
|
|
|
|
2017-07-07 00:24:21 +08:00
|
|
|
Value *OldLHS = I.getOperand(0);
|
|
|
|
Value *OldRHS = I.getOperand(1);
|
2017-07-03 13:54:16 +08:00
|
|
|
|
2017-07-03 13:54:15 +08:00
|
|
|
Value *NewLHS;
|
2017-07-07 00:24:21 +08:00
|
|
|
if (!match(OldLHS, m_BSwap(m_Value(NewLHS))))
|
2014-12-04 17:44:01 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2017-07-03 13:54:15 +08:00
|
|
|
Value *NewRHS;
|
|
|
|
const APInt *C;
|
2014-12-04 17:44:01 +08:00
|
|
|
|
2017-07-07 00:24:21 +08:00
|
|
|
if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) {
|
2017-07-03 13:54:15 +08:00
|
|
|
// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
|
2017-07-07 00:24:21 +08:00
|
|
|
if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse())
|
|
|
|
return nullptr;
|
2017-07-03 13:54:15 +08:00
|
|
|
// NewRHS initialized by the matcher.
|
2017-07-07 00:24:21 +08:00
|
|
|
} else if (match(OldRHS, m_APInt(C))) {
|
2017-07-03 13:54:15 +08:00
|
|
|
// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
|
2017-07-07 00:24:21 +08:00
|
|
|
if (!OldLHS->hasOneUse())
|
|
|
|
return nullptr;
|
2017-07-03 13:54:15 +08:00
|
|
|
NewRHS = ConstantInt::get(I.getType(), C->byteSwap());
|
|
|
|
} else
|
|
|
|
return nullptr;
|
2014-12-04 17:44:01 +08:00
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
|
2017-07-03 13:54:13 +08:00
|
|
|
Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap,
|
|
|
|
I.getType());
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateCall(F, BinOp);
|
2014-12-04 17:44:01 +08:00
|
|
|
}
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// This handles expressions of the form ((val OP C1) & C2). Where
|
2017-04-03 01:57:30 +08:00
|
|
|
/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.
|
|
|
|
Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
|
2010-01-05 15:50:36 +08:00
|
|
|
ConstantInt *OpRHS,
|
|
|
|
ConstantInt *AndRHS,
|
|
|
|
BinaryOperator &TheAnd) {
|
|
|
|
Value *X = Op->getOperand(0);
|
|
|
|
|
|
|
|
switch (Op->getOpcode()) {
|
2017-04-03 01:57:30 +08:00
|
|
|
default: break;
|
2010-01-05 15:50:36 +08:00
|
|
|
case Instruction::Add:
|
|
|
|
if (Op->hasOneUse()) {
|
|
|
|
// Adding a one to a single bit bit-field should be turned into an XOR
|
|
|
|
// of the bit. First thing to check is to see if this AND is with a
|
|
|
|
// single bit constant.
|
2013-06-06 08:49:57 +08:00
|
|
|
const APInt &AndRHSV = AndRHS->getValue();
|
2010-01-05 15:50:36 +08:00
|
|
|
|
|
|
|
// If there is only one bit set.
|
|
|
|
if (AndRHSV.isPowerOf2()) {
|
|
|
|
// Ok, at this point, we know that we are masking the result of the
|
|
|
|
// ADD down to exactly one bit. If the constant we are adding has
|
|
|
|
// no bits set below this bit, then we can eliminate the ADD.
|
2013-06-06 08:49:57 +08:00
|
|
|
const APInt& AddRHS = OpRHS->getValue();
|
2010-01-05 15:50:36 +08:00
|
|
|
|
|
|
|
// Check to see if any bits below the one bit set in AndRHSV are set.
|
2017-06-07 15:40:37 +08:00
|
|
|
if ((AddRHS & (AndRHSV - 1)).isNullValue()) {
|
2010-01-05 15:50:36 +08:00
|
|
|
// If not, the only thing that can effect the output of the AND is
|
|
|
|
// the bit specified by AndRHSV. If that bit is set, the effect of
|
|
|
|
// the XOR is to toggle the bit. If it is clear, then the ADD has
|
|
|
|
// no effect.
|
2017-06-07 15:40:37 +08:00
|
|
|
if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop
|
2010-01-05 15:50:36 +08:00
|
|
|
TheAnd.setOperand(0, X);
|
|
|
|
return &TheAnd;
|
|
|
|
} else {
|
|
|
|
// Pull the XOR out of the AND.
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewAnd = Builder.CreateAnd(X, AndRHS);
|
2010-01-05 15:50:36 +08:00
|
|
|
NewAnd->takeName(Op);
|
|
|
|
return BinaryOperator::CreateXor(NewAnd, AndRHS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2013-04-06 05:20:12 +08:00
|
|
|
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
|
2016-08-31 08:19:35 +08:00
|
|
|
/// (V < Lo || V >= Hi). This method expects that Lo <= Hi. IsSigned indicates
|
|
|
|
/// whether to treat V, Lo, and Hi as signed or not.
|
2016-09-01 03:49:56 +08:00
|
|
|
Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
|
2010-03-05 16:46:26 +08:00
|
|
|
bool isSigned, bool Inside) {
|
2016-09-01 03:49:56 +08:00
|
|
|
assert((isSigned ? Lo.sle(Hi) : Lo.ule(Hi)) &&
|
2010-01-05 15:50:36 +08:00
|
|
|
"Lo is not <= Hi in range emission code!");
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2016-09-01 03:49:56 +08:00
|
|
|
Type *Ty = V->getType();
|
2016-08-31 08:19:35 +08:00
|
|
|
if (Lo == Hi)
|
2016-09-01 03:49:56 +08:00
|
|
|
return Inside ? ConstantInt::getFalse(Ty) : ConstantInt::getTrue(Ty);
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2016-08-31 08:19:35 +08:00
|
|
|
// V >= Min && V < Hi --> V < Hi
|
|
|
|
// V < Min || V >= Hi --> V >= Hi
|
|
|
|
ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
|
2016-09-01 03:49:56 +08:00
|
|
|
if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) {
|
2016-08-31 08:19:35 +08:00
|
|
|
Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred;
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2016-08-31 08:19:35 +08:00
|
|
|
// V >= Lo && V < Hi --> V - Lo u< Hi - Lo
|
|
|
|
// V < Lo || V >= Hi --> V - Lo u>= Hi - Lo
|
2016-09-01 03:49:56 +08:00
|
|
|
Value *VMinusLo =
|
2017-07-08 07:16:26 +08:00
|
|
|
Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
|
2016-09-01 03:49:56 +08:00
|
|
|
Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo);
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
/// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
|
|
|
|
/// that can be simplified.
|
|
|
|
/// One of A and B is considered the mask. The other is the value. This is
|
|
|
|
/// described as the "AMask" or "BMask" part of the enum. If the enum contains
|
|
|
|
/// only "Mask", then both A and B can be considered masks. If A is the mask,
|
|
|
|
/// then it was proven that (A & C) == C. This is trivial if C == A or C == 0.
|
|
|
|
/// If both A and C are constants, this proof is also easy.
|
|
|
|
/// For the following explanations, we assume that A is the mask.
|
|
|
|
///
|
|
|
|
/// "AllOnes" declares that the comparison is true only if (A & B) == A or all
|
|
|
|
/// bits of A are set in B.
|
|
|
|
/// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes
|
|
|
|
///
|
|
|
|
/// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all
|
|
|
|
/// bits of A are cleared in B.
|
|
|
|
/// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes
|
|
|
|
///
|
|
|
|
/// "Mixed" declares that (A & B) == C and C might or might not contain any
|
|
|
|
/// number of one bits and zero bits.
|
|
|
|
/// Example: (icmp eq (A & 3), 1) -> AMask_Mixed
|
|
|
|
///
|
|
|
|
/// "Not" means that in above descriptions "==" should be replaced by "!=".
|
|
|
|
/// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes
|
|
|
|
///
|
2010-09-09 06:16:17 +08:00
|
|
|
/// If the mask A contains a single bit, then the following is equivalent:
|
|
|
|
/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
|
|
|
|
/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
|
|
|
|
enum MaskedICmpType {
|
2017-04-04 00:53:12 +08:00
|
|
|
AMask_AllOnes = 1,
|
|
|
|
AMask_NotAllOnes = 2,
|
|
|
|
BMask_AllOnes = 4,
|
|
|
|
BMask_NotAllOnes = 8,
|
|
|
|
Mask_AllZeros = 16,
|
|
|
|
Mask_NotAllZeros = 32,
|
|
|
|
AMask_Mixed = 64,
|
|
|
|
AMask_NotMixed = 128,
|
|
|
|
BMask_Mixed = 256,
|
|
|
|
BMask_NotMixed = 512
|
2010-09-09 06:16:17 +08:00
|
|
|
};
|
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
/// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)
|
|
|
|
/// satisfies.
|
|
|
|
static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
|
|
|
|
ICmpInst::Predicate Pred) {
|
2010-09-09 06:16:17 +08:00
|
|
|
ConstantInt *ACst = dyn_cast<ConstantInt>(A);
|
|
|
|
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
|
|
|
|
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
|
2017-04-04 00:53:12 +08:00
|
|
|
bool IsEq = (Pred == ICmpInst::ICMP_EQ);
|
|
|
|
bool IsAPow2 = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2());
|
|
|
|
bool IsBPow2 = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2());
|
|
|
|
unsigned MaskVal = 0;
|
2014-04-25 13:29:35 +08:00
|
|
|
if (CCst && CCst->isZero()) {
|
2010-09-09 06:16:17 +08:00
|
|
|
// if C is zero, then both A and B qualify as mask
|
2017-04-04 00:53:12 +08:00
|
|
|
MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed)
|
|
|
|
: (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed));
|
|
|
|
if (IsAPow2)
|
|
|
|
MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed)
|
|
|
|
: (AMask_AllOnes | AMask_Mixed));
|
|
|
|
if (IsBPow2)
|
|
|
|
MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed)
|
|
|
|
: (BMask_AllOnes | BMask_Mixed));
|
|
|
|
return MaskVal;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
2010-09-09 06:16:17 +08:00
|
|
|
if (A == C) {
|
2017-04-04 00:53:12 +08:00
|
|
|
MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed)
|
|
|
|
: (AMask_NotAllOnes | AMask_NotMixed));
|
|
|
|
if (IsAPow2)
|
|
|
|
MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed)
|
|
|
|
: (Mask_AllZeros | AMask_Mixed));
|
|
|
|
} else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) {
|
|
|
|
MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed);
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
2012-12-20 15:15:54 +08:00
|
|
|
if (B == C) {
|
2017-04-04 00:53:12 +08:00
|
|
|
MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed)
|
|
|
|
: (BMask_NotAllOnes | BMask_NotMixed));
|
|
|
|
if (IsBPow2)
|
|
|
|
MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed)
|
|
|
|
: (Mask_AllZeros | BMask_Mixed));
|
|
|
|
} else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) {
|
|
|
|
MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
|
|
|
return MaskVal;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
|
|
|
|
2013-09-04 19:57:13 +08:00
|
|
|
/// Convert an analysis of a masked ICmp into its equivalent if all boolean
|
|
|
|
/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
|
|
|
|
/// is adjacent to the corresponding normal flag (recording ==), this just
|
|
|
|
/// involves swapping those bits over.
|
|
|
|
static unsigned conjugateICmpMask(unsigned Mask) {
|
|
|
|
unsigned NewMask;
|
2017-04-04 00:53:12 +08:00
|
|
|
NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros |
|
|
|
|
AMask_Mixed | BMask_Mixed))
|
2013-09-04 19:57:13 +08:00
|
|
|
<< 1;
|
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros |
|
|
|
|
AMask_NotMixed | BMask_NotMixed))
|
|
|
|
>> 1;
|
2013-09-04 19:57:13 +08:00
|
|
|
|
|
|
|
return NewMask;
|
|
|
|
}
|
|
|
|
|
2017-08-15 05:39:51 +08:00
|
|
|
// Adapts the external decomposeBitTestICmp for local use.
|
|
|
|
static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred,
|
|
|
|
Value *&X, Value *&Y, Value *&Z) {
|
|
|
|
APInt Mask;
|
|
|
|
if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask))
|
|
|
|
return false;
|
|
|
|
|
2017-09-02 05:27:29 +08:00
|
|
|
Y = ConstantInt::get(X->getType(), Mask);
|
|
|
|
Z = ConstantInt::get(X->getType(), 0);
|
2017-08-15 05:39:51 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
|
2018-03-14 05:13:18 +08:00
|
|
|
/// Return the pattern classes (from MaskedICmpType) for the left hand side and
|
|
|
|
/// the right hand side as a pair.
|
|
|
|
/// LHS and RHS are the left hand side and the right hand side ICmps and PredL
|
|
|
|
/// and PredR are their predicates, respectively.
|
|
|
|
static
|
|
|
|
Optional<std::pair<unsigned, unsigned>>
|
|
|
|
getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
|
|
|
|
Value *&D, Value *&E, ICmpInst *LHS,
|
|
|
|
ICmpInst *RHS,
|
|
|
|
ICmpInst::Predicate &PredL,
|
|
|
|
ICmpInst::Predicate &PredR) {
|
2017-08-22 05:00:45 +08:00
|
|
|
// vectors are not (yet?) supported. Don't support pointers either.
|
2017-09-02 05:27:31 +08:00
|
|
|
if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
|
|
|
|
!RHS->getOperand(0)->getType()->isIntegerTy())
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2010-09-09 06:16:17 +08:00
|
|
|
|
|
|
|
// Here comes the tricky part:
|
2012-12-20 15:09:41 +08:00
|
|
|
// LHS might be of the form L11 & L12 == X, X == L21 & L22,
|
2010-09-09 06:16:17 +08:00
|
|
|
// and L11 & L12 == L21 & L22. The same goes for RHS.
|
|
|
|
// Now we must find those components L** and R**, that are equal, so
|
2012-12-20 15:09:41 +08:00
|
|
|
// that we can extract the parameters A, B, C, D, and E for the canonical
|
2010-09-09 06:16:17 +08:00
|
|
|
// above.
|
|
|
|
Value *L1 = LHS->getOperand(0);
|
|
|
|
Value *L2 = LHS->getOperand(1);
|
2017-04-04 00:53:12 +08:00
|
|
|
Value *L11, *L12, *L21, *L22;
|
2012-01-10 01:23:27 +08:00
|
|
|
// Check whether the icmp can be decomposed into a bit test.
|
2017-08-15 05:39:51 +08:00
|
|
|
if (decomposeBitTestICmp(L1, L2, PredL, L11, L12, L2)) {
|
2014-04-25 13:29:35 +08:00
|
|
|
L21 = L22 = L1 = nullptr;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else {
|
|
|
|
// Look for ANDs in the LHS icmp.
|
2017-08-22 05:00:45 +08:00
|
|
|
if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
|
2013-09-04 19:57:17 +08:00
|
|
|
// Any icmp can be viewed as being trivially masked; if it allows us to
|
|
|
|
// remove one, it's worth it.
|
|
|
|
L11 = L1;
|
|
|
|
L12 = Constant::getAllOnesValue(L1->getType());
|
|
|
|
}
|
|
|
|
|
2017-08-22 05:00:45 +08:00
|
|
|
if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
|
2013-09-04 19:57:17 +08:00
|
|
|
L21 = L2;
|
|
|
|
L22 = Constant::getAllOnesValue(L2->getType());
|
2012-01-10 01:23:27 +08:00
|
|
|
}
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
|
|
|
|
2012-01-10 01:23:27 +08:00
|
|
|
// Bail if LHS was a icmp that can't be decomposed into an equality.
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!ICmpInst::isEquality(PredL))
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2012-01-10 01:23:27 +08:00
|
|
|
|
2010-09-09 06:16:17 +08:00
|
|
|
Value *R1 = RHS->getOperand(0);
|
|
|
|
Value *R2 = RHS->getOperand(1);
|
2017-04-04 00:53:12 +08:00
|
|
|
Value *R11, *R12;
|
|
|
|
bool Ok = false;
|
2017-08-15 05:39:51 +08:00
|
|
|
if (decomposeBitTestICmp(R1, R2, PredR, R11, R12, R2)) {
|
2012-01-10 01:23:27 +08:00
|
|
|
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R11;
|
|
|
|
D = R12;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R12;
|
|
|
|
D = R11;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else {
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
E = R2;
|
|
|
|
R1 = nullptr;
|
|
|
|
Ok = true;
|
2017-08-22 05:00:45 +08:00
|
|
|
} else {
|
2013-09-04 19:57:17 +08:00
|
|
|
if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
|
|
|
|
// As before, model no mask as a trivial mask if it'll let us do an
|
2014-08-19 14:41:55 +08:00
|
|
|
// optimization.
|
2013-09-04 19:57:17 +08:00
|
|
|
R11 = R1;
|
|
|
|
R12 = Constant::getAllOnesValue(R1->getType());
|
|
|
|
}
|
|
|
|
|
2012-01-10 01:23:27 +08:00
|
|
|
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R11;
|
|
|
|
D = R12;
|
|
|
|
E = R2;
|
|
|
|
Ok = true;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R12;
|
|
|
|
D = R11;
|
|
|
|
E = R2;
|
|
|
|
Ok = true;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
|
|
|
}
|
2012-01-10 01:23:27 +08:00
|
|
|
|
|
|
|
// Bail if RHS was a icmp that can't be decomposed into an equality.
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!ICmpInst::isEquality(PredR))
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2012-01-10 01:23:27 +08:00
|
|
|
|
2016-05-10 05:37:43 +08:00
|
|
|
// Look for ANDs on the right side of the RHS icmp.
|
2017-08-22 05:00:45 +08:00
|
|
|
if (!Ok) {
|
2013-09-04 19:57:17 +08:00
|
|
|
if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
|
|
|
|
R11 = R2;
|
|
|
|
R12 = Constant::getAllOnesValue(R2->getType());
|
|
|
|
}
|
|
|
|
|
2012-01-10 01:23:27 +08:00
|
|
|
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R11;
|
|
|
|
D = R12;
|
|
|
|
E = R1;
|
|
|
|
Ok = true;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
|
2017-04-04 00:53:12 +08:00
|
|
|
A = R12;
|
|
|
|
D = R11;
|
|
|
|
E = R1;
|
|
|
|
Ok = true;
|
2012-01-10 01:23:27 +08:00
|
|
|
} else {
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2012-01-10 01:23:27 +08:00
|
|
|
}
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!Ok)
|
2018-03-14 05:13:18 +08:00
|
|
|
return None;
|
2010-09-09 06:16:17 +08:00
|
|
|
|
|
|
|
if (L11 == A) {
|
2017-04-04 00:53:12 +08:00
|
|
|
B = L12;
|
|
|
|
C = L2;
|
2012-12-20 15:15:54 +08:00
|
|
|
} else if (L12 == A) {
|
2017-04-04 00:53:12 +08:00
|
|
|
B = L11;
|
|
|
|
C = L2;
|
2012-12-20 15:15:54 +08:00
|
|
|
} else if (L21 == A) {
|
2017-04-04 00:53:12 +08:00
|
|
|
B = L22;
|
|
|
|
C = L1;
|
2012-12-20 15:15:54 +08:00
|
|
|
} else if (L22 == A) {
|
2017-04-04 00:53:12 +08:00
|
|
|
B = L21;
|
|
|
|
C = L1;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
unsigned LeftType = getMaskedICmpType(A, B, C, PredL);
|
|
|
|
unsigned RightType = getMaskedICmpType(A, D, E, PredR);
|
2018-03-14 05:13:18 +08:00
|
|
|
return Optional<std::pair<unsigned, unsigned>>(std::make_pair(LeftType, RightType));
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single
|
|
|
|
/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros
|
|
|
|
/// and the right hand side is of type BMask_Mixed. For example,
|
|
|
|
/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).
|
|
|
|
static Value * foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
|
|
|
|
ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
|
|
|
|
Value *A, Value *B, Value *C, Value *D, Value *E,
|
|
|
|
ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
|
|
|
|
llvm::InstCombiner::BuilderTy &Builder) {
|
|
|
|
// We are given the canonical form:
|
|
|
|
// (icmp ne (A & B), 0) & (icmp eq (A & D), E).
|
|
|
|
// where D & E == E.
|
|
|
|
//
|
|
|
|
// If IsAnd is false, we get it in negated form:
|
|
|
|
// (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->
|
|
|
|
// !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).
|
|
|
|
//
|
|
|
|
// We currently handle the case of B, C, D, E are constant.
|
|
|
|
//
|
|
|
|
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
|
|
|
|
if (!BCst)
|
|
|
|
return nullptr;
|
|
|
|
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
|
|
|
|
if (!CCst)
|
|
|
|
return nullptr;
|
|
|
|
ConstantInt *DCst = dyn_cast<ConstantInt>(D);
|
|
|
|
if (!DCst)
|
|
|
|
return nullptr;
|
|
|
|
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
|
|
|
|
if (!ECst)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
|
|
|
|
|
|
|
|
// Update E to the canonical form when D is a power of two and RHS is
|
|
|
|
// canonicalized as,
|
|
|
|
// (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or
|
|
|
|
// (icmp ne (A & D), D) -> (icmp eq (A & D), 0).
|
|
|
|
if (PredR != NewCC)
|
|
|
|
ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
|
|
|
|
|
|
|
|
// If B or D is zero, skip because if LHS or RHS can be trivially folded by
|
|
|
|
// other folding rules and this pattern won't apply any more.
|
|
|
|
if (BCst->getValue() == 0 || DCst->getValue() == 0)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If B and D don't intersect, ie. (B & D) == 0, no folding because we can't
|
|
|
|
// deduce anything from it.
|
|
|
|
// For example,
|
|
|
|
// (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.
|
|
|
|
if ((BCst->getValue() & DCst->getValue()) == 0)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If the following two conditions are met:
|
|
|
|
//
|
|
|
|
// 1. mask B covers only a single bit that's not covered by mask D, that is,
|
|
|
|
// (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of
|
|
|
|
// B and D has only one bit set) and,
|
|
|
|
//
|
|
|
|
// 2. RHS (and E) indicates that the rest of B's bits are zero (in other
|
|
|
|
// words, the intersection of B and D is zero), that is, ((B & D) & E) == 0
|
|
|
|
//
|
|
|
|
// then that single bit in B must be one and thus the whole expression can be
|
|
|
|
// folded to
|
|
|
|
// (A & (B | D)) == (B & (B ^ D)) | E.
|
|
|
|
//
|
|
|
|
// For example,
|
|
|
|
// (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)
|
|
|
|
// (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)
|
|
|
|
if ((((BCst->getValue() & DCst->getValue()) & ECst->getValue()) == 0) &&
|
|
|
|
(BCst->getValue() & (BCst->getValue() ^ DCst->getValue())).isPowerOf2()) {
|
|
|
|
APInt BorD = BCst->getValue() | DCst->getValue();
|
|
|
|
APInt BandBxorDorE = (BCst->getValue() & (BCst->getValue() ^ DCst->getValue())) |
|
|
|
|
ECst->getValue();
|
|
|
|
Value *NewMask = ConstantInt::get(BCst->getType(), BorD);
|
|
|
|
Value *NewMaskedValue = ConstantInt::get(BCst->getType(), BandBxorDorE);
|
|
|
|
Value *NewAnd = Builder.CreateAnd(A, NewMask);
|
|
|
|
return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto IsSubSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
|
|
|
|
return (C1->getValue() & C2->getValue()) == C1->getValue();
|
|
|
|
};
|
|
|
|
auto IsSuperSetOrEqual = [](ConstantInt *C1, ConstantInt *C2) {
|
|
|
|
return (C1->getValue() & C2->getValue()) == C2->getValue();
|
|
|
|
};
|
|
|
|
|
|
|
|
// In the following, we consider only the cases where B is a superset of D, B
|
|
|
|
// is a subset of D, or B == D because otherwise there's at least one bit
|
|
|
|
// covered by B but not D, in which case we can't deduce much from it, so
|
|
|
|
// no folding (aside from the single must-be-one bit case right above.)
|
|
|
|
// For example,
|
|
|
|
// (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.
|
|
|
|
if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// At this point, either B is a superset of D, B is a subset of D or B == D.
|
|
|
|
|
|
|
|
// If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict
|
|
|
|
// and the whole expression becomes false (or true if negated), otherwise, no
|
|
|
|
// folding.
|
|
|
|
// For example,
|
|
|
|
// (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.
|
|
|
|
// (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.
|
|
|
|
if (ECst->isZero()) {
|
|
|
|
if (IsSubSetOrEqual(BCst, DCst))
|
|
|
|
return ConstantInt::get(LHS->getType(), !IsAnd);
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==
|
|
|
|
// D. If B is a superset of (or equal to) D, since E is not zero, LHS is
|
|
|
|
// subsumed by RHS (RHS implies LHS.) So the whole expression becomes
|
|
|
|
// RHS. For example,
|
|
|
|
// (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
|
|
|
|
// (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
|
|
|
|
if (IsSuperSetOrEqual(BCst, DCst))
|
|
|
|
return RHS;
|
|
|
|
// Otherwise, B is a subset of D. If B and E have a common bit set,
|
|
|
|
// ie. (B & E) != 0, then LHS is subsumed by RHS. For example.
|
|
|
|
// (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).
|
|
|
|
assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");
|
|
|
|
if ((BCst->getValue() & ECst->getValue()) != 0)
|
|
|
|
return RHS;
|
|
|
|
// Otherwise, LHS and RHS contradict and the whole expression becomes false
|
|
|
|
// (or true if negated.) For example,
|
|
|
|
// (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.
|
|
|
|
// (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.
|
|
|
|
return ConstantInt::get(LHS->getType(), !IsAnd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single
|
|
|
|
/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side
|
|
|
|
/// aren't of the common mask pattern type.
|
|
|
|
static Value *foldLogOpOfMaskedICmpsAsymmetric(
|
|
|
|
ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
|
|
|
|
Value *A, Value *B, Value *C, Value *D, Value *E,
|
|
|
|
ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,
|
|
|
|
unsigned LHSMask, unsigned RHSMask,
|
|
|
|
llvm::InstCombiner::BuilderTy &Builder) {
|
|
|
|
assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
|
|
|
|
"Expected equality predicates for masked type of icmps.");
|
|
|
|
// Handle Mask_NotAllZeros-BMask_Mixed cases.
|
|
|
|
// (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or
|
|
|
|
// (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)
|
|
|
|
// which gets swapped to
|
|
|
|
// (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).
|
|
|
|
if (!IsAnd) {
|
|
|
|
LHSMask = conjugateICmpMask(LHSMask);
|
|
|
|
RHSMask = conjugateICmpMask(RHSMask);
|
|
|
|
}
|
|
|
|
if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) {
|
|
|
|
if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
|
|
|
|
LHS, RHS, IsAnd, A, B, C, D, E,
|
|
|
|
PredL, PredR, Builder)) {
|
|
|
|
return V;
|
|
|
|
}
|
|
|
|
} else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) {
|
|
|
|
if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(
|
|
|
|
RHS, LHS, IsAnd, A, D, E, B, C,
|
|
|
|
PredR, PredL, Builder)) {
|
|
|
|
return V;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nullptr;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2015-09-09 02:24:36 +08:00
|
|
|
|
|
|
|
/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
|
|
|
|
/// into a single (icmp(A & X) ==/!= Y).
|
2014-11-18 17:31:36 +08:00
|
|
|
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
|
2017-07-08 07:16:26 +08:00
|
|
|
llvm::InstCombiner::BuilderTy &Builder) {
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
|
2017-04-04 00:53:12 +08:00
|
|
|
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
|
2018-03-14 05:13:18 +08:00
|
|
|
Optional<std::pair<unsigned, unsigned>> MaskPair =
|
2017-04-04 00:53:12 +08:00
|
|
|
getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR);
|
2018-03-14 05:13:18 +08:00
|
|
|
if (!MaskPair)
|
2017-04-04 00:53:12 +08:00
|
|
|
return nullptr;
|
|
|
|
assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
|
|
|
|
"Expected equality predicates for masked type of icmps.");
|
2018-03-14 05:13:18 +08:00
|
|
|
unsigned LHSMask = MaskPair->first;
|
|
|
|
unsigned RHSMask = MaskPair->second;
|
|
|
|
unsigned Mask = LHSMask & RHSMask;
|
|
|
|
if (Mask == 0) {
|
|
|
|
// Even if the two sides don't share a common pattern, check if folding can
|
|
|
|
// still happen.
|
|
|
|
if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(
|
|
|
|
LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,
|
|
|
|
Builder))
|
|
|
|
return V;
|
|
|
|
return nullptr;
|
|
|
|
}
|
2010-09-09 06:16:17 +08:00
|
|
|
|
2013-09-04 19:57:13 +08:00
|
|
|
// In full generality:
|
|
|
|
// (icmp (A & B) Op C) | (icmp (A & D) Op E)
|
|
|
|
// == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
|
|
|
|
//
|
|
|
|
// If the latter can be converted into (icmp (A & X) Op Y) then the former is
|
|
|
|
// equivalent to (icmp (A & X) !Op Y).
|
|
|
|
//
|
|
|
|
// Therefore, we can pretend for the rest of this function that we're dealing
|
|
|
|
// with the conjunction, provided we flip the sense of any comparisons (both
|
|
|
|
// input and output).
|
|
|
|
|
|
|
|
// In most cases we're going to produce an EQ for the "&&" case.
|
2016-01-19 02:28:09 +08:00
|
|
|
ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
|
2013-09-04 19:57:13 +08:00
|
|
|
if (!IsAnd) {
|
|
|
|
// Convert the masking analysis into its equivalent with negated
|
|
|
|
// comparisons.
|
2016-01-19 02:28:09 +08:00
|
|
|
Mask = conjugateICmpMask(Mask);
|
2013-09-04 19:57:13 +08:00
|
|
|
}
|
2010-09-09 06:16:17 +08:00
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
if (Mask & Mask_AllZeros) {
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
|
2010-09-09 06:16:17 +08:00
|
|
|
// -> (icmp eq (A & (B|D)), 0)
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOr = Builder.CreateOr(B, D);
|
|
|
|
Value *NewAnd = Builder.CreateAnd(A, NewOr);
|
2016-01-19 02:28:09 +08:00
|
|
|
// We can't use C as zero because we might actually handle
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
|
2016-01-19 02:28:09 +08:00
|
|
|
// with B and D, having a single bit set.
|
|
|
|
Value *Zero = Constant::getNullValue(A->getType());
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(NewCC, NewAnd, Zero);
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
if (Mask & BMask_AllOnes) {
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp eq (A & B), B) & (icmp eq (A & D), D)
|
2010-09-09 06:16:17 +08:00
|
|
|
// -> (icmp eq (A & (B|D)), (B|D))
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOr = Builder.CreateOr(B, D);
|
|
|
|
Value *NewAnd = Builder.CreateAnd(A, NewOr);
|
|
|
|
return Builder.CreateICmp(NewCC, NewAnd, NewOr);
|
2012-12-20 15:09:41 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
if (Mask & AMask_AllOnes) {
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp eq (A & B), A) & (icmp eq (A & D), A)
|
2010-09-09 06:16:17 +08:00
|
|
|
// -> (icmp eq (A & (B&D)), A)
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewAnd1 = Builder.CreateAnd(B, D);
|
|
|
|
Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1);
|
|
|
|
return Builder.CreateICmp(NewCC, NewAnd2, A);
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2013-09-04 19:57:13 +08:00
|
|
|
|
|
|
|
// Remaining cases assume at least that B and D are constant, and depend on
|
2016-01-19 02:28:09 +08:00
|
|
|
// their actual values. This isn't strictly necessary, just a "handle the
|
2013-09-04 19:57:13 +08:00
|
|
|
// easy cases for now" decision.
|
|
|
|
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!BCst)
|
|
|
|
return nullptr;
|
2013-09-04 19:57:13 +08:00
|
|
|
ConstantInt *DCst = dyn_cast<ConstantInt>(D);
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!DCst)
|
|
|
|
return nullptr;
|
2013-09-04 19:57:13 +08:00
|
|
|
|
2017-04-04 00:53:12 +08:00
|
|
|
if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
|
2013-09-04 19:57:13 +08:00
|
|
|
// (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
|
|
|
|
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
|
|
|
|
// -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
|
|
|
|
// Only valid if one of the masks is a superset of the other (check "B&D" is
|
|
|
|
// the same as either B or D).
|
|
|
|
APInt NewMask = BCst->getValue() & DCst->getValue();
|
|
|
|
|
|
|
|
if (NewMask == BCst->getValue())
|
|
|
|
return LHS;
|
|
|
|
else if (NewMask == DCst->getValue())
|
|
|
|
return RHS;
|
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
|
|
|
if (Mask & AMask_NotAllOnes) {
|
2013-09-04 19:57:13 +08:00
|
|
|
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
|
|
|
|
// -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
|
|
|
|
// Only valid if one of the masks is a superset of the other (check "B|D" is
|
|
|
|
// the same as either B or D).
|
|
|
|
APInt NewMask = BCst->getValue() | DCst->getValue();
|
|
|
|
|
|
|
|
if (NewMask == BCst->getValue())
|
|
|
|
return LHS;
|
|
|
|
else if (NewMask == DCst->getValue())
|
|
|
|
return RHS;
|
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
|
|
|
if (Mask & BMask_Mixed) {
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
|
2010-09-09 06:16:17 +08:00
|
|
|
// We already know that B & C == C && D & E == E.
|
|
|
|
// If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
|
|
|
|
// C and E, which are shared by both the mask B and the mask D, don't
|
|
|
|
// contradict, then we can transform to
|
|
|
|
// -> (icmp eq (A & (B|D)), (C|E))
|
|
|
|
// Currently, we only handle the case of B, C, D, and E being constant.
|
2016-01-19 02:28:09 +08:00
|
|
|
// We can't simply use C and E because we might actually handle
|
2012-12-20 15:09:41 +08:00
|
|
|
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
|
2016-01-19 02:28:09 +08:00
|
|
|
// with B and D, having a single bit set.
|
2010-09-09 06:16:17 +08:00
|
|
|
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!CCst)
|
|
|
|
return nullptr;
|
2010-09-09 06:16:17 +08:00
|
|
|
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
|
2017-04-04 00:53:12 +08:00
|
|
|
if (!ECst)
|
|
|
|
return nullptr;
|
|
|
|
if (PredL != NewCC)
|
2014-11-18 17:31:36 +08:00
|
|
|
CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
|
2017-04-04 00:53:12 +08:00
|
|
|
if (PredR != NewCC)
|
2014-11-18 17:31:36 +08:00
|
|
|
ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
|
2017-04-04 00:53:12 +08:00
|
|
|
|
2016-01-19 02:28:09 +08:00
|
|
|
// If there is a conflict, we should actually return a false for the
|
|
|
|
// whole construct.
|
2014-11-18 17:31:36 +08:00
|
|
|
if (((BCst->getValue() & DCst->getValue()) &
|
2017-06-07 15:40:37 +08:00
|
|
|
(CCst->getValue() ^ ECst->getValue())).getBoolValue())
|
2014-11-18 17:31:41 +08:00
|
|
|
return ConstantInt::get(LHS->getType(), !IsAnd);
|
2017-04-04 00:53:12 +08:00
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOr1 = Builder.CreateOr(B, D);
|
2016-01-19 02:28:09 +08:00
|
|
|
Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewAnd = Builder.CreateAnd(A, NewOr1);
|
|
|
|
return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
2017-04-04 00:53:12 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-09-09 06:16:17 +08:00
|
|
|
}
|
|
|
|
|
2014-12-03 18:39:15 +08:00
|
|
|
/// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.
|
|
|
|
/// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
|
|
|
|
/// If \p Inverted is true then the check is for the inverted range, e.g.
|
|
|
|
/// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
|
|
|
|
Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
|
|
|
|
bool Inverted) {
|
|
|
|
// Check the lower range comparison, e.g. x >= 0
|
|
|
|
// InstCombine already ensured that if there is a constant it's on the RHS.
|
|
|
|
ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1));
|
|
|
|
if (!RangeStart)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() :
|
|
|
|
Cmp0->getPredicate());
|
|
|
|
|
|
|
|
// Accept x > -1 or x >= 0 (after potentially inverting the predicate).
|
|
|
|
if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) ||
|
|
|
|
(Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero())))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() :
|
|
|
|
Cmp1->getPredicate());
|
|
|
|
|
|
|
|
Value *Input = Cmp0->getOperand(0);
|
|
|
|
Value *RangeEnd;
|
|
|
|
if (Cmp1->getOperand(0) == Input) {
|
|
|
|
// For the upper range compare we have: icmp x, n
|
|
|
|
RangeEnd = Cmp1->getOperand(1);
|
|
|
|
} else if (Cmp1->getOperand(1) == Input) {
|
|
|
|
// For the upper range compare we have: icmp n, x
|
|
|
|
RangeEnd = Cmp1->getOperand(0);
|
|
|
|
Pred1 = ICmpInst::getSwappedPredicate(Pred1);
|
|
|
|
} else {
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the upper range comparison, e.g. x < n
|
|
|
|
ICmpInst::Predicate NewPred;
|
|
|
|
switch (Pred1) {
|
|
|
|
case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break;
|
|
|
|
case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break;
|
|
|
|
default: return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This simplification is only valid if the upper range is not negative.
|
2017-05-15 14:39:41 +08:00
|
|
|
KnownBits Known = computeKnownBits(RangeEnd, /*Depth=*/0, Cmp1);
|
|
|
|
if (!Known.isNonNegative())
|
2014-12-03 18:39:15 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
if (Inverted)
|
|
|
|
NewPred = ICmpInst::getInversePredicate(NewPred);
|
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(NewPred, Input, RangeEnd);
|
2014-12-03 18:39:15 +08:00
|
|
|
}
|
|
|
|
|
2017-04-15 03:23:50 +08:00
|
|
|
static Value *
|
|
|
|
foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
|
|
|
|
bool JoinedByAnd,
|
2017-07-08 07:16:26 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2017-04-16 01:55:06 +08:00
|
|
|
Value *X = LHS->getOperand(0);
|
|
|
|
if (X != RHS->getOperand(0))
|
2017-04-15 03:23:50 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2017-04-16 01:55:06 +08:00
|
|
|
const APInt *C1, *C2;
|
|
|
|
if (!match(LHS->getOperand(1), m_APInt(C1)) ||
|
|
|
|
!match(RHS->getOperand(1), m_APInt(C2)))
|
2017-04-15 03:23:50 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// We only handle (X != C1 && X != C2) and (X == C1 || X == C2).
|
|
|
|
ICmpInst::Predicate Pred = LHS->getPredicate();
|
|
|
|
if (Pred != RHS->getPredicate())
|
|
|
|
return nullptr;
|
|
|
|
if (JoinedByAnd && Pred != ICmpInst::ICMP_NE)
|
|
|
|
return nullptr;
|
|
|
|
if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// The larger unsigned constant goes on the right.
|
2017-04-16 01:55:06 +08:00
|
|
|
if (C1->ugt(*C2))
|
|
|
|
std::swap(C1, C2);
|
2017-04-15 03:23:50 +08:00
|
|
|
|
2017-04-16 01:55:06 +08:00
|
|
|
APInt Xor = *C1 ^ *C2;
|
2017-04-15 03:23:50 +08:00
|
|
|
if (Xor.isPowerOf2()) {
|
|
|
|
// If LHSC and RHSC differ by only one bit, then set that bit in X and
|
|
|
|
// compare against the larger constant:
|
|
|
|
// (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2
|
|
|
|
// (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
|
|
|
|
// We choose an 'or' with a Pow2 constant rather than the inverse mask with
|
|
|
|
// 'and' because that may lead to smaller codegen from a smaller constant.
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor));
|
|
|
|
return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
|
2017-04-15 03:23:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Special case: get the ordering right when the values wrap around zero.
|
|
|
|
// Ie, we assumed the constants were unsigned when swapping earlier.
|
2017-06-07 15:40:37 +08:00
|
|
|
if (C1->isNullValue() && C2->isAllOnesValue())
|
2017-04-16 01:55:06 +08:00
|
|
|
std::swap(C1, C2);
|
2017-04-15 03:23:50 +08:00
|
|
|
|
2017-04-16 01:55:06 +08:00
|
|
|
if (*C1 == *C2 - 1) {
|
2017-04-15 03:23:50 +08:00
|
|
|
// (X == 13 || X == 14) --> X - 13 <=u 1
|
|
|
|
// (X != 13 && X != 14) --> X - 13 >u 1
|
|
|
|
// An 'add' is the canonical IR form, so favor that over a 'sub'.
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
|
2017-04-15 03:23:50 +08:00
|
|
|
auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
|
2017-04-15 03:23:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-06-16 13:10:37 +08:00
|
|
|
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
|
|
|
|
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
|
|
|
|
Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS,
|
|
|
|
bool JoinedByAnd,
|
|
|
|
Instruction &CxtI) {
|
|
|
|
ICmpInst::Predicate Pred = LHS->getPredicate();
|
|
|
|
if (Pred != RHS->getPredicate())
|
|
|
|
return nullptr;
|
|
|
|
if (JoinedByAnd && Pred != ICmpInst::ICMP_NE)
|
|
|
|
return nullptr;
|
|
|
|
if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// TODO support vector splats
|
|
|
|
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
|
|
|
|
ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
|
|
|
|
if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *A, *B, *C, *D;
|
|
|
|
if (match(LHS->getOperand(0), m_And(m_Value(A), m_Value(B))) &&
|
|
|
|
match(RHS->getOperand(0), m_And(m_Value(C), m_Value(D)))) {
|
|
|
|
if (A == D || B == D)
|
|
|
|
std::swap(C, D);
|
|
|
|
if (B == C)
|
|
|
|
std::swap(A, B);
|
|
|
|
|
|
|
|
if (A == C &&
|
|
|
|
isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) &&
|
|
|
|
isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Mask = Builder.CreateOr(B, D);
|
|
|
|
Value *Masked = Builder.CreateAnd(A, Mask);
|
2017-06-16 13:10:37 +08:00
|
|
|
auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(NewPred, Masked, Mask);
|
2017-06-16 13:10:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
[InstCombine] Re-land: Optimize redundant 'signed truncation check pattern'.
Summary:
This comes with `Implicit Conversion Sanitizer - integer sign change` (D50250):
```
signed char test(unsigned int x) { return x; }
```
`clang++ -fsanitize=implicit-conversion -S -emit-llvm -o - /tmp/test.cpp -O3`
* Old: {F6904292}
* With this patch: {F6904294}
General pattern:
X & Y
Where `Y` is checking that all the high bits (covered by a mask `4294967168`)
are uniform, i.e. `%arg & 4294967168` can be either `4294967168` or `0`
Pattern can be one of:
%t = add i32 %arg, 128
%r = icmp ult i32 %t, 256
Or
%t0 = shl i32 %arg, 24
%t1 = ashr i32 %t0, 24
%r = icmp eq i32 %t1, %arg
Or
%t0 = trunc i32 %arg to i8
%t1 = sext i8 %t0 to i32
%r = icmp eq i32 %t1, %arg
This pattern is a signed truncation check.
And `X` is checking that some bit in that same mask is zero.
I.e. can be one of:
%r = icmp sgt i32 %arg, -1
Or
%t = and i32 %arg, 2147483648
%r = icmp eq i32 %t, 0
Since we are checking that all the bits in that mask are the same,
and a particular bit is zero, what we are really checking is that all the
masked bits are zero.
So this should be transformed to:
%r = icmp ult i32 %arg, 128
The transform itself ended up being rather horrible, even though i omitted some cases.
Surely there is some infrastructure that can help clean this up that i missed?
https://rise4fun.com/Alive/3Ou
The initial commit (rL339610)
was reverted, since the first assert was being triggered.
The @positive_with_extra_and test now has coverage for that case.
Reviewers: spatel, craig.topper
Reviewed By: spatel
Subscribers: RKSimon, erichkeane, vsk, llvm-commits
Differential Revision: https://reviews.llvm.org/D50465
llvm-svn: 339621
2018-08-14 05:54:37 +08:00
|
|
|
/// General pattern:
|
|
|
|
/// X & Y
|
|
|
|
///
|
|
|
|
/// Where Y is checking that all the high bits (covered by a mask 4294967168)
|
|
|
|
/// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0
|
|
|
|
/// Pattern can be one of:
|
|
|
|
/// %t = add i32 %arg, 128
|
|
|
|
/// %r = icmp ult i32 %t, 256
|
|
|
|
/// Or
|
|
|
|
/// %t0 = shl i32 %arg, 24
|
|
|
|
/// %t1 = ashr i32 %t0, 24
|
|
|
|
/// %r = icmp eq i32 %t1, %arg
|
|
|
|
/// Or
|
|
|
|
/// %t0 = trunc i32 %arg to i8
|
|
|
|
/// %t1 = sext i8 %t0 to i32
|
|
|
|
/// %r = icmp eq i32 %t1, %arg
|
|
|
|
/// This pattern is a signed truncation check.
|
|
|
|
///
|
|
|
|
/// And X is checking that some bit in that same mask is zero.
|
|
|
|
/// I.e. can be one of:
|
|
|
|
/// %r = icmp sgt i32 %arg, -1
|
|
|
|
/// Or
|
|
|
|
/// %t = and i32 %arg, 2147483648
|
|
|
|
/// %r = icmp eq i32 %t, 0
|
|
|
|
///
|
|
|
|
/// Since we are checking that all the bits in that mask are the same,
|
|
|
|
/// and a particular bit is zero, what we are really checking is that all the
|
|
|
|
/// masked bits are zero.
|
|
|
|
/// So this should be transformed to:
|
|
|
|
/// %r = icmp ult i32 %arg, 128
|
|
|
|
static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
|
|
|
|
Instruction &CxtI,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
assert(CxtI.getOpcode() == Instruction::And);
|
|
|
|
|
|
|
|
// Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two)
|
|
|
|
auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X,
|
|
|
|
APInt &SignBitMask) -> bool {
|
|
|
|
CmpInst::Predicate Pred;
|
|
|
|
const APInt *I01, *I1; // powers of two; I1 == I01 << 1
|
|
|
|
if (!(match(ICmp,
|
|
|
|
m_ICmp(Pred, m_Add(m_Value(X), m_Power2(I01)), m_Power2(I1))) &&
|
|
|
|
Pred == ICmpInst::ICMP_ULT && I1->ugt(*I01) && I01->shl(1) == *I1))
|
|
|
|
return false;
|
|
|
|
// Which bit is the new sign bit as per the 'signed truncation' pattern?
|
|
|
|
SignBitMask = *I01;
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
|
|
|
// One icmp needs to be 'signed truncation check'.
|
|
|
|
// We need to match this first, else we will mismatch commutative cases.
|
|
|
|
Value *X1;
|
|
|
|
APInt HighestBit;
|
|
|
|
ICmpInst *OtherICmp;
|
|
|
|
if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit))
|
|
|
|
OtherICmp = ICmp0;
|
|
|
|
else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit))
|
|
|
|
OtherICmp = ICmp1;
|
|
|
|
else
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)");
|
|
|
|
|
|
|
|
// Try to match/decompose into: icmp eq (X & Mask), 0
|
|
|
|
auto tryToDecompose = [](ICmpInst *ICmp, Value *&X,
|
|
|
|
APInt &UnsetBitsMask) -> bool {
|
|
|
|
CmpInst::Predicate Pred = ICmp->getPredicate();
|
|
|
|
// Can it be decomposed into icmp eq (X & Mask), 0 ?
|
|
|
|
if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
|
|
|
|
Pred, X, UnsetBitsMask,
|
|
|
|
/*LookThruTrunc=*/false) &&
|
|
|
|
Pred == ICmpInst::ICMP_EQ)
|
|
|
|
return true;
|
|
|
|
// Is it icmp eq (X & Mask), 0 already?
|
|
|
|
const APInt *Mask;
|
|
|
|
if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) &&
|
|
|
|
Pred == ICmpInst::ICMP_EQ) {
|
|
|
|
UnsetBitsMask = *Mask;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
// And the other icmp needs to be decomposable into a bit test.
|
|
|
|
Value *X0;
|
|
|
|
APInt UnsetBitsMask;
|
|
|
|
if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
|
|
|
|
|
|
|
|
// Are they working on the same value?
|
|
|
|
Value *X;
|
|
|
|
if (X1 == X0) {
|
|
|
|
// Ok as is.
|
|
|
|
X = X1;
|
|
|
|
} else if (match(X0, m_Trunc(m_Specific(X1)))) {
|
|
|
|
UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits());
|
|
|
|
X = X1;
|
|
|
|
} else
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// So which bits should be uniform as per the 'signed truncation check'?
|
|
|
|
// (all the bits starting with (i.e. including) HighestBit)
|
|
|
|
APInt SignBitsMask = ~(HighestBit - 1U);
|
|
|
|
|
|
|
|
// UnsetBitsMask must have some common bits with SignBitsMask,
|
|
|
|
if (!UnsetBitsMask.intersects(SignBitsMask))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// Does UnsetBitsMask contain any bits outside of SignBitsMask?
|
|
|
|
if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) {
|
|
|
|
APInt OtherHighestBit = (~UnsetBitsMask) + 1U;
|
|
|
|
if (!OtherHighestBit.isPowerOf2())
|
|
|
|
return nullptr;
|
|
|
|
HighestBit = APIntOps::umin(HighestBit, OtherHighestBit);
|
|
|
|
}
|
|
|
|
// Else, if it does not, then all is ok as-is.
|
|
|
|
|
|
|
|
// %r = icmp ult %X, SignBit
|
|
|
|
return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit),
|
|
|
|
CxtI.getName() + ".simplified");
|
|
|
|
}
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// Fold (icmp)&(icmp) if possible.
|
2017-06-16 13:10:37 +08:00
|
|
|
Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
|
|
|
|
Instruction &CxtI) {
|
|
|
|
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
|
|
|
|
// if K1 and K2 are a one-bit mask.
|
|
|
|
if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI))
|
|
|
|
return V;
|
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
|
2010-01-05 15:50:36 +08:00
|
|
|
|
|
|
|
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredicatesFoldable(PredL, PredR)) {
|
2010-01-05 15:50:36 +08:00
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(1) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(0))
|
|
|
|
LHS->swapOperands();
|
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(0) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(1)) {
|
|
|
|
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
|
|
|
|
unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
|
|
|
|
bool isSigned = LHS->isSigned() || RHS->isSigned();
|
2011-12-17 09:20:32 +08:00
|
|
|
return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
}
|
2010-09-09 06:16:17 +08:00
|
|
|
|
2011-02-10 13:17:27 +08:00
|
|
|
// handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
|
2013-09-04 19:57:13 +08:00
|
|
|
if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
|
2011-02-10 13:17:27 +08:00
|
|
|
return V;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2014-12-03 18:39:15 +08:00
|
|
|
// E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n
|
|
|
|
if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false))
|
|
|
|
return V;
|
|
|
|
|
|
|
|
// E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n
|
|
|
|
if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false))
|
|
|
|
return V;
|
|
|
|
|
2017-04-16 01:55:06 +08:00
|
|
|
if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
|
|
|
|
return V;
|
|
|
|
|
[InstCombine] Re-land: Optimize redundant 'signed truncation check pattern'.
Summary:
This comes with `Implicit Conversion Sanitizer - integer sign change` (D50250):
```
signed char test(unsigned int x) { return x; }
```
`clang++ -fsanitize=implicit-conversion -S -emit-llvm -o - /tmp/test.cpp -O3`
* Old: {F6904292}
* With this patch: {F6904294}
General pattern:
X & Y
Where `Y` is checking that all the high bits (covered by a mask `4294967168`)
are uniform, i.e. `%arg & 4294967168` can be either `4294967168` or `0`
Pattern can be one of:
%t = add i32 %arg, 128
%r = icmp ult i32 %t, 256
Or
%t0 = shl i32 %arg, 24
%t1 = ashr i32 %t0, 24
%r = icmp eq i32 %t1, %arg
Or
%t0 = trunc i32 %arg to i8
%t1 = sext i8 %t0 to i32
%r = icmp eq i32 %t1, %arg
This pattern is a signed truncation check.
And `X` is checking that some bit in that same mask is zero.
I.e. can be one of:
%r = icmp sgt i32 %arg, -1
Or
%t = and i32 %arg, 2147483648
%r = icmp eq i32 %t, 0
Since we are checking that all the bits in that mask are the same,
and a particular bit is zero, what we are really checking is that all the
masked bits are zero.
So this should be transformed to:
%r = icmp ult i32 %arg, 128
The transform itself ended up being rather horrible, even though i omitted some cases.
Surely there is some infrastructure that can help clean this up that i missed?
https://rise4fun.com/Alive/3Ou
The initial commit (rL339610)
was reverted, since the first assert was being triggered.
The @positive_with_extra_and test now has coverage for that case.
Reviewers: spatel, craig.topper
Reviewed By: spatel
Subscribers: RKSimon, erichkeane, vsk, llvm-commits
Differential Revision: https://reviews.llvm.org/D50465
llvm-svn: 339621
2018-08-14 05:54:37 +08:00
|
|
|
if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
|
|
|
|
return V;
|
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
|
2017-04-11 03:38:36 +08:00
|
|
|
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
|
2017-04-06 01:38:34 +08:00
|
|
|
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
|
|
|
|
ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
|
|
|
|
if (!LHSC || !RHSC)
|
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
if (LHSC == RHSC && PredL == PredR) {
|
2010-01-05 15:50:36 +08:00
|
|
|
// (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
|
2016-01-19 03:17:58 +08:00
|
|
|
// where C is a power of 2 or
|
2010-01-05 15:50:36 +08:00
|
|
|
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
|
2017-04-06 01:38:34 +08:00
|
|
|
if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
|
|
|
|
(PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOr = Builder.CreateOr(LHS0, RHS0);
|
|
|
|
return Builder.CreateICmp(PredL, NewOr, LHSC);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
}
|
2011-04-29 00:58:40 +08:00
|
|
|
|
2011-04-29 04:09:57 +08:00
|
|
|
// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
|
2011-04-29 00:58:40 +08:00
|
|
|
// where CMAX is the all ones value for the truncated type,
|
2012-09-27 18:14:43 +08:00
|
|
|
// iff the lower bits of C2 and CA are zero.
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() &&
|
|
|
|
RHS->hasOneUse()) {
|
2011-04-29 00:58:40 +08:00
|
|
|
Value *V;
|
2017-04-06 01:38:34 +08:00
|
|
|
ConstantInt *AndC, *SmallC = nullptr, *BigC = nullptr;
|
2011-04-29 00:58:40 +08:00
|
|
|
|
|
|
|
// (trunc x) == C1 & (and x, CA) == C2
|
2012-12-20 15:15:54 +08:00
|
|
|
// (and x, CA) == C2 & (trunc x) == C1
|
2017-04-11 03:38:36 +08:00
|
|
|
if (match(RHS0, m_Trunc(m_Value(V))) &&
|
|
|
|
match(LHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
|
2017-04-06 01:38:34 +08:00
|
|
|
SmallC = RHSC;
|
|
|
|
BigC = LHSC;
|
2017-04-11 03:38:36 +08:00
|
|
|
} else if (match(LHS0, m_Trunc(m_Value(V))) &&
|
|
|
|
match(RHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
|
2017-04-06 01:38:34 +08:00
|
|
|
SmallC = LHSC;
|
|
|
|
BigC = RHSC;
|
2011-04-29 00:58:40 +08:00
|
|
|
}
|
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
if (SmallC && BigC) {
|
|
|
|
unsigned BigBitSize = BigC->getType()->getBitWidth();
|
|
|
|
unsigned SmallBitSize = SmallC->getType()->getBitWidth();
|
2011-04-29 00:58:40 +08:00
|
|
|
|
|
|
|
// Check that the low bits are zero.
|
|
|
|
APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
|
2017-06-07 15:40:37 +08:00
|
|
|
if ((Low & AndC->getValue()).isNullValue() &&
|
|
|
|
(Low & BigC->getValue()).isNullValue()) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue());
|
2017-04-06 01:38:34 +08:00
|
|
|
APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
|
|
|
|
Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(PredL, NewAnd, NewVal);
|
2011-04-29 00:58:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-01-09 02:32:24 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// From here on, we only handle:
|
|
|
|
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
|
2017-04-11 03:38:36 +08:00
|
|
|
if (LHS0 != RHS0)
|
2017-04-06 01:38:34 +08:00
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
// ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
|
|
|
|
if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
|
|
|
|
PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
|
|
|
|
PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
|
|
|
|
PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2011-03-01 23:05:01 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// We can't fold (ugt x, C) & (sgt x, C2).
|
2017-04-06 01:38:34 +08:00
|
|
|
if (!PredicatesFoldable(PredL, PredR))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// Ensure that the larger constant is on the RHS.
|
|
|
|
bool ShouldSwap;
|
2017-04-11 23:57:32 +08:00
|
|
|
if (CmpInst::isSigned(PredL) ||
|
|
|
|
(ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
|
2017-04-11 00:55:57 +08:00
|
|
|
ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
|
2017-04-11 23:57:32 +08:00
|
|
|
else
|
|
|
|
ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
if (ShouldSwap) {
|
|
|
|
std::swap(LHS, RHS);
|
2017-04-06 01:38:34 +08:00
|
|
|
std::swap(LHSC, RHSC);
|
|
|
|
std::swap(PredL, PredR);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2010-02-11 00:03:48 +08:00
|
|
|
// At this point, we know we have two icmp instructions
|
2010-01-05 15:50:36 +08:00
|
|
|
// comparing a value against two constants and and'ing the result
|
|
|
|
// together. Because of the above check, we know that we only have
|
2012-12-20 15:09:41 +08:00
|
|
|
// icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
|
|
|
|
// (from the icmp folding check above), that the two constants
|
2010-01-05 15:50:36 +08:00
|
|
|
// are not equal and that the larger constant is on the RHS
|
2017-04-06 01:38:34 +08:00
|
|
|
assert(LHSC != RHSC && "Compares not folded above?");
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredL) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_NE:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_ULT:
|
2017-04-06 01:38:34 +08:00
|
|
|
if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmpULT(LHS0, LHSC);
|
2017-07-07 02:39:47 +08:00
|
|
|
if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13
|
2017-04-11 03:38:36 +08:00
|
|
|
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
|
2016-09-01 03:49:56 +08:00
|
|
|
false, true);
|
2017-04-06 01:38:34 +08:00
|
|
|
break; // (X != 13 & X u< 15) -> no change
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_SLT:
|
2017-04-06 01:38:34 +08:00
|
|
|
if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmpSLT(LHS0, LHSC);
|
2017-04-06 01:38:34 +08:00
|
|
|
break; // (X != 13 & X s< 15) -> no change
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_NE:
|
2017-04-15 03:23:50 +08:00
|
|
|
// Potential folds for this case should already be handled.
|
|
|
|
break;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICmpInst::ICMP_UGT:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_NE:
|
2017-04-06 01:38:34 +08:00
|
|
|
if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(PredL, LHS0, RHSC);
|
2017-04-06 01:38:34 +08:00
|
|
|
break; // (X u> 13 & X != 15) -> no change
|
|
|
|
case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
|
2017-04-11 03:38:36 +08:00
|
|
|
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
|
|
|
|
false, true);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICmpInst::ICMP_SGT:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_NE:
|
2017-04-06 01:38:34 +08:00
|
|
|
if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(PredL, LHS0, RHSC);
|
2017-04-06 01:38:34 +08:00
|
|
|
break; // (X s> 13 & X != 15) -> no change
|
|
|
|
case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
|
2017-04-11 03:38:36 +08:00
|
|
|
return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
|
2017-04-06 01:38:34 +08:00
|
|
|
true);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2017-09-03 01:53:33 +08:00
|
|
|
Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
|
2017-09-03 01:17:17 +08:00
|
|
|
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
|
|
|
|
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
|
|
|
|
FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
|
2017-09-03 01:17:17 +08:00
|
|
|
if (LHS0 == RHS1 && RHS0 == LHS1) {
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
// Swap RHS operands to match LHS.
|
2017-09-03 01:17:17 +08:00
|
|
|
PredR = FCmpInst::getSwappedPredicate(PredR);
|
|
|
|
std::swap(RHS0, RHS1);
|
[InstCombine] Simplify and correct folding fcmps with the same children
Summary: Take advantage of FCmpInst::Predicate's bit pattern and handle (fcmp *, x, y) | (fcmp *, x, y) and (fcmp *, x, y) & (fcmp *, x, y) more consistently. Also fold more FCmpInst::FCMP_FALSE and FCmpInst::FCMP_TRUE to constants.
Currently InstCombine wrongly folds (fcmp ogt, x, y) | (fcmp ord, x, y) to (fcmp ogt, x, y); this patch also fixes that.
Reviewers: spatel
Subscribers: llvm-commits, iteratee, echristo
Differential Revision: http://reviews.llvm.org/D21775
llvm-svn: 274156
2016-06-30 04:10:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
|
|
|
|
// Suppose the relation between x and y is R, where R is one of
|
|
|
|
// U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for
|
|
|
|
// testing the desired relations.
|
|
|
|
//
|
|
|
|
// Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
|
|
|
|
// bool(R & CC0) && bool(R & CC1)
|
|
|
|
// = bool((R & CC0) & (R & CC1))
|
|
|
|
// = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency
|
2017-09-03 00:30:27 +08:00
|
|
|
//
|
|
|
|
// Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:
|
|
|
|
// bool(R & CC0) || bool(R & CC1)
|
|
|
|
// = bool((R & CC0) | (R & CC1))
|
|
|
|
// = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)
|
2017-09-03 01:53:33 +08:00
|
|
|
if (LHS0 == RHS0 && LHS1 == RHS1) {
|
|
|
|
unsigned FCmpCodeL = getFCmpCode(PredL);
|
|
|
|
unsigned FCmpCodeR = getFCmpCode(PredR);
|
|
|
|
unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR;
|
|
|
|
return getFCmpValue(NewPred, LHS0, LHS1, Builder);
|
|
|
|
}
|
2017-09-03 00:30:27 +08:00
|
|
|
|
2017-09-03 01:53:33 +08:00
|
|
|
if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
|
|
|
|
(PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
|
2017-09-03 01:17:17 +08:00
|
|
|
if (LHS0->getType() != RHS0->getType())
|
|
|
|
return nullptr;
|
|
|
|
|
2017-09-06 07:13:13 +08:00
|
|
|
// FCmp canonicalization ensures that (fcmp ord/uno X, X) and
|
2018-03-26 05:16:33 +08:00
|
|
|
// (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).
|
|
|
|
if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP()))
|
2017-09-06 07:13:13 +08:00
|
|
|
// Ignore the constants because they are obviously not NANs:
|
|
|
|
// (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)
|
|
|
|
// (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)
|
2017-09-03 01:53:33 +08:00
|
|
|
return Builder.CreateFCmp(PredL, LHS0, RHS0);
|
2017-09-03 00:30:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2015-09-09 04:14:13 +08:00
|
|
|
/// Match De Morgan's Laws:
|
|
|
|
/// (~A & ~B) == (~(A | B))
|
|
|
|
/// (~A | ~B) == (~(A & B))
|
|
|
|
static Instruction *matchDeMorgansLaws(BinaryOperator &I,
|
2017-05-10 04:05:05 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2015-09-09 04:14:13 +08:00
|
|
|
auto Opcode = I.getOpcode();
|
|
|
|
assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
|
|
|
|
"Trying to match De Morgan's Laws with something other than and/or");
|
2017-05-10 04:05:05 +08:00
|
|
|
|
[InstCombine] match De Morgan's Law hidden by zext ops (PR22723)
This is a fix for PR22723:
https://llvm.org/bugs/show_bug.cgi?id=22723
My first attempt at this was to change what I thought was the root problem:
xor (zext i1 X to i32), 1 --> zext (xor i1 X, true) to i32
...but we create the opposite pattern in InstCombiner::visitZExt(), so infinite loop!
My next idea was to fix the matchIfNot() implementation in PatternMatch, but that would
mean potentially returning a different size for the match than what was input. I think
this would require all users of m_Not to check the size of the returned match, so I
abandoned that idea.
I settled on just fixing the exact case presented in the PR. This patch does allow the
2 functions in PR22723 to compile identically (x86):
bool test(bool x, bool y) { return !x | !y; }
bool test(bool x, bool y) { return !x || !y; }
...
andb %sil, %dil
xorb $1, %dil
movb %dil, %al
retq
Differential Revision: http://reviews.llvm.org/D12705
llvm-svn: 248634
2015-09-26 07:21:38 +08:00
|
|
|
// Flip the logic operation.
|
2017-05-10 04:05:05 +08:00
|
|
|
Opcode = (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
|
2015-09-09 04:14:13 +08:00
|
|
|
|
2017-05-10 04:05:05 +08:00
|
|
|
Value *A, *B;
|
|
|
|
if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) &&
|
|
|
|
match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) &&
|
|
|
|
!IsFreeToInvert(A, A->hasOneUse()) &&
|
|
|
|
!IsFreeToInvert(B, B->hasOneUse())) {
|
|
|
|
Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan");
|
|
|
|
return BinaryOperator::CreateNot(AndOr);
|
|
|
|
}
|
2015-09-09 04:14:13 +08:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2016-07-19 17:06:08 +08:00
|
|
|
bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
|
|
|
|
Value *CastSrc = CI->getOperand(0);
|
|
|
|
|
|
|
|
// Noop casts and casts of constants should be eliminated trivially.
|
|
|
|
if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(CastSrc))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// If this cast is paired with another cast that can be eliminated, we prefer
|
|
|
|
// to have it eliminated.
|
|
|
|
if (const auto *PrecedingCI = dyn_cast<CastInst>(CastSrc))
|
|
|
|
if (isEliminableCastPair(PrecedingCI, CI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-09-12 08:16:23 +08:00
|
|
|
/// Fold {and,or,xor} (cast X), C.
|
|
|
|
static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
|
2017-07-08 07:16:26 +08:00
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2017-08-09 14:17:48 +08:00
|
|
|
Constant *C = dyn_cast<Constant>(Logic.getOperand(1));
|
|
|
|
if (!C)
|
2016-09-12 08:16:23 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
auto LogicOpc = Logic.getOpcode();
|
|
|
|
Type *DestTy = Logic.getType();
|
|
|
|
Type *SrcTy = Cast->getSrcTy();
|
|
|
|
|
2017-08-03 04:25:56 +08:00
|
|
|
// Move the logic operation ahead of a zext or sext if the constant is
|
|
|
|
// unchanged in the smaller source type. Performing the logic in a smaller
|
|
|
|
// type may provide more information to later folds, and the smaller logic
|
|
|
|
// instruction may be cheaper (particularly in the case of vectors).
|
2016-09-12 08:16:23 +08:00
|
|
|
Value *X;
|
|
|
|
if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
|
|
|
|
Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
|
|
|
|
Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
|
|
|
|
if (ZextTruncC == C) {
|
|
|
|
// LogicOpc (zext X), C --> zext (LogicOpc X, C)
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
|
2016-09-12 08:16:23 +08:00
|
|
|
return new ZExtInst(NewOp, DestTy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-03 04:25:56 +08:00
|
|
|
if (match(Cast, m_OneUse(m_SExt(m_Value(X))))) {
|
|
|
|
Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
|
|
|
|
Constant *SextTruncC = ConstantExpr::getSExt(TruncC, DestTy);
|
|
|
|
if (SextTruncC == C) {
|
|
|
|
// LogicOpc (sext X), C --> sext (LogicOpc X, C)
|
|
|
|
Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC);
|
|
|
|
return new SExtInst(NewOp, DestTy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-12 08:16:23 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Fold {and,or,xor} (cast X), Y.
|
2016-02-24 00:36:07 +08:00
|
|
|
Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
|
2016-02-24 07:56:23 +08:00
|
|
|
auto LogicOpc = I.getOpcode();
|
2016-11-23 06:54:36 +08:00
|
|
|
assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
|
2016-02-24 07:56:23 +08:00
|
|
|
|
2016-02-24 00:36:07 +08:00
|
|
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
2016-02-24 01:41:34 +08:00
|
|
|
CastInst *Cast0 = dyn_cast<CastInst>(Op0);
|
[InstCombine] transform bitcasted bitwise logic ops with constants (PR26702)
Given that we're not actually reducing the instruction count in the included
regression tests, I think we would call this a canonicalization step.
The motivation comes from the example in PR26702:
https://llvm.org/bugs/show_bug.cgi?id=26702
If we hoist the bitwise logic ahead of the bitcast, the previously unoptimizable
example of:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
%bc2 = bitcast <2 x i64> %notnot to <4 x i32>
ret <4 x i32> %bc2
}
Simplifies to the expected:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %lobit
}
Differential Revision: http://reviews.llvm.org/D17583
llvm-svn: 262645
2016-03-04 03:19:04 +08:00
|
|
|
if (!Cast0)
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// This must be a cast from an integer or integer vector source type to allow
|
|
|
|
// transformation of the logic operation to the source type.
|
|
|
|
Type *DestTy = I.getType();
|
|
|
|
Type *SrcTy = Cast0->getSrcTy();
|
|
|
|
if (!SrcTy->isIntOrIntVectorTy())
|
|
|
|
return nullptr;
|
|
|
|
|
2016-09-12 08:16:23 +08:00
|
|
|
if (Instruction *Ret = foldLogicCastConstant(I, Cast0, Builder))
|
|
|
|
return Ret;
|
2016-07-21 08:24:18 +08:00
|
|
|
|
2016-02-24 01:41:34 +08:00
|
|
|
CastInst *Cast1 = dyn_cast<CastInst>(Op1);
|
[InstCombine] transform bitcasted bitwise logic ops with constants (PR26702)
Given that we're not actually reducing the instruction count in the included
regression tests, I think we would call this a canonicalization step.
The motivation comes from the example in PR26702:
https://llvm.org/bugs/show_bug.cgi?id=26702
If we hoist the bitwise logic ahead of the bitcast, the previously unoptimizable
example of:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
%bc2 = bitcast <2 x i64> %notnot to <4 x i32>
ret <4 x i32> %bc2
}
Simplifies to the expected:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %lobit
}
Differential Revision: http://reviews.llvm.org/D17583
llvm-svn: 262645
2016-03-04 03:19:04 +08:00
|
|
|
if (!Cast1)
|
2016-02-24 00:59:21 +08:00
|
|
|
return nullptr;
|
2016-02-24 00:36:07 +08:00
|
|
|
|
[InstCombine] transform bitcasted bitwise logic ops with constants (PR26702)
Given that we're not actually reducing the instruction count in the included
regression tests, I think we would call this a canonicalization step.
The motivation comes from the example in PR26702:
https://llvm.org/bugs/show_bug.cgi?id=26702
If we hoist the bitwise logic ahead of the bitcast, the previously unoptimizable
example of:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
%bc2 = bitcast <2 x i64> %notnot to <4 x i32>
ret <4 x i32> %bc2
}
Simplifies to the expected:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %lobit
}
Differential Revision: http://reviews.llvm.org/D17583
llvm-svn: 262645
2016-03-04 03:19:04 +08:00
|
|
|
// Both operands of the logic operation are casts. The casts must be of the
|
|
|
|
// same type for reduction.
|
2016-02-24 01:41:34 +08:00
|
|
|
auto CastOpcode = Cast0->getOpcode();
|
[InstCombine] transform bitcasted bitwise logic ops with constants (PR26702)
Given that we're not actually reducing the instruction count in the included
regression tests, I think we would call this a canonicalization step.
The motivation comes from the example in PR26702:
https://llvm.org/bugs/show_bug.cgi?id=26702
If we hoist the bitwise logic ahead of the bitcast, the previously unoptimizable
example of:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc = bitcast <4 x i32> %not to <2 x i64>
%notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
%bc2 = bitcast <2 x i64> %notnot to <4 x i32>
ret <4 x i32> %bc2
}
Simplifies to the expected:
define <4 x i32> @is_negative(<4 x i32> %x) {
%lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %lobit
}
Differential Revision: http://reviews.llvm.org/D17583
llvm-svn: 262645
2016-03-04 03:19:04 +08:00
|
|
|
if (CastOpcode != Cast1->getOpcode() || SrcTy != Cast1->getSrcTy())
|
2016-02-24 01:41:34 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *Cast0Src = Cast0->getOperand(0);
|
|
|
|
Value *Cast1Src = Cast1->getOperand(0);
|
|
|
|
|
2016-07-19 17:06:08 +08:00
|
|
|
// fold logic(cast(A), cast(B)) -> cast(logic(A, B))
|
[InstCombine] Refactor optimization of zext(or(icmp, icmp)) to enable more aggressive cast-folding
Summary:
InstCombine unfolds expressions of the form `zext(or(icmp, icmp))` to `or(zext(icmp), zext(icmp))` such that in a later iteration of InstCombine the exposed `zext(icmp)` instructions can be optimized. We now combine this unfolding and the subsequent `zext(icmp)` optimization to be performed together. Since the unfolding doesn't happen separately anymore, we also again enable the folding of `logic(cast(icmp), cast(icmp))` expressions to `cast(logic(icmp, icmp))` which had been disabled due to its interference with the unfolding transformation.
Tested via `make check` and `lnt`.
Background
==========
For a better understanding on how it came to this change we subsequently summarize its history. In commit r275989 we've already tried to enable the folding of `logic(cast(icmp), cast(icmp))` to `cast(logic(icmp, icmp))` which had to be reverted in r276106 because it could lead to an endless loop in InstCombine (also see http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160718/374347.html). The root of this problem is that in `visitZExt()` in InstCombineCasts.cpp there also exists a reverse of the above folding transformation, that unfolds `zext(or(icmp, icmp))` to `or(zext(icmp), zext(icmp))` in order to expose `zext(icmp)` operations which would then possibly be eliminated by subsequent iterations of InstCombine. However, before these `zext(icmp)` would be eliminated the folding from r275989 could kick in and cause InstCombine to endlessly switch back and forth between the folding and the unfolding transformation. This is the reason why we now combine the `zext`-unfolding and the elimination of the exposed `zext(icmp)` to happen at one go because this enables us to still allow the cast-folding in `logic(cast(icmp), cast(icmp))` without entering an endless loop again.
Details on the submitted changes
================================
- In `visitZExt()` we combine the unfolding and optimization of `zext` instructions.
- In `transformZExtICmp()` we have to use `Builder->CreateIntCast()` instead of `CastInst::CreateIntegerCast()` to make sure that the new `CastInst` is inserted in a `BasicBlock`. The new calls to `transformZExtICmp()` that we introduce in `visitZExt()` would otherwise cause according assertions to be triggered (in our case this happend, for example, with lnt for the MultiSource/Applications/sqlite3 and SingleSource/Regression/C++/EH/recursive-throw tests). The subsequent usage of `replaceInstUsesWith()` is necessary to ensure that the new `CastInst` replaces the `ZExtInst` accordingly.
- In InstCombineAndOrXor.cpp we again allow the folding of casts on `icmp` instructions.
- The instruction order in the optimized IR for the zext-or-icmp.ll test case is different with the introduced changes.
- The test cases in zext.ll have been adopted from the reverted commits r275989 and r276105.
Reviewers: grosser, majnemer, spatel
Subscribers: eli.friedman, majnemer, llvm-commits
Differential Revision: https://reviews.llvm.org/D22864
Contributed-by: Matthias Reisinger <d412vv1n@gmail.com>
llvm-svn: 277635
2016-08-04 03:30:35 +08:00
|
|
|
if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
|
2016-02-24 07:56:23 +08:00
|
|
|
I.getName());
|
2016-02-24 01:41:34 +08:00
|
|
|
return CastInst::Create(CastOpcode, NewOp, DestTy);
|
2016-02-24 00:36:07 +08:00
|
|
|
}
|
2016-02-24 01:41:34 +08:00
|
|
|
|
2016-02-25 01:00:34 +08:00
|
|
|
// For now, only 'and'/'or' have optimizations after this.
|
|
|
|
if (LogicOpc == Instruction::Xor)
|
|
|
|
return nullptr;
|
|
|
|
|
2016-02-24 07:56:23 +08:00
|
|
|
// If this is logic(cast(icmp), cast(icmp)), try to fold this even if the
|
2016-02-24 01:41:34 +08:00
|
|
|
// cast is otherwise not optimizable. This happens for vector sexts.
|
2016-02-24 07:56:23 +08:00
|
|
|
ICmpInst *ICmp0 = dyn_cast<ICmpInst>(Cast0Src);
|
|
|
|
ICmpInst *ICmp1 = dyn_cast<ICmpInst>(Cast1Src);
|
|
|
|
if (ICmp0 && ICmp1) {
|
2017-06-16 13:10:37 +08:00
|
|
|
Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I)
|
2017-06-16 03:09:51 +08:00
|
|
|
: foldOrOfICmps(ICmp0, ICmp1, I);
|
2016-02-24 07:56:23 +08:00
|
|
|
if (Res)
|
|
|
|
return CastInst::Create(CastOpcode, Res, DestTy);
|
|
|
|
return nullptr;
|
|
|
|
}
|
2016-02-24 01:41:34 +08:00
|
|
|
|
2016-02-24 07:56:23 +08:00
|
|
|
// If this is logic(cast(fcmp), cast(fcmp)), try to fold this even if the
|
2016-02-24 01:41:34 +08:00
|
|
|
// cast is otherwise not optimizable. This happens for vector sexts.
|
2016-02-24 07:56:23 +08:00
|
|
|
FCmpInst *FCmp0 = dyn_cast<FCmpInst>(Cast0Src);
|
|
|
|
FCmpInst *FCmp1 = dyn_cast<FCmpInst>(Cast1Src);
|
2017-09-03 01:53:33 +08:00
|
|
|
if (FCmp0 && FCmp1)
|
|
|
|
if (Value *R = foldLogicOfFCmps(FCmp0, FCmp1, LogicOpc == Instruction::And))
|
|
|
|
return CastInst::Create(CastOpcode, R, DestTy);
|
2016-02-24 01:41:34 +08:00
|
|
|
|
2016-02-24 00:36:07 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-04-24 06:00:02 +08:00
|
|
|
static Instruction *foldAndToXor(BinaryOperator &I,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
assert(I.getOpcode() == Instruction::And);
|
|
|
|
Value *Op0 = I.getOperand(0);
|
|
|
|
Value *Op1 = I.getOperand(1);
|
|
|
|
Value *A, *B;
|
|
|
|
|
|
|
|
// Operand complexity canonicalization guarantees that the 'or' is Op0.
|
|
|
|
// (A | B) & ~(A & B) --> A ^ B
|
|
|
|
// (A | B) & ~(B & A) --> A ^ B
|
[PatternMatch] Stabilize the matching order of commutative matchers
Summary:
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the `LHS` and `RHS` matchers:
1. match `RHS` matcher to the `first` operand of binary operator,
2. and then match `LHS` matcher to the `second` operand of binary operator.
This works ok.
But it complicates writing of commutative matchers, where one would like to match
(`m_Value()`) the value on one side, and use (`m_Specific()`) it on the other side.
This is additionally complicated by the fact that `m_Specific()` stores the `Value *`,
not `Value **`, so it won't work at all out of the box.
The last problem is trivially solved by adding a new `m_c_Specific()` that stores the
`Value **`, not `Value *`. I'm choosing to add a new matcher, not change the existing
one because i guess all the current users are ok with existing behavior,
and this additional pointer indirection may have performance drawbacks.
Also, i'm storing pointer, not reference, because for some mysterious-to-me reason
it did not work with the reference.
The first one appears trivial, too.
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the ~~`LHS` and `RHS` matchers~~ **operands**:
1. match ~~`RHS`~~ **`LHS`** matcher to the ~~`first`~~ **`second`** operand of binary operator,
2. and then match ~~`LHS`~~ **`RHS`** matcher to the ~~`second`~ **`first`** operand of binary operator.
Surprisingly, `$ ninja check-llvm` still passes with this.
But i expect the bots will disagree..
The motivational unittest is included.
I'd like to use this in D45664.
Reviewers: spatel, craig.topper, arsenm, RKSimon
Reviewed By: craig.topper
Subscribers: xbolva00, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D45828
llvm-svn: 331085
2018-04-28 05:23:20 +08:00
|
|
|
if (match(&I, m_BinOp(m_Or(m_Value(A), m_Value(B)),
|
|
|
|
m_Not(m_c_And(m_Deferred(A), m_Deferred(B))))))
|
2017-04-24 06:00:02 +08:00
|
|
|
return BinaryOperator::CreateXor(A, B);
|
|
|
|
|
|
|
|
// (A | ~B) & (~A | B) --> ~(A ^ B)
|
|
|
|
// (A | ~B) & (B | ~A) --> ~(A ^ B)
|
|
|
|
// (~B | A) & (~A | B) --> ~(A ^ B)
|
|
|
|
// (~B | A) & (B | ~A) --> ~(A ^ B)
|
2017-06-23 00:12:02 +08:00
|
|
|
if (Op0->hasOneUse() || Op1->hasOneUse())
|
[PatternMatch] Stabilize the matching order of commutative matchers
Summary:
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the `LHS` and `RHS` matchers:
1. match `RHS` matcher to the `first` operand of binary operator,
2. and then match `LHS` matcher to the `second` operand of binary operator.
This works ok.
But it complicates writing of commutative matchers, where one would like to match
(`m_Value()`) the value on one side, and use (`m_Specific()`) it on the other side.
This is additionally complicated by the fact that `m_Specific()` stores the `Value *`,
not `Value **`, so it won't work at all out of the box.
The last problem is trivially solved by adding a new `m_c_Specific()` that stores the
`Value **`, not `Value *`. I'm choosing to add a new matcher, not change the existing
one because i guess all the current users are ok with existing behavior,
and this additional pointer indirection may have performance drawbacks.
Also, i'm storing pointer, not reference, because for some mysterious-to-me reason
it did not work with the reference.
The first one appears trivial, too.
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the ~~`LHS` and `RHS` matchers~~ **operands**:
1. match ~~`RHS`~~ **`LHS`** matcher to the ~~`first`~~ **`second`** operand of binary operator,
2. and then match ~~`LHS`~~ **`RHS`** matcher to the ~~`second`~ **`first`** operand of binary operator.
Surprisingly, `$ ninja check-llvm` still passes with this.
But i expect the bots will disagree..
The motivational unittest is included.
I'd like to use this in D45664.
Reviewers: spatel, craig.topper, arsenm, RKSimon
Reviewed By: craig.topper
Subscribers: xbolva00, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D45828
llvm-svn: 331085
2018-04-28 05:23:20 +08:00
|
|
|
if (match(&I, m_BinOp(m_c_Or(m_Value(A), m_Not(m_Value(B))),
|
|
|
|
m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B)))))
|
2017-06-23 00:12:02 +08:00
|
|
|
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
|
2017-04-24 06:00:02 +08:00
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Instruction *foldOrToXor(BinaryOperator &I,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
assert(I.getOpcode() == Instruction::Or);
|
|
|
|
Value *Op0 = I.getOperand(0);
|
|
|
|
Value *Op1 = I.getOperand(1);
|
|
|
|
Value *A, *B;
|
|
|
|
|
|
|
|
// Operand complexity canonicalization guarantees that the 'and' is Op0.
|
|
|
|
// (A & B) | ~(A | B) --> ~(A ^ B)
|
|
|
|
// (A & B) | ~(B | A) --> ~(A ^ B)
|
2017-06-23 00:12:02 +08:00
|
|
|
if (Op0->hasOneUse() || Op1->hasOneUse())
|
|
|
|
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
|
|
|
|
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
|
2017-04-24 06:00:02 +08:00
|
|
|
|
|
|
|
// (A & ~B) | (~A & B) --> A ^ B
|
|
|
|
// (A & ~B) | (B & ~A) --> A ^ B
|
|
|
|
// (~B & A) | (~A & B) --> A ^ B
|
|
|
|
// (~B & A) | (B & ~A) --> A ^ B
|
|
|
|
if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
|
|
|
|
match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))
|
|
|
|
return BinaryOperator::CreateXor(A, B);
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2018-01-26 00:34:36 +08:00
|
|
|
/// Return true if a constant shift amount is always less than the specified
|
|
|
|
/// bit-width. If not, the shift could create poison in the narrower type.
|
|
|
|
static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) {
|
|
|
|
if (auto *ScalarC = dyn_cast<ConstantInt>(C))
|
|
|
|
return ScalarC->getZExtValue() < BitWidth;
|
|
|
|
|
|
|
|
if (C->getType()->isVectorTy()) {
|
|
|
|
// Check each element of a constant vector.
|
|
|
|
unsigned NumElts = C->getType()->getVectorNumElements();
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i) {
|
|
|
|
Constant *Elt = C->getAggregateElement(i);
|
|
|
|
if (!Elt)
|
|
|
|
return false;
|
|
|
|
if (isa<UndefValue>(Elt))
|
|
|
|
continue;
|
|
|
|
auto *CI = dyn_cast<ConstantInt>(Elt);
|
|
|
|
if (!CI || CI->getZExtValue() >= BitWidth)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The constant is a constant expression or unknown.
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and
|
|
|
|
/// a common zext operand: and (binop (zext X), C), (zext X).
|
|
|
|
Instruction *InstCombiner::narrowMaskedBinOp(BinaryOperator &And) {
|
|
|
|
// This transform could also apply to {or, and, xor}, but there are better
|
|
|
|
// folds for those cases, so we don't expect those patterns here. AShr is not
|
|
|
|
// handled because it should always be transformed to LShr in this sequence.
|
|
|
|
// The subtract transform is different because it has a constant on the left.
|
|
|
|
// Add/mul commute the constant to RHS; sub with constant RHS becomes add.
|
|
|
|
Value *Op0 = And.getOperand(0), *Op1 = And.getOperand(1);
|
|
|
|
Constant *C;
|
|
|
|
if (!match(Op0, m_OneUse(m_Add(m_Specific(Op1), m_Constant(C)))) &&
|
|
|
|
!match(Op0, m_OneUse(m_Mul(m_Specific(Op1), m_Constant(C)))) &&
|
|
|
|
!match(Op0, m_OneUse(m_LShr(m_Specific(Op1), m_Constant(C)))) &&
|
|
|
|
!match(Op0, m_OneUse(m_Shl(m_Specific(Op1), m_Constant(C)))) &&
|
|
|
|
!match(Op0, m_OneUse(m_Sub(m_Constant(C), m_Specific(Op1)))))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *X;
|
2018-03-13 02:46:05 +08:00
|
|
|
if (!match(Op1, m_ZExt(m_Value(X))) || Op1->hasNUsesOrMore(3))
|
2018-01-26 00:34:36 +08:00
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Type *Ty = And.getType();
|
|
|
|
if (!isa<VectorType>(Ty) && !shouldChangeType(Ty, X->getType()))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// If we're narrowing a shift, the shift amount must be safe (less than the
|
|
|
|
// width) in the narrower type. If the shift amount is greater, instsimplify
|
|
|
|
// usually handles that case, but we can't guarantee/assert it.
|
|
|
|
Instruction::BinaryOps Opc = cast<BinaryOperator>(Op0)->getOpcode();
|
|
|
|
if (Opc == Instruction::LShr || Opc == Instruction::Shl)
|
|
|
|
if (!canNarrowShiftAmt(C, X->getType()->getScalarSizeInBits()))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)
|
|
|
|
// and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)
|
|
|
|
Value *NewC = ConstantExpr::getTrunc(C, X->getType());
|
|
|
|
Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, NewC, X)
|
|
|
|
: Builder.CreateBinOp(Opc, X, NewC);
|
|
|
|
return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);
|
|
|
|
}
|
|
|
|
|
2016-12-19 02:49:48 +08:00
|
|
|
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
|
|
|
|
// here. We should standardize that construct where it is needed or choose some
|
|
|
|
// other way to ensure that commutated variants of patterns are not missed.
|
2010-01-05 15:50:36 +08:00
|
|
|
Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
2018-06-22 01:06:36 +08:00
|
|
|
if (Value *V = SimplifyAndInst(I.getOperand(0), I.getOperand(1),
|
|
|
|
SQ.getWithInstruction(&I)))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
if (SimplifyAssociativeOrCommutative(I))
|
|
|
|
return &I;
|
|
|
|
|
2018-10-03 23:20:58 +08:00
|
|
|
if (Instruction *X = foldVectorBinop(I))
|
2018-06-03 00:27:44 +08:00
|
|
|
return X;
|
|
|
|
|
2012-12-20 15:09:41 +08:00
|
|
|
// See if we can simplify any instructions used by the instruction whose sole
|
2010-01-05 15:50:36 +08:00
|
|
|
// purpose is to compute bits we don't care about.
|
|
|
|
if (SimplifyDemandedInstructionBits(I))
|
2012-12-20 15:09:41 +08:00
|
|
|
return &I;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-04-24 06:00:02 +08:00
|
|
|
// Do this before using distributive laws to catch simple and/or/not patterns.
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *Xor = foldAndToXor(I, Builder))
|
2017-04-24 06:00:02 +08:00
|
|
|
return Xor;
|
|
|
|
|
|
|
|
// (A|B)&(A|C) -> A|(B&C) etc
|
|
|
|
if (Value *V = SimplifyUsingDistributiveLaws(I))
|
|
|
|
return replaceInstUsesWith(I, V);
|
|
|
|
|
2017-07-07 00:24:23 +08:00
|
|
|
if (Value *V = SimplifyBSwap(I, Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2014-12-04 17:44:01 +08:00
|
|
|
|
2018-06-22 01:06:36 +08:00
|
|
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
2017-08-01 05:01:53 +08:00
|
|
|
const APInt *C;
|
|
|
|
if (match(Op1, m_APInt(C))) {
|
|
|
|
Value *X, *Y;
|
|
|
|
if (match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) &&
|
|
|
|
C->isOneValue()) {
|
|
|
|
// (1 << X) & 1 --> zext(X == 0)
|
|
|
|
// (1 >> X) & 1 --> zext(X == 0)
|
2017-07-16 01:26:01 +08:00
|
|
|
Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(I.getType(), 0));
|
|
|
|
return new ZExtInst(IsZero, I.getType());
|
|
|
|
}
|
2017-08-01 05:01:53 +08:00
|
|
|
|
2017-08-07 07:11:49 +08:00
|
|
|
const APInt *XorC;
|
|
|
|
if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) {
|
|
|
|
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
|
|
|
|
Constant *NewC = ConstantInt::get(I.getType(), *C & *XorC);
|
|
|
|
Value *And = Builder.CreateAnd(X, Op1);
|
|
|
|
And->takeName(Op0);
|
|
|
|
return BinaryOperator::CreateXor(And, NewC);
|
|
|
|
}
|
|
|
|
|
2017-08-08 02:10:39 +08:00
|
|
|
const APInt *OrC;
|
|
|
|
if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) {
|
|
|
|
// (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2)
|
|
|
|
// NOTE: This reduces the number of bits set in the & mask, which
|
|
|
|
// can expose opportunities for store narrowing for scalars.
|
|
|
|
// NOTE: SimplifyDemandedBits should have already removed bits from C1
|
|
|
|
// that aren't set in C2. Meaning we can replace (C1&C2) with C1 in
|
|
|
|
// above, but this feels safer.
|
|
|
|
APInt Together = *C & *OrC;
|
|
|
|
Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(),
|
|
|
|
Together ^ *C));
|
|
|
|
And->takeName(Op0);
|
|
|
|
return BinaryOperator::CreateOr(And, ConstantInt::get(I.getType(),
|
|
|
|
Together));
|
|
|
|
}
|
|
|
|
|
2017-08-01 05:01:53 +08:00
|
|
|
// If the mask is only needed on one incoming arm, push the 'and' op up.
|
|
|
|
if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||
|
|
|
|
match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
|
|
|
|
APInt NotAndMask(~(*C));
|
|
|
|
BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();
|
|
|
|
if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {
|
|
|
|
// Not masking anything out for the LHS, move mask to RHS.
|
|
|
|
// and ({x}or X, Y), C --> {x}or X, (and Y, C)
|
|
|
|
Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");
|
|
|
|
return BinaryOperator::Create(BinOp, X, NewRHS);
|
|
|
|
}
|
|
|
|
if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {
|
|
|
|
// Not masking anything out for the RHS, move mask to LHS.
|
|
|
|
// and ({x}or X, Y), C --> {x}or (and X, C), Y
|
|
|
|
Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");
|
|
|
|
return BinaryOperator::Create(BinOp, NewLHS, Y);
|
|
|
|
}
|
|
|
|
}
|
2017-08-07 07:11:49 +08:00
|
|
|
|
2017-07-16 01:26:01 +08:00
|
|
|
}
|
2017-07-15 23:29:47 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
|
|
|
|
const APInt &AndRHSMask = AndRHS->getValue();
|
|
|
|
|
|
|
|
// Optimize a variety of ((val OP C1) & C2) combinations...
|
|
|
|
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
|
2017-01-18 02:08:06 +08:00
|
|
|
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
|
|
|
|
// of X and OP behaves well when given trunc(C1) and X.
|
|
|
|
switch (Op0I->getOpcode()) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case Instruction::Xor:
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Mul:
|
|
|
|
case Instruction::Add:
|
|
|
|
case Instruction::Sub:
|
|
|
|
Value *X;
|
|
|
|
ConstantInt *C1;
|
2017-04-12 13:49:28 +08:00
|
|
|
if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
|
2017-01-18 02:08:06 +08:00
|
|
|
if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
|
|
|
|
auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
|
|
|
|
Value *BinOp;
|
2017-08-01 05:01:53 +08:00
|
|
|
Value *Op0LHS = Op0I->getOperand(0);
|
2017-01-18 02:08:06 +08:00
|
|
|
if (isa<ZExtInst>(Op0LHS))
|
2017-07-08 07:16:26 +08:00
|
|
|
BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
|
2017-01-18 02:08:06 +08:00
|
|
|
else
|
2017-07-08 07:16:26 +08:00
|
|
|
BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
|
2017-01-18 02:08:06 +08:00
|
|
|
auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
|
2017-07-08 07:16:26 +08:00
|
|
|
auto *And = Builder.CreateAnd(BinOp, TruncC2);
|
2017-01-18 02:08:06 +08:00
|
|
|
return new ZExtInst(And, I.getType());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
|
|
|
|
if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
|
|
|
|
return Res;
|
2011-02-10 13:17:27 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2011-02-10 13:17:27 +08:00
|
|
|
// If this is an integer truncation, and if the source is an 'and' with
|
|
|
|
// immediate, transform it. This frequently occurs for bitfield accesses.
|
|
|
|
{
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *X = nullptr; ConstantInt *YC = nullptr;
|
2011-02-10 13:17:27 +08:00
|
|
|
if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
|
|
|
|
// Change: and (trunc (and X, YC) to T), C2
|
|
|
|
// into : and (trunc X to T), trunc(YC) & C2
|
2012-12-20 15:09:41 +08:00
|
|
|
// This will fold the two constants together, which may allow
|
2011-02-10 13:17:27 +08:00
|
|
|
// other simplifications.
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk");
|
2011-02-10 13:17:27 +08:00
|
|
|
Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
|
|
|
|
C3 = ConstantExpr::getAnd(C3, AndRHS);
|
|
|
|
return BinaryOperator::CreateAnd(NewCast, C3);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
}
|
2017-04-05 04:26:25 +08:00
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-01-26 00:34:36 +08:00
|
|
|
if (Instruction *Z = narrowMaskedBinOp(I))
|
|
|
|
return Z;
|
|
|
|
|
2018-03-01 00:36:24 +08:00
|
|
|
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
|
|
|
|
return FoldedLogic;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
|
2015-09-09 04:14:13 +08:00
|
|
|
return DeMorgan;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
{
|
2018-07-31 21:00:03 +08:00
|
|
|
Value *A, *B, *C;
|
|
|
|
// A & (A ^ B) --> A & ~B
|
|
|
|
if (match(Op1, m_OneUse(m_c_Xor(m_Specific(Op0), m_Value(B)))))
|
|
|
|
return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(B));
|
|
|
|
// (A ^ B) & A --> A & ~B
|
|
|
|
if (match(Op0, m_OneUse(m_c_Xor(m_Specific(Op1), m_Value(B)))))
|
|
|
|
return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(B));
|
2010-01-05 15:50:36 +08:00
|
|
|
|
InstCombine: Simplify (A ^ B) or/and (A ^ B ^ C)
While we can already transform A | (A ^ B) into A | B, things get bad
once we have (A ^ B) | (A ^ B ^ Cst) because reassociation will morph
this into (A ^ B) | ((A ^ Cst) ^ B). Our existing patterns fail once
this happens.
To fix this, we add a new pattern which looks through the tree of xor
binary operators to see that, in fact, there exists a redundant xor
operation.
What follows bellow is a correctness proof of the transform using CVC3.
$ cat t.cvc
A, B, C : BITVECTOR(64);
QUERY BVXOR(A, B) | BVXOR(BVXOR(B, C), A) = BVXOR(A, B) | C;
QUERY BVXOR(BVXOR(A, C), B) | BVXOR(A, B) = BVXOR(A, B) | C;
QUERY BVXOR(A, B) & BVXOR(BVXOR(B, C), A) = BVXOR(A, B) & ~C;
QUERY BVXOR(BVXOR(A, C), B) & BVXOR(A, B) = BVXOR(A, B) & ~C;
$ cvc3 < t.cvc
Valid.
Valid.
Valid.
Valid.
llvm-svn: 214342
2014-07-31 05:26:37 +08:00
|
|
|
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
|
|
|
|
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
|
|
|
|
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
|
2017-06-20 00:23:49 +08:00
|
|
|
if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
|
2017-07-08 07:16:26 +08:00
|
|
|
return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C));
|
InstCombine: Simplify (A ^ B) or/and (A ^ B ^ C)
While we can already transform A | (A ^ B) into A | B, things get bad
once we have (A ^ B) | (A ^ B ^ Cst) because reassociation will morph
this into (A ^ B) | ((A ^ Cst) ^ B). Our existing patterns fail once
this happens.
To fix this, we add a new pattern which looks through the tree of xor
binary operators to see that, in fact, there exists a redundant xor
operation.
What follows bellow is a correctness proof of the transform using CVC3.
$ cat t.cvc
A, B, C : BITVECTOR(64);
QUERY BVXOR(A, B) | BVXOR(BVXOR(B, C), A) = BVXOR(A, B) | C;
QUERY BVXOR(BVXOR(A, C), B) | BVXOR(A, B) = BVXOR(A, B) | C;
QUERY BVXOR(A, B) & BVXOR(BVXOR(B, C), A) = BVXOR(A, B) & ~C;
QUERY BVXOR(BVXOR(A, C), B) & BVXOR(A, B) = BVXOR(A, B) & ~C;
$ cvc3 < t.cvc
Valid.
Valid.
Valid.
Valid.
llvm-svn: 214342
2014-07-31 05:26:37 +08:00
|
|
|
|
|
|
|
// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C
|
|
|
|
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
|
|
|
|
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
|
2017-06-20 00:23:49 +08:00
|
|
|
if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse()))
|
2017-07-08 07:16:26 +08:00
|
|
|
return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));
|
2014-08-01 12:59:26 +08:00
|
|
|
|
|
|
|
// (A | B) & ((~A) ^ B) -> (A & B)
|
2017-04-25 23:19:04 +08:00
|
|
|
// (A | B) & (B ^ (~A)) -> (A & B)
|
|
|
|
// (B | A) & ((~A) ^ B) -> (A & B)
|
|
|
|
// (B | A) & (B ^ (~A)) -> (A & B)
|
|
|
|
if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
|
|
|
|
match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
|
2014-08-01 12:59:26 +08:00
|
|
|
return BinaryOperator::CreateAnd(A, B);
|
|
|
|
|
|
|
|
// ((~A) ^ B) & (A | B) -> (A & B)
|
2016-12-19 02:49:48 +08:00
|
|
|
// ((~A) ^ B) & (B | A) -> (A & B)
|
2017-04-25 23:19:04 +08:00
|
|
|
// (B ^ (~A)) & (A | B) -> (A & B)
|
|
|
|
// (B ^ (~A)) & (B | A) -> (A & B)
|
|
|
|
if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
|
2016-12-19 02:49:48 +08:00
|
|
|
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
|
2014-08-01 12:59:26 +08:00
|
|
|
return BinaryOperator::CreateAnd(A, B);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2014-08-30 14:18:20 +08:00
|
|
|
{
|
|
|
|
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
|
|
|
|
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
|
|
|
|
if (LHS && RHS)
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *Res = foldAndOfICmps(LHS, RHS, I))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, Res);
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2014-08-30 14:18:20 +08:00
|
|
|
// TODO: Make this recursive; it's a little tricky because an arbitrary
|
|
|
|
// number of 'and' instructions might have to be created.
|
|
|
|
Value *X, *Y;
|
|
|
|
if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
|
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(X))
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
|
2014-08-30 14:18:20 +08:00
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *Res = foldAndOfICmps(LHS, Cmp, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
|
2014-08-30 14:18:20 +08:00
|
|
|
}
|
|
|
|
if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) {
|
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(X))
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y));
|
2014-08-30 14:18:20 +08:00
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *Res = foldAndOfICmps(Cmp, RHS, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateAnd(Res, X));
|
2014-08-30 14:18:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-11 14:26:33 +08:00
|
|
|
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
|
|
|
|
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
2017-09-03 01:53:33 +08:00
|
|
|
if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, Res);
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2016-02-24 00:36:07 +08:00
|
|
|
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
|
|
|
|
return CastedAnd;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2017-08-05 00:07:20 +08:00
|
|
|
// and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
|
|
|
|
Value *A;
|
|
|
|
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
|
|
|
|
A->getType()->isIntOrIntVectorTy(1))
|
|
|
|
return SelectInst::Create(A, Op1, Constant::getNullValue(I.getType()));
|
|
|
|
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
|
|
|
|
A->getType()->isIntOrIntVectorTy(1))
|
|
|
|
return SelectInst::Create(A, Op0, Constant::getNullValue(I.getType()));
|
2013-01-30 14:35:22 +08:00
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2016-05-26 00:22:14 +08:00
|
|
|
/// Given an OR instruction, check to see if this is a bswap idiom. If so,
|
|
|
|
/// insert the new intrinsic and return it.
|
|
|
|
Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
|
2016-05-26 22:58:51 +08:00
|
|
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
|
|
|
|
|
|
|
// Look through zero extends.
|
|
|
|
if (Instruction *Ext = dyn_cast<ZExtInst>(Op0))
|
|
|
|
Op0 = Ext->getOperand(0);
|
|
|
|
|
|
|
|
if (Instruction *Ext = dyn_cast<ZExtInst>(Op1))
|
|
|
|
Op1 = Ext->getOperand(0);
|
|
|
|
|
|
|
|
// (A | B) | C and A | (B | C) -> bswap if possible.
|
|
|
|
bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
|
|
|
|
match(Op1, m_Or(m_Value(), m_Value()));
|
|
|
|
|
|
|
|
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
|
|
|
|
bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
|
|
|
|
match(Op1, m_LogicalShift(m_Value(), m_Value()));
|
|
|
|
|
|
|
|
// (A & B) | (C & D) -> bswap if possible.
|
|
|
|
bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
|
|
|
|
match(Op1, m_And(m_Value(), m_Value()));
|
|
|
|
|
2018-05-01 20:25:46 +08:00
|
|
|
// (A << B) | (C & D) -> bswap if possible.
|
|
|
|
// The bigger pattern here is ((A & C1) << C2) | ((B >> C2) & C1), which is a
|
|
|
|
// part of the bswap idiom for specific values of C1, C2 (e.g. C1 = 16711935,
|
|
|
|
// C2 = 8 for i32).
|
|
|
|
// This pattern can occur when the operands of the 'or' are not canonicalized
|
|
|
|
// for some reason (not having only one use, for example).
|
|
|
|
bool OrOfAndAndSh = (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
|
|
|
|
match(Op1, m_And(m_Value(), m_Value()))) ||
|
|
|
|
(match(Op0, m_And(m_Value(), m_Value())) &&
|
|
|
|
match(Op1, m_LogicalShift(m_Value(), m_Value())));
|
|
|
|
|
|
|
|
if (!OrOfOrs && !OrOfShifts && !OrOfAnds && !OrOfAndAndSh)
|
2016-05-26 22:58:51 +08:00
|
|
|
return nullptr;
|
|
|
|
|
2016-01-15 17:20:19 +08:00
|
|
|
SmallVector<Instruction*, 4> Insts;
|
2016-05-26 00:22:14 +08:00
|
|
|
if (!recognizeBSwapOrBitReverseIdiom(&I, true, false, Insts))
|
2015-12-11 18:04:51 +08:00
|
|
|
return nullptr;
|
2016-01-15 17:20:19 +08:00
|
|
|
Instruction *LastInst = Insts.pop_back_val();
|
|
|
|
LastInst->removeFromParent();
|
2015-12-15 01:24:23 +08:00
|
|
|
|
2016-01-15 17:20:19 +08:00
|
|
|
for (auto *Inst : Insts)
|
|
|
|
Worklist.Add(Inst);
|
|
|
|
return LastInst;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
/// If all elements of two constant vectors are 0/-1 and inverses, return true.
|
|
|
|
static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
|
|
|
|
unsigned NumElts = C1->getType()->getVectorNumElements();
|
|
|
|
for (unsigned i = 0; i != NumElts; ++i) {
|
|
|
|
Constant *EltC1 = C1->getAggregateElement(i);
|
|
|
|
Constant *EltC2 = C2->getAggregateElement(i);
|
|
|
|
if (!EltC1 || !EltC2)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// One element must be all ones, and the other must be all zeros.
|
|
|
|
if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||
|
|
|
|
(match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-30 22:18:18 +08:00
|
|
|
/// We have an expression of the form (A & C) | (B & D). If A is a scalar or
|
|
|
|
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
|
|
|
|
/// B, it can be used as the condition operand of a select instruction.
|
2018-10-16 23:26:08 +08:00
|
|
|
static Value *getSelectCondition(Value *A, Value *B,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
// If these are scalars or vectors of i1, A can be used directly.
|
2016-06-30 22:18:18 +08:00
|
|
|
Type *Ty = A->getType();
|
2018-10-16 23:26:08 +08:00
|
|
|
if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1))
|
|
|
|
return A;
|
2016-06-30 22:18:18 +08:00
|
|
|
|
2018-10-16 23:26:08 +08:00
|
|
|
// If A and B are sign-extended, look through the sexts to find the booleans.
|
2016-06-30 22:18:18 +08:00
|
|
|
Value *Cond;
|
2017-06-22 23:46:54 +08:00
|
|
|
Value *NotB;
|
2016-06-30 22:18:18 +08:00
|
|
|
if (match(A, m_SExt(m_Value(Cond))) &&
|
2017-07-09 15:04:03 +08:00
|
|
|
Cond->getType()->isIntOrIntVectorTy(1) &&
|
2017-06-22 23:46:54 +08:00
|
|
|
match(B, m_OneUse(m_Not(m_Value(NotB))))) {
|
|
|
|
NotB = peekThroughBitcast(NotB, true);
|
|
|
|
if (match(NotB, m_SExt(m_Specific(Cond))))
|
|
|
|
return Cond;
|
|
|
|
}
|
2016-06-30 22:18:18 +08:00
|
|
|
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
// All scalar (and most vector) possibilities should be handled now.
|
|
|
|
// Try more matches that only apply to non-splat constant vectors.
|
|
|
|
if (!Ty->isVectorTy())
|
|
|
|
return nullptr;
|
|
|
|
|
2018-10-16 23:26:08 +08:00
|
|
|
// If both operands are constants, see if the constants are inverse bitmasks.
|
|
|
|
Constant *AC, *BC;
|
|
|
|
if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
|
|
|
|
areInverseVectorBitmasks(AC, BC)) {
|
|
|
|
return Builder.CreateZExtOrTrunc(AC, CmpInst::makeCmpResultType(Ty));
|
|
|
|
}
|
|
|
|
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
// If both operands are xor'd with constants using the same sexted boolean
|
|
|
|
// operand, see if the constants are inverse bitmasks.
|
2018-10-16 23:26:08 +08:00
|
|
|
if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
|
|
|
|
match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
|
2017-07-09 15:04:03 +08:00
|
|
|
Cond->getType()->isIntOrIntVectorTy(1) &&
|
2018-10-16 23:26:08 +08:00
|
|
|
areInverseVectorBitmasks(AC, BC)) {
|
|
|
|
AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
|
|
|
|
return Builder.CreateXor(Cond, AC);
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
}
|
2016-06-30 22:18:18 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// We have an expression of the form (A & C) | (B & D). Try to simplify this
|
|
|
|
/// to "A' ? C : D", where A' is a boolean or vector of booleans.
|
2018-10-16 23:26:08 +08:00
|
|
|
static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2016-06-25 02:55:27 +08:00
|
|
|
// The potential condition of the select may be bitcasted. In that case, look
|
|
|
|
// through its bitcast and the corresponding bitcast of the 'not' condition.
|
|
|
|
Type *OrigType = A->getType();
|
2017-06-22 23:28:01 +08:00
|
|
|
A = peekThroughBitcast(A, true);
|
|
|
|
B = peekThroughBitcast(B, true);
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *Cond = getSelectCondition(A, B, Builder)) {
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
|
2016-06-25 02:55:27 +08:00
|
|
|
// The bitcasts will either all exist or all not exist. The builder will
|
|
|
|
// not create unnecessary casts if the types already match.
|
|
|
|
Value *BitcastC = Builder.CreateBitCast(C, A->getType());
|
|
|
|
Value *BitcastD = Builder.CreateBitCast(D, A->getType());
|
|
|
|
Value *Select = Builder.CreateSelect(Cond, BitcastC, BitcastD);
|
|
|
|
return Builder.CreateBitCast(Select, OrigType);
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
}
|
2016-06-03 02:03:05 +08:00
|
|
|
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2015-09-09 02:24:36 +08:00
|
|
|
/// Fold (icmp)|(icmp) if possible.
|
2017-05-19 04:53:16 +08:00
|
|
|
Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
|
2017-06-16 03:09:51 +08:00
|
|
|
Instruction &CxtI) {
|
2013-11-13 06:38:59 +08:00
|
|
|
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
|
|
|
|
// if K1 and K2 are a one-bit mask.
|
2017-06-16 13:10:37 +08:00
|
|
|
if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI))
|
|
|
|
return V;
|
|
|
|
|
|
|
|
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
|
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
|
|
|
|
ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
|
2013-11-13 06:38:59 +08:00
|
|
|
|
2014-08-21 06:55:40 +08:00
|
|
|
// Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
|
|
|
|
// --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
|
|
|
|
// The original condition actually refers to the following two ranges:
|
|
|
|
// [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3]
|
|
|
|
// We can fold these two ranges if:
|
|
|
|
// 1) C1 and C2 is unsigned greater than C3.
|
|
|
|
// 2) The two ranges are separated.
|
|
|
|
// 3) C1 ^ C2 is one-bit mask.
|
|
|
|
// 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
|
|
|
|
// This implies all values in the two ranges differ by exactly one bit.
|
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
|
|
|
|
PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
|
|
|
|
LHSC->getType() == RHSC->getType() &&
|
|
|
|
LHSC->getValue() == (RHSC->getValue())) {
|
2014-08-21 06:55:40 +08:00
|
|
|
|
|
|
|
Value *LAdd = LHS->getOperand(0);
|
|
|
|
Value *RAdd = RHS->getOperand(0);
|
|
|
|
|
|
|
|
Value *LAddOpnd, *RAddOpnd;
|
2017-04-06 01:38:34 +08:00
|
|
|
ConstantInt *LAddC, *RAddC;
|
|
|
|
if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddC))) &&
|
|
|
|
match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddC))) &&
|
|
|
|
LAddC->getValue().ugt(LHSC->getValue()) &&
|
|
|
|
RAddC->getValue().ugt(LHSC->getValue())) {
|
|
|
|
|
|
|
|
APInt DiffC = LAddC->getValue() ^ RAddC->getValue();
|
|
|
|
if (LAddOpnd == RAddOpnd && DiffC.isPowerOf2()) {
|
|
|
|
ConstantInt *MaxAddC = nullptr;
|
|
|
|
if (LAddC->getValue().ult(RAddC->getValue()))
|
|
|
|
MaxAddC = RAddC;
|
2014-08-21 06:55:40 +08:00
|
|
|
else
|
2017-04-06 01:38:34 +08:00
|
|
|
MaxAddC = LAddC;
|
2014-08-21 06:55:40 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
APInt RRangeLow = -RAddC->getValue();
|
|
|
|
APInt RRangeHigh = RRangeLow + LHSC->getValue();
|
|
|
|
APInt LRangeLow = -LAddC->getValue();
|
|
|
|
APInt LRangeHigh = LRangeLow + LHSC->getValue();
|
2014-08-21 06:55:40 +08:00
|
|
|
APInt LowRangeDiff = RRangeLow ^ LRangeLow;
|
|
|
|
APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
|
|
|
|
APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
|
|
|
|
: RRangeLow - LRangeLow;
|
|
|
|
|
|
|
|
if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
|
2017-04-06 01:38:34 +08:00
|
|
|
RangeDiff.ugt(LHSC->getValue())) {
|
|
|
|
Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
|
2014-08-21 06:55:40 +08:00
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC);
|
|
|
|
Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
|
|
|
|
return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
|
2014-08-21 06:55:40 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredicatesFoldable(PredL, PredR)) {
|
2010-01-05 15:50:36 +08:00
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(1) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(0))
|
|
|
|
LHS->swapOperands();
|
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(0) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(1)) {
|
|
|
|
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
|
|
|
|
unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
|
|
|
|
bool isSigned = LHS->isSigned() || RHS->isSigned();
|
2011-12-17 09:20:32 +08:00
|
|
|
return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
}
|
2010-12-21 00:21:59 +08:00
|
|
|
|
|
|
|
// handle (roughly):
|
|
|
|
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
|
2013-09-04 19:57:13 +08:00
|
|
|
if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
|
2010-12-21 00:21:59 +08:00
|
|
|
return V;
|
2010-09-09 06:16:17 +08:00
|
|
|
|
2017-04-11 03:38:36 +08:00
|
|
|
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
if (LHS->hasOneUse() || RHS->hasOneUse()) {
|
|
|
|
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
|
|
|
|
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *A = nullptr, *B = nullptr;
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredL == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero()) {
|
2017-04-11 03:38:36 +08:00
|
|
|
B = LHS0;
|
|
|
|
if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS->getOperand(1))
|
|
|
|
A = RHS0;
|
|
|
|
else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0)
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
A = RHS->getOperand(1);
|
|
|
|
}
|
|
|
|
// (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
|
|
|
|
// (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
|
2017-04-06 01:38:34 +08:00
|
|
|
else if (PredR == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) {
|
2017-04-11 03:38:36 +08:00
|
|
|
B = RHS0;
|
|
|
|
if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS->getOperand(1))
|
|
|
|
A = LHS0;
|
|
|
|
else if (PredL == ICmpInst::ICMP_UGT && LHS0 == RHS0)
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
A = LHS->getOperand(1);
|
|
|
|
}
|
|
|
|
if (A && B)
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmp(
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
ICmpInst::ICMP_UGE,
|
2017-07-08 07:16:26 +08:00
|
|
|
Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A);
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
}
|
|
|
|
|
2014-12-03 18:39:15 +08:00
|
|
|
// E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n
|
|
|
|
if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true))
|
|
|
|
return V;
|
|
|
|
|
|
|
|
// E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
|
|
|
|
if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
|
|
|
|
return V;
|
2016-08-05 09:09:48 +08:00
|
|
|
|
2017-04-16 01:55:06 +08:00
|
|
|
if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
|
|
|
|
return V;
|
|
|
|
|
InstCombine: (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
This transform allows us to turn IR that looks like:
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
into:
%0 = add i64 %b, -1
%1 = icmp uge i64 %0, %a
ret i1 %1
which means we go from lowering:
cmpq %rsi, %rdi
setb %cl
testq %rsi, %rsi
sete %al
orb %cl, %al
ret
to lowering:
decq %rsi
cmpq %rdi, %rsi
setae %al
ret
llvm-svn: 185677
2013-07-05 08:31:17 +08:00
|
|
|
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
|
2017-04-06 01:38:34 +08:00
|
|
|
if (!LHSC || !RHSC)
|
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
if (LHSC == RHSC && PredL == PredR) {
|
2010-08-02 17:32:13 +08:00
|
|
|
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NewOr = Builder.CreateOr(LHS0, RHS0);
|
|
|
|
return Builder.CreateICmp(PredL, NewOr, LHSC);
|
2010-08-02 17:32:13 +08:00
|
|
|
}
|
2012-01-09 02:32:24 +08:00
|
|
|
}
|
|
|
|
|
2010-12-21 04:00:31 +08:00
|
|
|
// (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
|
2012-09-27 18:14:43 +08:00
|
|
|
// iff C2 + CA == C1.
|
2017-04-06 01:38:34 +08:00
|
|
|
if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
|
|
|
|
ConstantInt *AddC;
|
2017-04-11 03:38:36 +08:00
|
|
|
if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
|
2017-04-06 01:38:34 +08:00
|
|
|
if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateICmpULE(LHS0, LHSC);
|
2010-12-21 00:18:51 +08:00
|
|
|
}
|
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// From here on, we only handle:
|
|
|
|
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
|
2017-04-11 03:38:36 +08:00
|
|
|
if (LHS0 != RHS0)
|
2017-04-06 01:38:34 +08:00
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
// ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
|
|
|
|
if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
|
|
|
|
PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
|
|
|
|
PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
|
|
|
|
PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// We can't fold (ugt x, C) | (sgt x, C2).
|
2017-04-06 01:38:34 +08:00
|
|
|
if (!PredicatesFoldable(PredL, PredR))
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
// Ensure that the larger constant is on the RHS.
|
|
|
|
bool ShouldSwap;
|
2017-04-11 23:57:32 +08:00
|
|
|
if (CmpInst::isSigned(PredL) ||
|
|
|
|
(ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
|
2017-04-11 00:55:57 +08:00
|
|
|
ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
|
2017-04-11 23:57:32 +08:00
|
|
|
else
|
|
|
|
ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-01-05 15:50:36 +08:00
|
|
|
if (ShouldSwap) {
|
|
|
|
std::swap(LHS, RHS);
|
2017-04-06 01:38:34 +08:00
|
|
|
std::swap(LHSC, RHSC);
|
|
|
|
std::swap(PredL, PredR);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2010-02-11 00:03:48 +08:00
|
|
|
// At this point, we know we have two icmp instructions
|
2010-01-05 15:50:36 +08:00
|
|
|
// comparing a value against two constants and or'ing the result
|
|
|
|
// together. Because of the above check, we know that we only have
|
|
|
|
// ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
|
|
|
|
// icmp folding check above), that the two constants are not
|
|
|
|
// equal.
|
2017-04-06 01:38:34 +08:00
|
|
|
assert(LHSC != RHSC && "Compares not folded above?");
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredL) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_EQ:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
2010-01-05 15:50:36 +08:00
|
|
|
case ICmpInst::ICMP_EQ:
|
2017-04-15 03:23:50 +08:00
|
|
|
// Potential folds for this case should already be handled.
|
|
|
|
break;
|
2017-04-06 01:38:34 +08:00
|
|
|
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
|
|
|
|
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
|
2010-01-05 15:50:36 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICmpInst::ICMP_ULT:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
|
|
|
case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
|
2010-01-05 15:50:36 +08:00
|
|
|
break;
|
2017-04-06 01:38:34 +08:00
|
|
|
case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
|
2017-05-07 23:11:40 +08:00
|
|
|
assert(!RHSC->isMaxValue(false) && "Missed icmp simplification");
|
2017-04-11 03:38:36 +08:00
|
|
|
return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1,
|
|
|
|
false, false);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICmpInst::ICMP_SLT:
|
2017-04-06 01:38:34 +08:00
|
|
|
switch (PredR) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown integer condition code!");
|
|
|
|
case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
|
2010-01-05 15:50:36 +08:00
|
|
|
break;
|
2017-04-06 01:38:34 +08:00
|
|
|
case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
|
2017-05-07 23:11:40 +08:00
|
|
|
assert(!RHSC->isMaxValue(true) && "Missed icmp simplification");
|
2017-04-11 03:38:36 +08:00
|
|
|
return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true,
|
2017-04-06 01:38:34 +08:00
|
|
|
false);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2014-04-25 13:29:35 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2016-12-19 02:49:48 +08:00
|
|
|
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
|
|
|
|
// here. We should standardize that construct where it is needed or choose some
|
|
|
|
// other way to ensure that commutated variants of patterns are not missed.
|
2010-01-05 15:50:36 +08:00
|
|
|
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
|
2018-06-22 01:06:36 +08:00
|
|
|
if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1),
|
|
|
|
SQ.getWithInstruction(&I)))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2010-03-03 08:35:56 +08:00
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
if (SimplifyAssociativeOrCommutative(I))
|
|
|
|
return &I;
|
|
|
|
|
2018-10-03 23:20:58 +08:00
|
|
|
if (Instruction *X = foldVectorBinop(I))
|
2018-06-03 00:27:44 +08:00
|
|
|
return X;
|
|
|
|
|
2012-12-20 15:09:41 +08:00
|
|
|
// See if we can simplify any instructions used by the instruction whose sole
|
2010-01-05 15:50:36 +08:00
|
|
|
// purpose is to compute bits we don't care about.
|
|
|
|
if (SimplifyDemandedInstructionBits(I))
|
|
|
|
return &I;
|
|
|
|
|
2017-04-24 06:00:02 +08:00
|
|
|
// Do this before using distributive laws to catch simple and/or/not patterns.
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *Xor = foldOrToXor(I, Builder))
|
2017-04-24 06:00:02 +08:00
|
|
|
return Xor;
|
|
|
|
|
|
|
|
// (A&B)|(A&C) -> A&(B|C) etc
|
|
|
|
if (Value *V = SimplifyUsingDistributiveLaws(I))
|
|
|
|
return replaceInstUsesWith(I, V);
|
|
|
|
|
2017-07-07 00:24:23 +08:00
|
|
|
if (Value *V = SimplifyBSwap(I, Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2014-12-04 17:44:01 +08:00
|
|
|
|
2018-03-01 00:36:24 +08:00
|
|
|
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
|
|
|
|
return FoldedLogic;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2016-05-26 22:58:51 +08:00
|
|
|
if (Instruction *BSwap = MatchBSwap(I))
|
|
|
|
return BSwap;
|
|
|
|
|
2018-09-01 23:08:59 +08:00
|
|
|
Value *X, *Y;
|
|
|
|
const APInt *CV;
|
|
|
|
if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
|
|
|
|
!CV->isAllOnesValue() && MaskedValueIsZero(Y, *CV, 0, &I)) {
|
|
|
|
// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
|
|
|
|
// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
|
|
|
|
Value *Or = Builder.CreateOr(X, Y);
|
|
|
|
return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// (A & C)|(B & D)
|
2018-09-01 23:08:59 +08:00
|
|
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
|
|
|
Value *A, *B, *C, *D;
|
2010-01-05 15:50:36 +08:00
|
|
|
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
|
|
|
|
match(Op1, m_And(m_Value(B), m_Value(D)))) {
|
2017-04-09 14:12:41 +08:00
|
|
|
ConstantInt *C1 = dyn_cast<ConstantInt>(C);
|
|
|
|
ConstantInt *C2 = dyn_cast<ConstantInt>(D);
|
2010-01-05 15:50:36 +08:00
|
|
|
if (C1 && C2) { // (A & C1)|(B & C2)
|
2017-08-14 08:04:21 +08:00
|
|
|
Value *V1 = nullptr, *V2 = nullptr;
|
2017-06-07 15:40:37 +08:00
|
|
|
if ((C1->getValue() & C2->getValue()).isNullValue()) {
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
|
2012-09-27 18:14:43 +08:00
|
|
|
// iff (C1&C2) == 0 and (N&~C1) == 0
|
2010-01-05 15:50:36 +08:00
|
|
|
if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
|
Make use of @llvm.assume in ValueTracking (computeKnownBits, etc.)
This change, which allows @llvm.assume to be used from within computeKnownBits
(and other associated functions in ValueTracking), adds some (optional)
parameters to computeKnownBits and friends. These functions now (optionally)
take a "context" instruction pointer, an AssumptionTracker pointer, and also a
DomTree pointer, and most of the changes are just to pass this new information
when it is easily available from InstSimplify, InstCombine, etc.
As explained below, the significant conceptual change is that known properties
of a value might depend on the control-flow location of the use (because we
care that the @llvm.assume dominates the use because assumptions have
control-flow dependencies). This means that, when we ask if bits are known in a
value, we might get different answers for different uses.
The significant changes are all in ValueTracking. Two main changes: First, as
with the rest of the code, new parameters need to be passed around. To make
this easier, I grouped them into a structure, and I made internal static
versions of the relevant functions that take this structure as a parameter. The
new code does as you might expect, it looks for @llvm.assume calls that make
use of the value we're trying to learn something about (often indirectly),
attempts to pattern match that expression, and uses the result if successful.
By making use of the AssumptionTracker, the process of finding @llvm.assume
calls is not expensive.
Part of the structure being passed around inside ValueTracking is a set of
already-considered @llvm.assume calls. This is to prevent a query using, for
example, the assume(a == b), to recurse on itself. The context and DT params
are used to find applicable assumptions. An assumption needs to dominate the
context instruction, or come after it deterministically. In this latter case we
only handle the specific case where both the assumption and the context
instruction are in the same block, and we need to exclude assumptions from
being used to simplify their own ephemeral values (those which contribute only
to the assumption) because otherwise the assumption would prove its feeding
comparison trivial and would be removed.
This commit adds the plumbing and the logic for a simple masked-bit propagation
(just enough to write a regression test). Future commits add more patterns
(and, correspondingly, more regression tests).
llvm-svn: 217342
2014-09-08 02:57:58 +08:00
|
|
|
((V1 == B &&
|
|
|
|
MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N)
|
|
|
|
(V2 == B &&
|
|
|
|
MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
|
2010-01-05 15:50:36 +08:00
|
|
|
return BinaryOperator::CreateAnd(A,
|
2017-07-08 07:16:26 +08:00
|
|
|
Builder.getInt(C1->getValue()|C2->getValue()));
|
2010-01-05 15:50:36 +08:00
|
|
|
// Or commutes, try both ways.
|
|
|
|
if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
|
Make use of @llvm.assume in ValueTracking (computeKnownBits, etc.)
This change, which allows @llvm.assume to be used from within computeKnownBits
(and other associated functions in ValueTracking), adds some (optional)
parameters to computeKnownBits and friends. These functions now (optionally)
take a "context" instruction pointer, an AssumptionTracker pointer, and also a
DomTree pointer, and most of the changes are just to pass this new information
when it is easily available from InstSimplify, InstCombine, etc.
As explained below, the significant conceptual change is that known properties
of a value might depend on the control-flow location of the use (because we
care that the @llvm.assume dominates the use because assumptions have
control-flow dependencies). This means that, when we ask if bits are known in a
value, we might get different answers for different uses.
The significant changes are all in ValueTracking. Two main changes: First, as
with the rest of the code, new parameters need to be passed around. To make
this easier, I grouped them into a structure, and I made internal static
versions of the relevant functions that take this structure as a parameter. The
new code does as you might expect, it looks for @llvm.assume calls that make
use of the value we're trying to learn something about (often indirectly),
attempts to pattern match that expression, and uses the result if successful.
By making use of the AssumptionTracker, the process of finding @llvm.assume
calls is not expensive.
Part of the structure being passed around inside ValueTracking is a set of
already-considered @llvm.assume calls. This is to prevent a query using, for
example, the assume(a == b), to recurse on itself. The context and DT params
are used to find applicable assumptions. An assumption needs to dominate the
context instruction, or come after it deterministically. In this latter case we
only handle the specific case where both the assumption and the context
instruction are in the same block, and we need to exclude assumptions from
being used to simplify their own ephemeral values (those which contribute only
to the assumption) because otherwise the assumption would prove its feeding
comparison trivial and would be removed.
This commit adds the plumbing and the logic for a simple masked-bit propagation
(just enough to write a regression test). Future commits add more patterns
(and, correspondingly, more regression tests).
llvm-svn: 217342
2014-09-08 02:57:58 +08:00
|
|
|
((V1 == A &&
|
|
|
|
MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N)
|
|
|
|
(V2 == A &&
|
|
|
|
MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
|
2010-01-05 15:50:36 +08:00
|
|
|
return BinaryOperator::CreateAnd(B,
|
2017-07-08 07:16:26 +08:00
|
|
|
Builder.getInt(C1->getValue()|C2->getValue()));
|
2012-12-20 15:09:41 +08:00
|
|
|
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
|
2012-09-27 18:14:43 +08:00
|
|
|
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
|
2014-04-25 13:29:35 +08:00
|
|
|
ConstantInt *C3 = nullptr, *C4 = nullptr;
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
|
2017-06-07 15:40:37 +08:00
|
|
|
(C3->getValue() & ~C1->getValue()).isNullValue() &&
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
|
2017-06-07 15:40:37 +08:00
|
|
|
(C4->getValue() & ~C2->getValue()).isNullValue()) {
|
2017-07-08 07:16:26 +08:00
|
|
|
V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
return BinaryOperator::CreateAnd(V2,
|
2017-07-08 07:16:26 +08:00
|
|
|
Builder.getInt(C1->getValue()|C2->getValue()));
|
add one more bitfield optimization, allowing clang to generate
good code on PR4216:
_test_bitfield: ## @test_bitfield
orl $32962, %edi
movl $4294941946, %eax
andq %rdi, %rax
ret
instead of:
_test_bitfield:
movl $4294941696, %ecx
movl %edi, %eax
orl $194, %edi
orl $32768, %eax
andq $250, %rdi
andq %rax, %rcx
movq %rdi, %rax
orq %rcx, %rax
ret
Evan is looking into the remaining andq+imm -> andl optimization.
llvm-svn: 93147
2010-01-11 14:55:24 +08:00
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2017-08-14 08:04:21 +08:00
|
|
|
|
|
|
|
if (C1->getValue() == ~C2->getValue()) {
|
|
|
|
Value *X;
|
|
|
|
|
|
|
|
// ((X|B)&C1)|(B&C2) -> (X&C1) | B iff C1 == ~C2
|
|
|
|
if (match(A, m_c_Or(m_Value(X), m_Specific(B))))
|
|
|
|
return BinaryOperator::CreateOr(Builder.CreateAnd(X, C1), B);
|
|
|
|
// (A&C2)|((X|A)&C1) -> (X&C2) | A iff C1 == ~C2
|
|
|
|
if (match(B, m_c_Or(m_Specific(A), m_Value(X))))
|
|
|
|
return BinaryOperator::CreateOr(Builder.CreateAnd(X, C2), A);
|
|
|
|
|
|
|
|
// ((X^B)&C1)|(B&C2) -> (X&C1) ^ B iff C1 == ~C2
|
|
|
|
if (match(A, m_c_Xor(m_Value(X), m_Specific(B))))
|
|
|
|
return BinaryOperator::CreateXor(Builder.CreateAnd(X, C1), B);
|
|
|
|
// (A&C2)|((X^A)&C1) -> (X&C2) ^ A iff C1 == ~C2
|
|
|
|
if (match(B, m_c_Xor(m_Specific(A), m_Value(X))))
|
|
|
|
return BinaryOperator::CreateXor(Builder.CreateAnd(X, C2), A);
|
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2016-07-09 04:53:29 +08:00
|
|
|
// Don't try to form a select if it's unlikely that we'll get rid of at
|
|
|
|
// least one of the operands. A select is generally more expensive than the
|
|
|
|
// 'or' that it is replacing.
|
|
|
|
if (Op0->hasOneUse() || Op1->hasOneUse()) {
|
|
|
|
// (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2018-10-16 23:26:08 +08:00
|
|
|
if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
|
2016-07-09 04:53:29 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
InstCombine: Simplify (A ^ B) or/and (A ^ B ^ C)
While we can already transform A | (A ^ B) into A | B, things get bad
once we have (A ^ B) | (A ^ B ^ Cst) because reassociation will morph
this into (A ^ B) | ((A ^ Cst) ^ B). Our existing patterns fail once
this happens.
To fix this, we add a new pattern which looks through the tree of xor
binary operators to see that, in fact, there exists a redundant xor
operation.
What follows bellow is a correctness proof of the transform using CVC3.
$ cat t.cvc
A, B, C : BITVECTOR(64);
QUERY BVXOR(A, B) | BVXOR(BVXOR(B, C), A) = BVXOR(A, B) | C;
QUERY BVXOR(BVXOR(A, C), B) | BVXOR(A, B) = BVXOR(A, B) | C;
QUERY BVXOR(A, B) & BVXOR(BVXOR(B, C), A) = BVXOR(A, B) & ~C;
QUERY BVXOR(BVXOR(A, C), B) & BVXOR(A, B) = BVXOR(A, B) & ~C;
$ cvc3 < t.cvc
Valid.
Valid.
Valid.
Valid.
llvm-svn: 214342
2014-07-31 05:26:37 +08:00
|
|
|
// (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
|
|
|
|
if (match(Op0, m_Xor(m_Value(A), m_Value(B))))
|
|
|
|
if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A))))
|
2017-06-20 00:23:49 +08:00
|
|
|
return BinaryOperator::CreateOr(Op0, C);
|
InstCombine: Simplify (A ^ B) or/and (A ^ B ^ C)
While we can already transform A | (A ^ B) into A | B, things get bad
once we have (A ^ B) | (A ^ B ^ Cst) because reassociation will morph
this into (A ^ B) | ((A ^ Cst) ^ B). Our existing patterns fail once
this happens.
To fix this, we add a new pattern which looks through the tree of xor
binary operators to see that, in fact, there exists a redundant xor
operation.
What follows bellow is a correctness proof of the transform using CVC3.
$ cat t.cvc
A, B, C : BITVECTOR(64);
QUERY BVXOR(A, B) | BVXOR(BVXOR(B, C), A) = BVXOR(A, B) | C;
QUERY BVXOR(BVXOR(A, C), B) | BVXOR(A, B) = BVXOR(A, B) | C;
QUERY BVXOR(A, B) & BVXOR(BVXOR(B, C), A) = BVXOR(A, B) & ~C;
QUERY BVXOR(BVXOR(A, C), B) & BVXOR(A, B) = BVXOR(A, B) & ~C;
$ cvc3 < t.cvc
Valid.
Valid.
Valid.
Valid.
llvm-svn: 214342
2014-07-31 05:26:37 +08:00
|
|
|
|
|
|
|
// ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C
|
|
|
|
if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))))
|
|
|
|
if (match(Op1, m_Xor(m_Specific(B), m_Specific(A))))
|
2017-06-20 00:23:49 +08:00
|
|
|
return BinaryOperator::CreateOr(Op1, C);
|
InstCombine: Simplify (A ^ B) or/and (A ^ B ^ C)
While we can already transform A | (A ^ B) into A | B, things get bad
once we have (A ^ B) | (A ^ B ^ Cst) because reassociation will morph
this into (A ^ B) | ((A ^ Cst) ^ B). Our existing patterns fail once
this happens.
To fix this, we add a new pattern which looks through the tree of xor
binary operators to see that, in fact, there exists a redundant xor
operation.
What follows bellow is a correctness proof of the transform using CVC3.
$ cat t.cvc
A, B, C : BITVECTOR(64);
QUERY BVXOR(A, B) | BVXOR(BVXOR(B, C), A) = BVXOR(A, B) | C;
QUERY BVXOR(BVXOR(A, C), B) | BVXOR(A, B) = BVXOR(A, B) | C;
QUERY BVXOR(A, B) & BVXOR(BVXOR(B, C), A) = BVXOR(A, B) & ~C;
QUERY BVXOR(BVXOR(A, C), B) & BVXOR(A, B) = BVXOR(A, B) & ~C;
$ cvc3 < t.cvc
Valid.
Valid.
Valid.
Valid.
llvm-svn: 214342
2014-07-31 05:26:37 +08:00
|
|
|
|
2014-08-14 14:41:38 +08:00
|
|
|
// ((B | C) & A) | B -> B | (A & C)
|
|
|
|
if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
|
2017-07-08 07:16:26 +08:00
|
|
|
return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C));
|
2014-08-14 14:41:38 +08:00
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
|
2015-09-09 04:14:13 +08:00
|
|
|
return DeMorgan;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2011-02-20 21:23:43 +08:00
|
|
|
// Canonicalize xor to the RHS.
|
2012-03-16 08:52:42 +08:00
|
|
|
bool SwappedForXor = false;
|
|
|
|
if (match(Op0, m_Xor(m_Value(), m_Value()))) {
|
2011-02-20 21:23:43 +08:00
|
|
|
std::swap(Op0, Op1);
|
2012-03-16 08:52:42 +08:00
|
|
|
SwappedForXor = true;
|
|
|
|
}
|
2011-02-20 21:23:43 +08:00
|
|
|
|
|
|
|
// A | ( A ^ B) -> A | B
|
|
|
|
// A | (~A ^ B) -> A | ~B
|
2012-04-27 07:29:14 +08:00
|
|
|
// (A & B) | (A ^ B)
|
2011-02-20 21:23:43 +08:00
|
|
|
if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
|
|
|
|
if (Op0 == A || Op0 == B)
|
|
|
|
return BinaryOperator::CreateOr(A, B);
|
|
|
|
|
2012-04-27 07:29:14 +08:00
|
|
|
if (match(Op0, m_And(m_Specific(A), m_Specific(B))) ||
|
|
|
|
match(Op0, m_And(m_Specific(B), m_Specific(A))))
|
|
|
|
return BinaryOperator::CreateOr(A, B);
|
|
|
|
|
2011-02-20 21:23:43 +08:00
|
|
|
if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Not = Builder.CreateNot(B, B->getName() + ".not");
|
2011-02-20 21:23:43 +08:00
|
|
|
return BinaryOperator::CreateOr(Not, Op0);
|
|
|
|
}
|
|
|
|
if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Not = Builder.CreateNot(A, A->getName() + ".not");
|
2011-02-20 21:23:43 +08:00
|
|
|
return BinaryOperator::CreateOr(Not, Op0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// A | ~(A | B) -> A | ~B
|
|
|
|
// A | ~(A ^ B) -> A | ~B
|
|
|
|
if (match(Op1, m_Not(m_Value(A))))
|
|
|
|
if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
|
2011-02-20 23:20:01 +08:00
|
|
|
if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
|
|
|
|
Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
|
|
|
|
B->getOpcode() == Instruction::Xor)) {
|
|
|
|
Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
|
|
|
|
B->getOperand(0);
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not");
|
2011-02-20 23:20:01 +08:00
|
|
|
return BinaryOperator::CreateOr(Not, Op0);
|
|
|
|
}
|
2011-02-20 21:23:43 +08:00
|
|
|
|
2012-03-16 08:52:42 +08:00
|
|
|
if (SwappedForXor)
|
|
|
|
std::swap(Op0, Op1);
|
|
|
|
|
2014-11-29 03:58:29 +08:00
|
|
|
{
|
|
|
|
ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);
|
|
|
|
ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);
|
|
|
|
if (LHS && RHS)
|
2017-06-16 03:09:51 +08:00
|
|
|
if (Value *Res = foldOrOfICmps(LHS, RHS, I))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, Res);
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2014-11-29 03:58:29 +08:00
|
|
|
// TODO: Make this recursive; it's a little tricky because an arbitrary
|
|
|
|
// number of 'or' instructions might have to be created.
|
|
|
|
Value *X, *Y;
|
|
|
|
if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
|
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(X))
|
2017-06-16 03:09:51 +08:00
|
|
|
if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
|
2014-11-29 03:58:29 +08:00
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
|
2017-06-16 03:09:51 +08:00
|
|
|
if (Value *Res = foldOrOfICmps(LHS, Cmp, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
|
2014-11-29 03:58:29 +08:00
|
|
|
}
|
|
|
|
if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {
|
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(X))
|
2017-06-16 03:09:51 +08:00
|
|
|
if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateOr(Res, Y));
|
2014-11-29 03:58:29 +08:00
|
|
|
if (auto *Cmp = dyn_cast<ICmpInst>(Y))
|
2017-06-16 03:09:51 +08:00
|
|
|
if (Value *Res = foldOrOfICmps(Cmp, RHS, I))
|
2017-07-08 07:16:26 +08:00
|
|
|
return replaceInstUsesWith(I, Builder.CreateOr(Res, X));
|
2014-11-29 03:58:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-02-11 14:26:33 +08:00
|
|
|
if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
|
|
|
|
if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
|
2017-09-03 01:53:33 +08:00
|
|
|
if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, Res);
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2016-02-24 07:56:23 +08:00
|
|
|
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
|
|
|
|
return CastedOr;
|
2011-04-15 06:41:27 +08:00
|
|
|
|
2016-07-09 01:01:15 +08:00
|
|
|
// or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
|
2016-07-09 01:26:47 +08:00
|
|
|
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
|
2017-07-09 15:04:03 +08:00
|
|
|
A->getType()->isIntOrIntVectorTy(1))
|
2011-04-15 06:41:27 +08:00
|
|
|
return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
|
2016-07-09 01:26:47 +08:00
|
|
|
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
|
2017-07-09 15:04:03 +08:00
|
|
|
A->getType()->isIntOrIntVectorTy(1))
|
2011-04-15 06:41:27 +08:00
|
|
|
return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
|
|
|
|
|
2010-09-14 01:59:27 +08:00
|
|
|
// Note: If we've gotten to the point of visiting the outer OR, then the
|
|
|
|
// inner one couldn't be simplified. If it was a constant, then it won't
|
|
|
|
// be simplified by a later pass either, so we try swapping the inner/outer
|
|
|
|
// ORs in the hopes that we'll be able to simplify it this way.
|
|
|
|
// (X|C) | V --> (X|V) | C
|
2018-09-01 23:08:59 +08:00
|
|
|
ConstantInt *CI;
|
2010-09-14 01:59:27 +08:00
|
|
|
if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
|
2018-09-01 23:08:59 +08:00
|
|
|
match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Inner = Builder.CreateOr(A, Op1);
|
2010-09-14 01:59:27 +08:00
|
|
|
Inner->takeName(Op0);
|
2018-09-01 23:08:59 +08:00
|
|
|
return BinaryOperator::CreateOr(Inner, CI);
|
2010-09-14 01:59:27 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2013-02-17 07:41:36 +08:00
|
|
|
// Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
|
|
|
|
// Since this OR statement hasn't been optimized further yet, we hope
|
|
|
|
// that this transformation will allow the new ORs to be optimized.
|
|
|
|
{
|
2014-04-25 13:29:35 +08:00
|
|
|
Value *X = nullptr, *Y = nullptr;
|
2013-02-17 07:41:36 +08:00
|
|
|
if (Op0->hasOneUse() && Op1->hasOneUse() &&
|
|
|
|
match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *orTrue = Builder.CreateOr(A, C);
|
|
|
|
Value *orFalse = Builder.CreateOr(B, D);
|
2013-02-17 07:41:36 +08:00
|
|
|
return SelectInst::Create(X, orTrue, orFalse);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 00:03:00 +08:00
|
|
|
/// A ^ B can be specified using other logic ops in a variety of patterns. We
|
|
|
|
/// can fold these early and efficiently by morphing an existing instruction.
|
2017-07-02 09:15:51 +08:00
|
|
|
static Instruction *foldXorToXor(BinaryOperator &I,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
2017-04-24 00:03:00 +08:00
|
|
|
assert(I.getOpcode() == Instruction::Xor);
|
|
|
|
Value *Op0 = I.getOperand(0);
|
|
|
|
Value *Op1 = I.getOperand(1);
|
|
|
|
Value *A, *B;
|
|
|
|
|
|
|
|
// There are 4 commuted variants for each of the basic patterns.
|
|
|
|
|
|
|
|
// (A & B) ^ (A | B) -> A ^ B
|
|
|
|
// (A & B) ^ (B | A) -> A ^ B
|
|
|
|
// (A | B) ^ (A & B) -> A ^ B
|
|
|
|
// (A | B) ^ (B & A) -> A ^ B
|
[PatternMatch] Stabilize the matching order of commutative matchers
Summary:
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the `LHS` and `RHS` matchers:
1. match `RHS` matcher to the `first` operand of binary operator,
2. and then match `LHS` matcher to the `second` operand of binary operator.
This works ok.
But it complicates writing of commutative matchers, where one would like to match
(`m_Value()`) the value on one side, and use (`m_Specific()`) it on the other side.
This is additionally complicated by the fact that `m_Specific()` stores the `Value *`,
not `Value **`, so it won't work at all out of the box.
The last problem is trivially solved by adding a new `m_c_Specific()` that stores the
`Value **`, not `Value *`. I'm choosing to add a new matcher, not change the existing
one because i guess all the current users are ok with existing behavior,
and this additional pointer indirection may have performance drawbacks.
Also, i'm storing pointer, not reference, because for some mysterious-to-me reason
it did not work with the reference.
The first one appears trivial, too.
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the ~~`LHS` and `RHS` matchers~~ **operands**:
1. match ~~`RHS`~~ **`LHS`** matcher to the ~~`first`~~ **`second`** operand of binary operator,
2. and then match ~~`LHS`~~ **`RHS`** matcher to the ~~`second`~ **`first`** operand of binary operator.
Surprisingly, `$ ninja check-llvm` still passes with this.
But i expect the bots will disagree..
The motivational unittest is included.
I'd like to use this in D45664.
Reviewers: spatel, craig.topper, arsenm, RKSimon
Reviewed By: craig.topper
Subscribers: xbolva00, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D45828
llvm-svn: 331085
2018-04-28 05:23:20 +08:00
|
|
|
if (match(&I, m_c_Xor(m_And(m_Value(A), m_Value(B)),
|
|
|
|
m_c_Or(m_Deferred(A), m_Deferred(B))))) {
|
2017-04-24 00:03:00 +08:00
|
|
|
I.setOperand(0, A);
|
|
|
|
I.setOperand(1, B);
|
|
|
|
return &I;
|
|
|
|
}
|
|
|
|
|
|
|
|
// (A | ~B) ^ (~A | B) -> A ^ B
|
|
|
|
// (~B | A) ^ (~A | B) -> A ^ B
|
|
|
|
// (~A | B) ^ (A | ~B) -> A ^ B
|
|
|
|
// (B | ~A) ^ (A | ~B) -> A ^ B
|
[PatternMatch] Stabilize the matching order of commutative matchers
Summary:
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the `LHS` and `RHS` matchers:
1. match `RHS` matcher to the `first` operand of binary operator,
2. and then match `LHS` matcher to the `second` operand of binary operator.
This works ok.
But it complicates writing of commutative matchers, where one would like to match
(`m_Value()`) the value on one side, and use (`m_Specific()`) it on the other side.
This is additionally complicated by the fact that `m_Specific()` stores the `Value *`,
not `Value **`, so it won't work at all out of the box.
The last problem is trivially solved by adding a new `m_c_Specific()` that stores the
`Value **`, not `Value *`. I'm choosing to add a new matcher, not change the existing
one because i guess all the current users are ok with existing behavior,
and this additional pointer indirection may have performance drawbacks.
Also, i'm storing pointer, not reference, because for some mysterious-to-me reason
it did not work with the reference.
The first one appears trivial, too.
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the ~~`LHS` and `RHS` matchers~~ **operands**:
1. match ~~`RHS`~~ **`LHS`** matcher to the ~~`first`~~ **`second`** operand of binary operator,
2. and then match ~~`LHS`~~ **`RHS`** matcher to the ~~`second`~ **`first`** operand of binary operator.
Surprisingly, `$ ninja check-llvm` still passes with this.
But i expect the bots will disagree..
The motivational unittest is included.
I'd like to use this in D45664.
Reviewers: spatel, craig.topper, arsenm, RKSimon
Reviewed By: craig.topper
Subscribers: xbolva00, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D45828
llvm-svn: 331085
2018-04-28 05:23:20 +08:00
|
|
|
if (match(&I, m_Xor(m_c_Or(m_Value(A), m_Not(m_Value(B))),
|
|
|
|
m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B))))) {
|
2017-04-24 00:03:00 +08:00
|
|
|
I.setOperand(0, A);
|
|
|
|
I.setOperand(1, B);
|
|
|
|
return &I;
|
|
|
|
}
|
|
|
|
|
|
|
|
// (A & ~B) ^ (~A & B) -> A ^ B
|
|
|
|
// (~B & A) ^ (~A & B) -> A ^ B
|
|
|
|
// (~A & B) ^ (A & ~B) -> A ^ B
|
|
|
|
// (B & ~A) ^ (A & ~B) -> A ^ B
|
[PatternMatch] Stabilize the matching order of commutative matchers
Summary:
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the `LHS` and `RHS` matchers:
1. match `RHS` matcher to the `first` operand of binary operator,
2. and then match `LHS` matcher to the `second` operand of binary operator.
This works ok.
But it complicates writing of commutative matchers, where one would like to match
(`m_Value()`) the value on one side, and use (`m_Specific()`) it on the other side.
This is additionally complicated by the fact that `m_Specific()` stores the `Value *`,
not `Value **`, so it won't work at all out of the box.
The last problem is trivially solved by adding a new `m_c_Specific()` that stores the
`Value **`, not `Value *`. I'm choosing to add a new matcher, not change the existing
one because i guess all the current users are ok with existing behavior,
and this additional pointer indirection may have performance drawbacks.
Also, i'm storing pointer, not reference, because for some mysterious-to-me reason
it did not work with the reference.
The first one appears trivial, too.
Currently, we
1. match `LHS` matcher to the `first` operand of binary operator,
2. and then match `RHS` matcher to the `second` operand of binary operator.
If that does not match, we swap the ~~`LHS` and `RHS` matchers~~ **operands**:
1. match ~~`RHS`~~ **`LHS`** matcher to the ~~`first`~~ **`second`** operand of binary operator,
2. and then match ~~`LHS`~~ **`RHS`** matcher to the ~~`second`~ **`first`** operand of binary operator.
Surprisingly, `$ ninja check-llvm` still passes with this.
But i expect the bots will disagree..
The motivational unittest is included.
I'd like to use this in D45664.
Reviewers: spatel, craig.topper, arsenm, RKSimon
Reviewed By: craig.topper
Subscribers: xbolva00, wdng, llvm-commits
Differential Revision: https://reviews.llvm.org/D45828
llvm-svn: 331085
2018-04-28 05:23:20 +08:00
|
|
|
if (match(&I, m_Xor(m_c_And(m_Value(A), m_Not(m_Value(B))),
|
|
|
|
m_c_And(m_Not(m_Deferred(A)), m_Deferred(B))))) {
|
2017-04-24 00:03:00 +08:00
|
|
|
I.setOperand(0, A);
|
|
|
|
I.setOperand(1, B);
|
|
|
|
return &I;
|
|
|
|
}
|
|
|
|
|
2017-07-02 09:15:51 +08:00
|
|
|
// For the remaining cases we need to get rid of one of the operands.
|
|
|
|
if (!Op0->hasOneUse() && !Op1->hasOneUse())
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// (A | B) ^ ~(A & B) -> ~(A ^ B)
|
|
|
|
// (A | B) ^ ~(B & A) -> ~(A ^ B)
|
|
|
|
// (A & B) ^ ~(A | B) -> ~(A ^ B)
|
|
|
|
// (A & B) ^ ~(B | A) -> ~(A ^ B)
|
|
|
|
// Complexity sorting ensures the not will be on the right side.
|
|
|
|
if ((match(Op0, m_Or(m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) ||
|
|
|
|
(match(Op0, m_And(m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))))
|
|
|
|
return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
|
|
|
|
|
2017-04-24 00:03:00 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2017-05-19 04:53:16 +08:00
|
|
|
Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
|
|
|
|
if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
|
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(1) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(0))
|
|
|
|
LHS->swapOperands();
|
|
|
|
if (LHS->getOperand(0) == RHS->getOperand(0) &&
|
|
|
|
LHS->getOperand(1) == RHS->getOperand(1)) {
|
|
|
|
// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
|
|
|
|
Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
|
|
|
|
unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
|
|
|
|
bool isSigned = LHS->isSigned() || RHS->isSigned();
|
|
|
|
return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-22 22:08:16 +08:00
|
|
|
// TODO: This can be generalized to compares of non-signbits using
|
|
|
|
// decomposeBitTestICmp(). It could be enhanced more by using (something like)
|
|
|
|
// foldLogOpOfMaskedICmps().
|
|
|
|
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
|
|
|
|
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
|
|
|
|
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
|
|
|
|
if ((LHS->hasOneUse() || RHS->hasOneUse()) &&
|
2018-08-16 01:46:22 +08:00
|
|
|
LHS0->getType() == RHS0->getType() &&
|
|
|
|
LHS0->getType()->isIntOrIntVectorTy()) {
|
2018-03-22 22:08:16 +08:00
|
|
|
// (X > -1) ^ (Y > -1) --> (X ^ Y) < 0
|
|
|
|
// (X < 0) ^ (Y < 0) --> (X ^ Y) < 0
|
|
|
|
if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
|
|
|
|
PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) ||
|
|
|
|
(PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
|
|
|
|
PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) {
|
|
|
|
Value *Zero = ConstantInt::getNullValue(LHS0->getType());
|
|
|
|
return Builder.CreateICmpSLT(Builder.CreateXor(LHS0, RHS0), Zero);
|
|
|
|
}
|
|
|
|
// (X > -1) ^ (Y < 0) --> (X ^ Y) > -1
|
|
|
|
// (X < 0) ^ (Y > -1) --> (X ^ Y) > -1
|
|
|
|
if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) &&
|
|
|
|
PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) ||
|
|
|
|
(PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) &&
|
|
|
|
PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) {
|
|
|
|
Value *MinusOne = ConstantInt::getAllOnesValue(LHS0->getType());
|
|
|
|
return Builder.CreateICmpSGT(Builder.CreateXor(LHS0, RHS0), MinusOne);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-20 20:40:55 +08:00
|
|
|
// Instead of trying to imitate the folds for and/or, decompose this 'xor'
|
|
|
|
// into those logic ops. That is, try to turn this into an and-of-icmps
|
|
|
|
// because we have many folds for that pattern.
|
|
|
|
//
|
|
|
|
// This is based on a truth table definition of xor:
|
|
|
|
// X ^ Y --> (X | Y) & !(X & Y)
|
|
|
|
if (Value *OrICmp = SimplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {
|
|
|
|
// TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).
|
|
|
|
// TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
|
|
|
|
if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
|
|
|
|
// TODO: Independently handle cases where the 'and' side is a constant.
|
|
|
|
if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
|
|
|
|
// (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
|
|
|
|
RHS->setPredicate(RHS->getInversePredicate());
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateAnd(LHS, RHS);
|
2017-06-20 20:40:55 +08:00
|
|
|
}
|
|
|
|
if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
|
2017-06-20 20:45:46 +08:00
|
|
|
// !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
|
2017-06-20 20:40:55 +08:00
|
|
|
LHS->setPredicate(LHS->getInversePredicate());
|
2017-07-08 07:16:26 +08:00
|
|
|
return Builder.CreateAnd(LHS, RHS);
|
2017-06-20 20:40:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-19 04:53:16 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2018-04-28 23:45:07 +08:00
|
|
|
/// If we have a masked merge, in the canonical form of:
|
2018-05-01 01:59:33 +08:00
|
|
|
/// (assuming that A only has one use.)
|
2018-04-28 23:45:07 +08:00
|
|
|
/// | A | |B|
|
|
|
|
/// ((x ^ y) & M) ^ y
|
|
|
|
/// | D |
|
|
|
|
/// * If M is inverted:
|
|
|
|
/// | D |
|
|
|
|
/// ((x ^ y) & ~M) ^ y
|
2018-05-01 01:59:33 +08:00
|
|
|
/// We can canonicalize by swapping the final xor operand
|
|
|
|
/// to eliminate the 'not' of the mask.
|
2018-04-28 23:45:07 +08:00
|
|
|
/// ((x ^ y) & M) ^ x
|
2018-05-01 01:59:33 +08:00
|
|
|
/// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops
|
|
|
|
/// because that shortens the dependency chain and improves analysis:
|
|
|
|
/// (x & M) | (y & ~M)
|
2018-04-28 23:45:07 +08:00
|
|
|
static Instruction *visitMaskedMerge(BinaryOperator &I,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
Value *B, *X, *D;
|
|
|
|
Value *M;
|
|
|
|
if (!match(&I, m_c_Xor(m_Value(B),
|
|
|
|
m_OneUse(m_c_And(
|
|
|
|
m_CombineAnd(m_c_Xor(m_Deferred(B), m_Value(X)),
|
|
|
|
m_Value(D)),
|
|
|
|
m_Value(M))))))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *NotM;
|
|
|
|
if (match(M, m_Not(m_Value(NotM)))) {
|
|
|
|
// De-invert the mask and swap the value in B part.
|
|
|
|
Value *NewA = Builder.CreateAnd(D, NotM);
|
|
|
|
return BinaryOperator::CreateXor(NewA, X);
|
|
|
|
}
|
|
|
|
|
2018-05-01 01:59:33 +08:00
|
|
|
Constant *C;
|
|
|
|
if (D->hasOneUse() && match(M, m_Constant(C))) {
|
|
|
|
// Unfold.
|
|
|
|
Value *LHS = Builder.CreateAnd(X, C);
|
|
|
|
Value *NotC = Builder.CreateNot(C);
|
|
|
|
Value *RHS = Builder.CreateAnd(B, NotC);
|
|
|
|
return BinaryOperator::CreateOr(LHS, RHS);
|
|
|
|
}
|
|
|
|
|
2018-04-28 23:45:07 +08:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2018-08-08 21:31:19 +08:00
|
|
|
// Transform
|
|
|
|
// ~(x ^ y)
|
|
|
|
// into:
|
|
|
|
// (~x) ^ y
|
|
|
|
// or into
|
|
|
|
// x ^ (~y)
|
|
|
|
static Instruction *sinkNotIntoXor(BinaryOperator &I,
|
|
|
|
InstCombiner::BuilderTy &Builder) {
|
|
|
|
Value *X, *Y;
|
|
|
|
// FIXME: one-use check is not needed in general, but currently we are unable
|
|
|
|
// to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)
|
|
|
|
if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
// We only want to do the transform if it is free to do.
|
|
|
|
if (IsFreeToInvert(X, X->hasOneUse())) {
|
|
|
|
// Ok, good.
|
|
|
|
} else if (IsFreeToInvert(Y, Y->hasOneUse())) {
|
|
|
|
std::swap(X, Y);
|
|
|
|
} else
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
Value *NotX = Builder.CreateNot(X, X->getName() + ".not");
|
|
|
|
return BinaryOperator::CreateXor(NotX, Y, I.getName() + ".demorgan");
|
|
|
|
}
|
|
|
|
|
2016-12-19 02:49:48 +08:00
|
|
|
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
|
|
|
|
// here. We should standardize that construct where it is needed or choose some
|
|
|
|
// other way to ensure that commutated variants of patterns are not missed.
|
2010-01-05 15:50:36 +08:00
|
|
|
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
|
2018-06-22 01:06:36 +08:00
|
|
|
if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
|
|
|
|
SQ.getWithInstruction(&I)))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
if (SimplifyAssociativeOrCommutative(I))
|
|
|
|
return &I;
|
|
|
|
|
2018-10-03 23:20:58 +08:00
|
|
|
if (Instruction *X = foldVectorBinop(I))
|
2018-06-03 00:27:44 +08:00
|
|
|
return X;
|
|
|
|
|
2017-07-08 07:16:26 +08:00
|
|
|
if (Instruction *NewXor = foldXorToXor(I, Builder))
|
2017-04-24 00:03:00 +08:00
|
|
|
return NewXor;
|
|
|
|
|
2010-12-22 21:36:08 +08:00
|
|
|
// (A&B)^(A&C) -> A&(B^C) etc
|
|
|
|
if (Value *V = SimplifyUsingDistributiveLaws(I))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2010-11-23 22:23:47 +08:00
|
|
|
|
2012-12-20 15:09:41 +08:00
|
|
|
// See if we can simplify any instructions used by the instruction whose sole
|
2010-01-05 15:50:36 +08:00
|
|
|
// purpose is to compute bits we don't care about.
|
|
|
|
if (SimplifyDemandedInstructionBits(I))
|
|
|
|
return &I;
|
|
|
|
|
2017-07-07 00:24:23 +08:00
|
|
|
if (Value *V = SimplifyBSwap(I, Builder))
|
2016-02-02 06:23:39 +08:00
|
|
|
return replaceInstUsesWith(I, V);
|
2014-12-04 17:44:01 +08:00
|
|
|
|
2018-06-22 01:06:36 +08:00
|
|
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
|
2018-08-13 08:38:27 +08:00
|
|
|
|
|
|
|
// Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
|
2018-08-13 08:54:23 +08:00
|
|
|
// This it a special case in haveNoCommonBitsSet, but the computeKnownBits
|
2018-08-13 08:38:27 +08:00
|
|
|
// calls in there are unnecessary as SimplifyDemandedInstructionBits should
|
|
|
|
// have already taken care of those cases.
|
|
|
|
Value *M;
|
|
|
|
if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
|
|
|
|
m_c_And(m_Deferred(M), m_Value()))))
|
2018-04-16 02:59:44 +08:00
|
|
|
return BinaryOperator::CreateOr(Op0, Op1);
|
|
|
|
|
2017-05-02 23:31:40 +08:00
|
|
|
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
|
|
|
|
Value *X, *Y;
|
|
|
|
|
|
|
|
// We must eliminate the and/or (one-use) for these transforms to not increase
|
|
|
|
// the instruction count.
|
|
|
|
// ~(~X & Y) --> (X | ~Y)
|
|
|
|
// ~(Y & ~X) --> (X | ~Y)
|
|
|
|
if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
|
2017-05-02 23:31:40 +08:00
|
|
|
return BinaryOperator::CreateOr(X, NotY);
|
|
|
|
}
|
|
|
|
// ~(~X | Y) --> (X & ~Y)
|
|
|
|
// ~(Y | ~X) --> (X & ~Y)
|
|
|
|
if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
|
2017-05-02 23:31:40 +08:00
|
|
|
return BinaryOperator::CreateAnd(X, NotY);
|
|
|
|
}
|
|
|
|
|
2018-04-28 23:45:07 +08:00
|
|
|
if (Instruction *Xor = visitMaskedMerge(I, Builder))
|
|
|
|
return Xor;
|
|
|
|
|
2017-04-23 02:05:35 +08:00
|
|
|
// Is this a 'not' (~) fed by a binary operator?
|
2017-05-09 04:49:59 +08:00
|
|
|
BinaryOperator *NotVal;
|
|
|
|
if (match(&I, m_Not(m_BinOp(NotVal)))) {
|
|
|
|
if (NotVal->getOpcode() == Instruction::And ||
|
|
|
|
NotVal->getOpcode() == Instruction::Or) {
|
2017-05-02 23:31:40 +08:00
|
|
|
// Apply DeMorgan's Law when inverts are free:
|
|
|
|
// ~(X & Y) --> (~X | ~Y)
|
|
|
|
// ~(X | Y) --> (~X & ~Y)
|
2017-05-09 04:49:59 +08:00
|
|
|
if (IsFreeToInvert(NotVal->getOperand(0),
|
|
|
|
NotVal->getOperand(0)->hasOneUse()) &&
|
|
|
|
IsFreeToInvert(NotVal->getOperand(1),
|
|
|
|
NotVal->getOperand(1)->hasOneUse())) {
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs");
|
|
|
|
Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs");
|
2017-05-09 04:49:59 +08:00
|
|
|
if (NotVal->getOpcode() == Instruction::And)
|
2017-04-23 02:05:35 +08:00
|
|
|
return BinaryOperator::CreateOr(NotX, NotY);
|
|
|
|
return BinaryOperator::CreateAnd(NotX, NotY);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2017-05-09 04:49:59 +08:00
|
|
|
}
|
|
|
|
|
2018-07-27 18:54:48 +08:00
|
|
|
// ~(X - Y) --> ~X + Y
|
2018-09-03 03:31:45 +08:00
|
|
|
if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
|
|
|
|
if (isa<Constant>(X) || NotVal->hasOneUse())
|
|
|
|
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
|
2018-07-27 18:54:48 +08:00
|
|
|
|
2017-05-09 04:49:59 +08:00
|
|
|
// ~(~X >>s Y) --> (X >>s Y)
|
|
|
|
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
|
|
|
|
return BinaryOperator::CreateAShr(X, Y);
|
|
|
|
|
|
|
|
// If we are inverting a right-shifted constant, we may be able to eliminate
|
|
|
|
// the 'not' by inverting the constant and using the opposite shift type.
|
|
|
|
// Canonicalization rules ensure that only a negative constant uses 'ashr',
|
|
|
|
// but we must check that in case that transform has not fired yet.
|
2018-09-04 02:40:56 +08:00
|
|
|
|
|
|
|
// ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)
|
2018-02-11 05:46:09 +08:00
|
|
|
Constant *C;
|
|
|
|
if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&
|
2018-09-04 02:40:56 +08:00
|
|
|
match(C, m_Negative()))
|
|
|
|
return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y);
|
2017-05-09 04:49:59 +08:00
|
|
|
|
2018-09-04 02:40:56 +08:00
|
|
|
// ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)
|
2018-02-11 05:46:09 +08:00
|
|
|
if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&
|
2018-09-04 02:40:56 +08:00
|
|
|
match(C, m_NonNegative()))
|
|
|
|
return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
|
2018-09-04 02:21:59 +08:00
|
|
|
|
|
|
|
// ~(X + C) --> -(C + 1) - X
|
|
|
|
if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
|
|
|
|
return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2018-09-07 00:23:40 +08:00
|
|
|
// Use DeMorgan and reassociation to eliminate a 'not' op.
|
|
|
|
Constant *C1;
|
|
|
|
if (match(Op1, m_Constant(C1))) {
|
|
|
|
Constant *C2;
|
|
|
|
if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
|
|
|
|
// (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
|
|
|
|
Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2));
|
|
|
|
return BinaryOperator::CreateXor(And, ConstantExpr::getNot(C1));
|
|
|
|
}
|
|
|
|
if (match(Op0, m_OneUse(m_And(m_Not(m_Value(X)), m_Constant(C2))))) {
|
|
|
|
// (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C1
|
|
|
|
Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));
|
|
|
|
return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-29 08:07:08 +08:00
|
|
|
// not (cmp A, B) = !cmp A, B
|
2017-07-06 04:31:00 +08:00
|
|
|
CmpInst::Predicate Pred;
|
2017-06-29 08:07:08 +08:00
|
|
|
if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
|
2017-04-12 23:11:33 +08:00
|
|
|
cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
|
|
|
|
return replaceInstUsesWith(I, Op0);
|
2015-02-13 04:26:46 +08:00
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2017-08-06 14:28:41 +08:00
|
|
|
{
|
|
|
|
const APInt *RHSC;
|
|
|
|
if (match(Op1, m_APInt(RHSC))) {
|
2017-08-11 04:35:34 +08:00
|
|
|
Value *X;
|
2017-08-06 14:28:41 +08:00
|
|
|
const APInt *C;
|
2018-09-04 02:40:56 +08:00
|
|
|
if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X)))) {
|
|
|
|
// (C - X) ^ signmask -> (C + signmask - X)
|
|
|
|
Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
|
|
|
|
return BinaryOperator::CreateSub(NewC, X);
|
|
|
|
}
|
|
|
|
if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C)))) {
|
|
|
|
// (X + C) ^ signmask -> (X + C + signmask)
|
|
|
|
Constant *NewC = ConstantInt::get(I.getType(), *C + *RHSC);
|
|
|
|
return BinaryOperator::CreateAdd(X, NewC);
|
2017-08-06 14:28:41 +08:00
|
|
|
}
|
2017-08-11 04:35:34 +08:00
|
|
|
|
|
|
|
// (X|C1)^C2 -> X^(C1^C2) iff X&~C1 == 0
|
|
|
|
if (match(Op0, m_Or(m_Value(X), m_APInt(C))) &&
|
|
|
|
MaskedValueIsZero(X, *C, 0, &I)) {
|
|
|
|
Constant *NewC = ConstantInt::get(I.getType(), *C ^ *RHSC);
|
|
|
|
Worklist.Add(cast<Instruction>(Op0));
|
|
|
|
I.setOperand(0, X);
|
|
|
|
I.setOperand(1, NewC);
|
|
|
|
return &I;
|
|
|
|
}
|
2017-08-06 14:28:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-09 14:12:31 +08:00
|
|
|
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
|
2010-01-05 15:50:36 +08:00
|
|
|
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
|
|
|
|
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
|
2017-08-11 04:35:34 +08:00
|
|
|
if (Op0I->getOpcode() == Instruction::LShr) {
|
2012-11-27 05:44:25 +08:00
|
|
|
// ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)
|
|
|
|
// E1 = "X ^ C1"
|
2012-12-20 15:09:41 +08:00
|
|
|
BinaryOperator *E1;
|
2012-11-27 05:44:25 +08:00
|
|
|
ConstantInt *C1;
|
|
|
|
if (Op0I->hasOneUse() &&
|
|
|
|
(E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) &&
|
|
|
|
E1->getOpcode() == Instruction::Xor &&
|
|
|
|
(C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
|
|
|
|
// fold (C1 >> C2) ^ C3
|
2017-04-09 14:12:31 +08:00
|
|
|
ConstantInt *C2 = Op0CI, *C3 = RHSC;
|
2012-11-27 05:44:25 +08:00
|
|
|
APInt FoldConst = C1->getValue().lshr(C2->getValue());
|
|
|
|
FoldConst ^= C3->getValue();
|
|
|
|
// Prepare the two operands.
|
2017-07-08 07:16:26 +08:00
|
|
|
Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2);
|
2012-11-27 05:44:25 +08:00
|
|
|
Opnd0->takeName(Op0I);
|
|
|
|
cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc());
|
|
|
|
Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst);
|
|
|
|
|
|
|
|
return BinaryOperator::CreateXor(Opnd0, FoldVal);
|
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-04-05 04:26:25 +08:00
|
|
|
}
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-03-01 00:36:24 +08:00
|
|
|
if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))
|
|
|
|
return FoldedLogic;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-09-05 05:00:13 +08:00
|
|
|
// Y ^ (X | Y) --> X & ~Y
|
|
|
|
// Y ^ (Y | X) --> X & ~Y
|
|
|
|
if (match(Op1, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op0)))))
|
|
|
|
return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op0));
|
|
|
|
// (X | Y) ^ Y --> X & ~Y
|
|
|
|
// (Y | X) ^ Y --> X & ~Y
|
|
|
|
if (match(Op0, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op1)))))
|
|
|
|
return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op1));
|
|
|
|
|
|
|
|
// Y ^ (X & Y) --> ~X & Y
|
|
|
|
// Y ^ (Y & X) --> ~X & Y
|
|
|
|
if (match(Op1, m_OneUse(m_c_And(m_Value(X), m_Specific(Op0)))))
|
|
|
|
return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(X));
|
|
|
|
// (X & Y) ^ Y --> ~X & Y
|
|
|
|
// (Y & X) ^ Y --> ~X & Y
|
2018-09-05 05:17:14 +08:00
|
|
|
// Canonical form is (X & C) ^ C; don't touch that.
|
2018-09-05 05:00:13 +08:00
|
|
|
// TODO: A 'not' op is better for analysis and codegen, but demanded bits must
|
|
|
|
// be fixed to prefer that (otherwise we get infinite looping).
|
2018-09-05 05:17:14 +08:00
|
|
|
if (!match(Op1, m_Constant()) &&
|
2018-09-05 05:00:13 +08:00
|
|
|
match(Op0, m_OneUse(m_c_And(m_Value(X), m_Specific(Op1)))))
|
|
|
|
return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(X));
|
2012-12-20 15:09:41 +08:00
|
|
|
|
2018-09-05 07:22:13 +08:00
|
|
|
Value *A, *B, *C;
|
|
|
|
// (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.
|
|
|
|
if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
|
|
|
|
m_OneUse(m_c_Or(m_Deferred(A), m_Value(C))))))
|
|
|
|
return BinaryOperator::CreateXor(
|
|
|
|
Builder.CreateAnd(Builder.CreateNot(A), C), B);
|
|
|
|
|
|
|
|
// (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.
|
|
|
|
if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),
|
|
|
|
m_OneUse(m_c_Or(m_Deferred(B), m_Value(C))))))
|
|
|
|
return BinaryOperator::CreateXor(
|
|
|
|
Builder.CreateAnd(Builder.CreateNot(B), C), A);
|
|
|
|
|
|
|
|
// (A & B) ^ (A ^ B) -> (A | B)
|
|
|
|
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
|
|
|
|
return BinaryOperator::CreateOr(A, B);
|
|
|
|
// (A ^ B) ^ (A & B) -> (A | B)
|
|
|
|
if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
|
|
|
|
match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
|
|
|
|
return BinaryOperator::CreateOr(A, B);
|
2010-11-23 22:23:47 +08:00
|
|
|
|
2016-12-19 02:49:48 +08:00
|
|
|
// (A & ~B) ^ ~A -> ~(A & B)
|
|
|
|
// (~B & A) ^ ~A -> ~(A & B)
|
|
|
|
if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
|
2014-08-01 13:07:20 +08:00
|
|
|
match(Op1, m_Not(m_Specific(A))))
|
2017-07-08 07:16:26 +08:00
|
|
|
return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
|
2014-08-01 13:07:20 +08:00
|
|
|
|
2017-05-19 04:53:16 +08:00
|
|
|
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
|
|
|
|
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
|
|
|
|
if (Value *V = foldXorOfICmps(LHS, RHS))
|
|
|
|
return replaceInstUsesWith(I, V);
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2016-02-25 01:00:34 +08:00
|
|
|
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
|
|
|
|
return CastedXor;
|
2010-01-05 15:50:36 +08:00
|
|
|
|
2018-06-07 05:58:12 +08:00
|
|
|
// Canonicalize a shifty way to code absolute value to the common pattern.
|
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel
We want to do this for 2 reasons:
1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766.
2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern.
More detail about what happens in the backend:
1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs
into the shift variant. That is the opposite of this IR canonicalization.
2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs
into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization.
3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2
into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node
when that's legal/custom.
4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a
variety of ways.
a. For #2, the vector path is missing the case for setlt with a '1' constant.
b. For #3, we are missing a match for commuted versions of the shift variants.
5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel
produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the
shift sequence when not.
6. In the following examples with this patch applied, we may get conditional moves rather than the shift
produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific
decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate.
define i32 @abs_shifty(i32 %x) {
%signbit = ashr i32 %x, 31
%add = add i32 %signbit, %x
%abs = xor i32 %signbit, %add
ret i32 %abs
}
define i32 @abs_cmpsubsel(i32 %x) {
%cmp = icmp slt i32 %x, zeroinitializer
%sub = sub i32 zeroinitializer, %x
%abs = select i1 %cmp, i32 %sub, i32 %x
ret i32 %abs
}
define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
%signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%add = add <4 x i32> %signbit, %x
%abs = xor <4 x i32> %signbit, %add
ret <4 x i32> %abs
}
define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
%cmp = icmp slt <4 x i32> %x, zeroinitializer
%sub = sub <4 x i32> zeroinitializer, %x
%abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
ret <4 x i32> %abs
}
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx
> abs_shifty:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_cmpsubsel:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_shifty_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> abs_cmpsubsel_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64
> abs_shifty:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_cmpsubsel:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_shifty_vec:
> abs v0.4s, v0.4s
> ret
>
> abs_cmpsubsel_vec:
> abs v0.4s, v0.4s
> ret
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le
> abs_shifty:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_cmpsubsel:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_shifty_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
> abs_cmpsubsel_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
Differential Revision: https://reviews.llvm.org/D40984
llvm-svn: 320921
2017-12-17 00:41:17 +08:00
|
|
|
// There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.
|
|
|
|
// We're relying on the fact that we only do this transform when the shift has
|
|
|
|
// exactly 2 uses and the add has exactly 1 use (otherwise, we might increase
|
|
|
|
// instructions).
|
2018-03-13 02:46:05 +08:00
|
|
|
if (Op0->hasNUses(2))
|
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel
We want to do this for 2 reasons:
1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766.
2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern.
More detail about what happens in the backend:
1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs
into the shift variant. That is the opposite of this IR canonicalization.
2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs
into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization.
3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2
into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node
when that's legal/custom.
4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a
variety of ways.
a. For #2, the vector path is missing the case for setlt with a '1' constant.
b. For #3, we are missing a match for commuted versions of the shift variants.
5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel
produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the
shift sequence when not.
6. In the following examples with this patch applied, we may get conditional moves rather than the shift
produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific
decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate.
define i32 @abs_shifty(i32 %x) {
%signbit = ashr i32 %x, 31
%add = add i32 %signbit, %x
%abs = xor i32 %signbit, %add
ret i32 %abs
}
define i32 @abs_cmpsubsel(i32 %x) {
%cmp = icmp slt i32 %x, zeroinitializer
%sub = sub i32 zeroinitializer, %x
%abs = select i1 %cmp, i32 %sub, i32 %x
ret i32 %abs
}
define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
%signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%add = add <4 x i32> %signbit, %x
%abs = xor <4 x i32> %signbit, %add
ret <4 x i32> %abs
}
define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
%cmp = icmp slt <4 x i32> %x, zeroinitializer
%sub = sub <4 x i32> zeroinitializer, %x
%abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
ret <4 x i32> %abs
}
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx
> abs_shifty:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_cmpsubsel:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_shifty_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> abs_cmpsubsel_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64
> abs_shifty:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_cmpsubsel:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_shifty_vec:
> abs v0.4s, v0.4s
> ret
>
> abs_cmpsubsel_vec:
> abs v0.4s, v0.4s
> ret
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le
> abs_shifty:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_cmpsubsel:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_shifty_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
> abs_cmpsubsel_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
Differential Revision: https://reviews.llvm.org/D40984
llvm-svn: 320921
2017-12-17 00:41:17 +08:00
|
|
|
std::swap(Op0, Op1);
|
|
|
|
|
|
|
|
const APInt *ShAmt;
|
|
|
|
Type *Ty = I.getType();
|
|
|
|
if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
|
2018-03-13 02:46:05 +08:00
|
|
|
Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&
|
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel
We want to do this for 2 reasons:
1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766.
2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern.
More detail about what happens in the backend:
1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs
into the shift variant. That is the opposite of this IR canonicalization.
2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs
into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization.
3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2
into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node
when that's legal/custom.
4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a
variety of ways.
a. For #2, the vector path is missing the case for setlt with a '1' constant.
b. For #3, we are missing a match for commuted versions of the shift variants.
5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel
produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the
shift sequence when not.
6. In the following examples with this patch applied, we may get conditional moves rather than the shift
produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific
decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate.
define i32 @abs_shifty(i32 %x) {
%signbit = ashr i32 %x, 31
%add = add i32 %signbit, %x
%abs = xor i32 %signbit, %add
ret i32 %abs
}
define i32 @abs_cmpsubsel(i32 %x) {
%cmp = icmp slt i32 %x, zeroinitializer
%sub = sub i32 zeroinitializer, %x
%abs = select i1 %cmp, i32 %sub, i32 %x
ret i32 %abs
}
define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
%signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%add = add <4 x i32> %signbit, %x
%abs = xor <4 x i32> %signbit, %add
ret <4 x i32> %abs
}
define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
%cmp = icmp slt <4 x i32> %x, zeroinitializer
%sub = sub <4 x i32> zeroinitializer, %x
%abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
ret <4 x i32> %abs
}
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx
> abs_shifty:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_cmpsubsel:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_shifty_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> abs_cmpsubsel_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64
> abs_shifty:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_cmpsubsel:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_shifty_vec:
> abs v0.4s, v0.4s
> ret
>
> abs_cmpsubsel_vec:
> abs v0.4s, v0.4s
> ret
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le
> abs_shifty:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_cmpsubsel:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_shifty_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
> abs_cmpsubsel_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
Differential Revision: https://reviews.llvm.org/D40984
llvm-svn: 320921
2017-12-17 00:41:17 +08:00
|
|
|
match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {
|
|
|
|
// B = ashr i32 A, 31 ; smear the sign bit
|
|
|
|
// xor (add A, B), B ; add -1 and flip bits if negative
|
|
|
|
// --> (A < 0) ? -A : A
|
|
|
|
Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty));
|
2018-05-18 00:29:52 +08:00
|
|
|
// Copy the nuw/nsw flags from the add to the negate.
|
|
|
|
auto *Add = cast<BinaryOperator>(Op0);
|
|
|
|
Value *Neg = Builder.CreateNeg(A, "", Add->hasNoUnsignedWrap(),
|
|
|
|
Add->hasNoSignedWrap());
|
|
|
|
return SelectInst::Create(Cmp, Neg, A);
|
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel
We want to do this for 2 reasons:
1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766.
2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern.
More detail about what happens in the backend:
1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs
into the shift variant. That is the opposite of this IR canonicalization.
2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs
into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization.
3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2
into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node
when that's legal/custom.
4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a
variety of ways.
a. For #2, the vector path is missing the case for setlt with a '1' constant.
b. For #3, we are missing a match for commuted versions of the shift variants.
5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel
produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the
shift sequence when not.
6. In the following examples with this patch applied, we may get conditional moves rather than the shift
produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific
decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate.
define i32 @abs_shifty(i32 %x) {
%signbit = ashr i32 %x, 31
%add = add i32 %signbit, %x
%abs = xor i32 %signbit, %add
ret i32 %abs
}
define i32 @abs_cmpsubsel(i32 %x) {
%cmp = icmp slt i32 %x, zeroinitializer
%sub = sub i32 zeroinitializer, %x
%abs = select i1 %cmp, i32 %sub, i32 %x
ret i32 %abs
}
define <4 x i32> @abs_shifty_vec(<4 x i32> %x) {
%signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%add = add <4 x i32> %signbit, %x
%abs = xor <4 x i32> %signbit, %add
ret <4 x i32> %abs
}
define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) {
%cmp = icmp slt <4 x i32> %x, zeroinitializer
%sub = sub <4 x i32> zeroinitializer, %x
%abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x
ret <4 x i32> %abs
}
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx
> abs_shifty:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_cmpsubsel:
> movl %edi, %eax
> negl %eax
> cmovll %edi, %eax
> retq
>
> abs_shifty_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> abs_cmpsubsel_vec:
> vpabsd %xmm0, %xmm0
> retq
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64
> abs_shifty:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_cmpsubsel:
> cmp w0, #0 // =0
> cneg w0, w0, mi
> ret
>
> abs_shifty_vec:
> abs v0.4s, v0.4s
> ret
>
> abs_cmpsubsel_vec:
> abs v0.4s, v0.4s
> ret
>
> $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le
> abs_shifty:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_cmpsubsel:
> srawi 4, 3, 31
> add 3, 3, 4
> xor 3, 3, 4
> blr
>
> abs_shifty_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
> abs_cmpsubsel_vec:
> vspltisw 3, -16
> vspltisw 4, 15
> vsubuwm 3, 4, 3
> vsraw 3, 2, 3
> vadduwm 2, 2, 3
> xxlxor 34, 34, 35
> blr
>
Differential Revision: https://reviews.llvm.org/D40984
llvm-svn: 320921
2017-12-17 00:41:17 +08:00
|
|
|
}
|
|
|
|
|
2018-04-11 18:29:37 +08:00
|
|
|
// Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
|
|
|
|
//
|
|
|
|
// %notx = xor i32 %x, -1
|
|
|
|
// %cmp1 = icmp sgt i32 %notx, %y
|
|
|
|
// %smax = select i1 %cmp1, i32 %notx, i32 %y
|
|
|
|
// %res = xor i32 %smax, -1
|
|
|
|
// =>
|
|
|
|
// %noty = xor i32 %y, -1
|
|
|
|
// %cmp2 = icmp slt %x, %noty
|
|
|
|
// %res = select i1 %cmp2, i32 %x, i32 %noty
|
|
|
|
//
|
|
|
|
// Same is applicable for smin/umax/umin.
|
2018-08-22 03:17:00 +08:00
|
|
|
if (match(Op1, m_AllOnes()) && Op0->hasOneUse()) {
|
2018-04-11 18:29:37 +08:00
|
|
|
Value *LHS, *RHS;
|
|
|
|
SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
|
2018-08-22 03:17:00 +08:00
|
|
|
if (SelectPatternResult::isMinOrMax(SPF)) {
|
2018-09-08 00:19:50 +08:00
|
|
|
// It's possible we get here before the not has been simplified, so make
|
|
|
|
// sure the input to the not isn't freely invertible.
|
|
|
|
if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) {
|
2018-04-11 18:29:37 +08:00
|
|
|
Value *NotY = Builder.CreateNot(RHS);
|
|
|
|
return SelectInst::Create(
|
|
|
|
Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
|
|
|
|
}
|
2018-09-08 00:19:50 +08:00
|
|
|
|
|
|
|
// It's possible we get here before the not has been simplified, so make
|
|
|
|
// sure the input to the not isn't freely invertible.
|
|
|
|
if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) {
|
|
|
|
Value *NotX = Builder.CreateNot(LHS);
|
|
|
|
return SelectInst::Create(
|
|
|
|
Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
|
|
|
|
}
|
2018-09-14 02:52:58 +08:00
|
|
|
|
|
|
|
// If both sides are freely invertible, then we can get rid of the xor
|
|
|
|
// completely.
|
|
|
|
if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
|
|
|
|
IsFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
|
|
|
|
Value *NotLHS = Builder.CreateNot(LHS);
|
|
|
|
Value *NotRHS = Builder.CreateNot(RHS);
|
|
|
|
return SelectInst::Create(
|
|
|
|
Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
|
|
|
|
NotLHS, NotRHS);
|
|
|
|
}
|
2018-04-11 18:29:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-08 21:31:19 +08:00
|
|
|
if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
|
|
|
|
return NewXor;
|
|
|
|
|
2018-07-13 09:18:07 +08:00
|
|
|
return nullptr;
|
2010-01-05 15:50:36 +08:00
|
|
|
}
|