[InstCombine] allow more shuffle folds using safe constants

getSafeVectorConstantForBinop() was calling getBinOpIdentity() assuming
that the constant we wanted was operand 1 (RHS). That's wrong, but I
don't think we could expose a bug or even a suboptimal fold from that
because the callers have other guards for any binop that would have
been affected.

llvm-svn: 336617
This commit is contained in:
Sanjay Patel 2018-07-09 23:22:47 +00:00
parent a5bb6d53f2
commit 69faf464ed
4 changed files with 53 additions and 32 deletions

View File

@ -214,24 +214,55 @@ IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
/// Some binary operators require special handling to avoid poison and undefined
/// behavior. If a constant vector has undef elements, replace those undefs with
/// identity constants because those are always safe to execute. If no identity
/// constant exists, replace undef with '1' or '1.0'.
/// identity constants if possible because those are always safe to execute.
/// If no identity constant exists, replace undef with some other safe constant.
static inline Constant *getSafeVectorConstantForBinop(
BinaryOperator::BinaryOps Opcode, Constant *In) {
Type *Ty = In->getType();
assert(Ty->isVectorTy() && "Not expecting scalars here");
BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant) {
assert(In->getType()->isVectorTy() && "Not expecting scalars here");
Type *EltTy = Ty->getVectorElementType();
Constant *IdentityC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, true);
if (!IdentityC)
IdentityC = EltTy->isIntegerTy() ? ConstantInt::get(EltTy, 1):
ConstantFP::get(EltTy, 1.0);
unsigned NumElts = Ty->getVectorNumElements();
Type *EltTy = In->getType()->getVectorElementType();
auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant);
if (!SafeC) {
// TODO: Should this be available as a constant utility function? It is
// similar to getBinOpAbsorber().
if (IsRHSConstant) {
switch (Opcode) {
case Instruction::SRem: // X % 1 = 0
case Instruction::URem: // X %u 1 = 0
SafeC = ConstantInt::get(EltTy, 1);
break;
case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe)
SafeC = ConstantFP::get(EltTy, 1.0);
break;
default:
llvm_unreachable("Only rem opcodes have no identity constant for RHS");
}
} else {
switch (Opcode) {
case Instruction::Shl: // 0 << X = 0
case Instruction::LShr: // 0 >>u X = 0
case Instruction::AShr: // 0 >> X = 0
case Instruction::SDiv: // 0 / X = 0
case Instruction::UDiv: // 0 /u X = 0
case Instruction::SRem: // 0 % X = 0
case Instruction::URem: // 0 %u X = 0
case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe)
case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe)
case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe)
case Instruction::FRem: // 0.0 % X = 0
SafeC = Constant::getNullValue(EltTy);
break;
default:
llvm_unreachable("Expected to find identity constant for opcode");
}
}
}
assert(SafeC && "Must have safe constant for binop");
unsigned NumElts = In->getType()->getVectorNumElements();
SmallVector<Constant *, 16> Out(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
Constant *C = In->getAggregateElement(i);
Out[i] = isa<UndefValue>(C) ? IdentityC : C;
Out[i] = isa<UndefValue>(C) ? SafeC : C;
}
return ConstantVector::get(Out);
}

View File

@ -1293,14 +1293,8 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
Value *V;
if (X == Y) {
// The new binop constant must not have any potential for extra poison/UB.
if (MightCreatePoisonOrUB) {
// TODO: Use getBinOpAbsorber for LHS replacement constants?
if (!ConstantsAreOp1)
return nullptr;
// Replace undef elements with identity constants.
NewC = getSafeVectorConstantForBinop(BOpc, NewC);
}
if (MightCreatePoisonOrUB)
NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
// Remove a binop and the shuffle by rearranging the constant:
// shuffle (op V, C0), (op V, C1), M --> op V, C'

View File

@ -1421,9 +1421,9 @@ Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) {
// It may not be safe to execute a binop on a vector with undef elements
// because the entire instruction can be folded to undef or create poison
// that did not exist in the original code.
if (Inst.isIntDivRem() ||
(Inst.isShift() && isa<Constant>(Inst.getOperand(1))))
NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC);
bool ConstOp1 = isa<Constant>(Inst.getOperand(1));
if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1))
NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1);
// Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
// Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)

View File

@ -720,13 +720,11 @@ define <4 x i32> @urem_urem(<4 x i32> %v0) {
ret <4 x i32> %t3
}
; TODO: This could be folded by using a safe constant.
; This is folded by using a safe constant.
define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @urem_urem_undef_mask_elt(
; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT: [[T2:%.*]] = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
; CHECK-NEXT: ret <4 x i32> [[T3]]
;
%t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
@ -746,13 +744,11 @@ define <4 x i32> @srem_srem(<4 x i32> %v0) {
ret <4 x i32> %t3
}
; TODO: This could be folded by using a safe constant.
; This is folded by using a safe constant.
define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @srem_srem_undef_mask_elt(
; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
; CHECK-NEXT: ret <4 x i32> [[T3]]
;
%t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0