forked from OSchip/llvm-project
Revert r155136 "Defer some shl transforms to DAGCombine."
While the patch was perfect and defect free, it exposed a really nasty bug in X86 SelectionDAG that caused an llc crash when compiling lencod. I'll put the patch back in after fixing the SelectionDAG problem. llvm-svn: 155181
This commit is contained in:
parent
9c74e4310c
commit
205ee3b389
|
@ -529,19 +529,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
|
|||
ShiftOp = 0;
|
||||
|
||||
if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
|
||||
|
||||
// This is a constant shift of a constant shift. Be careful about hiding
|
||||
// shl instructions behind bit masks. They are used to represent multiplies
|
||||
// by a constant, and it is important that simple arithmetic expressions
|
||||
// are still recognizable by scalar evolution.
|
||||
//
|
||||
// The transforms applied to shl are very similar to the transforms applied
|
||||
// to mul by constant. We can be more aggressive about optimizing right
|
||||
// shifts.
|
||||
//
|
||||
// Combinations of right and left shifts will still be optimized in
|
||||
// DAGCombine where scalar evolution no longer applies.
|
||||
|
||||
ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
|
||||
uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
|
||||
uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
|
||||
|
@ -567,6 +554,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
|
|||
}
|
||||
|
||||
if (ShiftAmt1 == ShiftAmt2) {
|
||||
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
|
||||
if (I.getOpcode() == Instruction::Shl &&
|
||||
ShiftOp->getOpcode() != Instruction::Shl) {
|
||||
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
|
||||
return BinaryOperator::CreateAnd(X,
|
||||
ConstantInt::get(I.getContext(),Mask));
|
||||
}
|
||||
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
|
||||
if (I.getOpcode() == Instruction::LShr &&
|
||||
ShiftOp->getOpcode() == Instruction::Shl) {
|
||||
|
@ -576,23 +570,28 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
|
|||
}
|
||||
} else if (ShiftAmt1 < ShiftAmt2) {
|
||||
uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
|
||||
|
||||
// (X >>?,exact C1) << C2 --> X << (C2-C1)
|
||||
// The inexact version is deferred to DAGCombine so we don't hide shl
|
||||
// behind a bit mask.
|
||||
|
||||
// (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
|
||||
if (I.getOpcode() == Instruction::Shl &&
|
||||
ShiftOp->getOpcode() != Instruction::Shl &&
|
||||
ShiftOp->isExact()) {
|
||||
ShiftOp->getOpcode() != Instruction::Shl) {
|
||||
assert(ShiftOp->getOpcode() == Instruction::LShr ||
|
||||
ShiftOp->getOpcode() == Instruction::AShr);
|
||||
ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
|
||||
BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
|
||||
X, ShiftDiffCst);
|
||||
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
|
||||
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
|
||||
return NewShl;
|
||||
if (ShiftOp->isExact()) {
|
||||
// (X >>?,exact C1) << C2 --> X << (C2-C1)
|
||||
BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
|
||||
X, ShiftDiffCst);
|
||||
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
|
||||
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
|
||||
return NewShl;
|
||||
}
|
||||
Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
|
||||
|
||||
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
|
||||
return BinaryOperator::CreateAnd(Shift,
|
||||
ConstantInt::get(I.getContext(),Mask));
|
||||
}
|
||||
|
||||
|
||||
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
|
||||
if (I.getOpcode() == Instruction::LShr &&
|
||||
ShiftOp->getOpcode() == Instruction::Shl) {
|
||||
|
@ -628,19 +627,24 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
|
|||
assert(ShiftAmt2 < ShiftAmt1);
|
||||
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
|
||||
|
||||
// (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
|
||||
// The inexact version is deferred to DAGCombine so we don't hide shl
|
||||
// behind a bit mask.
|
||||
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
|
||||
if (I.getOpcode() == Instruction::Shl &&
|
||||
ShiftOp->getOpcode() != Instruction::Shl &&
|
||||
ShiftOp->isExact()) {
|
||||
ShiftOp->getOpcode() != Instruction::Shl) {
|
||||
ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
|
||||
BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
|
||||
X, ShiftDiffCst);
|
||||
NewShr->setIsExact(true);
|
||||
return NewShr;
|
||||
if (ShiftOp->isExact()) {
|
||||
// (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
|
||||
BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
|
||||
X, ShiftDiffCst);
|
||||
NewShr->setIsExact(true);
|
||||
return NewShr;
|
||||
}
|
||||
Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
|
||||
X, ShiftDiffCst);
|
||||
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
|
||||
return BinaryOperator::CreateAnd(Shift,
|
||||
ConstantInt::get(I.getContext(),Mask));
|
||||
}
|
||||
|
||||
|
||||
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
|
||||
if (I.getOpcode() == Instruction::LShr &&
|
||||
ShiftOp->getOpcode() == Instruction::Shl) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
define i32 @main(i32 %argc) nounwind ssp {
|
||||
entry:
|
||||
%tmp3151 = trunc i32 %argc to i8
|
||||
; CHECK: %tmp3163 = shl i8 %tmp3162, 6
|
||||
; CHECK: and i8 %tmp3163, 64
|
||||
; CHECK: %tmp3162 = shl i8 %tmp3151, 5
|
||||
; CHECK: and i8 %tmp3162, 64
|
||||
; CHECK-NOT: shl
|
||||
; CHECK-NOT: shr
|
||||
%tmp3161 = or i8 %tmp3151, -17
|
||||
|
@ -38,8 +38,8 @@ bb:
|
|||
%tmp10 = lshr i8 %tmp8, 7
|
||||
%tmp11 = shl i8 %tmp10, 5
|
||||
|
||||
; CHECK: %tmp10 = lshr i8 %tmp8, 7
|
||||
; CHECK: %tmp11 = shl nuw nsw i8 %tmp10, 5
|
||||
; CHECK: %0 = lshr i8 %tmp8, 2
|
||||
; CHECK: %tmp11 = and i8 %0, 32
|
||||
|
||||
%tmp12 = xor i8 %tmp11, %tmp9
|
||||
ret i8 %tmp12
|
||||
|
|
|
@ -47,21 +47,13 @@ define i32 @test5a(i32 %A) {
|
|||
}
|
||||
|
||||
; CHECK: @test6
|
||||
; CHECK: mul i55 %A, 6
|
||||
; CHECK-NOT: sh
|
||||
define i55 @test6(i55 %A) {
|
||||
%B = shl i55 %A, 1 ; <i55> [#uses=1]
|
||||
%C = mul i55 %B, 3 ; <i55> [#uses=1]
|
||||
ret i55 %C
|
||||
}
|
||||
|
||||
; CHECK: @test6a
|
||||
; CHECK: mul i55 %A, 6
|
||||
define i55 @test6a(i55 %A) {
|
||||
%B = mul i55 %A, 3 ; <i55> [#uses=1]
|
||||
%C = shl i55 %B, 1 ; <i55> [#uses=1]
|
||||
ret i55 %C
|
||||
}
|
||||
|
||||
; CHECK: @test7
|
||||
; CHECK-NOT: sh
|
||||
define i29 @test7(i8 %X) {
|
||||
|
@ -95,8 +87,7 @@ define i19 @test10(i19 %A) {
|
|||
}
|
||||
|
||||
; CHECK: @test11
|
||||
; Don't hide the shl from scalar evolution. DAGCombine will get it.
|
||||
; CHECK: shl
|
||||
; CHECK-NOT: sh
|
||||
define i23 @test11(i23 %A) {
|
||||
%a = mul i23 %A, 3 ; <i23> [#uses=1]
|
||||
%B = lshr i23 %a, 11 ; <i23> [#uses=1]
|
||||
|
@ -113,8 +104,7 @@ define i47 @test12(i47 %A) {
|
|||
}
|
||||
|
||||
; CHECK: @test13
|
||||
; Don't hide the shl from scalar evolution. DAGCombine will get it.
|
||||
; CHECK: shl
|
||||
; CHECK-NOT: sh
|
||||
define i18 @test13(i18 %A) {
|
||||
%a = mul i18 %A, 3 ; <i18> [#uses=1]
|
||||
%B = ashr i18 %a, 8 ; <i18> [#uses=1]
|
||||
|
|
|
@ -457,12 +457,10 @@ define i64 @test50(i64 %A) {
|
|||
%E = sext i32 %D to i64
|
||||
ret i64 %E
|
||||
; CHECK: @test50
|
||||
; lshr+shl will be handled by DAGCombine.
|
||||
; CHECK-NEXT: lshr i64 %A, 2
|
||||
; CHECK-NEXT: shl i64 %a, 32
|
||||
; CHECK-NEXT: shl i64 %A, 30
|
||||
; CHECK-NEXT: add i64 {{.*}}, -4294967296
|
||||
; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
|
||||
; CHECK-NEXT: ret i64 %E
|
||||
; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
|
||||
; CHECK-NEXT: ret i64 %sext
|
||||
}
|
||||
|
||||
define i64 @test51(i64 %A, i1 %cond) {
|
||||
|
|
|
@ -65,17 +65,8 @@ define i32 @test6(i32 %A) {
|
|||
; CHECK: @test6
|
||||
; CHECK-NEXT: mul i32 %A, 6
|
||||
; CHECK-NEXT: ret i32
|
||||
%B = shl i32 %A, 1 ;; convert to an mul instruction
|
||||
%C = mul i32 %B, 3
|
||||
ret i32 %C
|
||||
}
|
||||
|
||||
define i32 @test6a(i32 %A) {
|
||||
; CHECK: @test6a
|
||||
; CHECK-NEXT: mul i32 %A, 6
|
||||
; CHECK-NEXT: ret i32
|
||||
%B = mul i32 %A, 3
|
||||
%C = shl i32 %B, 1 ;; convert to an mul instruction
|
||||
%B = shl i32 %A, 1 ;; convert to an mul instruction
|
||||
%C = mul i32 %B, 3
|
||||
ret i32 %C
|
||||
}
|
||||
|
||||
|
@ -106,9 +97,7 @@ define i8 @test9(i8 %A) {
|
|||
ret i8 %C
|
||||
}
|
||||
|
||||
;; This transformation is deferred to DAGCombine:
|
||||
;; (A >> 7) << 7 === A & 128
|
||||
;; The shl may be valuable to scalar evolution.
|
||||
define i8 @test10(i8 %A) {
|
||||
; CHECK: @test10
|
||||
; CHECK-NEXT: and i8 %A, -128
|
||||
|
@ -118,21 +107,11 @@ define i8 @test10(i8 %A) {
|
|||
ret i8 %C
|
||||
}
|
||||
|
||||
;; Allow the simplification when the lshr shift is exact.
|
||||
define i8 @test10a(i8 %A) {
|
||||
; CHECK: @test10a
|
||||
; CHECK-NEXT: ret i8 %A
|
||||
%B = lshr exact i8 %A, 7
|
||||
%C = shl i8 %B, 7
|
||||
ret i8 %C
|
||||
}
|
||||
|
||||
;; This transformation is deferred to DAGCombine:
|
||||
;; (A >> 3) << 4 === (A & 0x1F) << 1
|
||||
;; The shl may be valuable to scalar evolution.
|
||||
define i8 @test11(i8 %A) {
|
||||
; CHECK: @test11
|
||||
; CHECK: shl i8
|
||||
; CHECK-NEXT: mul i8 %A, 6
|
||||
; CHECK-NEXT: and i8
|
||||
; CHECK-NEXT: ret i8
|
||||
%a = mul i8 %A, 3 ; <i8> [#uses=1]
|
||||
%B = lshr i8 %a, 3 ; <i8> [#uses=1]
|
||||
|
@ -140,18 +119,6 @@ define i8 @test11(i8 %A) {
|
|||
ret i8 %C
|
||||
}
|
||||
|
||||
;; Allow the simplification in InstCombine when the lshr shift is exact.
|
||||
define i8 @test11a(i8 %A) {
|
||||
; CHECK: @test11a
|
||||
; CHECK-NEXT: mul i8 %A, 6
|
||||
; CHECK-NEXT: ret i8
|
||||
%a = mul i8 %A, 3
|
||||
%B = lshr exact i8 %a, 3
|
||||
%C = shl i8 %B, 4
|
||||
ret i8 %C
|
||||
}
|
||||
|
||||
;; This is deferred to DAGCombine unless %B is single-use.
|
||||
;; (A >> 8) << 8 === A & -256
|
||||
define i32 @test12(i32 %A) {
|
||||
; CHECK: @test12
|
||||
|
@ -162,12 +129,11 @@ define i32 @test12(i32 %A) {
|
|||
ret i32 %C
|
||||
}
|
||||
|
||||
;; This transformation is deferred to DAGCombine:
|
||||
;; (A >> 3) << 4 === (A & -8) * 2
|
||||
;; The shl may be valuable to scalar evolution.
|
||||
define i8 @test13(i8 %A) {
|
||||
; CHECK: @test13
|
||||
; CHECK: shl i8
|
||||
; CHECK-NEXT: mul i8 %A, 6
|
||||
; CHECK-NEXT: and i8
|
||||
; CHECK-NEXT: ret i8
|
||||
%a = mul i8 %A, 3 ; <i8> [#uses=1]
|
||||
%B = ashr i8 %a, 3 ; <i8> [#uses=1]
|
||||
|
@ -175,16 +141,6 @@ define i8 @test13(i8 %A) {
|
|||
ret i8 %C
|
||||
}
|
||||
|
||||
define i8 @test13a(i8 %A) {
|
||||
; CHECK: @test13a
|
||||
; CHECK-NEXT: mul i8 %A, 6
|
||||
; CHECK-NEXT: ret i8
|
||||
%a = mul i8 %A, 3
|
||||
%B = ashr exact i8 %a, 3
|
||||
%C = shl i8 %B, 4
|
||||
ret i8 %C
|
||||
}
|
||||
|
||||
;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
|
||||
define i32 @test14(i32 %A) {
|
||||
; CHECK: @test14
|
||||
|
@ -521,11 +477,10 @@ entry:
|
|||
%tmp49 = lshr i8 %tmp48, 5
|
||||
%tmp50 = mul i8 %tmp49, 64
|
||||
%tmp51 = xor i8 %tmp50, %tmp5
|
||||
; CHECK: and i8 %0, 16
|
||||
%tmp52 = and i8 %tmp51, -128
|
||||
%tmp53 = lshr i8 %tmp52, 7
|
||||
; CHECK: lshr i8 %tmp51, 7
|
||||
%tmp54 = mul i8 %tmp53, 16
|
||||
; CHECK: shl nuw nsw i8 %tmp53, 4
|
||||
%tmp55 = xor i8 %tmp54, %tmp51
|
||||
; CHECK: ret i8 %tmp551
|
||||
ret i8 %tmp55
|
||||
|
|
|
@ -22,30 +22,3 @@ define void @test1() nounwind ssp {
|
|||
; CHECK: @test1
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
; This function exposes a phase ordering problem when InstCombine is
|
||||
; turning %add into a bitmask, making it difficult to spot a 0 return value.
|
||||
;
|
||||
; It it also important that %add is expressed as a multiple of %div so scalar
|
||||
; evolution can recognize it.
|
||||
define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
|
||||
entry:
|
||||
%div = udiv i32 %a, 4
|
||||
%arrayidx = getelementptr inbounds i32* %p, i64 0
|
||||
store i32 %div, i32* %arrayidx, align 4
|
||||
%add = add i32 %div, %div
|
||||
%arrayidx1 = getelementptr inbounds i32* %p, i64 1
|
||||
store i32 %add, i32* %arrayidx1, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32* %p, i64 1
|
||||
%0 = load i32* %arrayidx2, align 4
|
||||
%arrayidx3 = getelementptr inbounds i32* %p, i64 0
|
||||
%1 = load i32* %arrayidx3, align 4
|
||||
%mul = mul i32 2, %1
|
||||
%sub = sub i32 %0, %mul
|
||||
ret i32 %sub
|
||||
|
||||
; CHECK: @test2
|
||||
; CHECK: %div = lshr i32 %a, 2
|
||||
; CHECK: %add = shl nuw nsw i32 %div, 1
|
||||
; CHECK: ret i32 0
|
||||
}
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
; RUN: opt -O3 -S -analyze -scalar-evolution %s | FileCheck %s
|
||||
;
|
||||
; This file contains phase ordering tests for scalar evolution.
|
||||
; Test that the standard passes don't obfuscate the IR so scalar evolution can't
|
||||
; recognize expressions.
|
||||
|
||||
; CHECK: test1
|
||||
; The loop body contains two increments by %div.
|
||||
; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
|
||||
; CHECK: --> {%p,+,(2 * (%d /u 4) * sizeof(i32))}
|
||||
define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
|
||||
entry:
|
||||
%div = udiv i64 %d, 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp ne i32 %i.0, 64
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
store i32 0, i32* %p.addr.0, align 4
|
||||
%add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
|
||||
store i32 1, i32* %add.ptr, align 4
|
||||
%add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: test1a
|
||||
; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
|
||||
; CHECK: --> {%p,+,(2 * (%d /u 2) * sizeof(i32))}
|
||||
define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
|
||||
entry:
|
||||
%div = udiv i64 %d, 2
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
|
||||
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
||||
%cmp = icmp ne i32 %i.0, 64
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
store i32 0, i32* %p.addr.0, align 4
|
||||
%add.ptr = getelementptr inbounds i32* %p.addr.0, i64 %div
|
||||
store i32 1, i32* %add.ptr, align 4
|
||||
%add.ptr1 = getelementptr inbounds i32* %add.ptr, i64 %div
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%inc = add i32 %i.0, 1
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue