forked from OSchip/llvm-project
Teach basicaa that x|c == x+c when the c bits of x are clear. This
allows us to compile the example in readme.txt into: LBB1_1: ## %bb movl 4(%rdx,%rax), %ecx movl %ecx, %esi imull (%rdx,%rax), %esi imull %esi, %ecx movl %esi, 8(%rdx,%rax) imull %ecx, %esi movl %ecx, 12(%rdx,%rax) movl %esi, 16(%rdx,%rax) imull %ecx, %esi movl %esi, 20(%rdx,%rax) addq $16, %rax cmpq $4000, %rax jne LBB1_1 instead of: LBB1_1: movl (%rdx,%rax), %ecx imull 4(%rdx,%rax), %ecx movl %ecx, 8(%rdx,%rax) imull 4(%rdx,%rax), %ecx movl %ecx, 12(%rdx,%rax) imull 8(%rdx,%rax), %ecx movl %ecx, 16(%rdx,%rax) imull 12(%rdx,%rax), %ecx movl %ecx, 20(%rdx,%rax) addq $16, %rax cmpq $4000, %rax jne LBB1_1 GCC (4.2) doesn't seem to be able to eliminate the loads in this testcase either, it generates: L2: movl (%rdx), %eax imull 4(%rdx), %eax movl %eax, 8(%rdx) imull 4(%rdx), %eax movl %eax, 12(%rdx) imull 8(%rdx), %eax movl %eax, 16(%rdx) imull 12(%rdx), %eax movl %eax, 20(%rdx) addl $4, %ecx addq $16, %rdx cmpl $1002, %ecx jne L2 llvm-svn: 89952
This commit is contained in:
parent
12dacdd359
commit
29bc8a91d3
|
@ -14,8 +14,6 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
|
@ -26,6 +24,9 @@
|
|||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Operator.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
@ -381,15 +382,22 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
|
|||
/// GetLinearExpression - Analyze the specified value as a linear expression:
|
||||
/// "A*V + B". Return the scale and offset values as APInts and return V as a
|
||||
/// Value*. The incoming Value is known to be a scalar integer.
|
||||
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset) {
|
||||
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
|
||||
const TargetData *TD) {
|
||||
assert(isa<IntegerType>(V->getType()) && "Not an integer value");
|
||||
|
||||
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
|
||||
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
|
||||
switch (BOp->getOpcode()) {
|
||||
default: break;
|
||||
case Instruction::Or:
|
||||
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
|
||||
// analyze it.
|
||||
if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD))
|
||||
break;
|
||||
// FALL THROUGH.
|
||||
case Instruction::Add:
|
||||
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset);
|
||||
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD);
|
||||
Offset += RHSC->getValue();
|
||||
return V;
|
||||
// TODO: SHL, MUL, OR.
|
||||
|
@ -482,7 +490,7 @@ static const Value *DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
|
|||
|
||||
unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
|
||||
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
|
||||
Index = GetLinearExpression(Index, IndexScale, IndexOffset);
|
||||
Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD);
|
||||
|
||||
Scale *= IndexScale.getZExtValue();
|
||||
BaseOffs += IndexOffset.getZExtValue()*Scale;
|
||||
|
|
|
@ -443,23 +443,6 @@ entry:
|
|||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
"basicaa" should know how to look through "or" instructions that act like add
|
||||
instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and
|
||||
basicaa can't analyze the array subscript, leading to duplicated loads in the
|
||||
generated code:
|
||||
|
||||
void test(int X, int Y, int a[]) {
|
||||
int i;
|
||||
for (i=2; i<1000; i+=4) {
|
||||
a[i+0] = a[i-1+0]*a[i-2+0];
|
||||
a[i+1] = a[i-1+1]*a[i-2+1];
|
||||
a[i+2] = a[i-1+2]*a[i-2+2];
|
||||
a[i+3] = a[i-1+3]*a[i-2+3];
|
||||
}
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We should investigate an instruction sinking pass. Consider this silly
|
||||
example in pic mode:
|
||||
|
||||
|
|
|
@ -86,4 +86,18 @@ define i32 @test5(i32* %p, i64 %i) {
|
|||
; CHECK: ret i32 0
|
||||
}
|
||||
|
||||
; P[i] != p[(i*4)|1]
|
||||
define i32 @test6(i32* %p, i64 %i1) {
|
||||
%i = shl i64 %i1, 2
|
||||
%pi = getelementptr i32* %p, i64 %i
|
||||
%i.next = or i64 %i, 1
|
||||
%pi.next = getelementptr i32* %p, i64 %i.next
|
||||
%x = load i32* %pi
|
||||
store i32 42, i32* %pi.next
|
||||
%y = load i32* %pi
|
||||
%z = sub i32 %x, %y
|
||||
ret i32 %z
|
||||
; CHECK: @test6
|
||||
; CHECK: ret i32 0
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue