Change these tests to use regular loads instead of llvm.x86.sse2.loadu.dq.

Enhance instcombine to use the preferred field of
GetOrEnforceKnownAlignment in more cases, so that regular IR operations are
optimized in the same way that the intrinsics currently are.

llvm-svn: 64623
This commit is contained in:
Dan Gohman 2009-02-16 00:44:23 +00:00
parent aaee6c9523
commit 9cdfd44521
4 changed files with 20 additions and 25 deletions

View File

@ -9275,7 +9275,7 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2), DstAlign);
unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment()->getZExtValue(); unsigned CopyAlign = MI->getAlignment()->getZExtValue();
@ -11097,7 +11097,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0); Value *Op = LI.getOperand(0);
// Attempt to improve the alignment. // Attempt to improve the alignment.
unsigned KnownAlign = GetOrEnforceKnownAlignment(Op); unsigned KnownAlign =
GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
if (KnownAlign > if (KnownAlign >
(LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
LI.getAlignment())) LI.getAlignment()))
@ -11376,7 +11377,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
} }
// Attempt to improve the alignment. // Attempt to improve the alignment.
unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr); unsigned KnownAlign =
GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
if (KnownAlign > if (KnownAlign >
(SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
SI.getAlignment())) SI.getAlignment()))

View File

@ -14,8 +14,8 @@ cond_true: ; preds = %cond_true, %entry
%k.0.0 = bitcast i32 %tmp.10 to i32 ; <i32> [#uses=2] %k.0.0 = bitcast i32 %tmp.10 to i32 ; <i32> [#uses=2]
%tmp31 = add i32 %k.0.0, -1 ; <i32> [#uses=4] %tmp31 = add i32 %k.0.0, -1 ; <i32> [#uses=4]
%tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1] %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
%tmp34 = bitcast i32* %tmp32 to i8* ; <i8*> [#uses=1] %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1]
%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp34 ) ; <<16 x i8>> [#uses=1] %tmp = load <16 x i8>* %tmp34, align 1
%tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1] %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1] %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
@ -23,8 +23,8 @@ cond_true: ; preds = %cond_true, %entry
%tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2] %tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2]
%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1] %tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1] %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
%tmp65 = bitcast i32* %tmp62 to i8* ; <i8*> [#uses=1] %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1]
%tmp66 = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp65 ) ; <<16 x i8>> [#uses=1] %tmp66 = load <16 x i8>* %tmp65, align 1
%tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1] %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1] %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
@ -50,6 +50,4 @@ return: ; preds = %cond_true, %entry
ret void ret void
} }
declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)

View File

@ -160,23 +160,23 @@ bb9: ; preds = %bb9, %bb10.preheader
%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec ; <i64> [#uses=2] %B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec ; <i64> [#uses=2]
%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum ; <float*> [#uses=1] %B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum ; <float*> [#uses=1]
%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum ; <float*> [#uses=1] %A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum ; <float*> [#uses=1]
%61 = bitcast float* %B_addr.438 to i8* ; <i8*> [#uses=1] %61 = bitcast float* %B_addr.438 to <4 x float>* ; <i8*> [#uses=1]
%62 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %61) nounwind readonly ; <<4 x float>> [#uses=1] %62 = load <4 x float>* %61, align 1
%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4 ; <i64> [#uses=1] %B_addr.438.sum169 = or i64 %A_addr.440.rec, 4 ; <i64> [#uses=1]
%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169 ; <i64> [#uses=2] %B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169 ; <i64> [#uses=2]
%63 = getelementptr float* %B, i64 %B_addr.0.sum187 ; <float*> [#uses=1] %63 = getelementptr float* %B, i64 %B_addr.0.sum187 ; <float*> [#uses=1]
%64 = bitcast float* %63 to i8* ; <i8*> [#uses=1] %64 = bitcast float* %63 to <4 x float>* ; <i8*> [#uses=1]
%65 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %64) nounwind readonly ; <<4 x float>> [#uses=1] %65 = load <4 x float>* %64, align 1
%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8 ; <i64> [#uses=1] %B_addr.438.sum168 = or i64 %A_addr.440.rec, 8 ; <i64> [#uses=1]
%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168 ; <i64> [#uses=2] %B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168 ; <i64> [#uses=2]
%66 = getelementptr float* %B, i64 %B_addr.0.sum186 ; <float*> [#uses=1] %66 = getelementptr float* %B, i64 %B_addr.0.sum186 ; <float*> [#uses=1]
%67 = bitcast float* %66 to i8* ; <i8*> [#uses=1] %67 = bitcast float* %66 to <4 x float>* ; <i8*> [#uses=1]
%68 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %67) nounwind readonly ; <<4 x float>> [#uses=1] %68 = load <4 x float>* %67, align 1
%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12 ; <i64> [#uses=1] %B_addr.438.sum167 = or i64 %A_addr.440.rec, 12 ; <i64> [#uses=1]
%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167 ; <i64> [#uses=2] %B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167 ; <i64> [#uses=2]
%69 = getelementptr float* %B, i64 %B_addr.0.sum185 ; <float*> [#uses=1] %69 = getelementptr float* %B, i64 %B_addr.0.sum185 ; <float*> [#uses=1]
%70 = bitcast float* %69 to i8* ; <i8*> [#uses=1] %70 = bitcast float* %69 to <4 x float>* ; <i8*> [#uses=1]
%71 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %70) nounwind readonly ; <<4 x float>> [#uses=1] %71 = load <4 x float>* %70, align 1
%72 = bitcast float* %A_addr.440 to <4 x float>* ; <<4 x float>*> [#uses=1] %72 = bitcast float* %A_addr.440 to <4 x float>* ; <<4 x float>*> [#uses=1]
%73 = load <4 x float>* %72, align 16 ; <<4 x float>> [#uses=1] %73 = load <4 x float>* %72, align 16 ; <<4 x float>> [#uses=1]
%74 = mul <4 x float> %73, %62 ; <<4 x float>> [#uses=1] %74 = mul <4 x float> %73, %62 ; <<4 x float>> [#uses=1]
@ -214,8 +214,8 @@ bb11: ; preds = %bb11, %bb12.loopexit
%A_addr.529.rec = shl i64 %indvar, 2 ; <i64> [#uses=3] %A_addr.529.rec = shl i64 %indvar, 2 ; <i64> [#uses=3]
%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1] %B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1]
%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1] %A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1]
%95 = bitcast float* %B_addr.527 to i8* ; <i8*> [#uses=1] %95 = bitcast float* %B_addr.527 to <4 x float>* ; <i8*> [#uses=1]
%96 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %95) nounwind readonly ; <<4 x float>> [#uses=1] %96 = load <4 x float>* %95, align 1
%97 = bitcast float* %A_addr.529 to <4 x float>* ; <<4 x float>*> [#uses=1] %97 = bitcast float* %A_addr.529 to <4 x float>* ; <<4 x float>*> [#uses=1]
%98 = load <4 x float>* %97, align 16 ; <<4 x float>> [#uses=1] %98 = load <4 x float>* %97, align 16 ; <<4 x float>> [#uses=1]
%99 = mul <4 x float> %98, %96 ; <<4 x float>> [#uses=1] %99 = mul <4 x float> %98, %96 ; <<4 x float>> [#uses=1]
@ -288,5 +288,3 @@ bb16: ; preds = %bb14, %bb13
store float %Sum0.2.lcssa, float* %C, align 4 store float %Sum0.2.lcssa, float* %C, align 4
ret void ret void
} }
declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly

View File

@ -3,12 +3,9 @@
@GLOBAL = internal global [4 x i32] zeroinitializer @GLOBAL = internal global [4 x i32] zeroinitializer
declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
define <16 x i8> @foo(<2 x i64> %x) { define <16 x i8> @foo(<2 x i64> %x) {
entry: entry:
%tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) ) %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
ret <16 x i8> %tmp ret <16 x i8> %tmp
} }