diff --git a/llvm/lib/Transforms/Scalar/InstructionCombining.cpp b/llvm/lib/Transforms/Scalar/InstructionCombining.cpp index 7117bda70675..76b55663004a 100644 --- a/llvm/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/llvm/lib/Transforms/Scalar/InstructionCombining.cpp @@ -9275,7 +9275,7 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2), DstAlign); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment()->getZExtValue(); @@ -11097,7 +11097,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Attempt to improve the alignment. - unsigned KnownAlign = GetOrEnforceKnownAlignment(Op); + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); if (KnownAlign > (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : LI.getAlignment())) @@ -11376,7 +11377,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } // Attempt to improve the alignment. - unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr); + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); if (KnownAlign > (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : SI.getAlignment())) diff --git a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll index 774e7243fd37..6c0e76b34ade 100644 --- a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -14,8 +14,8 @@ cond_true: ; preds = %cond_true, %entry %k.0.0 = bitcast i32 %tmp.10 to i32 ; [#uses=2] %tmp31 = add i32 %k.0.0, -1 ; [#uses=4] %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; [#uses=1] - %tmp34 = bitcast i32* %tmp32 to i8* ; [#uses=1] - %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp34 ) ; <<16 x i8>> [#uses=1] + %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; [#uses=1] + %tmp = load <16 x i8>* %tmp34, align 1 %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; [#uses=1] %tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1] @@ -23,8 +23,8 @@ cond_true: ; preds = %cond_true, %entry %tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2] %tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1] %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; [#uses=1] - %tmp65 = bitcast i32* %tmp62 to i8* ; [#uses=1] - %tmp66 = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp65 ) ; <<16 x i8>> [#uses=1] + %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; [#uses=1] + %tmp66 = load <16 x i8>* %tmp65, align 1 %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; [#uses=1] %tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1] %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1] @@ -50,6 +50,4 @@ return: ; preds = %cond_true, %entry ret void } -declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) - declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll index 175f4c0c6a3f..e8762bc96895 100644 --- a/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/llvm/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -160,23 +160,23 @@ bb9: ; preds = %bb9, %bb10.preheader %B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec ; [#uses=2] %B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum ; [#uses=1] %A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum ; [#uses=1] - %61 = bitcast float* %B_addr.438 to i8* ; [#uses=1] - %62 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %61) nounwind readonly ; <<4 x float>> [#uses=1] + %61 = bitcast float* %B_addr.438 to <4 x float>* ; [#uses=1] + %62 = load <4 x float>* %61, align 1 %B_addr.438.sum169 = or i64 %A_addr.440.rec, 4 ; [#uses=1] %B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169 ; [#uses=2] %63 = getelementptr float* %B, i64 %B_addr.0.sum187 ; [#uses=1] - %64 = bitcast float* %63 to i8* ; [#uses=1] - %65 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %64) nounwind readonly ; <<4 x float>> [#uses=1] + %64 = bitcast float* %63 to <4 x float>* ; [#uses=1] + %65 = load <4 x float>* %64, align 1 %B_addr.438.sum168 = or i64 %A_addr.440.rec, 8 ; [#uses=1] %B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168 ; [#uses=2] %66 = getelementptr float* %B, i64 %B_addr.0.sum186 ; [#uses=1] - %67 = bitcast float* %66 to i8* ; [#uses=1] - %68 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %67) nounwind readonly ; <<4 x float>> [#uses=1] + %67 = bitcast float* %66 to <4 x float>* ; [#uses=1] + %68 = load <4 x float>* %67, align 1 %B_addr.438.sum167 = or i64 %A_addr.440.rec, 12 ; [#uses=1] %B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167 ; [#uses=2] %69 = getelementptr float* %B, i64 %B_addr.0.sum185 ; [#uses=1] - %70 = bitcast float* %69 to i8* ; [#uses=1] - %71 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %70) nounwind readonly ; <<4 x float>> [#uses=1] + %70 = bitcast float* %69 to <4 x float>* ; [#uses=1] + %71 = load <4 x float>* %70, align 1 %72 = bitcast float* %A_addr.440 to <4 x float>* ; <<4 x float>*> [#uses=1] %73 = load <4 x float>* %72, align 16 ; <<4 x float>> [#uses=1] %74 = mul <4 x float> %73, %62 ; <<4 x float>> [#uses=1] @@ -214,8 +214,8 @@ bb11: ; preds = %bb11, %bb12.loopexit %A_addr.529.rec = shl i64 %indvar, 2 ; [#uses=3] %B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec ; [#uses=1] %A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec ; [#uses=1] - %95 = bitcast float* %B_addr.527 to i8* ; [#uses=1] - %96 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %95) nounwind readonly ; <<4 x float>> [#uses=1] + %95 = bitcast float* %B_addr.527 to <4 x float>* ; [#uses=1] + %96 = load <4 x float>* %95, align 1 %97 = bitcast float* %A_addr.529 to <4 x float>* ; <<4 x float>*> [#uses=1] %98 = load <4 x float>* %97, align 16 ; <<4 x float>> [#uses=1] %99 = mul <4 x float> %98, %96 ; <<4 x float>> [#uses=1] @@ -288,5 +288,3 @@ bb16: ; preds = %bb14, %bb13 store float %Sum0.2.lcssa, float* %C, align 4 ret void } - -declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly diff --git a/llvm/test/Transforms/InstCombine/align-inc.ll b/llvm/test/Transforms/InstCombine/align-inc.ll index 0ad01cb23498..104d9918a9c7 100644 --- a/llvm/test/Transforms/InstCombine/align-inc.ll +++ b/llvm/test/Transforms/InstCombine/align-inc.ll @@ -3,12 +3,9 @@ @GLOBAL = internal global [4 x i32] zeroinitializer -declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) - - define <16 x i8> @foo(<2 x i64> %x) { entry: - %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) ) + %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1 ret <16 x i8> %tmp }