diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c0df914cb669..b24f0eafd5d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -491,102 +491,81 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return nullptr; } -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); +/// \brief Combine stores to match the type of value being stored. +/// +/// The core idea here is that the memory does not have any intrinsic type and +/// where we can we should match the type of a store to the type of value being +/// stored. +/// +/// However, this routine must never change the width of a store or the number of +/// stores as that would introduce a semantic change. This combine is expected to +/// be a semantic no-op which just allows stores to more closely model the types +/// of their incoming values. +/// +/// Currently, we also refuse to change the precise type used for an atomic or +/// volatile store. This is debatable, and might be reasonable to change later. +/// However, it is risky in case some backend or other part of LLVM is relying +/// on the exact type stored to select appropriate atomic operations. +/// +/// \returns true if the store was successfully combined away. This indicates +/// the caller must erase the store instruction. We have to let the caller erase +/// the store instruction sas otherwise there is no way to signal whether it was +/// combined or not: IC.EraseInstFromFunction returns a null pointer. +static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; - Type *DestPTy = CI->getType()->getPointerElementType(); - PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (!SrcTy) return nullptr; + Value *Ptr = SI.getPointerOperand(); + Value *V = SI.getValueOperand(); + unsigned AS = SI.getPointerAddressSpace(); + SmallVector, 8> MD; + SI.getAllMetadata(MD); - Type *SrcPTy = SrcTy->getElementType(); + // Fold away bit casts of the stored value by storing the original type. + if (auto *BC = dyn_cast(V)) { + V = BC->getOperand(0); + StoreInst *NewStore = IC.Builder->CreateAlignedStore( + V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + SI.getAlignment()); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a store instruction changing *only its + // type*. The only metadata it makes sense to drop is metadata which is + // invalidated when the pointer type changes. This should essentially + // never be the case in LLVM, but we explicitly switch over only known + // metadata to be conservatively correct. If you are adding metadata to + // LLVM which pertains to stores, you almost certainly want to add it + // here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_nonnull: + // All of these directly apply. + NewStore->setMetadata(ID, N); + break; - if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return nullptr; - - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (StructType *STy = dyn_cast(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (ArrayType *ATy = dyn_cast(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_range: break; } } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + return true; } - if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return nullptr; - - // If the pointers point into different address spaces don't do the - // transformation. - if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) - return nullptr; - - // If the pointers point to values of different sizes don't do the - // transformation. - if (!IC.getDataLayout() || - IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != - IC.getDataLayout()->getTypeSizeInBits(DestPTy)) - return nullptr; - - // If the pointers point to pointers to different address spaces don't do the - // transformation. It is not safe to introduce an addrspacecast instruction in - // this case since, depending on the target, addrspacecast may not be a no-op - // cast. - if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && - SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) - return nullptr; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Instruction::CastOps opcode = Instruction::BitCast; - Type* CastSrcTy = DestPTy; - Type* CastDstTy = SrcPTy; - if (CastDstTy->isPointerTy()) { - if (CastSrcTy->isIntegerTy()) - opcode = Instruction::IntToPtr; - } else if (CastDstTy->isIntegerTy()) { - if (CastSrcTy->isPointerTy()) - opcode = Instruction::PtrToInt; - } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - - Value *SIOp0 = SI.getOperand(0); - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - SI.setOperand(0, NewCast); - SI.setOperand(1, CastOp); - return &SI; + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. + return false; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -622,6 +601,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + // Try to canonicalize the stored type. + if (combineStoreToValueType(*this, SI)) + return EraseInstFromFunction(SI); + // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = @@ -713,17 +696,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return EraseInstFromFunction(SI); - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. diff --git a/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll index 895b2603ad9e..c8f0351858ca 100644 --- a/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll +++ b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll @@ -50,7 +50,8 @@ define void @fu2(i32 %parm) nounwind ssp { %7 = add i32 %6, 2048 ; CHECK: alloca i8 %8 = alloca i8, i32 %7 -; CHECK-NEXT: bitcast i8* +; CHECK-NEXT: bitcast double** +; CHECK-NEXT: store i8* %9 = bitcast i8* %8 to double* store double* %9, double** %ptr, align 4 br label %10 diff --git a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll index bc36b25b6ded..cfec09200dbc 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll @@ -94,7 +94,8 @@ entry: ; CHECK: load i32* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp -; CHECK: bitcast i32 %call to float +; CHECK: bitcast float* %dest to i32* +; CHECK: store i32 %tmp = load float* %source, align 8 %call = call float @alias_i32_to_f32(float %tmp) nounwind store float %call, float* %dest, align 8 @@ -109,7 +110,8 @@ entry: ; CHECK: load <2 x i32>* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp -; CHECK: bitcast <2 x i32> %call to <2 x float> +; CHECK: bitcast <2 x float>* %dest to <2 x i32>* +; CHECK: store <2 x i32> %tmp = load <2 x float>* %source, align 8 %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind store <2 x float> %call, <2 x float>* %dest, align 8 @@ -123,7 +125,8 @@ entry: ; CHECK: bitcast <2 x float>* %source to i64* ; CHECK: load i64* ; CHECK: %call = call i64 @func_i64 -; CHECK: bitcast i64 %call to <2 x float> +; CHECK: bitcast <2 x float>* %dest to i64* +; CHECK: store i64 %tmp = load <2 x float>* %source, align 8 %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind store <2 x float> %call, <2 x float>* %dest, align 8 @@ -136,7 +139,8 @@ entry: ; CHECK: bitcast i64* %source to <2 x float>* ; CHECK: load <2 x float>* ; CHECK: call <2 x float> @func_v2f32 -; CHECK: bitcast <2 x float> %call to i64 +; CHECK: bitcast i64* %dest to <2 x float>* +; CHECK: store <2 x float> %tmp = load i64* %source, align 8 %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind store i64 %call, i64* %dest, align 8 @@ -149,7 +153,8 @@ entry: ; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>* ; CHECK: load <2 x i32*>* ; CHECK: call <2 x i32*> @func_v2i32p -; CHECK: bitcast <2 x i32*> %call to <2 x i64*> +; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>* +; CHECK: store <2 x i32*> %tmp = load <2 x i64*>* %source, align 8 %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind store <2 x i64*> %call, <2 x i64*>* %dest, align 8 diff --git a/llvm/test/Transforms/InstCombine/bitcast-store.ll b/llvm/test/Transforms/InstCombine/bitcast-store.ll index e46b5c82d9ff..3081cb187166 100644 --- a/llvm/test/Transforms/InstCombine/bitcast-store.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-store.ll @@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @G = external constant [5 x i8*] ; CHECK-LABEL: @foo -; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0 -define void @foo(%struct.A* %a) nounwind { +; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0 +define void @foo(i32 %x, float* %p) nounwind { entry: - %0 = bitcast %struct.A* %a to i8*** - store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0 + %x.cast = bitcast i32 %x to float + store float %x.cast, float* %p, align 16, !noalias !0 ret void } @@ -32,4 +32,4 @@ entry: ret void } -!0 = metadata !{metadata !"hello"} +!0 = metadata !{metadata !0} diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 578b16d98113..a6865557409d 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -199,15 +199,6 @@ define i1 @test24(i1 %C) { ; CHECK: ret i1 true } -define void @test25(i32** %P) { - %c = bitcast i32** %P to float** ; [#uses=1] - ;; Fold cast into null - store float* null, float** %c - ret void -; CHECK: store i32* null, i32** %P -; CHECK: ret void -} - define i32 @test26(float %F) { ;; no need to cast from float->double. %c = fpext float %F to double ; [#uses=1] diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll index bb4666219a71..94cc1803073a 100644 --- a/llvm/test/Transforms/InstCombine/getelementptr.ll +++ b/llvm/test/Transforms/InstCombine/getelementptr.ll @@ -602,8 +602,8 @@ entry: %C = load i8** %B, align 8 ret i8* %C ; CHECK-LABEL: @test34( -; CHECK: %V.c = inttoptr i64 %V to i8* -; CHECK: ret i8* %V.c +; CHECK: %[[C:.*]] = inttoptr i64 %V to i8* +; CHECK: ret i8* %[[C]] } %t0 = type { i8*, [19 x i8] } diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index f3fb76ee3e36..c8ae024c293d 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1395,11 +1395,11 @@ define i8* @test83(i1 %flag) { ; CHECK-LABEL: @test83( ; CHECK: %[[X:.*]] = alloca i8* ; CHECK-NEXT: %[[Y:.*]] = alloca i8* -; CHECK: %[[V:.*]] = load i64* %[[X]] -; CHECK-NEXT: %[[C1:.*]] = inttoptr i64 %[[V]] to i8* -; CHECK-NEXT: store i8* %[[C1]], i8** %[[Y]] -; CHECK-NEXT: %[[C2:.*]] = inttoptr i64 %[[V]] to i8* -; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i8* %[[C2]], i8* %[[C1]] +; CHECK-DAG: %[[X2:.*]] = bitcast i8** %[[X]] to i64* +; CHECK-DAG: %[[Y2:.*]] = bitcast i8** %[[Y]] to i64* +; CHECK: %[[V:.*]] = load i64* %[[X2]] +; CHECK-NEXT: store i64 %[[V]], i64* %[[Y2]] +; CHECK-NEXT: %[[C:.*]] = inttoptr i64 %[[V]] to i8* ; CHECK-NEXT: ret i8* %[[S]] entry: %x = alloca i8*