forked from OSchip/llvm-project
[InstCombine] try to narrow a truncated load
trunc (load X) --> load (bitcast X to narrow type) We have this transform in DAGCombiner::ReduceLoadWidth(), but the truncated load pattern can interfere with other instcombine transforms, so I'd like to allow the fold sooner. Example: https://bugs.llvm.org/show_bug.cgi?id=16739 ...in that report, we have bitcasts bracketing these ops, so those could get eliminated too. We've generally ruled out widening of loads early in IR ( LoadCombine - http://lists.llvm.org/pipermail/llvm-dev/2016-September/105291.html ), but that reasoning may not apply to narrowing if we can preserve information such as the dereferenceable range. Differential Revision: https://reviews.llvm.org/D64432 llvm-svn: 367011
This commit is contained in:
parent
4e1d188be2
commit
bc4a63fd3c
|
@ -681,6 +681,42 @@ static Instruction *shrinkInsertElt(CastInst &Trunc,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static Instruction *narrowLoad(TruncInst &Trunc,
|
||||
InstCombiner::BuilderTy &Builder,
|
||||
const DataLayout &DL) {
|
||||
// Check the layout to ensure we are not creating an unsupported operation.
|
||||
// TODO: Create a GEP to offset the load?
|
||||
if (!DL.isLittleEndian())
|
||||
return nullptr;
|
||||
unsigned NarrowBitWidth = Trunc.getDestTy()->getPrimitiveSizeInBits();
|
||||
if (!DL.isLegalInteger(NarrowBitWidth))
|
||||
return nullptr;
|
||||
|
||||
// Match a truncated load with no other uses.
|
||||
Value *X;
|
||||
if (!match(Trunc.getOperand(0), m_OneUse(m_Load(m_Value(X)))))
|
||||
return nullptr;
|
||||
LoadInst *WideLoad = cast<LoadInst>(Trunc.getOperand(0));
|
||||
if (!WideLoad->isSimple())
|
||||
return nullptr;
|
||||
|
||||
// Don't narrow this load if we would lose information about the
|
||||
// dereferenceable range.
|
||||
bool CanBeNull;
|
||||
uint64_t DerefBits = X->getPointerDereferenceableBytes(DL, CanBeNull) * 8;
|
||||
if (DerefBits < WideLoad->getType()->getPrimitiveSizeInBits())
|
||||
return nullptr;
|
||||
|
||||
// trunc (load X) --> load (bitcast X)
|
||||
PointerType *PtrTy = PointerType::get(Trunc.getDestTy(),
|
||||
WideLoad->getPointerAddressSpace());
|
||||
Value *Bitcast = Builder.CreatePointerCast(X, PtrTy);
|
||||
LoadInst *NarrowLoad = new LoadInst(Trunc.getDestTy(), Bitcast);
|
||||
NarrowLoad->setAlignment(WideLoad->getAlignment());
|
||||
copyMetadataForLoad(*NarrowLoad, *WideLoad);
|
||||
return NarrowLoad;
|
||||
}
|
||||
|
||||
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
|
||||
if (Instruction *Result = commonCastTransforms(CI))
|
||||
return Result;
|
||||
|
@ -840,6 +876,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
|
|||
if (Instruction *I = foldVecTruncToExtElt(CI, *this))
|
||||
return I;
|
||||
|
||||
if (Instruction *NewLoad = narrowLoad(CI, Builder, DL))
|
||||
return NewLoad;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,10 +29,15 @@ define i32 @truncload_small_deref(i64* dereferenceable(7) %ptr) {
|
|||
; On little-endian, we can narrow the load without an offset.
|
||||
|
||||
define i32 @truncload_deref(i64* dereferenceable(8) %ptr) {
|
||||
; CHECK-LABEL: @truncload_deref(
|
||||
; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
; LE-LABEL: @truncload_deref(
|
||||
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32*
|
||||
; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4
|
||||
; LE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
; BE-LABEL: @truncload_deref(
|
||||
; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4
|
||||
; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
|
||||
; BE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%x = load i64, i64* %ptr
|
||||
%r = trunc i64 %x to i32
|
||||
|
@ -42,10 +47,15 @@ define i32 @truncload_deref(i64* dereferenceable(8) %ptr) {
|
|||
; Preserve alignment.
|
||||
|
||||
define i16 @truncload_align(i32* dereferenceable(14) %ptr) {
|
||||
; CHECK-LABEL: @truncload_align(
|
||||
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16
|
||||
; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16
|
||||
; CHECK-NEXT: ret i16 [[R]]
|
||||
; LE-LABEL: @truncload_align(
|
||||
; LE-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to i16*
|
||||
; LE-NEXT: [[R:%.*]] = load i16, i16* [[TMP1]], align 16
|
||||
; LE-NEXT: ret i16 [[R]]
|
||||
;
|
||||
; BE-LABEL: @truncload_align(
|
||||
; BE-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16
|
||||
; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16
|
||||
; BE-NEXT: ret i16 [[R]]
|
||||
;
|
||||
%x = load i32, i32* %ptr, align 16
|
||||
%r = trunc i32 %x to i16
|
||||
|
@ -98,12 +108,40 @@ define i32 @truncload_volatile(i64* dereferenceable(8) %ptr) {
|
|||
; Preserve address space.
|
||||
|
||||
define i32 @truncload_address_space(i64 addrspace(1)* dereferenceable(8) %ptr) {
|
||||
; CHECK-LABEL: @truncload_address_space(
|
||||
; CHECK-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
|
||||
; CHECK-NEXT: ret i32 [[R]]
|
||||
; LE-LABEL: @truncload_address_space(
|
||||
; LE-NEXT: [[TMP1:%.*]] = bitcast i64 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)*
|
||||
; LE-NEXT: [[R:%.*]] = load i32, i32 addrspace(1)* [[TMP1]], align 4
|
||||
; LE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
; BE-LABEL: @truncload_address_space(
|
||||
; BE-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
|
||||
; BE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%x = load i64, i64 addrspace(1)* %ptr, align 4
|
||||
%r = trunc i64 %x to i32
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; Most metadata should be transferred to the narrow load.
|
||||
; TODO: We lost the range.
|
||||
|
||||
define i32 @truncload_metadata(i64* dereferenceable(8) %ptr) {
|
||||
; LE-LABEL: @truncload_metadata(
|
||||
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32*
|
||||
; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4, !invariant.load !0, !nontemporal !1
|
||||
; LE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
; BE-LABEL: @truncload_metadata(
|
||||
; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4, !range !0, !invariant.load !1, !nontemporal !2
|
||||
; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32
|
||||
; BE-NEXT: ret i32 [[R]]
|
||||
;
|
||||
%x = load i64, i64* %ptr, align 4, !invariant.load !0, !nontemporal !1, !range !2
|
||||
%r = trunc i64 %x to i32
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{i32 1}
|
||||
!2 = !{i64 0, i64 2}
|
||||
|
|
Loading…
Reference in New Issue