forked from OSchip/llvm-project
AMDGPU: Preserve metadata when widening loads
Preserves the low bound of the !range. I don't think it's legal to do anything with the top half since it's theoretically reading garbage. llvm-svn: 334045
This commit is contained in:
parent
9224c00d2b
commit
57e541e87e
|
@ -465,7 +465,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
|
|||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
|
||||
bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
|
||||
if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
||||
I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
|
||||
canWidenScalarExtLoad(I)) {
|
||||
|
@ -475,7 +475,28 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
|
|||
Type *I32Ty = Builder.getInt32Ty();
|
||||
Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
|
||||
Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
|
||||
Value *WidenLoad = Builder.CreateLoad(BitCast);
|
||||
LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
|
||||
WidenLoad->copyMetadata(I);
|
||||
|
||||
// If we have range metadata, we need to convert the type, and not make
|
||||
// assumptions about the high bits.
|
||||
if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
|
||||
ConstantInt *Lower =
|
||||
mdconst::extract<ConstantInt>(Range->getOperand(0));
|
||||
|
||||
if (Lower->getValue().isNullValue()) {
|
||||
WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
|
||||
} else {
|
||||
Metadata *LowAndHigh[] = {
|
||||
ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
|
||||
// Don't make assumptions about the high bits.
|
||||
ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
|
||||
};
|
||||
|
||||
WidenLoad->setMetadata(LLVMContext::MD_range,
|
||||
MDNode::get(Mod->getContext(), LowAndHigh));
|
||||
}
|
||||
}
|
||||
|
||||
int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
|
||||
Type *IntNTy = Builder.getIntNTy(TySize);
|
||||
|
|
|
@ -189,4 +189,80 @@ define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_range(
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !range !0
|
||||
define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !0
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_range_max(
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !range !0
|
||||
define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !1
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_complex_range(
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !range !1
|
||||
define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !2
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_range_from_0(
|
||||
; OPT: load i32, i32 addrspace(4)* %1{{$}}
|
||||
define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !3
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_range_from_neg(
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !range !2
|
||||
define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !4
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0(
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !range !2
|
||||
define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !range !5
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @constant_load_i16_align4_invariant
|
||||
; OPT: load i32, i32 addrspace(4)* %1, !invariant.load !3
|
||||
define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
|
||||
%ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6
|
||||
%ext = sext i16 %ld to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
; OPT: !0 = !{i32 5, i32 0}
|
||||
; OPT: !1 = !{i32 8, i32 0}
|
||||
; OPT: !2 = !{i32 65520, i32 0}
|
||||
; OPT: !3 = !{}
|
||||
|
||||
!0 = !{i16 5, i16 500}
|
||||
!1 = !{i16 5, i16 -1}
|
||||
!2 = !{i16 8, i16 12, i16 42, i16 99}
|
||||
!3 = !{i16 0, i16 255}
|
||||
!4 = !{i16 -16, i16 16}
|
||||
!5 = !{i16 -16, i16 0}
|
||||
!6 = !{}
|
||||
|
|
Loading…
Reference in New Issue