forked from OSchip/llvm-project
[mlir][AMDGPU] Explicitly truncate memory addresses in buffer ops
As a percaution, truncate memory addresses passed to kernels to 48 bits, since bits 48-63 of the buffer descriptor are used for the stride field and, on gfx10, to control swizzling. Reviewed By: ThomasRaoux Differential Revision: https://reviews.llvm.org/D131016
This commit is contained in:
parent
35cc173846
commit
6329562249
|
@ -118,20 +118,29 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
|
|||
MemRefDescriptor memrefDescriptor(memref);
|
||||
Type llvmI64 = this->typeConverter->convertType(rewriter.getI64Type());
|
||||
Type llvm2xI32 = this->typeConverter->convertType(VectorType::get(2, i32));
|
||||
Value c32I64 = rewriter.create<LLVM::ConstantOp>(
|
||||
loc, llvmI64, rewriter.getI64IntegerAttr(32));
|
||||
|
||||
Value resource = rewriter.create<LLVM::UndefOp>(loc, llvm4xI32);
|
||||
|
||||
Value ptr = memrefDescriptor.alignedPtr(rewriter, loc);
|
||||
Value ptrAsInt = rewriter.create<LLVM::PtrToIntOp>(loc, llvmI64, ptr);
|
||||
Value ptrAsInts =
|
||||
rewriter.create<LLVM::BitcastOp>(loc, llvm2xI32, ptrAsInt);
|
||||
for (int64_t i = 0; i < 2; ++i) {
|
||||
Value idxConst = this->createIndexConstant(rewriter, loc, i);
|
||||
Value part =
|
||||
rewriter.create<LLVM::ExtractElementOp>(loc, ptrAsInts, idxConst);
|
||||
resource = rewriter.create<LLVM::InsertElementOp>(
|
||||
loc, llvm4xI32, resource, part, idxConst);
|
||||
}
|
||||
Value lowHalf = rewriter.create<LLVM::TruncOp>(loc, llvmI32, ptrAsInt);
|
||||
resource = rewriter.create<LLVM::InsertElementOp>(
|
||||
loc, llvm4xI32, resource, lowHalf,
|
||||
this->createIndexConstant(rewriter, loc, 0));
|
||||
|
||||
// Bits 48-63 are used both for the stride of the buffer and (on gfx10) for
|
||||
// enabling swizzling. Prevent the high bits of pointers from accidentally
|
||||
// setting those flags.
|
||||
Value highHalfShifted = rewriter.create<LLVM::TruncOp>(
|
||||
loc, llvmI32, rewriter.create<LLVM::LShrOp>(loc, ptrAsInt, c32I64));
|
||||
Value highHalfTruncated = rewriter.create<LLVM::AndOp>(
|
||||
loc, llvmI32, highHalfShifted,
|
||||
createI32Constant(rewriter, loc, 0x0000ffff));
|
||||
resource = rewriter.create<LLVM::InsertElementOp>(
|
||||
loc, llvm4xI32, resource, highHalfTruncated,
|
||||
this->createIndexConstant(rewriter, loc, 1));
|
||||
|
||||
Value numRecords;
|
||||
if (memrefType.hasStaticShape()) {
|
||||
|
|
|
@ -3,11 +3,18 @@
|
|||
|
||||
// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32
|
||||
func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 {
|
||||
// CHECK: %[[ptr:.*]] = llvm.ptrtoint
|
||||
// CHECK: %[[lowHalf:.*]] = llvm.trunc %[[ptr]] : i64 to i32
|
||||
// CHECK: %[[resource_1:.*]] = llvm.insertelement %[[lowHalf]]
|
||||
// CHECK: %[[highHalfI64:.*]] = llvm.lshr %[[ptr]]
|
||||
// CHECK: %[[highHalfI32:.*]] = llvm.trunc %[[highHalfI64]] : i64 to i32
|
||||
// CHECK: %[[highHalf:.*]] = llvm.and %[[highHalfI32]], %{{.*}} : i32
|
||||
// CHECK: %[[resource_2:.*]] = llvm.insertelement %[[highHalf]], %[[resource_1]]
|
||||
// CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32)
|
||||
// CHECK: llvm.insertelement{{.*}}%[[numRecords]]
|
||||
// CHECK: %[[resource_3:.*]] = llvm.insertelement %[[numRecords]], %[[resource_2]]
|
||||
// CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32)
|
||||
// RDNA: %[[word3:.*]] = llvm.mlir.constant(822243328 : i32)
|
||||
// CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]]
|
||||
// CHECK: %[[resource:.*]] = llvm.insertelement %[[word3]], %[[resource_3]]
|
||||
// CHECK: %[[ret:.*]] = rocdl.raw.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32
|
||||
// CHECK: return %[[ret]]
|
||||
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[%idx] : memref<64xi32>, i32 -> i32
|
||||
|
|
Loading…
Reference in New Issue