forked from OSchip/llvm-project
[mlir][nvvm] Fix bug in ldmatrix intrinsic conversion
The ldmatrix intrinsic trans option was inverted. Bug found by @christopherbate! Differential Revision: https://reviews.llvm.org/D121666
This commit is contained in:
parent
0aaf480be9
commit
6d007e0278
|
@ -67,7 +67,7 @@ static llvm::Intrinsic::ID getShflIntrinsicId(llvm::Type *resultType,
|
|||
/// Return the intrinsic ID associated with ldmatrix for the given paramters.
|
||||
static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout,
|
||||
int32_t num) {
|
||||
if (layout == NVVM::MMALayout::col) {
|
||||
if (layout == NVVM::MMALayout::row) {
|
||||
switch (num) {
|
||||
case 1:
|
||||
return llvm::Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16;
|
||||
|
|
|
@ -178,12 +178,18 @@ llvm.func @cp_async(%arg0: !llvm.ptr<i8, 3>, %arg1: !llvm.ptr<i8, 1>) {
|
|||
|
||||
// CHECK-LABEL: @ld_matrix(
|
||||
llvm.func @ld_matrix(%arg0: !llvm.ptr<i32, 3>) {
|
||||
// CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
// CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout<row>} : (!llvm.ptr<i32, 3>) -> i32
|
||||
// CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
// CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout<row>} : (!llvm.ptr<i32, 3>) -> !llvm.struct<(i32, i32)>
|
||||
// CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
// CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout<row>} : (!llvm.ptr<i32, 3>) -> !llvm.struct<(i32, i32, i32, i32)>
|
||||
// CHECK: call i32 @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x1.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l1t = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout<col>} : (!llvm.ptr<i32, 3>) -> i32
|
||||
// CHECK: call { i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x2.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l2t = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout<col>} : (!llvm.ptr<i32, 3>) -> !llvm.struct<(i32, i32)>
|
||||
// CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.ldmatrix.sync.aligned.m8n8.x4.trans.b16.p3i32(i32 addrspace(3)* %{{.*}})
|
||||
%l4t = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout<col>} : (!llvm.ptr<i32, 3>) -> !llvm.struct<(i32, i32, i32, i32)>
|
||||
llvm.return
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue