2019-04-30 21:08:21 +08:00
// RUN: mlir-translate -mlir-to-nvvmir %s | FileCheck %s
2019-10-10 16:33:33 +08:00
llvm. func @nvvm_special_regs ( ) -> ! llvm. i32 {
// CHECK: %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
2019-04-30 21:08:21 +08:00
%1 = nvvm. read. ptx. sreg. tid. x : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
2019-04-30 21:08:21 +08:00
%2 = nvvm. read. ptx. sreg. tid. y : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
2019-04-30 21:08:21 +08:00
%3 = nvvm. read. ptx. sreg. tid. z : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2019-04-30 21:08:21 +08:00
%4 = nvvm. read. ptx. sreg. ntid. x : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
2019-04-30 21:08:21 +08:00
%5 = nvvm. read. ptx. sreg. ntid. y : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
2019-04-30 21:08:21 +08:00
%6 = nvvm. read. ptx. sreg. ntid. z : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
2019-04-30 21:08:21 +08:00
%7 = nvvm. read. ptx. sreg. ctaid. x : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
2019-04-30 21:08:21 +08:00
%8 = nvvm. read. ptx. sreg. ctaid. y : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
2019-04-30 21:08:21 +08:00
%9 = nvvm. read. ptx. sreg. ctaid. z : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
2019-04-30 21:08:21 +08:00
%10 = nvvm. read. ptx. sreg. nctaid. x : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
2019-04-30 21:08:21 +08:00
%11 = nvvm. read. ptx. sreg. nctaid. y : ! llvm. i32
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
2019-04-30 21:08:21 +08:00
%12 = nvvm. read. ptx. sreg. nctaid. z : ! llvm. i32
2019-09-10 02:37:05 +08:00
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
%13 = nvvm. read. ptx. sreg. warpsize : ! llvm. i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
%14 = nvvm. read. ptx. sreg. laneid : ! llvm. i32
2019-04-30 21:08:21 +08:00
llvm. return %1 : ! llvm. i32
2019-06-25 21:30:32 +08:00
}
2019-10-10 16:33:33 +08:00
llvm. func @llvm.nvvm.barrier0 ( ) {
2019-08-28 01:55:47 +08:00
// CHECK: call void @llvm.nvvm.barrier0()
nvvm. barrier0
llvm. return
}
2019-10-10 16:33:33 +08:00
llvm. func @nvvm_shfl (
2019-08-28 01:55:47 +08:00
%0 : ! llvm. i32 , %1 : ! llvm. i32 , %2 : ! llvm. i32 ,
%3 : ! llvm. i32 , %4 : ! llvm. float) -> ! llvm. i32 {
2019-12-12 19:48:00 +08:00
// CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%6 = nvvm. shfl. sync. bfly %0 , %3 , %1 , %2 : ! llvm. i32
// CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%7 = nvvm. shfl. sync. bfly %0 , %4 , %1 , %2 : ! llvm. float
2019-08-28 01:55:47 +08:00
llvm. return %6 : ! llvm. i32
}
2019-10-19 16:52:51 +08:00
llvm. func @nvvm_shfl_pred (
%0 : ! llvm. i32 , %1 : ! llvm. i32 , %2 : ! llvm. i32 ,
%3 : ! llvm. i32 , %4 : ! llvm. float) -> ! llvm< "{ i32, i1 }" > {
2019-12-12 19:48:00 +08:00
// CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%6 = nvvm. shfl. sync. bfly %0 , %3 , %1 , %2 { return _value_and_is_valid} : ! llvm< "{ i32, i1 }" >
// CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
%7 = nvvm. shfl. sync. bfly %0 , %4 , %1 , %2 { return _value_and_is_valid} : ! llvm< "{ float, i1 }" >
2019-10-19 16:52:51 +08:00
llvm. return %6 : ! llvm< "{ i32, i1 }" >
}
2019-10-10 16:33:33 +08:00
llvm. func @nvvm_vote ( %0 : ! llvm. i32 , %1 : ! llvm. i1 ) -> ! llvm. i32 {
2019-08-28 01:55:47 +08:00
// CHECK: call i32 @llvm.nvvm.vote.ballot.sync(i32 %{{.*}}, i1 %{{.*}})
%3 = nvvm. vote. ballot. sync %0 , %1 : ! llvm. i32
llvm. return %3 : ! llvm. i32
}
2019-11-05 04:36:04 +08:00
llvm. func @nvvm_mma ( %a0 : ! llvm< "<2 x half>" > , %a1 : ! llvm< "<2 x half>" > ,
%b0 : ! llvm< "<2 x half>" > , %b1 : ! llvm< "<2 x half>" > ,
%c0 : ! llvm. float, %c1 : ! llvm. float, %c2 : ! llvm. float, %c3 : ! llvm. float,
%c4 : ! llvm. float, %c5 : ! llvm. float, %c6 : ! llvm. float, %c7 : ! llvm. float) {
[MLIR] change NVVM.mma.sync to the most useful variant.
Summary:
the .row.col variant turns out to be the popular one, contrary to what I
thought as .row.row. Since .row.col is so prevailing (as I inspect
cuDNN's behavior), I'm going to remove the .row.row support here, which
makes the patch a little bit easier.
Reviewers: ftynse
Subscribers: jholewinski, bixia, sanjoy.google, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D74655
2020-02-15 07:07:44 +08:00
// CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32
%0 = nvvm. mma. sync %a0 , %a1 , %b0 , %b1 , %c0 , %c1 , %c2 , %c3 , %c4 , %c5 , %c6 , %c7 { alayout= "row" , blayout= "col" } : ( ! llvm< "<2 x half>" > , ! llvm< "<2 x half>" > , ! llvm< "<2 x half>" > , ! llvm< "<2 x half>" > , ! llvm. float, ! llvm. float, ! llvm. float, ! llvm. float, ! llvm. float, ! llvm. float, ! llvm. float, ! llvm. float) -> ! llvm< "{ float, float, float, float, float, float, float, float }" >
2019-11-05 04:36:04 +08:00
llvm. return %0 : ! llvm< "{ float, float, float, float, float, float, float, float }" >
}
2019-06-25 21:30:32 +08:00
// This function has the "kernel" attribute attached and should appear in the
// NVVM annotations after conversion.
2019-10-10 16:33:33 +08:00
llvm. func @kernel_func ( ) attributes { gpu. kernel} {
2019-06-25 21:30:32 +08:00
llvm. return
}
// CHECK: !nvvm.annotations =
// CHECK-NOT: {i32 ()* @nvvm_special_regs, !"kernel", i32 1}
// CHECK: {void ()* @kernel_func, !"kernel", i32 1}