forked from OSchip/llvm-project
[mlir] Added missing GPU lowering ops.
Summary: This diff adds missing GPU lowering ops to MLIR. Reviewers: herhut, pifon2a, ftynse Tags: #pre-merge_beta_testing, #llvm Differential Revision: https://reviews.llvm.org/D72439
This commit is contained in:
parent
6b686703e6
commit
202ab273e6
|
@ -712,7 +712,8 @@ public:
|
|||
populateGpuToNVVMConversionPatterns(converter, patterns);
|
||||
ConversionTarget target(getContext());
|
||||
target.addIllegalDialect<gpu::GPUDialect>();
|
||||
target.addIllegalOp<LLVM::ExpOp>();
|
||||
target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOp,
|
||||
LLVM::ExpOP>();
|
||||
target.addIllegalOp<FuncOp>();
|
||||
target.addLegalDialect<LLVM::LLVMDialect>();
|
||||
target.addLegalDialect<NVVM::NVVMDialect>();
|
||||
|
@ -739,6 +740,12 @@ void mlir::populateGpuToNVVMConversionPatterns(
|
|||
NVVM::GridDimYOp, NVVM::GridDimZOp>,
|
||||
GPUAllReduceOpLowering, GPUShuffleOpLowering, GPUFuncOpLowering,
|
||||
GPUReturnOpLowering>(converter);
|
||||
patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf",
|
||||
"__nv_fabs");
|
||||
patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf",
|
||||
"__nv_ceil");
|
||||
patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "__nv_cosf",
|
||||
"__nv_cos");
|
||||
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
|
||||
"__nv_exp");
|
||||
}
|
||||
|
|
|
@ -51,12 +51,19 @@ public:
|
|||
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
|
||||
ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
|
||||
converter);
|
||||
patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "_ocml_fabs_f32",
|
||||
"_ocml_fabs_f64");
|
||||
patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "_ocml_ceil_f32",
|
||||
"_ocml_ceil_f64");
|
||||
patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "_ocml_cos_f32",
|
||||
"_ocml_cos_f64");
|
||||
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32",
|
||||
"_ocml_exp_f64");
|
||||
|
||||
ConversionTarget target(getContext());
|
||||
target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
|
||||
target.addIllegalOp<LLVM::ExpOp>();
|
||||
target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOP,
|
||||
LLVM::ExpOp>();
|
||||
target.addDynamicallyLegalOp<FuncOp>(
|
||||
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
|
||||
if (failed(applyPartialConversion(m, target, patterns, &converter)))
|
||||
|
|
|
@ -111,6 +111,51 @@ module attributes {gpu.kernel_module} {
|
|||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_fabs
|
||||
func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.absf %arg_f32 : f32
|
||||
// CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.absf %arg_f64 : f64
|
||||
// CHECK: llvm.call @__nv_fabs(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_ceil
|
||||
func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.ceilf %arg_f32 : f32
|
||||
// CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.ceilf %arg_f64 : f64
|
||||
// CHECK: llvm.call @__nv_ceil(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_cos
|
||||
func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.cos %arg_f32 : f32
|
||||
// CHECK: llvm.call @__nv_cosf(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.cos %arg_f64 : f64
|
||||
// CHECK: llvm.call @__nv_cos(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double
|
||||
|
|
|
@ -38,6 +38,51 @@ module attributes {gpu.kernel_module} {
|
|||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @_ocml_fabs_f32(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @_ocml_fabs_f64(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_fabs
|
||||
func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.absf %arg_f32 : f32
|
||||
// CHECK: llvm.call @_ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.absf %arg_f64 : f64
|
||||
// CHECK: llvm.call @_ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @_ocml_ceil_f32(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @_ocml_ceil_f64(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_ceil
|
||||
func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.ceilf %arg_f32 : f32
|
||||
// CHECK: llvm.call @_ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.ceilf %arg_f64 : f64
|
||||
// CHECK: llvm.call @_ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @_ocml_cos_f32(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @_ocml_cos_f64(!llvm.double) -> !llvm.double
|
||||
// CHECK-LABEL: func @gpu_cos
|
||||
func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
|
||||
%result32 = std.cos %arg_f32 : f32
|
||||
// CHECK: llvm.call @_ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
|
||||
%result64 = std.cos %arg_f64 : f64
|
||||
// CHECK: llvm.call @_ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double
|
||||
|
|
Loading…
Reference in New Issue