Skip to content

Commit 3182b7d

Browse files
fengxiezahiraam
authored andcommitted
generalize pass gpu-kernel-outlining for symbol op (llvm#72074)
This PR generalize gpu-out-lining pass to take care of ops `SymbolOpInterface` instead of just `func::FuncOp`. Before this change, gpu-out-lining pass will skip `llvm.func`. ```mlir module { llvm.func @main() { %c1 = arith.constant 1 : index gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { gpu.terminator } llvm.return } } ``` After this change, gpu-out-lining pass can handle llvm.func as well.
1 parent 3216aab commit 3182b7d

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,13 @@ class GpuKernelOutliningPass
349349
void runOnOperation() override {
350350
SymbolTable symbolTable(getOperation());
351351
bool modified = false;
352-
for (auto func : getOperation().getOps<func::FuncOp>()) {
352+
for (auto func : getOperation().getOps<SymbolOpInterface>()) {
353353
// Insert just after the function.
354354
Block::iterator insertPt(func->getNextNode());
355355
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
356356
SetVector<Value> operands;
357357
std::string kernelFnName =
358-
Twine(op->getParentOfType<func::FuncOp>().getName(), "_kernel")
358+
Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
359359
.str();
360360

361361
gpu::GPUFuncOp outlinedFunc =

mlir/test/Dialect/GPU/outlining.mlir

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ func.func @launch() {
3737
}
3838

3939
// CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
40-
4140
// CHECK-LABEL: gpu.module @launch_kernel
4241
// CHECK-NEXT: gpu.func @launch_kernel
4342
// CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
@@ -63,6 +62,42 @@ func.func @launch() {
6362

6463
// -----
6564

65+
// This test checks gpu-out-lining can handle gpu.launch kernel from an llvm.func
66+
// CHECK-LABEL: @launch_from_llvm_func
67+
llvm.func @launch_from_llvm_func() {
68+
// CHECK: %[[ARG0:.*]] = "op"() : () -> f32
69+
%0 = "op"() : () -> (f32)
70+
// CHECK: %[[ARG1:.*]] = "op"() : () -> memref<?xf32, 1>
71+
%1 = "op"() : () -> (memref<?xf32, 1>)
72+
73+
// CHECK: %[[DIM:.*]] = arith.constant 1
74+
%dim = arith.constant 1 : index
75+
76+
// CHECK: gpu.launch_func @launch_from_llvm_func_kernel::@launch_from_llvm_func_kernel
77+
// CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]])
78+
// CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
79+
// CHECK-NEXT: llvm.return
80+
81+
// CHECK: gpu.func {{.*}} kernel attributes
82+
// CHECK-SAME: gpu.known_block_size = array<i32: 1, 1, 1>
83+
// CHECK-SAME: gpu.known_grid_size = array<i32: 1, 1, 1>
84+
// CHECK: gpu.return
85+
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %dim, %grid_y = %dim,
86+
%grid_z = %dim)
87+
threads(%tx, %ty, %tz) in (%block_x = %dim, %block_y = %dim,
88+
%block_z = %dim) {
89+
"use"(%0): (f32) -> ()
90+
"some_op"(%bx, %block_x) : (index, index) -> ()
91+
%2 = memref.load %1[%tx] : memref<?xf32, 1>
92+
gpu.terminator
93+
}
94+
llvm.return
95+
}
96+
97+
// CHECK-DL-LABLE: gpu.module @launch_from_llvm_func_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
98+
99+
// -----
100+
66101
// CHECK: module attributes {gpu.container_module}
67102
// CHECK-LABEL: @multiple_launches
68103
func.func @multiple_launches() {

0 commit comments

Comments
 (0)