Skip to content

Commit 3bcb419

Browse files
committed
[MLIR][Flang][OpenMP] Make omp.simdloop into a loop wrapper
This patch updates the definition of `omp.simdloop` to enforce the restrictions of a wrapper operation. It has been renamed to `omp.simd`, to better reflect the naming used in the spec. All uses of "simdloop" in function names have been updated accordingly. Some changes to Flang lowering and OpenMP to LLVM IR translation are introduced to prevent the introduction of compilation/test failures. The eventual long term solution might be different.
1 parent 99e8123 commit 3bcb419

File tree

19 files changed

+634
-549
lines changed

19 files changed

+634
-549
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 60 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ struct OpWithBodyGenInfo {
521521
/// \param [in] op - the operation the body belongs to.
522522
/// \param [in] info - options controlling code-gen for the construction.
523523
template <typename Op>
524-
static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
524+
static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) {
525525
fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
526526

527527
auto insertMarker = [](fir::FirOpBuilder &builder) {
@@ -537,10 +537,10 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
537537
auto regionArgs =
538538
[&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
539539
if (info.genRegionEntryCB != nullptr) {
540-
return info.genRegionEntryCB(op);
540+
return info.genRegionEntryCB(&op);
541541
}
542542

543-
firOpBuilder.createBlock(&op.getRegion());
543+
firOpBuilder.createBlock(&op.getRegion(0));
544544
return {};
545545
}();
546546
// Mark the earliest insertion point.
@@ -556,7 +556,7 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
556556
// Start with privatization, so that the lowering of the nested
557557
// code will use the right symbols.
558558
constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsloopOp> ||
559-
std::is_same_v<Op, mlir::omp::SimdLoopOp>;
559+
std::is_same_v<Op, mlir::omp::SimdOp>;
560560
bool privatize = info.clauses && !info.outerCombined;
561561

562562
firOpBuilder.setInsertionPoint(marker);
@@ -582,9 +582,9 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
582582
// a lot of complications for our approach if the terminator generation
583583
// is delayed past this point. Insert a temporary terminator here, then
584584
// delete it.
585-
firOpBuilder.setInsertionPointToEnd(&op.getRegion().back());
586-
auto *temp = Fortran::lower::genOpenMPTerminator(
587-
firOpBuilder, op.getOperation(), info.loc);
585+
firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
586+
auto *temp =
587+
Fortran::lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
588588
firOpBuilder.setInsertionPointAfter(marker);
589589
genNestedEvaluations(info.converter, info.eval);
590590
temp->erase();
@@ -626,23 +626,36 @@ static void createBodyOfOp(Op &op, OpWithBodyGenInfo &info) {
626626
return exit;
627627
};
628628

629-
if (auto *exitBlock = getUniqueExit(op.getRegion())) {
629+
if (auto *exitBlock = getUniqueExit(op.getRegion(0))) {
630630
firOpBuilder.setInsertionPointToEnd(exitBlock);
631-
auto *term = Fortran::lower::genOpenMPTerminator(
632-
firOpBuilder, op.getOperation(), info.loc);
631+
auto *term =
632+
Fortran::lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
633633
// Only insert lastprivate code when there actually is an exit block.
634634
// Such a block may not exist if the nested code produced an infinite
635635
// loop (this may not make sense in production code, but a user could
636636
// write that and we should handle it).
637637
firOpBuilder.setInsertionPoint(term);
638638
if (privatize) {
639+
// DataSharingProcessor::processStep2() may create operations before/after
640+
// the one passed as argument. We need to treat loop wrappers and their
641+
// nested loop as a unit, so we need to pass the top level wrapper (if
642+
// present). Otherwise, these operations will be inserted within a
643+
// wrapper region.
644+
mlir::Operation *privatizationTopLevelOp = &op;
645+
if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) {
646+
llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
647+
loopNest.gatherWrappers(wrappers);
648+
if (!wrappers.empty())
649+
privatizationTopLevelOp = &*wrappers.back();
650+
}
651+
639652
if (!info.dsp) {
640653
assert(tempDsp.has_value());
641-
tempDsp->processStep2(op, isLoop);
654+
tempDsp->processStep2(privatizationTopLevelOp, isLoop);
642655
} else {
643656
if (isLoop && regionArgs.size() > 0)
644657
info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0]));
645-
info.dsp->processStep2(op, isLoop);
658+
info.dsp->processStep2(privatizationTopLevelOp, isLoop);
646659
}
647660
}
648661
}
@@ -719,7 +732,7 @@ template <typename OpTy, typename... Args>
719732
static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) {
720733
auto op = info.converter.getFirOpBuilder().create<OpTy>(
721734
info.loc, std::forward<Args>(args)...);
722-
createBodyOfOp<OpTy>(op, info);
735+
createBodyOfOp<OpTy>(*op, info);
723736
return op;
724737
}
725738

@@ -1689,13 +1702,12 @@ genLoopAndReductionVars(
16891702
return llvm::SmallVector<const Fortran::semantics::Symbol *>(loopArgs);
16901703
}
16911704

1692-
static void
1693-
createSimdLoop(Fortran::lower::AbstractConverter &converter,
1694-
Fortran::semantics::SemanticsContext &semaCtx,
1695-
Fortran::lower::pft::Evaluation &eval,
1696-
llvm::omp::Directive ompDirective,
1697-
const Fortran::parser::OmpClauseList &loopOpClauseList,
1698-
mlir::Location loc) {
1705+
static void createSimd(Fortran::lower::AbstractConverter &converter,
1706+
Fortran::semantics::SemanticsContext &semaCtx,
1707+
Fortran::lower::pft::Evaluation &eval,
1708+
llvm::omp::Directive ompDirective,
1709+
const Fortran::parser::OmpClauseList &loopOpClauseList,
1710+
mlir::Location loc) {
16991711
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
17001712
DataSharingProcessor dsp(converter, semaCtx, loopOpClauseList, eval);
17011713
dsp.processStep1();
@@ -1720,11 +1732,20 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
17201732
cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear,
17211733
clause::Nontemporal, clause::Order>(loc, ompDirective);
17221734

1735+
// Create omp.simd wrapper.
17231736
mlir::TypeRange resultType;
1724-
auto simdLoopOp = firOpBuilder.create<mlir::omp::SimdLoopOp>(
1725-
loc, resultType, lowerBound, upperBound, step, alignedVars,
1726-
/*alignment_values=*/nullptr, ifClauseOperand, nontemporalVars,
1727-
orderClauseOperand, simdlenClauseOperand, safelenClauseOperand,
1737+
auto simdOp = firOpBuilder.create<mlir::omp::SimdOp>(
1738+
loc, resultType, alignedVars, /*alignment_values=*/nullptr,
1739+
ifClauseOperand, nontemporalVars, orderClauseOperand,
1740+
simdlenClauseOperand, safelenClauseOperand);
1741+
1742+
firOpBuilder.createBlock(&simdOp.getRegion());
1743+
firOpBuilder.setInsertionPoint(
1744+
Fortran::lower::genOpenMPTerminator(firOpBuilder, simdOp, loc));
1745+
1746+
// Create nested omp.loop_nest and fill body with loop contents.
1747+
auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(
1748+
loc, lowerBound, upperBound, step,
17281749
/*inclusive=*/firOpBuilder.getUnitAttr());
17291750

17301751
auto *nestedEval = getCollapsedLoopEval(
@@ -1734,11 +1755,11 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
17341755
return genLoopVars(op, converter, loc, iv);
17351756
};
17361757

1737-
createBodyOfOp<mlir::omp::SimdLoopOp>(
1738-
simdLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
1739-
.setClauses(&loopOpClauseList)
1740-
.setDataSharingProcessor(&dsp)
1741-
.setGenRegionEntryCb(ivCallback));
1758+
createBodyOfOp<mlir::omp::SimdOp>(
1759+
*loopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
1760+
.setClauses(&loopOpClauseList)
1761+
.setDataSharingProcessor(&dsp)
1762+
.setGenRegionEntryCb(ivCallback));
17421763
}
17431764

17441765
static void createWsloop(Fortran::lower::AbstractConverter &converter,
@@ -1819,11 +1840,11 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter,
18191840
};
18201841

18211842
createBodyOfOp<mlir::omp::WsloopOp>(
1822-
wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
1823-
.setClauses(&beginClauseList)
1824-
.setDataSharingProcessor(&dsp)
1825-
.setReductions(&reductionSymbols, &reductionTypes)
1826-
.setGenRegionEntryCb(ivCallback));
1843+
*wsLoopOp, OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval)
1844+
.setClauses(&beginClauseList)
1845+
.setDataSharingProcessor(&dsp)
1846+
.setReductions(&reductionSymbols, &reductionTypes)
1847+
.setGenRegionEntryCb(ivCallback));
18271848
}
18281849

18291850
static void createSimdWsloop(
@@ -2200,7 +2221,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
22002221
global.getSymName()));
22012222
}();
22022223
auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval);
2203-
createBodyOfOp<mlir::omp::CriticalOp>(criticalOp, genInfo);
2224+
createBodyOfOp<mlir::omp::CriticalOp>(*criticalOp, genInfo);
22042225
}
22052226

22062227
static void
@@ -2285,8 +2306,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
22852306

22862307
} else if (llvm::omp::allSimdSet.test(ompDirective)) {
22872308
// 2.9.3.1 SIMD construct
2288-
createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
2289-
currentLocation);
2309+
createSimd(converter, semaCtx, eval, ompDirective, loopOpClauseList,
2310+
currentLocation);
22902311
genOpenMPReduction(converter, semaCtx, loopOpClauseList);
22912312
} else {
22922313
createWsloop(converter, semaCtx, eval, ompDirective, loopOpClauseList,
@@ -2410,10 +2431,9 @@ mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
24102431
mlir::Operation *op,
24112432
mlir::Location loc) {
24122433
if (mlir::isa<mlir::omp::WsloopOp, mlir::omp::DeclareReductionOp,
2413-
mlir::omp::AtomicUpdateOp, mlir::omp::SimdLoopOp>(op))
2434+
mlir::omp::AtomicUpdateOp, mlir::omp::LoopNestOp>(op))
24142435
return builder.create<mlir::omp::YieldOp>(loc);
2415-
else
2416-
return builder.create<mlir::omp::TerminatorOp>(loc);
2436+
return builder.create<mlir::omp::TerminatorOp>(loc);
24172437
}
24182438

24192439
void Fortran::lower::genOpenMPConstruct(

flang/test/Fir/convert-to-llvm-openmp-and-fir.fir

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,16 @@ func.func @_QPsimd1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref
180180
omp.parallel {
181181
%1 = fir.alloca i32 {adapt.valuebyref, pinned}
182182
%2 = fir.load %arg0 : !fir.ref<i32>
183-
omp.simdloop for (%arg2) : i32 = (%c1_i32) to (%2) step (%c1_i32) {
184-
fir.store %arg2 to %1 : !fir.ref<i32>
185-
%3 = fir.load %1 : !fir.ref<i32>
186-
%4 = fir.convert %3 : (i32) -> i64
187-
%5 = arith.subi %4, %c1_i64 : i64
188-
%6 = fir.coordinate_of %arg1, %5 : (!fir.ref<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
189-
fir.store %3 to %6 : !fir.ref<i32>
190-
omp.yield
183+
omp.simd {
184+
omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%2) step (%c1_i32) {
185+
fir.store %arg2 to %1 : !fir.ref<i32>
186+
%3 = fir.load %1 : !fir.ref<i32>
187+
%4 = fir.convert %3 : (i32) -> i64
188+
%5 = arith.subi %4, %c1_i64 : i64
189+
%6 = fir.coordinate_of %arg1, %5 : (!fir.ref<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
190+
fir.store %3 to %6 : !fir.ref<i32>
191+
omp.yield
192+
}
191193
}
192194
omp.terminator
193195
}
@@ -202,8 +204,8 @@ func.func @_QPsimd1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref
202204
// CHECK: %[[ONE_3:.*]] = llvm.mlir.constant(1 : i64) : i64
203205
// CHECK: %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {pinned} : (i64) -> !llvm.ptr
204206
// CHECK: %[[N:.*]] = llvm.load %[[N_REF]] : !llvm.ptr -> i32
205-
// CHECK: omp.simdloop
206-
// CHECK-SAME: (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) step (%[[ONE_2]]) {
207+
// CHECK: omp.simd {
208+
// CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) step (%[[ONE_2]]) {
207209
// CHECK: llvm.store %[[I]], %[[I_VAR]] : i32, !llvm.ptr
208210
// CHECK: %[[I1:.*]] = llvm.load %[[I_VAR]] : !llvm.ptr -> i32
209211
// CHECK: %[[I1_EXT:.*]] = llvm.sext %[[I1]] : i32 to i64
@@ -212,6 +214,7 @@ func.func @_QPsimd1(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref
212214
// CHECK: llvm.store %[[I1]], %[[ARR_I_REF]] : i32, !llvm.ptr
213215
// CHECK: omp.yield
214216
// CHECK: }
217+
// CHECK: }
215218
// CHECK: omp.terminator
216219
// CHECK: }
217220
// CHECK: llvm.return
@@ -471,55 +474,59 @@ func.func @_QPomp_target() {
471474

472475
// -----
473476

474-
func.func @_QPsimdloop_with_nested_loop() {
477+
func.func @_QPsimd_with_nested_loop() {
475478
%0 = fir.alloca i32 {adapt.valuebyref}
476-
%1 = fir.alloca !fir.array<10xi32> {bindc_name = "a", uniq_name = "_QFsimdloop_with_nested_loopEa"}
477-
%2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimdloop_with_nested_loopEi"}
478-
%3 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFsimdloop_with_nested_loopEj"}
479+
%1 = fir.alloca !fir.array<10xi32> {bindc_name = "a", uniq_name = "_QFsimd_with_nested_loopEa"}
480+
%2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimd_with_nested_loopEi"}
481+
%3 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFsimd_with_nested_loopEj"}
479482
%c1_i32 = arith.constant 1 : i32
480483
%c10_i32 = arith.constant 10 : i32
481484
%c1_i32_0 = arith.constant 1 : i32
482-
omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32_0) {
483-
fir.store %arg0 to %0 : !fir.ref<i32>
484-
%c1_i32_1 = arith.constant 1 : i32
485-
%4 = fir.convert %c1_i32_1 : (i32) -> index
486-
%c10_i32_2 = arith.constant 10 : i32
487-
%5 = fir.convert %c10_i32_2 : (i32) -> index
488-
%c1 = arith.constant 1 : index
489-
%6 = fir.do_loop %arg1 = %4 to %5 step %c1 -> index {
490-
%8 = fir.convert %arg1 : (index) -> i32
491-
fir.store %8 to %3 : !fir.ref<i32>
492-
%9 = fir.load %0 : !fir.ref<i32>
493-
%10 = fir.load %0 : !fir.ref<i32>
494-
%11 = fir.convert %10 : (i32) -> i64
495-
%c1_i64 = arith.constant 1 : i64
496-
%12 = arith.subi %11, %c1_i64 : i64
497-
%13 = fir.coordinate_of %1, %12 : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
498-
fir.store %9 to %13 : !fir.ref<i32>
499-
%14 = arith.addi %arg1, %c1 : index
500-
fir.result %14 : index
485+
omp.simd {
486+
omp.loop_nest (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32_0) {
487+
fir.store %arg0 to %0 : !fir.ref<i32>
488+
%c1_i32_1 = arith.constant 1 : i32
489+
%4 = fir.convert %c1_i32_1 : (i32) -> index
490+
%c10_i32_2 = arith.constant 10 : i32
491+
%5 = fir.convert %c10_i32_2 : (i32) -> index
492+
%c1 = arith.constant 1 : index
493+
%6 = fir.do_loop %arg1 = %4 to %5 step %c1 -> index {
494+
%8 = fir.convert %arg1 : (index) -> i32
495+
fir.store %8 to %3 : !fir.ref<i32>
496+
%9 = fir.load %0 : !fir.ref<i32>
497+
%10 = fir.load %0 : !fir.ref<i32>
498+
%11 = fir.convert %10 : (i32) -> i64
499+
%c1_i64 = arith.constant 1 : i64
500+
%12 = arith.subi %11, %c1_i64 : i64
501+
%13 = fir.coordinate_of %1, %12 : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
502+
fir.store %9 to %13 : !fir.ref<i32>
503+
%14 = arith.addi %arg1, %c1 : index
504+
fir.result %14 : index
505+
}
506+
%7 = fir.convert %6 : (index) -> i32
507+
fir.store %7 to %3 : !fir.ref<i32>
508+
omp.yield
501509
}
502-
%7 = fir.convert %6 : (index) -> i32
503-
fir.store %7 to %3 : !fir.ref<i32>
504-
omp.yield
505510
}
506511
return
507512
}
508513

509-
// CHECK-LABEL: llvm.func @_QPsimdloop_with_nested_loop() {
514+
// CHECK-LABEL: llvm.func @_QPsimd_with_nested_loop() {
510515
// CHECK: %[[LOWER:.*]] = llvm.mlir.constant(1 : i32) : i32
511516
// CHECK: %[[UPPER:.*]] = llvm.mlir.constant(10 : i32) : i32
512517
// CHECK: %[[STEP:.*]] = llvm.mlir.constant(1 : i32) : i32
513-
// CHECK: omp.simdloop for (%[[CNT:.*]]) : i32 = (%[[LOWER]]) to (%[[UPPER]]) inclusive step (%[[STEP]]) {
514-
// CHECK: llvm.br ^bb1(%[[VAL_1:.*]], %[[VAL_2:.*]] : i64, i64)
515-
// CHECK: ^bb1(%[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i64):
516-
// CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(0 : index) : i64
517-
// CHECK: %[[VAL_6:.*]] = llvm.icmp "sgt" %[[VAL_4]], %[[VAL_5]] : i64
518-
// CHECK: llvm.cond_br %[[VAL_6]], ^bb2, ^bb3
519-
// CHECK: ^bb2:
520-
// CHECK: llvm.br ^bb1(%[[VAL_7:.*]], %[[VAL_8:.*]] : i64, i64)
521-
// CHECK: ^bb3:
522-
// CHECK: omp.yield
518+
// CHECK: omp.simd {
519+
// CHECK-NEXT: omp.loop_nest (%[[CNT:.*]]) : i32 = (%[[LOWER]]) to (%[[UPPER]]) inclusive step (%[[STEP]]) {
520+
// CHECK: llvm.br ^bb1(%[[VAL_1:.*]], %[[VAL_2:.*]] : i64, i64)
521+
// CHECK: ^bb1(%[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i64):
522+
// CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(0 : index) : i64
523+
// CHECK: %[[VAL_6:.*]] = llvm.icmp "sgt" %[[VAL_4]], %[[VAL_5]] : i64
524+
// CHECK: llvm.cond_br %[[VAL_6]], ^bb2, ^bb3
525+
// CHECK: ^bb2:
526+
// CHECK: llvm.br ^bb1(%[[VAL_7:.*]], %[[VAL_8:.*]] : i64, i64)
527+
// CHECK: ^bb3:
528+
// CHECK: omp.yield
529+
// CHECK: }
523530
// CHECK: }
524531
// CHECK: llvm.return
525532
// CHECK: }

0 commit comments

Comments
 (0)