Skip to content

[mlir][memref] Add a new ReifyResultShapes pass #145927

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,68 @@ def ResolveShapedTypeResultDimsPass : Pass<"resolve-shaped-type-result-dims"> {
];
}

def ReifyResultShapesPass : Pass<"reify-result-shapes"> {
let summary ="Reifies the results of `tensor::PadOp` and `tensor::ConcatOp`.";
let description = [{
This pass reifies the shapes of a subset of `ReifyRankedShapedTypeOpInterface`
ops with `tensor` results.

The pass currently only supports result shape type reification for:
- tensor::PadOp
- tensor::ConcatOp
It addresses a representation gap where implicit op semantics are needed to
infer static result types from dynamic operands.
But it does so by using `ReifyRankedShapedTypeOpInterface` as the source of
truth rather than the op itself. As a consequence, this cannot generalize
today.

TODO: in the future, we should consider coupling this information with op
"transfer functions" (e.g. `IndexingMapOpInterface`) to provide a source of
truth that can work across result shape inference, canonicalization and op
verifiers.

The pass replaces the operations with their reified versions, when more
static information can be derived, and inserts casts when results shapes
are updated.

Example:
```mlir
#map = affine_map<(d0) -> (-d0 + 256)>
func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>)
-> tensor<1x?x64xf32>
{
%0 = affine.apply #map(%arg1)
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1]
: tensor<64x?x64xf32> to tensor<1x?x64xf32>
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %arg0 : f32
} : tensor<1x?x64xf32> to tensor<1x?x64xf32>
return %padded : tensor<1x?x64xf32>
}

// mlir-opt --reify-result-shapes
#map = affine_map<()[s0] -> (-s0 + 256)>
func.func @func(%arg0: f32, %arg1: index, %arg2: tensor<64x?x64xf32>)
-> tensor<1x?x64xf32>
{
%0 = affine.apply #map()[%arg1]
%extracted_slice = tensor.extract_slice %arg2[0, 0, 0] [1, %arg1, 64] [1, 1, 1]
: tensor<64x?x64xf32> to tensor<1x?x64xf32>
%padded = tensor.pad %extracted_slice low[0, 0, 0] high[0, %0, 0] {
^bb0(%arg3: index, %arg4: index, %arg5: index):
tensor.yield %arg0 : f32
} : tensor<1x?x64xf32> to tensor<1x256x64xf32>
%cast = tensor.cast %padded : tensor<1x256x64xf32> to tensor<1x?x64xf32>
return %cast : tensor<1x?x64xf32>
}
```
}];
let dependentDialects = [
"affine::AffineDialect", "memref::MemRefDialect", "tensor::TensorDialect"
];
}

def ExpandStridedMetadataPass : Pass<"expand-strided-metadata"> {
let summary = "Expand memref operations into easier to analyze constructs";
let description = [{
Expand Down
2 changes: 1 addition & 1 deletion mlir/include/mlir/Dialect/MemRef/Transforms/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class RewritePatternSet;
class RewriterBase;
class Value;
class ValueRange;
class ReifyRankedShapedTypeOpInterface;

namespace arith {
class WideIntEmulationConverter;
Expand Down Expand Up @@ -208,7 +209,6 @@ FailureOr<Value> replaceWithIndependentOp(RewriterBase &rewriter,
memref::AllocaOp allocToAlloca(
RewriterBase &rewriter, memref::AllocOp alloc,
function_ref<bool(memref::AllocOp, memref::DeallocOp)> filter = nullptr);

} // namespace memref
} // namespace mlir

Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_mlir_dialect_library(MLIRMemRefTransforms
IndependenceTransforms.cpp
MultiBuffer.cpp
NormalizeMemRefs.cpp
ReifyResultShapes.cpp
ResolveShapedTypeResultDims.cpp
RuntimeOpVerification.cpp

Expand Down
159 changes: 159 additions & 0 deletions mlir/lib/Dialect/MemRef/Transforms/ReifyResultShapes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
//===- ReifyResultShapes.cpp - Reify result shapes ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This transform reifies result shapes of `ReifyRankedShapedTypeOpInterface`
// operations with ranked `memref` and `tensor` results.
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/MemRef/Transforms/Passes.h"

#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "llvm/Support/InterleavedRange.h"

#define DEBUG_TYPE "reify-result-shapes"
#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")

namespace mlir {
namespace memref {
#define GEN_PASS_DEF_REIFYRESULTSHAPESPASS
#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
} // namespace memref
} // namespace mlir

using namespace mlir;

/// Reifies the results of `op`, potentially replacing `op` with a reified
/// version. Returns `failure` if `mlir::reifyResultShapes` returned failure,
/// otherwise it always succeeds. Users of this transform should always expect
/// it to modify the IR, even when it fails. If any of the result types changes,
/// the transform will insert cast operations to the old type to keep the IR
/// consistent.
static LogicalResult reifyOpResultShapes(RewriterBase &rewriter,
ReifyRankedShapedTypeOpInterface op) {
LLVM_DEBUG({ DBGS() << " reifying op: " << op << "\n"; });
// Get the reified out shapes.
ReifiedRankedShapedTypeDims reifiedResultShapes;
if (failed(mlir::reifyResultShapes(rewriter, op, reifiedResultShapes)) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is my source of confusion. As far as I know this is meant to extract information about the shape of the result of the, but this is actually changing the operation itself. This seems like something that cannot be done just based on the interface/clone. The change in the result type might make the operation invalid (according to its verifier). This kind of rewrite cannot really be done just on the interface.

Copy link
Contributor

@fabianmcg fabianmcg Jun 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me the interface description establishes an implicit contract allowing this:

    Interface to compute the shape of the result of an operation when
    the result is a ranked shape type, i.e. `RankedTensorType` or
    `MemRefType`.

Because, what would it mean for reifyResultShapes to return a shape that the op verifier will reject? The interface would produce inconsistent results with itself, rendering the interface implementation erroneous (IMO the verifier has higher precedence).

From my POV the interface solves this issue with the return of the LogicalResult, because then either the reifyResultShapes method should return failure or produce a shape that the verifier should accept. And if that's not the case then such an operation shouldn't implement the reify interface.

Nonetheless, I do see the argument for making the implicit contract explicit. So how about adding something along the lines the following method to the interface?

    InterfaceMethod<
      /*desc=*/[{
        Reify the  shape of the result of an operation (typically in terms of the
        shape of its operands).
      }],
      /*retTy=*/"::llvm::LogicalResult",
      /*methodName=*/"resifyOpResult",
      /*args=*/(ins "::mlir::OpBuilder &":$builder,
        "unsigned":$resultNum)
    >

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me the interface description establishes an implicit contract allowing this:

    Interface to compute the shape of the result of an operation when
    the result is a ranked shape type, i.e. `RankedTensorType` or
    `MemRefType`.

Because, what would it mean for reifyResultShapes to return a shape that the op verifier will reject? The interface would produce inconsistent results with itself, rendering the interface implementation erroneous (IMO the verifier has higher precedence).

I am not sure I fully follow the logic. Lets assume the op is currently valid and you are getting the shape of the result. Now you are change the shape of the result without modifying any of its other operands. There is no way you can make an interface gaurantee that the shape it found is considered valid by the operation. The operation method itself might not know that. The inconsistency could be coming from the verifier checking consistency between the other operands values and its result type. You really cannot expect the reify method implementation to also know whether the op is valid or not, that is kind of a disconnected contract that can be a source of bugs.

reifiedResultShapes.empty()) {
return op->emitWarning() << "failed to get the reified shapes";
}

bool modified = false;
// Compute the new output types.
SmallVector<Type> outTypes;
for (const auto &[oldTy, reifiedShape] :
llvm::zip(op->getResultTypes(), reifiedResultShapes)) {
// Skip if it's not a memref or tensor type.
if (!isa<RankedTensorType, MemRefType>(oldTy)) {
outTypes.push_back(oldTy);
continue;
}

ShapedType shapedTy = dyn_cast<ShapedType>(oldTy);

SmallVector<int64_t> shape = llvm::to_vector(shapedTy.getShape());
for (auto &&[dim, ofr] : llvm::zip_equal(shape, reifiedShape)) {
std::optional<int64_t> maybeCst = getConstantIntValue(ofr);
// If the reified dim is dynamic set it appropriately.
if (!maybeCst.has_value()) {
dim = ShapedType::kDynamic;
continue;
}
// Set the static dim.
dim = *maybeCst;
}

// If the shape didn't change continue.
if (shape == shapedTy.getShape()) {
outTypes.push_back(oldTy);
continue;
}
modified = true;
outTypes.push_back(shapedTy.cloneWith(shape, shapedTy.getElementType()));
}

// Return if we don't need to update.
if (!modified) {
LLVM_DEBUG({ DBGS() << "- op doesn't require update\n"; });
return success();
}

LLVM_DEBUG({
DBGS() << "- oldTypes: " << llvm::interleaved_array(op->getResultTypes())
<< " \n";
DBGS() << "- outTypes: " << llvm::interleaved_array(outTypes) << " \n";
});

// We now have outTypes that need to be turned to cast ops.
Location loc = op->getLoc();
SmallVector<Value> newResults;
// TODO: `mlir::reifyResultShapes` and op verifiers may not agree atm.
// This is a confluence problem that will need to be addressed.
// For now, we know PadOp and ConcatOp are fine.
assert((isa<tensor::PadOp, tensor::ConcatOp>(op.getOperation())) &&
"incorrect op");
Operation *newOp = rewriter.clone(*op);
for (auto [reifiedTy, oldRes] : llvm::zip(outTypes, op->getResults())) {
OpResult newRes = newOp->getResult(oldRes.getResultNumber());
Type oldTy = oldRes.getType();
// Continue if the type remained invariant or is not shaped.
if (oldTy == reifiedTy || !isa<MemRefType, RankedTensorType>(oldTy)) {
newResults.push_back(newRes);
continue;
}

// Update the type.
newRes.setType(reifiedTy);
if (isa<RankedTensorType>(reifiedTy)) {
newResults.push_back(rewriter.create<tensor::CastOp>(loc, oldTy, newRes));
} else {
assert(isa<MemRefType>(reifiedTy) && "expected a memref type");
newResults.push_back(rewriter.create<memref::CastOp>(loc, oldTy, newRes));
}
}

LLVM_DEBUG({
DBGS() << "- reified results " << llvm::interleaved_array(newResults)
<< "\n";
});
rewriter.replaceOp(op, newResults);
return success();
}

//===----------------------------------------------------------------------===//
// Pass registration
//===----------------------------------------------------------------------===//

namespace {
struct ReifyResultShapesPass final
: public memref::impl::ReifyResultShapesPassBase<ReifyResultShapesPass> {
void runOnOperation() override;
};
} // namespace

void ReifyResultShapesPass::runOnOperation() {
SmallVector<ReifyRankedShapedTypeOpInterface> ops;
getOperation()->walk([&](ReifyRankedShapedTypeOpInterface op) {
// Handle ops that are not DPS and that do not carry an tied operand shapes.
// For now, limit to tensor::PadOp and tensor::ConcatOp.
if (!isa<tensor::PadOp, tensor::ConcatOp>(op.getOperation()))
return;
ops.push_back(op);
});
IRRewriter rewriter(&getContext());
for (ReifyRankedShapedTypeOpInterface op : ops) {
rewriter.setInsertionPoint(op);
(void)reifyOpResultShapes(rewriter, op);
}
}
31 changes: 31 additions & 0 deletions mlir/test/Dialect/Tensor/reify-shapes.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: mlir-opt -reify-result-shapes %s | FileCheck %s

// The test below checks concat op reification. In the first case, no cast is inserted while on the second a cast gets inserted.
// CHECK-LABEL: func.func @concat_reification
func.func @concat_reification(%arg0: tensor<4x7x3xf32>, %arg1 : tensor<4x4x3xf32>, %arg2: tensor<?x?x?xf32>)
-> (tensor<4x11x3xf32>, tensor<?x?x?xf32>) {
// CHECK: %[[RES0:.*]] = tensor.concat dim(1) %{{.*}} : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
%1 = tensor.concat dim(1) %arg0, %arg1 : (tensor<4x7x3xf32>, tensor<4x4x3xf32>) -> tensor<4x11x3xf32>
// CHECK: %[[V0:.*]] = tensor.concat dim(2) %{{.*}} : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<4x7x?xf32>
// CHECK: %[[RES1:.*]] = tensor.cast %[[V0]] : tensor<4x7x?xf32> to tensor<?x?x?xf32>
%2 = tensor.concat dim(2) %arg0, %arg2 : (tensor<4x7x3xf32>, tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// CHECK: return %[[RES0]], %[[RES1]] : tensor<4x11x3xf32>, tensor<?x?x?xf32>
return %1, %2 : tensor<4x11x3xf32>, tensor<?x?x?xf32>
}

// CHECK-LABEL: func.func @pad_reification
func.func @pad_reification(%cst : f32, %idx : index, %t: tensor<64x?x64xf32>) -> tensor<1x?x64xf32> {
%pad_amt = affine.apply affine_map<(d0) -> (-d0 + 256)>(%idx)
%es = tensor.extract_slice %t[0, 0, 0] [1, %idx, 64] [1, 1, 1]
: tensor<64x?x64xf32> to tensor<1x?x64xf32>

// CHECK: tensor.pad
// CHECK: : tensor<1x?x64xf32> to tensor<1x256x64xf32>
// CHECK: tensor.cast %{{.*}} : tensor<1x256x64xf32> to tensor<1x?x64xf32>
%padded = tensor.pad %es low[0, 0, 0] high[0, %pad_amt, 0] {
^bb0(%a: index, %b: index, %c: index):
tensor.yield %cst : f32
} : tensor<1x?x64xf32> to tensor<1x?x64xf32>

return %padded : tensor<1x?x64xf32>
}
Loading