Skip to content

[LV] Add initial legality checks for ee loops with stores #145663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,14 @@ class LoopVectorizationLegality {
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
}

/// Returns true if this is an early exit loop containing a store.
bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }

/// Returns the load instruction, if any, directly used for an exit comparison
/// in and early exit loop containing state-changing or potentially-faulting
/// operations.
std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }

/// Return true if there is store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
Expand Down Expand Up @@ -536,6 +544,12 @@ class LoopVectorizationLegality {
/// additional cases safely.
bool isVectorizableEarlyExitLoop();

/// Clears any current early exit data gathered if a check failed.
void clearEarlyExitData() {
UncountableEdge = std::nullopt;
EarlyExitLoad = std::nullopt;
}

/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
Expand Down Expand Up @@ -654,6 +668,10 @@ class LoopVectorizationLegality {
/// Keep track of the loop edge to an uncountable exit, comprising a pair
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;

/// Keep track of the load used for early exits where state-changing or
/// potentially faulting operations occur inside the loop.
std::optional<LoadInst *> EarlyExitLoad;
};

} // namespace llvm
Expand Down
137 changes: 124 additions & 13 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
Expand Down Expand Up @@ -1207,8 +1208,42 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}

if (!LAI->canVectorizeMemory())
return canVectorizeIndirectUnsafeDependences();
if (LAI->canVectorizeMemory()) {
// FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
// since we're (potentially) doing the load out of its normal order
// in the loop and that may throw off dependency checking.
// A forward dependency should be fine, but a backwards dep may not
// be even if LAA thinks it is due to performing the load for the
// vector iteration i+1 in vector iteration i.
if (isConditionCopyRequired()) {
const MemoryDepChecker &DepChecker = LAI->getDepChecker();
const auto *Deps = DepChecker.getDependences();

for (const MemoryDepChecker::Dependence &Dep : *Deps) {
if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
Dep.getSource(DepChecker) == EarlyExitLoad) {
// Refine language a little? This currently only applies when a store
// is present in the early exit loop.
reportVectorizationFailure(
"No dependencies allowed for early exit condition load",
"Early exit condition loads may not have a dependence with "
"another"
" memory operation.",
"CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
}
}
} else {
if (!isConditionCopyRequired())
return canVectorizeIndirectUnsafeDependences();
reportVectorizationFailure(
"Cannot vectorize unsafe dependencies in state-changing early exit "
"loop.",
"Unable to vectorize memory in an early exit loop with store",
"CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop);
return false;
}

if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("We don't allow storing to uniform addresses",
Expand Down Expand Up @@ -1747,16 +1782,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
};

bool HasStore = false;
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
HasStore = true;
if (SI->isSimple())
continue;

reportVectorizationFailure(
"Complex writes to memory unsupported in early exit loops",
"Cannot vectorize early exit loop with complex writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
}

if (I.mayWriteToMemory()) {
// We don't support writes to memory.
reportVectorizationFailure(
"Writes to memory unsupported in early exit loops",
"Cannot vectorize early exit loop with writes to memory",
"Complex writes to memory unsupported in early exit loops",
"Cannot vectorize early exit loop with complex writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
} else if (!IsSafeOperation(&I)) {
}

if (!IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"UnsafeOperationsEarlyExitLoop", ORE,
Expand All @@ -1771,13 +1821,65 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {

// TODO: Handle loops that may fault.
Predicates.clear();
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
if (HasStore) {
// Record load for analysis by isDereferenceableAndAlignedInLoop
// and later by dependence analysis.
if (BranchInst *Br = dyn_cast<BranchInst>(
SingleUncountableEdge->first->getTerminator())) {
// FIXME: Handle exit conditions with multiple users, more complex exit
// conditions than br(icmp(load, loop_inv)).
ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
if (Cmp && Cmp->hasOneUse() &&
TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
*DT, AC, &Predicates)) {
ICFLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(TheLoop);
// FIXME: We may have multiple levels of conditional loads, so will
// need to improve on outright rejection at some point.
if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
EarlyExitLoad = Load;
} else {
reportVectorizationFailure(
"Early exit condition load not guaranteed to execute",
"Cannot vectorize early exit loop when condition load is not "
"guaranteed to execute",
"EarlyExitLoadNotGuaranteed", ORE, TheLoop);
}
} else {
reportVectorizationFailure(
"Uncounted loop condition not known safe",
"Cannot vectorize early exit loop with "
"possibly unsafe condition load",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
}
}
}

if (!EarlyExitLoad) {
reportVectorizationFailure(
"Early exit loop with store but no condition load",
"Cannot vectorize early exit loop with store but no condition load",
"NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
return false;
}
} else {
// Read-only loop.
// FIXME: as with the loops with stores, only the loads contributing to
// the loop condition need to be guaranteed dereferenceable and
// aligned.
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
}

[[maybe_unused]] const SCEV *SymbolicMaxBTC =
Expand Down Expand Up @@ -1861,7 +1963,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
} else {
if (!isVectorizableEarlyExitLoop()) {
UncountableEdge = std::nullopt;
clearEarlyExitData();
if (DoExtraAnalysis)
Result = false;
else
Expand All @@ -1879,6 +1981,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}

// Bail out for state-changing EE loops for now.
if (EarlyExitLoad) {
reportVectorizationFailure(
"Writes to memory unsupported in early exit loops",
"Cannot vectorize early exit loop with writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
}

if (Result) {
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
<< (LAI->getRuntimePointerChecking()->Need
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/control-flow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; return 0;
; }

; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
; CHECK: remark: source.cpp:5:9: loop not vectorized

; CHECK: _Z4testPii
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

define i64 @loop_contains_store(ptr %dest) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops
; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
%p1 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
Expand Down Expand Up @@ -56,7 +56,7 @@ exit:

define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
br label %for.body

Expand All @@ -80,7 +80,7 @@ exit:

define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
br label %for.body

Expand All @@ -106,7 +106,7 @@ exit:

define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: No dependencies allowed for early exit condition load.
entry:
%pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
br label %for.body
Expand Down Expand Up @@ -135,7 +135,7 @@ exit:

define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
%unknown.offset = call i64 @get_an_unknown_offset()
%unknown.cmp = icmp ult i64 %unknown.offset, 20
Expand All @@ -149,10 +149,10 @@ for.body:
%data = load i16, ptr %st.addr, align 2
%inc = add nsw i16 %data, 1
store i16 %inc, ptr %st.addr, align 2
%ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
%ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
%ee.val = load i16, ptr %ee.addr, align 2
%ee.cond = icmp sgt i16 %ee.val, 500
%some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
%some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
store i16 42, ptr %some.addr, align 2
br i1 %ee.cond, label %exit, label %for.inc

Expand Down Expand Up @@ -194,7 +194,7 @@ exit:

define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
br label %for.body

Expand All @@ -220,7 +220,7 @@ exit:

define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
entry:
br label %for.body

Expand Down Expand Up @@ -324,7 +324,7 @@ exit:

define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops.
; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
entry:
br label %for.body

Expand Down
Loading