diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index d654ac3ec9273..aa34daf596b6b 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -407,6 +407,14 @@ class LoopVectorizationLegality { return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; } + /// Returns true if this is an early exit loop containing a store. + bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); } + + /// Returns the load instruction, if any, directly used for an exit comparison + /// in and early exit loop containing state-changing or potentially-faulting + /// operations. + std::optional getEarlyExitLoad() const { return EarlyExitLoad; } + /// Return true if there is store-load forwarding dependencies. bool isSafeForAnyStoreLoadForwardDistances() const { return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); @@ -536,6 +544,12 @@ class LoopVectorizationLegality { /// additional cases safely. bool isVectorizableEarlyExitLoop(); + /// Clears any current early exit data gathered if a check failed. + void clearEarlyExitData() { + UncountableEdge = std::nullopt; + EarlyExitLoad = std::nullopt; + } + /// Return true if all of the instructions in the block can be speculatively /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know @@ -654,6 +668,10 @@ class LoopVectorizationLegality { /// Keep track of the loop edge to an uncountable exit, comprising a pair /// of (Exiting, Exit) blocks, if there is exactly one early exit. std::optional> UncountableEdge; + + /// Keep track of the load used for early exits where state-changing or + /// potentially faulting operations occur inside the loop. + std::optional EarlyExitLoad; }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 969d225c6ef2e..eec660e5958ca 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -1207,8 +1208,42 @@ bool LoopVectorizationLegality::canVectorizeMemory() { }); } - if (!LAI->canVectorizeMemory()) - return canVectorizeIndirectUnsafeDependences(); + if (LAI->canVectorizeMemory()) { + // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot + // since we're (potentially) doing the load out of its normal order + // in the loop and that may throw off dependency checking. + // A forward dependency should be fine, but a backwards dep may not + // be even if LAA thinks it is due to performing the load for the + // vector iteration i+1 in vector iteration i. + if (isConditionCopyRequired()) { + const MemoryDepChecker &DepChecker = LAI->getDepChecker(); + const auto *Deps = DepChecker.getDependences(); + + for (const MemoryDepChecker::Dependence &Dep : *Deps) { + if (Dep.getDestination(DepChecker) == EarlyExitLoad || + Dep.getSource(DepChecker) == EarlyExitLoad) { + // Refine language a little? This currently only applies when a store + // is present in the early exit loop. + reportVectorizationFailure( + "No dependencies allowed for early exit condition load", + "Early exit condition loads may not have a dependence with " + "another" + " memory operation.", + "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); + return false; + } + } + } + } else { + if (!isConditionCopyRequired()) + return canVectorizeIndirectUnsafeDependences(); + reportVectorizationFailure( + "Cannot vectorize unsafe dependencies in state-changing early exit " + "loop.", + "Unable to vectorize memory in an early exit loop with store", + "CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop); + return false; + } if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("We don't allow storing to uniform addresses", @@ -1747,16 +1782,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } }; + bool HasStore = false; for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { + if (StoreInst *SI = dyn_cast(&I)) { + HasStore = true; + if (SI->isSimple()) + continue; + + reportVectorizationFailure( + "Complex writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with complex writes to memory", + "WritesInEarlyExitLoop", ORE, TheLoop); + return false; + } + if (I.mayWriteToMemory()) { // We don't support writes to memory. reportVectorizationFailure( - "Writes to memory unsupported in early exit loops", - "Cannot vectorize early exit loop with writes to memory", + "Complex writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with complex writes to memory", "WritesInEarlyExitLoop", ORE, TheLoop); return false; - } else if (!IsSafeOperation(&I)) { + } + + if (!IsSafeOperation(&I)) { reportVectorizationFailure("Early exit loop contains operations that " "cannot be speculatively executed", "UnsafeOperationsEarlyExitLoop", ORE, @@ -1771,13 +1821,65 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { // TODO: Handle loops that may fault. Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { - reportVectorizationFailure( - "Loop may fault", - "Cannot vectorize potentially faulting early exit loop", - "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); - return false; + if (HasStore) { + // Record load for analysis by isDereferenceableAndAlignedInLoop + // and later by dependence analysis. + if (BranchInst *Br = dyn_cast( + SingleUncountableEdge->first->getTerminator())) { + // FIXME: Handle exit conditions with multiple users, more complex exit + // conditions than br(icmp(load, loop_inv)). + ICmpInst *Cmp = dyn_cast(Br->getCondition()); + if (Cmp && Cmp->hasOneUse() && + TheLoop->isLoopInvariant(Cmp->getOperand(1))) { + LoadInst *Load = dyn_cast(Cmp->getOperand(0)); + if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) { + if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), + *DT, AC, &Predicates)) { + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(TheLoop); + // FIXME: We may have multiple levels of conditional loads, so will + // need to improve on outright rejection at some point. + if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) { + EarlyExitLoad = Load; + } else { + reportVectorizationFailure( + "Early exit condition load not guaranteed to execute", + "Cannot vectorize early exit loop when condition load is not " + "guaranteed to execute", + "EarlyExitLoadNotGuaranteed", ORE, TheLoop); + } + } else { + reportVectorizationFailure( + "Uncounted loop condition not known safe", + "Cannot vectorize early exit loop with " + "possibly unsafe condition load", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } + } + } + } + + if (!EarlyExitLoad) { + reportVectorizationFailure( + "Early exit loop with store but no condition load", + "Cannot vectorize early exit loop with store but no condition load", + "NoConditionLoadForEarlyExitLoop", ORE, TheLoop); + return false; + } + } else { + // Read-only loop. + // FIXME: as with the loops with stores, only the loads contributing to + // the loop condition need to be guaranteed dereferenceable and + // aligned. + if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, + &Predicates)) { + reportVectorizationFailure( + "Loop may fault", + "Cannot vectorize potentially faulting early exit loop", + "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + return false; + } } [[maybe_unused]] const SCEV *SymbolicMaxBTC = @@ -1861,7 +1963,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } else { if (!isVectorizableEarlyExitLoop()) { - UncountableEdge = std::nullopt; + clearEarlyExitData(); if (DoExtraAnalysis) Result = false; else @@ -1879,6 +1981,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } + // Bail out for state-changing EE loops for now. + if (EarlyExitLoad) { + reportVectorizationFailure( + "Writes to memory unsupported in early exit loops", + "Cannot vectorize early exit loop with writes to memory", + "WritesInEarlyExitLoop", ORE, TheLoop); + return false; + } + if (Result) { LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll index 3a8aec34dfe43..2578260fe878d 100644 --- a/llvm/test/Transforms/LoopVectorize/control-flow.ll +++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll @@ -10,7 +10,7 @@ ; return 0; ; } -; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory +; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load ; CHECK: remark: source.cpp:5:9: loop not vectorized ; CHECK: _Z4testPii diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 7c80dad006952..6724c703e4940 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -3,7 +3,7 @@ define i64 @loop_contains_store(ptr %dest) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops +; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load. entry: %p1 = alloca [1024 x i8] call void @init_mem(ptr %p1, i64 1024) @@ -56,7 +56,7 @@ exit: define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load. entry: br label %for.body @@ -80,7 +80,7 @@ exit: define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load. entry: br label %for.body @@ -106,7 +106,7 @@ exit: define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: No dependencies allowed for early exit condition load. entry: %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8 br label %for.body @@ -135,7 +135,7 @@ exit: define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe. entry: %unknown.offset = call i64 @get_an_unknown_offset() %unknown.cmp = icmp ult i64 %unknown.offset, 20 @@ -149,10 +149,10 @@ for.body: %data = load i16, ptr %st.addr, align 2 %inc = add nsw i16 %data, 1 store i16 %inc, ptr %st.addr, align 2 - %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv %ee.val = load i16, ptr %ee.addr, align 2 %ee.cond = icmp sgt i16 %ee.val, 500 - %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv + %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv store i16 42, ptr %some.addr, align 2 br i1 %ee.cond, label %exit, label %for.inc @@ -194,7 +194,7 @@ exit: define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe. entry: br label %for.body @@ -220,7 +220,7 @@ exit: define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops. entry: br label %for.body @@ -324,7 +324,7 @@ exit: define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) { ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained' -; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops. +; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe. entry: br label %for.body