llvm · NagyDonat · Sep 23, 2024 · Sep 24, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -121,6 +121,25 @@ struct EvalCallOptions {
   EvalCallOptions() {}
 };
 
+/// Simple control flow statements like `if` only produce a single state split,
+/// so the fact that they are included in the source code implies that both
+/// branches are possible (at least under some conditions) and the analyzer can
+/// freely assume either of them. (This is not entirely true, because there may
+/// be unmarked logical correlations between `if` statements, but is a good
+/// enough heuristic and the analyzer strongly relies on it.)
+/// On the other hand, in a loop the state may be split repeatedly at each
+/// evaluation of the loop condition, and this can lead to following "weak"
+/// assumptions even though the code does not imply that they're valid and the
+/// programmer intended to cover them.
+/// This function is called to mark the `State` when the engine makes an
+/// assumption which is weak. Checkers may use this heuristical mark to discard
+/// result and reduce the amount of false positives.
+ProgramStateRef recordWeakLoopAssumption(ProgramStateRef State);
+
+/// Returns true if `recordWeakLoopAssumption()` was called on the execution
+/// path which produced `State`.
+bool seenWeakLoopAssumption(ProgramStateRef State);
+
 class ExprEngine {
   void anchor();
 
@@ -323,12 +342,13 @@ class ExprEngine {
 
   /// ProcessBranch - Called by CoreEngine.  Used to generate successor
   ///  nodes by processing the 'effects' of a branch condition.
-  void processBranch(const Stmt *Condition,
-                     NodeBuilderContext& BuilderCtx,
-                     ExplodedNode *Pred,
-                     ExplodedNodeSet &Dst,
-                     const CFGBlock *DstT,
-                     const CFGBlock *DstF);
+  /// If the branch condition is a loop condition, IterationsFinishedInLoop is
+  /// the number of already finished iterations (0, 1, 2...); otherwise it's
+  /// std::nullopt.
-  /// the number of already finished iterations (0, 1, 2...); otherwise it's
-  /// std::nullopt.
+  /// the number of already finished iterations (0, 1, 2, ...); otherwise it's
+  /// std::nullopt.
-  /// the number of already finished iterations (0, 1, 2...); otherwise it's
-  /// std::nullopt.
+  /// the number of already finished iterations (0, 1, 2, ...); otherwise it's
+  /// std::nullopt.
+  void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
+                     ExplodedNode *Pred, ExplodedNodeSet &Dst,
+                     const CFGBlock *DstT, const CFGBlock *DstF,
+                     std::optional<unsigned> IterationsFinishedInLoop);
 
   /// Called by CoreEngine.
   /// Used to generate successor nodes for temporary destructors depending
@@ -583,11 +603,11 @@ class ExprEngine {
                                 ExplodedNode *Pred,
                                 ExplodedNodeSet &Dst);
 
-  /// evalEagerlyAssumeBinOpBifurcation - Given the nodes in 'Src', eagerly assume symbolic
-  ///  expressions of the form 'x != 0' and generate new nodes (stored in Dst)
-  ///  with those assumptions.
-  void evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
-                         const Expr *Ex);
+  /// evalEagerlyAssumeOpBifurcation - Given the nodes in 'Src', eagerly assume
+  /// symbolic expressions of the form 'x != 0' or '!x' and generate new nodes
+  /// (stored in Dst) with those assumptions.
+  void evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst,
+                                      ExplodedNodeSet &Src, const Expr *Ex);
 
   static std::pair<const ProgramPointTag *, const ProgramPointTag *>
     geteagerlyAssumeBinOpBifurcationTags();

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -697,6 +697,11 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
                                     ProgramStateRef ErrorState, Messages Msgs,
                                     NonLoc Offset, std::optional<NonLoc> Extent,
                                     bool IsTaintBug /*=false*/) const {
+  // Suppress results found through execution paths where in some loop the
+  // analyzer arbitrarily assumed either that the loop is skipped (0 iterations)
+  // or that 3 or more iterations are executed.
+  if (seenWeakLoopAssumption(ErrorState))
+    return;
 
   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
   if (!ErrorNode)

diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -441,10 +441,33 @@ void CoreEngine::HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred) {
 void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
                                 const CFGBlock * B, ExplodedNode *Pred) {
   assert(B->succ_size() == 2);
+
+  const LocationContext *LC = Pred->getLocationContext();
+  BlockCounter Counter = WList->getBlockCounter();
+  unsigned BlockCount =
+      Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());
+  std::optional<unsigned> IterationsFinishedInLoop = std::nullopt;
+  if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
+    // FIXME: This code approximates the number of finished iteration based on
+    // the block count, i.e. the number of evaluations of the terminator block
+    // on the current execution path (which includes the current evaluation, so
+    // is always at least 1). This is probably acceptable for the
+    // checker-specific false positive suppression that currently uses this
+    // value, but it would be better to calcuate an accurate count of
+    // iterations.
+    assert(BlockCount >= 1);
+    IterationsFinishedInLoop = BlockCount - 1;
+  } else if (isa<DoStmt>(Term)) {
+    // FIXME: The fixme note in the previous branch also applies here.
+    // In a do-while loop one iteration happens before the first evaluation of
+    // the loop condition, so we don't subtract one from the block count.
+    IterationsFinishedInLoop = BlockCount;
+  }
+
   NodeBuilderContext Ctx(*this, B, Pred);
   ExplodedNodeSet Dst;
   ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
-                       *(B->succ_begin() + 1));
+                        *(B->succ_begin() + 1), IterationsFinishedInLoop);
   // Enqueue the new frontier onto the worklist.
   enqueue(Dst);
 }

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -212,6 +212,25 @@ typedef llvm::ImmutableMap<const LocationContext *, unsigned>
 REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingArrayDestruction,
                                  PendingArrayDestructionMap)
 
+// This trait is used to heuristically filter out results produced from
+// execution paths that took "weak" assumptions within a loop.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(SeenWeakLoopAssumption, bool)
+
+ProgramStateRef clang::ento::recordWeakLoopAssumption(ProgramStateRef State) {
+  return State->set<SeenWeakLoopAssumption>(true);
+}
+
+bool clang::ento::seenWeakLoopAssumption(ProgramStateRef State) {
+  return State->get<SeenWeakLoopAssumption>();
+}
+
+// This trait points to the last expression (logical operator) where an eager
+// assumption introduced a state split (i.e. both cases were feasible). This is
+// used by the WeakLoopAssumption heuristic to find situations where the an
+// eager assumption introduces a state split within the evaluation of a loop
+// condition.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
-REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerAssumptionAt, const Expr *)
-REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *)
+REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerAssumptionAt, const Expr *)
+
 //===----------------------------------------------------------------------===//
 // Engine construction and deletion.
 //===----------------------------------------------------------------------===//
@@ -2128,7 +2147,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
           (B->isRelationalOp() || B->isEqualityOp())) {
         ExplodedNodeSet Tmp;
         VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Tmp);
-        evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast<Expr>(S));
+        evalEagerlyAssumeOpBifurcation(Dst, Tmp, cast<Expr>(S));
       }
       else
         VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
@@ -2401,7 +2420,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) {
         ExplodedNodeSet Tmp;
         VisitUnaryOperator(U, Pred, Tmp);
-        evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U);
+        evalEagerlyAssumeOpBifurcation(Dst, Tmp, U);
       }
       else
         VisitUnaryOperator(U, Pred, Dst);
@@ -2761,12 +2780,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) {
   return State->assume(V);
 }
 
-void ExprEngine::processBranch(const Stmt *Condition,
-                               NodeBuilderContext& BldCtx,
-                               ExplodedNode *Pred,
-                               ExplodedNodeSet &Dst,
-                               const CFGBlock *DstT,
-                               const CFGBlock *DstF) {
+void ExprEngine::processBranch(
+    const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
+    ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
+    std::optional<unsigned> IterationsFinishedInLoop) {
   assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
          "CXXBindTemporaryExprs are handled by processBindTemporary.");
   const LocationContext *LCtx = Pred->getLocationContext();
@@ -2808,27 +2825,63 @@ void ExprEngine::processBranch(const Stmt *Condition,
       std::tie(StTrue, StFalse) = *KnownCondValueAssumption;
     else {
       assert(!isa<ObjCForCollectionStmt>(Condition));
+      // TODO: instead of this shortcut perhaps it would be better to "rejoin"
+      // the common execution path with
+      // StTrue = StFalse = PrevState;
       builder.generateNode(PrevState, true, PredN);
       builder.generateNode(PrevState, false, PredN);
       continue;
     }
     if (StTrue && StFalse)
       assert(!isa<ObjCForCollectionStmt>(Condition));
 
+    const Expr *EagerlyAssumeExpr =
+        PrevState->get<LastEagerlyAssumeAssumptionAt>();
+    const Expr *ConditionExpr = dyn_cast<Expr>(Condition);
+    if (ConditionExpr)
+      ConditionExpr = ConditionExpr->IgnoreParenCasts();
+    bool DidEagerlyAssume = EagerlyAssumeExpr == ConditionExpr;
+    bool BothFeasible = (DidEagerlyAssume || (StTrue && StFalse)) &&
+                        builder.isFeasible(true) && builder.isFeasible(false);
+
     // Process the true branch.
     if (builder.isFeasible(true)) {
-      if (StTrue)
+      if (StTrue) {
+        if (BothFeasible && IterationsFinishedInLoop &&
+            *IterationsFinishedInLoop >= 2) {
+          // When programmers write a loop, they imply that at least two
+          // iterations are possible (otherwise they would just write an `if`),
+          // but the third iteration is not implied: there are situations where
+          // the programmer knows that there won't be a third iteration (e.g.
+          // they iterate over a structure that has <= 2 elements) but this is
+          // not marked in the source code.
+          // Checkers may use this heuristic mark to discard results found on
+          // branches that contain this "weak" assumption.
+          StTrue = recordWeakLoopAssumption(StTrue);
+        }
         builder.generateNode(StTrue, true, PredN);
-      else
+      } else {
         builder.markInfeasible(true);
+      }
     }
 
     // Process the false branch.
     if (builder.isFeasible(false)) {
-      if (StFalse)
+      if (StFalse) {
+        if (BothFeasible && IterationsFinishedInLoop &&
+            *IterationsFinishedInLoop == 0) {
+          // There are many situations where the programmers know that there
+          // will be at least one iteration in a loop (e.g. a structure is not
+          // empty) but the analyzer cannot deduce this and reports false
+          // positives after skipping the loop.
+          // Checkers may use this heuristic mark to discard results found on
+          // branches that contain this "weak" assumption.
+          StFalse = recordWeakLoopAssumption(StFalse);
+        }
         builder.generateNode(StFalse, false, PredN);
-      else
+      } else {
         builder.markInfeasible(false);
+      }
     }
   }
   currBldrCtx = nullptr;
@@ -3752,9 +3805,9 @@ ExprEngine::geteagerlyAssumeBinOpBifurcationTags() {
                         &eagerlyAssumeBinOpBifurcationFalse);
 }
 
-void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
-                                                   ExplodedNodeSet &Src,
-                                                   const Expr *Ex) {
+void ExprEngine::evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst,
+                                                ExplodedNodeSet &Src,
+                                                const Expr *Ex) {
   StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);
 
   for (const auto Pred : Src) {
@@ -3776,6 +3829,11 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
       ProgramStateRef StateTrue, StateFalse;
       std::tie(StateTrue, StateFalse) = state->assume(*SEV);
 
+      if (StateTrue && StateFalse) {
+        StateTrue = StateTrue->set<LastEagerlyAssumeAssumptionAt>(Ex);
+        StateFalse = StateFalse->set<LastEagerlyAssumeAssumptionAt>(Ex);
+      }
+
       // First assume that the condition is true.
       if (StateTrue) {
         SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());

diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp
@@ -349,7 +349,7 @@ int simple_unknown_bound_loop() {
 #ifdef DFS
     clang_analyzer_numTimesReached(); // expected-warning {{16}}
 #else
-    clang_analyzer_numTimesReached(); // expected-warning {{8}}
+    clang_analyzer_numTimesReached(); // expected-warning {{10}}
 #endif
   }
   return 0;
@@ -369,9 +369,9 @@ int nested_inlined_no_unroll1() {
   int k;
   for (int i = 0; i < 9; i++) {
 #ifdef DFS
-    clang_analyzer_numTimesReached(); // expected-warning {{18}}
+    clang_analyzer_numTimesReached(); // expected-warning {{20}}
 #else
-    clang_analyzer_numTimesReached(); // expected-warning {{14}}
+    clang_analyzer_numTimesReached(); // expected-warning {{18}}
 #endif
     k = simple_unknown_bound_loop();  // reevaluation without inlining, splits the state as well
   }

diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c
@@ -1,4 +1,9 @@
 // RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s
+
+// Note that eagerly-assume=false is tested separately because the
+// WeakLoopAssumption suppression heuristic uses different code paths to
+// achieve the same result with and without eagerly-assume.
 
 void clang_analyzer_eval(int);
 
@@ -194,3 +199,99 @@ char test_comparison_with_extent_symbol(struct incomplete *p) {
   return ((char *)p)[-1]; // no-warning
 }
 
+// WeakLoopAssumption suppression
+///////////////////////////////////////////////////////////////////////
+
+int GlobalArray[100];
+int loop_suppress_after_zero_iterations(unsigned len) {
+  for (unsigned i = 0; i < len; i++)
+    if (GlobalArray[i] > 0)
+      return GlobalArray[i];
+  // Previously this would have produced an overflow warning because splitting
+  // the state on the loop condition introduced an execution path where the
+  // analyzer thinks that len == 0.
+  // There are very many situations where the programmer knows that an argument
-  // There are very many situations where the programmer knows that an argument
+  // There are many situations where the programmer knows that an argument
-  // There are very many situations where the programmer knows that an argument
+  // There are many situations where the programmer knows that an argument
+  // is positive, but this is not indicated in the source code, so we must
+  // avoid reporting errors (especially out of bounds errors) on these
+  // branches, because otherwise we'd get prohibitively many false positives.
+  return GlobalArray[len - 1]; // no-warning
+}
+
+void loop_report_in_second_iteration(int len) {
+  int buf[1] = {0};
+  for (int i = 0; i < len; i++) {
+    // When a programmer writes a loop, we may assume that they intended at
+    // least two iterations.
+    buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+  }
+}
+
+void loop_suppress_in_third_iteration(int len) {
+  int buf[2] = {0};
+  for (int i = 0; i < len; i++) {
+    // We should suppress array bounds errors on the third and later iterations
+    // of loops, because sometimes programmers write a loop in sitiuations
+    // where they know that there will be at most two iterations.
+    buf[i] = 1; // no-warning
+  }
+}
+
+void loop_suppress_in_third_iteration_cast(int len) {
+  int buf[2] = {0};
+  for (int i = 0; (unsigned)(i < len); i++) {
+    // Check that a (somewhat arbitrary) cast does not hinder the recognition
+    // of the condition expression.
+    buf[i] = 1; // no-warning
+  }
+}
+
+void loop_suppress_in_third_iteration_logical_and(int len, int flag) {
+  int buf[2] = {0};
+  for (int i = 0; i < len && flag; i++) {
+    // FIXME: In this case the checker should suppress the warning the same way
+    // as it's suppressed in loop_suppress_in_third_iteration, but the
+    // suppression is not activated because the terminator statement associated
+    // with the loop is just the expression 'flag', while 'i < len' is a
+    // separate terminator statement that's associated with the
+    // short-circuiting operator '&&'.
+    // I have seen a real-world FP that looks like this, but it is much rarer
+    // than the basic setup.
+    buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+  }
+}
+
+void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) {
+  int buf[2] = {0};
+  for (int i = 0; flag && i < len; i++) {
+    // If the two operands of '&&' are flipped, the suppression works.
+    buf[i] = 1; // no-warning
+  }
+}
+
+int coinflip(void);
+int do_while_report_after_one_iteration(void) {
+  int i = 0;
+  do {
+    i++;
+  } while (coinflip());
+  // Unlike `loop_suppress_after_zero_iterations`, running just one iteration
+  // in a do-while is not a corner case that would produce too many false
+  // positives, so don't suppress bounds errors in these situations.
+  return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}}
+}
+
+void do_while_report_in_second_iteration(int len) {
+  int buf[1] = {0};
+  int i = 0;
+  do {
+    buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+  } while (i++ < len);
+}
+
+void do_while_suppress_in_third_iteration(int len) {
+  int buf[2] = {0};
+  int i = 0;
+  do {
+    buf[i] = 1; // no-warning
+  } while (i++ < len);
+}