2020#include " cfg/CFG.h"
2121#include " common/common.h"
2222#include " common/sort.h"
23+ #include " core/ErrorQueue.h"
2324#include " core/Loc.h"
2425#include " core/SymbolRef.h"
2526#include " core/Symbols.h"
@@ -67,11 +68,6 @@ template <typename T, typename Fn> static string vec_to_string(const vector<T> v
6768 return out.str ();
6869}
6970
70- static string bbvec_to_string (const vector<sorbet::cfg::BasicBlock *> &parents) {
71- return vec_to_string (
72- parents, [](const auto &ptr) -> auto { return fmt::format (" (id: {}, ptr: {})" , ptr->id , (void *)ptr); });
73- };
74-
7571template <typename T> static void drain (vector<T> &input, vector<T> &output) {
7672 output.reserve (output.size () + input.size ());
7773 for (auto &v : input) {
@@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable &
109105 }
110106 auto n = var._name ;
111107 return n == Names::blockPreCallTemp () || n == Names::blockTemp () || n == Names::blockPassTemp () ||
112- n == Names::forTemp () || n == Names::blkArg () || n == Names::blockCall () ||
108+ n == Names::blkArg () || n == Names::blockCall () || n == Names::blockBreakAssign () || n == Names::forTemp () ||
113109 // Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬
114110 n == Names::finalReturn () || n == NameRef::noName () || n == Names::blockCall () || n == Names::selfLocal () ||
115111 n == Names::unconditional ();
@@ -198,6 +194,16 @@ class SCIPState {
198194public:
199195 UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
200196 UnorderedMap<core::FileRef, vector<scip::SymbolInformation>> symbolMap;
197+ // Cache of occurrences for locals that have been emitted in this function.
198+ //
199+ // Note that the SymbolRole is a part of the key too, because we can
200+ // have a read-reference and write-reference at the same location
201+ // (we don't merge those for now).
202+ //
203+ // The 'value' in the map is purely for sanity-checking. It's a bit
204+ // cumbersome to conditionalize the type to be a set in non-debug and
205+ // map in debug, so keeping it a map.
206+ UnorderedMap<std::pair<core::LocOffsets, /* SymbolRole*/ int32_t >, uint32_t > localOccurrenceCache;
201207 vector<scip::Document> documents;
202208 vector<scip::SymbolInformation> externalSymbols;
203209
@@ -271,14 +277,52 @@ class SCIPState {
271277 return absl::OkStatus ();
272278 }
273279
280+ // Returns true if there was a cache hit.
281+ //
282+ // Otherwise, inserts the location into the cache and returns false.
283+ bool cacheOccurrence (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) {
284+ // Optimization:
285+ // Avoid emitting duplicate defs/refs for locals.
286+ // This can happen with constructs like:
287+ // z = if cond then expr else expr end
288+ // When this is lowered to a CFG, we will end up with
289+ // multiple bindings with the same LHS location.
290+ //
291+ // Repeated reads for the same occurrence can happen for rvalues too.
292+ // If the compiler has proof that a scrutinee variable is not modified,
293+ // each comparison in a case will perform a read.
294+ // case x
295+ // when 0 then <stuff>
296+ // when 1 then <stuff>
297+ // else <stuff>
298+ // end
299+ // The CFG for this involves two reads for x in calls to ==.
300+ // (This wouldn't have happened if x was always stashed away into
301+ // a temporary first, but the temporary only appears in the CFG if
302+ // evaluating one of the cases has a chance to modify x.)
303+ auto [it, inserted] = this ->localOccurrenceCache .insert ({{occ.offsets , symbolRoles}, occ.counter });
304+ if (inserted) {
305+ return false ;
306+ }
307+ auto savedCounter = it->second ;
308+ ENFORCE (occ.counter == savedCounter, " cannot have distinct local variable {} at same location {}" ,
309+ (symbolRoles & scip::SymbolRole::Definition) ? " definitions" : " references" ,
310+ core::Loc (file, occ.offsets ).showRaw (gs));
311+ return true ;
312+ }
313+
274314public:
275315 absl::Status saveDefinition (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) {
316+ if (this ->cacheOccurrence (gs, file, occ, scip::SymbolRole::Definition)) {
317+ return absl::OkStatus ();
318+ }
276319 return this ->saveDefinitionImpl (gs, file, occ.toString (gs, file), core::Loc (file, occ.offsets ));
277320 }
278321
279322 // Save definition when you have a sorbet Symbol.
280323 // Meant for methods, fields etc., but not local variables.
281324 absl::Status saveDefinition (const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) {
325+ // TODO:(varun) Should we cache here too to avoid emitting duplicate definitions?
282326 scip::Symbol symbol;
283327 auto status = symbolForExpr (gs, symRef, symbol);
284328 if (!status.ok ()) {
@@ -297,11 +341,15 @@ class SCIPState {
297341 }
298342
299343 absl::Status saveReference (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) {
344+ if (this ->cacheOccurrence (gs, file, occ, symbol_roles)) {
345+ return absl::OkStatus ();
346+ }
300347 return this ->saveReferenceImpl (gs, file, occ.toString (gs, file), occ.offsets , symbol_roles);
301348 }
302349
303350 absl::Status saveReference (const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef,
304351 core::LocOffsets occLoc, int32_t symbol_roles) {
352+ // TODO:(varun) Should we cache here to to avoid emitting duplicate references?
305353 absl::StatusOr<string *> valueOrStatus (this ->saveSymbolString (gs, symRef, nullptr ));
306354 if (!valueOrStatus.ok ()) {
307355 return valueOrStatus.status ();
@@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR
357405}
358406
359407class CFGTraversal final {
360- UnorderedMap<const cfg::BasicBlock *, UnorderedMap<cfg::LocalRef, core::Loc>> blockLocals;
408+ // A map from each basic block to the locals in it.
409+ //
410+ // The locals may be coming from the parents, or they may be defined in the
411+ // block. Locals coming from the parents may be in the form of basic block
412+ // arguments or they may be "directly referenced."
413+ //
414+ // For example, if you have code like:
415+ //
416+ // def f(x):
417+ // y = 0
418+ // if cond:
419+ // y = x + $z
420+ //
421+ // Then x and $z will be passed as arguments to the basic block for the
422+ // true branch, whereas 'y' won't be. However, 'y' is still technically
423+ // coming from the parent basic block, otherwise we'd end up marking the
424+ // assignment as a definition instead of a (write) reference.
425+ //
426+ // At the start of the traversal of a basic block, the entry for a basic
427+ // block is populated with the locals coming from the parents. Then,
428+ // we traverse each instruction and populate it with the locals defined
429+ // in the block.
430+ UnorderedMap<const cfg::BasicBlock *, UnorderedSet<cfg::LocalRef>> blockLocals;
361431 UnorderedMap<cfg::LocalRef, uint32_t > functionLocals;
362432
363433 // Local variable counter that is reset for every function.
@@ -370,8 +440,8 @@ class CFGTraversal final {
370440 : blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {}
371441
372442private:
373- void addLocal (const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc ) {
374- this ->blockLocals [bb][ localRef] = loc ;
443+ void addLocal (const cfg::BasicBlock *bb, cfg::LocalRef localRef) {
444+ this ->blockLocals [bb]. insert ( localRef) ;
375445 this ->functionLocals [localRef] = ++this ->counter ;
376446 }
377447
@@ -405,7 +475,7 @@ class CFGTraversal final {
405475 if (!this ->functionLocals .contains (localRef)) {
406476 isDefinition = true ; // If we're seeing this for the first time in topological order,
407477 // The current block must have a definition for the variable.
408- this ->addLocal (bb, localRef, this -> ctx . locAt (local. loc ) );
478+ this ->addLocal (bb, localRef);
409479 }
410480 // The variable wasn't passed in as an argument, and hasn't already been recorded
411481 // as a local in the block. So this must be a definition line.
@@ -419,7 +489,7 @@ class CFGTraversal final {
419489 // Ill-formed code where we're trying to access a variable
420490 // without setting it first. Emit a local as a best-effort.
421491 // TODO(varun): Will Sorbet error out before we get here?
422- this ->addLocal (bb, localRef, this -> ctx . locAt (local. loc ) );
492+ this ->addLocal (bb, localRef);
423493 }
424494 // TODO(varun): Will Sorbet error out before we get here?
425495 // It's possible that we have ill-formed code where the variable
@@ -448,42 +518,34 @@ class CFGTraversal final {
448518 return true ;
449519 }
450520
451- void addArgLocals (cfg::BasicBlock *bb, const cfg::CFG &cfg) {
452- this ->blockLocals [bb] = {};
453- for (auto &bbArgs : bb->args ) {
454- bool found = false ;
455- for (auto parentBB : bb->backEdges ) {
456- if (this ->blockLocals [parentBB].contains (bbArgs.variable )) {
457- this ->blockLocals [bb][bbArgs.variable ] = this ->blockLocals [parentBB][bbArgs.variable ];
458- found = true ;
459- break ;
460- }
521+ void copyLocalsFromParents (cfg::BasicBlock *bb, const cfg::CFG &cfg) {
522+ UnorderedSet<cfg::LocalRef> bbLocals{};
523+ for (auto parentBB : bb->backEdges ) {
524+ if (!this ->blockLocals .contains (parentBB)) { // e.g. loops
525+ continue ;
461526 }
462- if (!found) {
463- print_dbg (" # basic block argument {} did not come from parents {}\n " ,
464- bbArgs.toString (this ->ctx .state , cfg), bbvec_to_string (bb->backEdges ));
527+ auto &parentLocals = this ->blockLocals [parentBB];
528+ if (bbLocals.size () + parentLocals.size () > bbLocals.capacity ()) {
529+ bbLocals.reserve (bbLocals.size () + parentLocals.size ());
530+ }
531+ for (auto local : parentLocals) {
532+ bbLocals.insert (local);
465533 }
466534 }
535+ ENFORCE (!this ->blockLocals .contains (bb));
536+ this ->blockLocals [bb] = std::move (bbLocals);
467537 }
468538
469539public:
470540 void traverse (const cfg::CFG &cfg) {
471541 auto &gs = this ->ctx .state ;
472542 auto method = this ->ctx .owner ;
473543
474- auto print_map = [&cfg, &gs](const UnorderedMap<cfg::LocalRef, core::Loc> &map, cfg::BasicBlock *ptr) {
475- print_dbg (" # blockLocals (id: {}, ptr: {}) = {}\n " , ptr->id , (void *)ptr,
476- map_to_string (
477- map, [&](const auto &localref, const auto &loc) -> auto {
478- return fmt::format (" {} {}" , localref.data (cfg).toString (gs), loc.showRaw (gs));
479- }));
480- };
481-
482544 // I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice.
483545 for (auto it = cfg.forwardsTopoSort .rbegin (); it != cfg.forwardsTopoSort .rend (); ++it) {
484546 cfg::BasicBlock *bb = *it;
485547 print_dbg (" # Looking at block id: {} ptr: {}\n " , bb->id , (void *)bb);
486- this ->addArgLocals (bb, cfg);
548+ this ->copyLocalsFromParents (bb, cfg);
487549 for (auto &binding : bb->exprs ) {
488550 if (auto *aliasInstr = cfg::cast_instruction<cfg::Alias>(binding.value )) {
489551 auto aliasName = aliasInstr->name ;
@@ -497,7 +559,7 @@ class CFGTraversal final {
497559 method.toString (gs));
498560 continue ;
499561 }
500- this ->addLocal (bb, binding.bind .variable , aliasedSym. loc (gs) );
562+ this ->addLocal (bb, binding.bind .variable );
501563 continue ;
502564 }
503565 if (!binding.loc .exists () || binding.loc .empty ()) { // TODO(varun): When can each case happen?
@@ -599,7 +661,6 @@ class CFGTraversal final {
599661 }
600662 }
601663 }
602- print_map (this ->blockLocals .at (bb), bb);
603664 }
604665 }
605666};
0 commit comments