Skip to content

Commit e77cbbb

Browse files
committed
fix!: previously the commit-traversal sorted by date must have been wonky & rename Sorting::Topological to Sorting::BreadthFirst.
This is now fixed by using a proven data structure, the `BinaryHeap` as priority queue. The rename is to differentiate `git log --topo-order` from what we are doing, which is actually quite different.
1 parent 832c773 commit e77cbbb

File tree

9 files changed

+488
-230
lines changed

9 files changed

+488
-230
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-traverse/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,6 @@ doctest = false
1717
gix-hash = { version = "^0.11.2", path = "../gix-hash" }
1818
gix-object = { version = "^0.30.0", path = "../gix-object" }
1919
gix-hashtable = { version = "^0.2.1", path = "../gix-hashtable" }
20+
gix-revwalk = { version = "^0.1.0", path = "../gix-revwalk" }
2021
smallvec = "1.10.0"
2122
thiserror = "1.0.32"

gix-traverse/src/commit.rs

Lines changed: 87 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,36 @@ pub enum Parents {
2020
}
2121

2222
/// Specify how to sort commits during traversal.
23+
///
24+
/// ### Sample History
25+
///
26+
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
27+
/// (*their X-alignment doesn't matter*).
28+
///
29+
/// ```text
30+
/// ---1----2----4----7 <- second parent of 8
31+
/// \ \
32+
/// 3----5----6----8---
33+
/// ```
34+
2335
#[derive(Default, Debug, Copy, Clone)]
2436
pub enum Sorting {
2537
/// Commits are sorted as they are mentioned in the commit graph.
38+
///
39+
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
40+
///
41+
/// ### Note
42+
///
43+
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
44+
/// as it avoids overlapping branches.
2645
#[default]
27-
Topological,
46+
BreadthFirst,
2847
/// Commits are sorted by their commit time in descending order, that is newest first.
2948
///
3049
/// The sorting applies to all currently queued commit ids and thus is full.
3150
///
51+
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
52+
///
3253
/// # Performance
3354
///
3455
/// This mode benefits greatly from having an object_cache in `find()`
@@ -38,6 +59,8 @@ pub enum Sorting {
3859
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
3960
///
4061
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
62+
///
63+
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
4164
ByCommitTimeNewestFirstCutoffOlderThan {
4265
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
4366
time_in_seconds_since_epoch: u32,
@@ -53,7 +76,7 @@ pub struct Info {
5376
pub id: gix_hash::ObjectId,
5477
/// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled.
5578
pub parent_ids: ParentIds,
56-
/// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::Topological`], as the walk
79+
/// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk
5780
/// needs to require the commit-date.
5881
pub commit_time: Option<u64>,
5982
}
@@ -63,7 +86,6 @@ pub mod ancestors {
6386
use std::{
6487
borrow::{Borrow, BorrowMut},
6588
collections::VecDeque,
66-
iter::FromIterator,
6789
};
6890

6991
use gix_hash::{oid, ObjectId};
@@ -88,31 +110,36 @@ pub mod ancestors {
88110
type TimeInSeconds = u32;
89111

90112
/// The state used and potentially shared by multiple graph traversals.
91-
#[derive(Default, Clone)]
113+
#[derive(Clone)]
92114
pub struct State {
93-
next: VecDeque<(ObjectId, TimeInSeconds)>,
115+
next: VecDeque<ObjectId>,
116+
queue: gix_revwalk::PriorityQueue<TimeInSeconds, ObjectId>,
94117
buf: Vec<u8>,
95118
seen: HashSet<ObjectId>,
96119
parents_buf: Vec<u8>,
97120
}
98121

122+
impl Default for State {
123+
fn default() -> Self {
124+
State {
125+
next: Default::default(),
126+
queue: gix_revwalk::PriorityQueue::new(),
127+
buf: vec![],
128+
seen: Default::default(),
129+
parents_buf: vec![],
130+
}
131+
}
132+
}
133+
99134
impl State {
100135
fn clear(&mut self) {
101136
self.next.clear();
137+
self.queue.clear();
102138
self.buf.clear();
103139
self.seen.clear();
104140
}
105141
}
106142

107-
/// Builder
108-
impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut> {
109-
/// Change our commit parent handling mode to the given one.
110-
pub fn parents(mut self, mode: Parents) -> Self {
111-
self.parents = mode;
112-
self
113-
}
114-
}
115-
116143
/// Builder
117144
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
118145
where
@@ -123,32 +150,52 @@ pub mod ancestors {
123150
/// Set the sorting method, either topological or by author date
124151
pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
125152
self.sorting = sorting;
126-
if !matches!(self.sorting, Sorting::Topological) {
127-
let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new()));
128-
let state = self.state.borrow_mut();
129-
for (commit_id, commit_time) in &mut state.next {
130-
let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
131-
oid: *commit_id,
132-
source: err.into(),
133-
})?;
134-
let time = commit_iter.committer()?.time.seconds_since_unix_epoch;
135-
match &mut cutoff_time_storage {
136-
Some((cutoff_time, storage)) if time >= *cutoff_time => {
137-
storage.push((*commit_id, time));
153+
match self.sorting {
154+
Sorting::BreadthFirst => {
155+
self.queue_to_vecdeque();
156+
}
157+
Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
158+
let cutoff_time = self.sorting.cutoff_time();
159+
let state = self.state.borrow_mut();
160+
for commit_id in state.next.drain(..) {
161+
let commit_iter =
162+
(self.find)(&commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
163+
oid: commit_id,
164+
source: err.into(),
165+
})?;
166+
let time = commit_iter.committer()?.time.seconds_since_unix_epoch;
167+
match cutoff_time {
168+
Some(cutoff_time) if time >= cutoff_time => {
169+
state.queue.insert(time, commit_id);
170+
}
171+
Some(_) => {}
172+
None => {
173+
state.queue.insert(time, commit_id);
174+
}
138175
}
139-
Some(_) => {}
140-
None => *commit_time = time,
141176
}
142177
}
143-
let mut v = match cutoff_time_storage {
144-
Some((_, storage)) => storage,
145-
None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()),
146-
};
147-
v.sort_by(|a, b| a.1.cmp(&b.1).reverse());
148-
state.next = v.into();
149178
}
150179
Ok(self)
151180
}
181+
182+
/// Change our commit parent handling mode to the given one.
183+
pub fn parents(mut self, mode: Parents) -> Self {
184+
self.parents = mode;
185+
if matches!(self.parents, Parents::First) {
186+
self.queue_to_vecdeque();
187+
}
188+
self
189+
}
190+
191+
fn queue_to_vecdeque(&mut self) {
192+
let state = self.state.borrow_mut();
193+
state.next.extend(
194+
std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
195+
.into_iter_unordered()
196+
.map(|(_time, id)| id),
197+
);
198+
}
152199
}
153200

154201
/// Initialization
@@ -207,7 +254,7 @@ pub mod ancestors {
207254
for tip in tips.map(Into::into) {
208255
let was_inserted = state.seen.insert(tip);
209256
if was_inserted && predicate(&tip) {
210-
state.next.push_back((tip, 0));
257+
state.next.push_back(tip);
211258
}
212259
}
213260
}
@@ -245,7 +292,7 @@ pub mod ancestors {
245292
self.next_by_topology()
246293
} else {
247294
match self.sorting {
248-
Sorting::Topological => self.next_by_topology(),
295+
Sorting::BreadthFirst => self.next_by_topology(),
249296
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
250297
Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
251298
time_in_seconds_since_epoch,
@@ -278,7 +325,7 @@ pub mod ancestors {
278325
fn next_by_commit_date(&mut self, cutoff_older_than: Option<TimeInSeconds>) -> Option<Result<Info, Error>> {
279326
let state = self.state.borrow_mut();
280327

281-
let (oid, commit_time) = state.next.pop_front()?;
328+
let (commit_time, oid) = state.queue.pop()?;
282329
let mut parents: ParentIds = Default::default();
283330
match (self.find)(&oid, &mut state.buf) {
284331
Ok(commit_iter) => {
@@ -309,17 +356,9 @@ pub mod ancestors {
309356
})
310357
.unwrap_or_default();
311358

312-
let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse())
313-
{
314-
Ok(_) => None,
315-
Err(pos) => Some(pos),
316-
};
317359
match cutoff_older_than {
318360
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
319-
Some(_) | None => match pos {
320-
Some(pos) => state.next.insert(pos, (id, parent_commit_time)),
321-
None => state.next.push_back((id, parent_commit_time)),
322-
},
361+
Some(_) | None => state.queue.insert(parent_commit_time, id),
323362
}
324363

325364
if is_first && matches!(self.parents, Parents::First) {
@@ -356,7 +395,7 @@ pub mod ancestors {
356395
{
357396
fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
358397
let state = self.state.borrow_mut();
359-
let (oid, _commit_time) = state.next.pop_front()?;
398+
let oid = state.next.pop_front()?;
360399
let mut parents: ParentIds = Default::default();
361400
match (self.find)(&oid, &mut state.buf) {
362401
Ok(commit_iter) => {
@@ -367,7 +406,7 @@ pub mod ancestors {
367406
parents.push(id);
368407
let was_inserted = state.seen.insert(id);
369408
if was_inserted && (self.predicate)(&id) {
370-
state.next.push_back((id, 0));
409+
state.next.push_back(id);
371410
}
372411
if matches!(self.parents, Parents::First) {
373412
break;

0 commit comments

Comments
 (0)