|
1 | 1 | use std::fmt;
|
2 | 2 | use std::collections::HashMap;
|
| 3 | +use std::cmp::Ordering; |
3 | 4 |
|
4 | 5 | use flow::prelude::*;
|
5 | 6 |
|
@@ -50,7 +51,11 @@ pub trait GroupedOperation: fmt::Debug + Clone {
|
50 | 51 |
|
51 | 52 | /// Given the given `current` value, and a number of changes for a group (`diffs`), compute the
|
52 | 53 | /// updated group value.
|
53 |
| - fn apply(&self, current: Option<&DataType>, diffs: Vec<Self::Diff>) -> DataType; |
| 54 | + fn apply( |
| 55 | + &self, |
| 56 | + current: Option<&DataType>, |
| 57 | + diffs: &mut Iterator<Item = Self::Diff>, |
| 58 | + ) -> DataType; |
54 | 59 |
|
55 | 60 | fn description(&self) -> String;
|
56 | 61 | }
|
@@ -153,81 +158,123 @@ where
|
153 | 158 | };
|
154 | 159 | }
|
155 | 160 |
|
| 161 | + let group_by = &self.group_by; |
| 162 | + let cmp = |a: &Record, b: &Record| { |
| 163 | + group_by |
| 164 | + .iter() |
| 165 | + .map(|&col| &a[col]) |
| 166 | + .cmp(group_by.iter().map(|&col| &b[col])) |
| 167 | + }; |
| 168 | + |
156 | 169 | // First, we want to be smart about multiple added/removed rows with same group.
|
157 | 170 | // For example, if we get a -, then a +, for the same group, we don't want to
|
158 |
| - // execute two queries. |
159 |
| - let mut consolidate = HashMap::new(); |
160 |
| - for rec in rs { |
161 |
| - let val = self.inner.to_diff(&rec[..], rec.is_positive()); |
162 |
| - |
163 |
| - let mut group = rec.extract().0; |
164 |
| - for (i, &col) in self.group_by.iter().enumerate() { |
165 |
| - group[i] = group[col].clone(); |
166 |
| - } |
167 |
| - group.resize(self.group_by.len(), DataType::None); |
168 |
| - consolidate.entry(group).or_insert_with(Vec::new).push(val); |
169 |
| - } |
| 171 | + // execute two queries. We'll do this by sorting the batch by our group by. |
| 172 | + let mut rs: Vec<_> = rs.into(); |
| 173 | + rs.sort_by(&cmp); |
170 | 174 |
|
| 175 | + // find the current value for this group |
171 | 176 | let us = self.us.unwrap();
|
172 |
| - let mut misses = Vec::new(); |
173 |
| - let mut out = Vec::with_capacity(2 * consolidate.len()); |
174 |
| - for (group, diffs) in consolidate { |
175 |
| - // find the current value for this group |
176 |
| - let db = state |
177 |
| - .get(&*us) |
178 |
| - .expect("grouped operators must have their own state materialized"); |
179 |
| - |
180 |
| - let old = match db.lookup(&self.out_key[..], &KeyType::from(&group[..])) { |
181 |
| - LookupResult::Some(rs) => { |
182 |
| - debug_assert!(rs.len() <= 1, "a group had more than 1 result"); |
183 |
| - rs.get(0) |
184 |
| - } |
185 |
| - LookupResult::Missing => { |
186 |
| - misses.push(Miss { |
187 |
| - node: *us, |
188 |
| - columns: self.out_key.clone(), |
189 |
| - replay_key: replay_key_col.map(|col| { |
190 |
| - // since group columns go first in our output, and the replay key must |
191 |
| - // be on our group by column (partial can't go through generated |
192 |
| - // columns), this column should be < group.len() |
193 |
| - debug_assert!(col < group.len()); |
194 |
| - vec![group[col].clone()] |
195 |
| - }), |
196 |
| - key: group, |
197 |
| - }); |
198 |
| - continue; |
199 |
| - } |
200 |
| - }; |
201 |
| - |
202 |
| - let (current, new) = { |
203 |
| - use std::borrow::Cow; |
| 177 | + let db = state |
| 178 | + .get(&*us) |
| 179 | + .expect("grouped operators must have their own state materialized"); |
204 | 180 |
|
205 |
| - // current value is in the last output column |
206 |
| - // or "" if there is no current group |
207 |
| - let current = old.map(|r| Cow::Borrowed(&r[r.len() - 1])); |
208 |
| - |
209 |
| - // new is the result of applying all diffs for the group to the current value |
210 |
| - let new = self.inner.apply(current.as_ref().map(|v| &**v), diffs); |
211 |
| - (current, new) |
212 |
| - }; |
213 |
| - |
214 |
| - match current { |
215 |
| - Some(ref current) if new == **current => { |
216 |
| - // no change |
217 |
| - } |
218 |
| - _ => { |
219 |
| - if let Some(old) = old { |
220 |
| - // revoke old value |
221 |
| - debug_assert!(current.is_some()); |
222 |
| - out.push(Record::Negative((**old).clone())); |
| 181 | + let mut misses = Vec::new(); |
| 182 | + let mut out = Vec::new(); |
| 183 | + { |
| 184 | + let out_key = &self.out_key; |
| 185 | + let mut handle_group = |
| 186 | + |inner: &mut T, group_r: Record, mut diffs: ::std::vec::Drain<_>| { |
| 187 | + let (group_r, _) = group_r.extract(); |
| 188 | + let mut group_by_i = 0; |
| 189 | + let mut group = Vec::with_capacity(group_by.len() + 1); |
| 190 | + for (col, v) in group_r.into_iter().enumerate() { |
| 191 | + if col == group_by[group_by_i] { |
| 192 | + group.push(v); |
| 193 | + group_by_i += 1; |
| 194 | + if group_by_i == group_by.len() { |
| 195 | + break; |
| 196 | + } |
| 197 | + } |
223 | 198 | }
|
224 | 199 |
|
225 |
| - // emit positive, which is group + new. |
226 |
| - let mut rec = group; |
227 |
| - rec.push(new); |
228 |
| - out.push(Record::Positive(rec)); |
| 200 | + let old = { |
| 201 | + match db.lookup(&out_key[..], &KeyType::from(&group[..])) { |
| 202 | + LookupResult::Some(rs) => { |
| 203 | + debug_assert!(rs.len() <= 1, "a group had more than 1 result"); |
| 204 | + rs.get(0) |
| 205 | + } |
| 206 | + LookupResult::Missing => { |
| 207 | + misses.push(Miss { |
| 208 | + node: *us, |
| 209 | + columns: out_key.clone(), |
| 210 | + replay_key: replay_key_col.map(|col| { |
| 211 | + // since group columns go first in our output, and the replay |
| 212 | + // key must be on our group by column (partial can't go through |
| 213 | + // generated columns), this column should be < group.len() |
| 214 | + debug_assert!(col < group.len()); |
| 215 | + vec![group[col].clone()] |
| 216 | + }), |
| 217 | + key: group, |
| 218 | + }); |
| 219 | + return; |
| 220 | + } |
| 221 | + } |
| 222 | + }; |
| 223 | + |
| 224 | + let (current, new) = { |
| 225 | + use std::borrow::Cow; |
| 226 | + |
| 227 | + // current value is in the last output column |
| 228 | + // or "" if there is no current group |
| 229 | + let current = old.map(|r| Cow::Borrowed(&r[r.len() - 1])); |
| 230 | + |
| 231 | + // new is the result of applying all diffs for the group to the current value |
| 232 | + let new = inner.apply(current.as_ref().map(|v| &**v), &mut diffs as &mut _); |
| 233 | + (current, new) |
| 234 | + }; |
| 235 | + |
| 236 | + match current { |
| 237 | + Some(ref current) if new == **current => { |
| 238 | + // no change |
| 239 | + } |
| 240 | + _ => { |
| 241 | + if let Some(old) = old { |
| 242 | + // revoke old value |
| 243 | + debug_assert!(current.is_some()); |
| 244 | + out.push(Record::Negative((**old).clone())); |
| 245 | + } |
| 246 | + |
| 247 | + // emit positive, which is group + new. |
| 248 | + let mut rec = group; |
| 249 | + rec.push(new); |
| 250 | + out.push(Record::Positive(rec)); |
| 251 | + } |
| 252 | + } |
| 253 | + }; |
| 254 | + |
| 255 | + let mut prev_group_r = None; |
| 256 | + let mut diffs = Vec::new(); |
| 257 | + for r in rs { |
| 258 | + if prev_group_r.is_some() |
| 259 | + && cmp(prev_group_r.as_ref().unwrap(), &r) != Ordering::Equal |
| 260 | + { |
| 261 | + handle_group( |
| 262 | + &mut self.inner, |
| 263 | + prev_group_r.take().unwrap(), |
| 264 | + diffs.drain(..), |
| 265 | + ); |
| 266 | + } |
| 267 | + diffs.push(self.inner.to_diff(&r[..], r.is_positive())); |
| 268 | + if prev_group_r.is_none() { |
| 269 | + prev_group_r = Some(r); |
229 | 270 | }
|
230 | 271 | }
|
| 272 | + assert!(!diffs.is_empty()); |
| 273 | + handle_group( |
| 274 | + &mut self.inner, |
| 275 | + prev_group_r.take().unwrap(), |
| 276 | + diffs.drain(..), |
| 277 | + ); |
231 | 278 | }
|
232 | 279 |
|
233 | 280 | ProcessingResult {
|
|
0 commit comments