Skip to content

Commit ee7df97

Browse files
authored
feat(arena): optimize data frame deserialization (#453)
* feat(arena): optimize data frame deserialization * fix: no serialization for response
1 parent 1e355f9 commit ee7df97

File tree

3 files changed

+106
-49
lines changed

3 files changed

+106
-49
lines changed

flock-function/src/aws/actor.rs

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,11 @@ pub async fn handler(
112112
let (input, status) = prepare_data_sources(ctx, arena, event).await?;
113113

114114
if status == HashAggregateStatus::Processed {
115-
let info = format!("[Ok] Function {}: data is already processed.", ctx.name);
116-
info!("{}", info);
117-
return Ok(json!({ "response": info }));
115+
info!("[Ok] Function {}: data is already processed.", ctx.name);
116+
return Ok(Value::Null);
118117
} else if status == HashAggregateStatus::NotReady {
119-
let info = format!("[Ok] Function {}: data aggregation is not ready.", ctx.name);
120-
info!("{}", info);
121-
return Ok(json!({ "response": info }));
118+
info!("[Ok] Function {}: data aggregation is not ready.", ctx.name);
119+
return Ok(Value::Null);
122120
}
123121

124122
let output = collect(ctx, input).await?;
@@ -181,7 +179,8 @@ async fn prepare_data_sources(
181179
if status == HashAggregateStatus::Ready {
182180
info!("Received all data packets for the window: {:?}", window_id);
183181
arena
184-
.take_batches(&window_id)
182+
.take(&window_id)
183+
.await?
185184
.into_iter()
186185
.for_each(|b| input.push(b));
187186
PROCESSED_WINDOWS.lock().unwrap().insert(window_id);
@@ -223,7 +222,8 @@ async fn prepare_data_sources(
223222
if arena.is_complete(&window_id) {
224223
info!("Received all data packets for the window: {:?}", window_id);
225224
arena
226-
.take_batches(&window_id)
225+
.take(&window_id)
226+
.await?
227227
.into_iter()
228228
.for_each(|b| input.push(b));
229229
status = HashAggregateStatus::Ready;
@@ -287,7 +287,7 @@ async fn invoke_next_functions(
287287
.write(sink_type.clone(), DataSinkFormat::SerdeBinary)
288288
.await
289289
} else {
290-
Ok(json!({ "response": "No data to sink." }))
290+
Ok(Value::Null)
291291
}
292292
}
293293
CloudFunction::Lambda(group_name) => {
@@ -352,9 +352,7 @@ async fn invoke_next_functions(
352352
);
353353
lambda::invoke_function(group_name, &invocation_type, Some(bytes.into())).await?;
354354
}
355-
Ok(json!({
356-
"response": format!("next function: {}", group_name)
357-
}))
355+
Ok(Value::Null)
358356
}
359357
CloudFunction::Group((group_name, _)) => {
360358
if !ctx.is_shuffling().await? {
@@ -419,9 +417,7 @@ async fn invoke_next_functions(
419417

420418
futures::future::join_all(tasks).await;
421419

422-
Ok(json!({
423-
"response": format!("next function group: {}", group_name)
424-
}))
420+
Ok(Value::Null)
425421
} else {
426422
let output = Arc::new(output);
427423
let mut rng = StdRng::seed_from_u64(0xDEAD); // Predictable RNG clutch
@@ -533,9 +529,7 @@ async fn invoke_next_functions(
533529
.collect::<Vec<tokio::task::JoinHandle<Result<()>>>>();
534530
futures::future::join_all(tasks).await;
535531

536-
Ok(json!({
537-
"response": format!("next function group: {}", group_name)
538-
}))
532+
Ok(Value::Null)
539533
}
540534
}
541535
}

flock-function/src/aws/nexmark/source.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,5 @@ pub async fn handler(ctx: &mut ExecutionContext, payload: Payload) -> Result<Val
7474
_ => unimplemented!(),
7575
};
7676

77-
Ok(json!({"name": &ctx.name, "type": "nexmark_bench".to_string()}))
77+
Ok(Value::Null)
7878
}

flock/src/runtime/arena/mod.rs

Lines changed: 93 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,18 @@
1818
mod bitmap;
1919
pub use bitmap::Bitmap;
2020

21+
use crate::encoding::Encoding;
2122
use crate::error::{FlockError, Result};
22-
use crate::runtime::payload::Payload;
23+
use crate::runtime::payload::{DataFrame, Payload};
24+
use crate::transmute::*;
2325
use datafusion::arrow::datatypes::SchemaRef;
2426
use datafusion::arrow::record_batch::RecordBatch;
27+
use datafusion::arrow_flight::utils::flight_data_to_arrow_batch;
28+
use datafusion::arrow_flight::FlightData;
2529
use hashbrown::HashMap;
30+
use rayon::prelude::*;
2631
use std::ops::{Deref, DerefMut};
32+
use tokio::task::JoinHandle;
2733

2834
type QueryId = String;
2935
type ShuffleId = usize;
@@ -61,29 +67,38 @@ pub struct Arena(HashMap<WindowId, WindowSession>);
6167
pub struct WindowSession {
6268
/// The number of data fragments in the window.
6369
/// [`WindowSession::size`] equals to [`Uuid::seq_len`].
64-
pub size: usize,
65-
/// Aggregate record batches for the first relation.
66-
pub r1_records: Vec<Vec<RecordBatch>>,
67-
/// Aggregate record batches for the second relation.
68-
pub r2_records: Vec<Vec<RecordBatch>>,
70+
pub size: usize,
71+
/// Aggregate the encoded data frames for the first relation.
72+
/// https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/
73+
pub r1_flight_data: Vec<Vec<DataFrame>>,
74+
/// The schema of the first relation.
75+
pub r1_schema: Vec<u8>,
76+
/// Aggregate the encoded data frames for the second relation.
77+
/// https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/
78+
pub r2_flight_data: Vec<Vec<DataFrame>>,
79+
/// The schema of the second relation.
80+
pub r2_schema: Vec<u8>,
6981
/// Bitmap indicating the data existence in the window.
70-
pub bitmap: Bitmap,
82+
pub bitmap: Bitmap,
83+
/// The compression method.
84+
pub encoding: Encoding,
7185
}
7286

7387
impl WindowSession {
7488
/// Return the schema of data fragments in the temporal window.
7589
pub fn schema(&self) -> Result<(SchemaRef, Option<SchemaRef>)> {
76-
if self.r1_records.is_empty() || self.r1_records[0].is_empty() {
90+
if self.r1_schema.is_empty() {
7791
return Err(FlockError::Internal(
7892
"Record batches are empty.".to_string(),
7993
));
8094
}
81-
if !self.r2_records.is_empty() && !self.r2_records[0].is_empty() {
82-
Ok((self.r1_records[0][0].schema(), None))
95+
96+
if self.r2_schema.is_empty() {
97+
Ok((schema_from_bytes(&self.r1_schema)?, None))
8398
} else {
8499
Ok((
85-
self.r1_records[0][0].schema(),
86-
Some(self.r2_records[0][0].schema()),
100+
schema_from_bytes(&self.r1_schema)?,
101+
Some(schema_from_bytes(&self.r2_schema)?),
87102
))
88103
}
89104
}
@@ -95,12 +110,59 @@ impl Arena {
95110
Arena(HashMap::<WindowId, WindowSession>::new())
96111
}
97112

98-
/// Get the data fragments in the temporal window via the key.
99-
pub fn take_batches(&mut self, window_id: &WindowId) -> Vec<Vec<Vec<RecordBatch>>> {
113+
/// Take a window from the arena.
114+
pub async fn take(&mut self, window_id: &WindowId) -> Result<Vec<Vec<Vec<RecordBatch>>>> {
115+
let to_batches = |df: Vec<DataFrame>, schema: SchemaRef| -> Vec<RecordBatch> {
116+
df.into_par_iter()
117+
.map(|d| {
118+
flight_data_to_arrow_batch(
119+
&FlightData {
120+
data_body: d.body,
121+
data_header: d.header,
122+
app_metadata: vec![],
123+
flight_descriptor: None,
124+
},
125+
schema.clone(),
126+
&[],
127+
)
128+
.unwrap()
129+
})
130+
.collect()
131+
};
132+
100133
if let Some(window) = (*self).remove(window_id) {
101-
vec![window.r1_records, window.r2_records]
134+
let (schema1, schema2) = window.schema()?;
135+
136+
let mut tasks: Vec<JoinHandle<Vec<Vec<RecordBatch>>>> = vec![];
137+
138+
let encoding = window.encoding.clone();
139+
tasks.push(tokio::spawn(async move {
140+
window
141+
.r1_flight_data
142+
.into_par_iter()
143+
.map(|d| to_batches(unmarshal(d, encoding.clone()), schema1.clone()))
144+
.collect()
145+
}));
146+
147+
if schema2.is_some() {
148+
let encoding = window.encoding.clone();
149+
let schema2 = schema2.unwrap();
150+
tasks.push(tokio::spawn(async move {
151+
window
152+
.r2_flight_data
153+
.into_par_iter()
154+
.map(|d| to_batches(unmarshal(d, encoding.clone()), schema2.clone()))
155+
.collect()
156+
}));
157+
}
158+
159+
Ok(futures::future::join_all(tasks)
160+
.await
161+
.into_iter()
162+
.map(|r| r.unwrap())
163+
.collect())
102164
} else {
103-
vec![vec![], vec![]]
165+
Ok(vec![vec![], vec![]])
104166
}
105167
}
106168

@@ -112,7 +174,7 @@ impl Arena {
112174
/// Return true if the temporal window is empty.
113175
pub fn is_complete(&self, window_id: &WindowId) -> bool {
114176
self.get(window_id)
115-
.map(|window| window.size == window.r1_records.len())
177+
.map(|window| window.size == window.r1_flight_data.len())
116178
.unwrap_or(false)
117179
}
118180

@@ -132,12 +194,11 @@ impl Arena {
132194
Some(window) => {
133195
assert!(uuid.seq_len == window.size);
134196
if !window.bitmap.is_set(uuid.seq_num) {
135-
let (r1, r2) = payload.to_record_batch();
136-
window.r1_records.push(r1);
137-
window.r2_records.push(r2);
138-
assert!(window.r1_records.len() == window.r2_records.len());
197+
window.r1_flight_data.push(payload.data);
198+
window.r2_flight_data.push(payload.data2);
199+
assert!(window.r1_flight_data.len() == window.r2_flight_data.len());
139200
window.bitmap.set(uuid.seq_num);
140-
if window.size == window.r1_records.len() {
201+
if window.size == window.r1_flight_data.len() {
141202
HashAggregateStatus::Ready
142203
} else {
143204
HashAggregateStatus::NotReady
@@ -147,12 +208,14 @@ impl Arena {
147208
}
148209
}
149210
None => {
150-
let (r1, r2) = payload.to_record_batch();
151211
let mut window = WindowSession {
152-
size: uuid.seq_len,
153-
r1_records: vec![r1],
154-
r2_records: vec![r2],
155-
bitmap: Bitmap::new(uuid.seq_len + 1), // Starts from 1.
212+
size: uuid.seq_len,
213+
r1_flight_data: vec![payload.data],
214+
r2_flight_data: vec![payload.data2],
215+
r1_schema: payload.schema,
216+
r2_schema: payload.schema2,
217+
bitmap: Bitmap::new(uuid.seq_len + 1), // Starts from 1.
218+
encoding: payload.encoding,
156219
};
157220
// SEQ_NUM is used to indicate the data existence in the window via bitmap.
158221
window.bitmap.set(uuid.seq_num);
@@ -243,12 +306,12 @@ mod tests {
243306

244307
if let Some(window) = (*arena).get(&window_id) {
245308
assert_eq!(8, window.size);
246-
assert_eq!(8, window.r1_records.len());
309+
assert_eq!(8, window.r1_flight_data.len());
247310
(0..8).for_each(|i| assert!(window.bitmap.is_set(i + 1)));
248311
}
249312

250-
assert_eq!(8, arena.take_batches(&window_id)[0].len());
251-
assert_eq!(0, arena.take_batches(&("no exists".to_owned(), 0))[0].len());
313+
assert_eq!(8, arena.take(&window_id).await?[0].len());
314+
assert_eq!(0, arena.take(&("no exists".to_owned(), 0)).await?[0].len());
252315

253316
Ok(())
254317
}

0 commit comments

Comments
 (0)