Skip to content

Commit 231cf78

Browse files
authored
Improve parquet reading performance for columns with nulls by preserving bitmask when possible (#1037) (#1054)
* Preserve bitmask (#1037) * Remove now unnecessary box (#1061) * Fix handling of empty bitmasks * More docs * Add nested nullability test case * Add packed decoder test
1 parent 4218c74 commit 231cf78

File tree

8 files changed

+571
-86
lines changed

8 files changed

+571
-86
lines changed

arrow/src/array/builder.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,11 @@ impl BooleanBufferBuilder {
419419
);
420420
}
421421

422+
/// Returns the packed bits
423+
pub fn as_slice(&self) -> &[u8] {
424+
self.buffer.as_slice()
425+
}
426+
422427
#[inline]
423428
pub fn finish(&mut self) -> Buffer {
424429
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));

parquet/benches/arrow_array_reader.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,13 @@ fn create_int32_primitive_array_reader(
301301
column_desc: ColumnDescPtr,
302302
) -> impl ArrayReader {
303303
use parquet::arrow::array_reader::PrimitiveArrayReader;
304-
PrimitiveArrayReader::<Int32Type>::new(Box::new(page_iterator), column_desc, None)
305-
.unwrap()
304+
PrimitiveArrayReader::<Int32Type>::new_with_options(
305+
Box::new(page_iterator),
306+
column_desc,
307+
None,
308+
true,
309+
)
310+
.unwrap()
306311
}
307312

308313
fn create_string_arrow_array_reader(

parquet/src/arrow/array_reader.rs

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,17 @@ where
247247
pages: Box<dyn PageIterator>,
248248
column_desc: ColumnDescPtr,
249249
arrow_type: Option<ArrowType>,
250+
) -> Result<Self> {
251+
Self::new_with_options(pages, column_desc, arrow_type, false)
252+
}
253+
254+
/// Construct primitive array reader with ability to only compute null mask and not
255+
/// buffer level data
256+
pub fn new_with_options(
257+
pages: Box<dyn PageIterator>,
258+
column_desc: ColumnDescPtr,
259+
arrow_type: Option<ArrowType>,
260+
null_mask_only: bool,
250261
) -> Result<Self> {
251262
// Check if Arrow type is specified, else create it from Parquet type
252263
let data_type = match arrow_type {
@@ -256,7 +267,7 @@ where
256267
.clone(),
257268
};
258269

259-
let record_reader = RecordReader::<T>::new(column_desc.clone());
270+
let record_reader = RecordReader::<T>::new_with_options(column_desc.clone(), null_mask_only);
260271

261272
Ok(Self {
262273
data_type,
@@ -1350,19 +1361,26 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a ArrayReaderBuilderContext
13501361
let mut new_context = context.clone();
13511362
new_context.path.append(vec![cur_type.name().to_string()]);
13521363

1353-
match cur_type.get_basic_info().repetition() {
1364+
let null_mask_only = match cur_type.get_basic_info().repetition() {
13541365
Repetition::REPEATED => {
13551366
new_context.def_level += 1;
13561367
new_context.rep_level += 1;
1368+
false
13571369
}
13581370
Repetition::OPTIONAL => {
13591371
new_context.def_level += 1;
1372+
1373+
// Can just compute null mask if no parent
1374+
context.def_level == 0 && context.rep_level == 0
13601375
}
1361-
_ => (),
1362-
}
1376+
_ => false,
1377+
};
13631378

1364-
let reader =
1365-
self.build_for_primitive_type_inner(cur_type.clone(), &new_context)?;
1379+
let reader = self.build_for_primitive_type_inner(
1380+
cur_type.clone(),
1381+
&new_context,
1382+
null_mask_only,
1383+
)?;
13661384

13671385
if cur_type.get_basic_info().repetition() == Repetition::REPEATED {
13681386
Err(ArrowError(
@@ -1641,6 +1659,7 @@ impl<'a> ArrayReaderBuilder {
16411659
&self,
16421660
cur_type: TypePtr,
16431661
context: &'a ArrayReaderBuilderContext,
1662+
null_mask_only: bool,
16441663
) -> Result<Box<dyn ArrayReader>> {
16451664
let column_desc = Arc::new(ColumnDescriptor::new(
16461665
cur_type.clone(),
@@ -1658,30 +1677,39 @@ impl<'a> ArrayReaderBuilder {
16581677
.map(|f| f.data_type().clone());
16591678

16601679
match cur_type.get_physical_type() {
1661-
PhysicalType::BOOLEAN => Ok(Box::new(PrimitiveArrayReader::<BoolType>::new(
1662-
page_iterator,
1663-
column_desc,
1664-
arrow_type,
1665-
)?)),
1680+
PhysicalType::BOOLEAN => Ok(Box::new(
1681+
PrimitiveArrayReader::<BoolType>::new_with_options(
1682+
page_iterator,
1683+
column_desc,
1684+
arrow_type,
1685+
null_mask_only,
1686+
)?,
1687+
)),
16661688
PhysicalType::INT32 => {
16671689
if let Some(ArrowType::Null) = arrow_type {
16681690
Ok(Box::new(NullArrayReader::<Int32Type>::new(
16691691
page_iterator,
16701692
column_desc,
16711693
)?))
16721694
} else {
1673-
Ok(Box::new(PrimitiveArrayReader::<Int32Type>::new(
1674-
page_iterator,
1675-
column_desc,
1676-
arrow_type,
1677-
)?))
1695+
Ok(Box::new(
1696+
PrimitiveArrayReader::<Int32Type>::new_with_options(
1697+
page_iterator,
1698+
column_desc,
1699+
arrow_type,
1700+
null_mask_only,
1701+
)?,
1702+
))
16781703
}
16791704
}
1680-
PhysicalType::INT64 => Ok(Box::new(PrimitiveArrayReader::<Int64Type>::new(
1681-
page_iterator,
1682-
column_desc,
1683-
arrow_type,
1684-
)?)),
1705+
PhysicalType::INT64 => Ok(Box::new(
1706+
PrimitiveArrayReader::<Int64Type>::new_with_options(
1707+
page_iterator,
1708+
column_desc,
1709+
arrow_type,
1710+
null_mask_only,
1711+
)?,
1712+
)),
16851713
PhysicalType::INT96 => {
16861714
// get the optional timezone information from arrow type
16871715
let timezone = arrow_type
@@ -1705,18 +1733,22 @@ impl<'a> ArrayReaderBuilder {
17051733
arrow_type,
17061734
)?))
17071735
}
1708-
PhysicalType::FLOAT => Ok(Box::new(PrimitiveArrayReader::<FloatType>::new(
1709-
page_iterator,
1710-
column_desc,
1711-
arrow_type,
1712-
)?)),
1713-
PhysicalType::DOUBLE => {
1714-
Ok(Box::new(PrimitiveArrayReader::<DoubleType>::new(
1736+
PhysicalType::FLOAT => Ok(Box::new(
1737+
PrimitiveArrayReader::<FloatType>::new_with_options(
17151738
page_iterator,
17161739
column_desc,
17171740
arrow_type,
1718-
)?))
1719-
}
1741+
null_mask_only,
1742+
)?,
1743+
)),
1744+
PhysicalType::DOUBLE => Ok(Box::new(
1745+
PrimitiveArrayReader::<DoubleType>::new_with_options(
1746+
page_iterator,
1747+
column_desc,
1748+
arrow_type,
1749+
null_mask_only,
1750+
)?,
1751+
)),
17201752
PhysicalType::BYTE_ARRAY => {
17211753
if cur_type.get_basic_info().converted_type() == ConvertedType::UTF8 {
17221754
if let Some(ArrowType::LargeUtf8) = arrow_type {

parquet/src/arrow/arrow_reader.rs

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,11 @@ mod tests {
249249
use crate::file::properties::{WriterProperties, WriterVersion};
250250
use crate::file::reader::{FileReader, SerializedFileReader};
251251
use crate::file::writer::{FileWriter, SerializedFileWriter};
252+
use crate::schema::parser::parse_message_type;
252253
use crate::schema::types::{Type, TypePtr};
253-
use crate::util::test_common::{get_temp_filename, RandGen};
254+
use crate::util::test_common::{get_temp_file, get_temp_filename, RandGen};
254255
use arrow::array::*;
255-
use arrow::datatypes::DataType as ArrowDataType;
256+
use arrow::datatypes::{DataType as ArrowDataType, Field};
256257
use arrow::record_batch::RecordBatchReader;
257258
use rand::{thread_rng, RngCore};
258259
use serde_json::json;
@@ -916,4 +917,54 @@ mod tests {
916917
batch.unwrap();
917918
}
918919
}
920+
921+
#[test]
922+
fn test_nested_nullability() {
923+
let message_type = "message nested {
924+
OPTIONAL Group group {
925+
REQUIRED INT32 leaf;
926+
}
927+
}";
928+
929+
let file = get_temp_file("nested_nullability.parquet", &[]);
930+
let schema = Arc::new(parse_message_type(message_type).unwrap());
931+
932+
{
933+
// Write using low-level parquet API (#1167)
934+
let writer_props = Arc::new(WriterProperties::builder().build());
935+
let mut writer = SerializedFileWriter::new(
936+
file.try_clone().unwrap(),
937+
schema,
938+
writer_props,
939+
)
940+
.unwrap();
941+
942+
let mut row_group_writer = writer.next_row_group().unwrap();
943+
let mut column_writer = row_group_writer.next_column().unwrap().unwrap();
944+
945+
get_typed_column_writer_mut::<Int32Type>(&mut column_writer)
946+
.write_batch(&[34, 76], Some(&[0, 1, 0, 1]), None)
947+
.unwrap();
948+
949+
row_group_writer.close_column(column_writer).unwrap();
950+
writer.close_row_group(row_group_writer).unwrap();
951+
952+
writer.close().unwrap();
953+
}
954+
955+
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
956+
let mut batch = ParquetFileArrowReader::new(file_reader);
957+
let reader = batch.get_record_reader_by_columns(vec![0], 1024).unwrap();
958+
959+
let expected_schema = arrow::datatypes::Schema::new(vec![Field::new(
960+
"group",
961+
ArrowDataType::Struct(vec![Field::new("leaf", ArrowDataType::Int32, false)]),
962+
true,
963+
)]);
964+
965+
let batch = reader.into_iter().next().unwrap().unwrap();
966+
assert_eq!(batch.schema().as_ref(), &expected_schema);
967+
assert_eq!(batch.num_rows(), 4);
968+
assert_eq!(batch.column(0).data().null_count(), 2);
969+
}
919970
}

parquet/src/arrow/record_reader.rs

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,35 @@ where
7373
V: ValuesBuffer + Default,
7474
CV: ColumnValueDecoder<Slice = V::Slice>,
7575
{
76+
/// Create a new [`GenericRecordReader`]
7677
pub fn new(desc: ColumnDescPtr) -> Self {
77-
let def_levels =
78-
(desc.max_def_level() > 0).then(|| DefinitionLevelBuffer::new(&desc));
78+
Self::new_with_options(desc, false)
79+
}
80+
81+
/// Create a new [`GenericRecordReader`] with the ability to only generate the bitmask
82+
///
83+
/// If `null_mask_only` is true only the null bitmask will be generated and
84+
/// [`Self::consume_def_levels`] and [`Self::consume_rep_levels`] will always return `None`
85+
///
86+
/// It is insufficient to solely check that that the max definition level is 1 as we
87+
/// need there to be no nullable parent array that will required decoded definition levels
88+
///
89+
/// In particular consider the case of:
90+
///
91+
/// ```ignore
92+
/// message nested {
93+
/// OPTIONAL Group group {
94+
/// REQUIRED INT32 leaf;
95+
/// }
96+
/// }
97+
/// ```
98+
///
99+
/// The maximum definition level of leaf is 1, however, we still need to decode the
100+
/// definition levels so that the parent group can be constructed correctly
101+
///
102+
pub(crate) fn new_with_options(desc: ColumnDescPtr, null_mask_only: bool) -> Self {
103+
let def_levels = (desc.max_def_level() > 0)
104+
.then(|| DefinitionLevelBuffer::new(&desc, null_mask_only));
79105

80106
let rep_levels = (desc.max_rep_level() > 0).then(ScalarBuffer::new);
81107

@@ -171,7 +197,7 @@ where
171197
/// as record values, e.g. those from `self.num_values` to `self.values_written`.
172198
pub fn consume_def_levels(&mut self) -> Result<Option<Buffer>> {
173199
Ok(match self.def_levels.as_mut() {
174-
Some(x) => Some(x.split_off(self.num_values)),
200+
Some(x) => x.split_levels(self.num_values),
175201
None => None,
176202
})
177203
}
@@ -221,10 +247,7 @@ where
221247
.as_mut()
222248
.map(|levels| levels.spare_capacity_mut(batch_size));
223249

224-
let def_levels = self
225-
.def_levels
226-
.as_mut()
227-
.map(|levels| levels.spare_capacity_mut(batch_size));
250+
let def_levels = self.def_levels.as_mut();
228251

229252
let values = self.records.spare_capacity_mut(batch_size);
230253

parquet/src/arrow/record_reader/buffer.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ impl<T: ScalarValue> ScalarBuffer<T> {
114114
self.len == 0
115115
}
116116

117+
pub fn resize(&mut self, len: usize) {
118+
self.buffer.resize(len * std::mem::size_of::<T>(), 0);
119+
self.len = len;
120+
}
121+
117122
#[inline]
118123
pub fn as_slice(&self) -> &[T] {
119124
let (prefix, buf, suffix) = unsafe { self.buffer.as_slice().align_to::<T>() };

0 commit comments

Comments
 (0)