10
10
// See the License for the specific language governing permissions and
11
11
// limitations under the License.
12
12
13
- use crate :: error;
13
+ use crate :: error:: { self , InvalidListArraySnafu } ;
14
14
use arrow:: array:: {
15
- Array , ArrayRef , ArrowPrimitiveType , BinaryArray , BooleanArray , DictionaryArray , Float32Array ,
16
- Float64Array , Int8Array , Int16Array , Int32Array , Int64Array , PrimitiveArray , RecordBatch ,
17
- StringArray , TimestampNanosecondArray , UInt8Array , UInt16Array , UInt32Array , UInt64Array ,
15
+ Array , ArrayRef , ArrowPrimitiveType , BinaryArray , BooleanArray , DictionaryArray ,
16
+ FixedSizeBinaryArray , Float32Array , Float64Array , Int8Array , Int16Array , Int32Array ,
17
+ Int64Array , PrimitiveArray , RecordBatch , StringArray , TimestampNanosecondArray , UInt8Array ,
18
+ UInt16Array , UInt32Array , UInt64Array ,
18
19
} ;
19
20
use arrow:: datatypes:: { ArrowDictionaryKeyType , TimeUnit } ;
20
21
use arrow:: datatypes:: { ArrowNativeType , DataType , UInt8Type , UInt16Type } ;
@@ -95,6 +96,18 @@ impl NullableArrayAccessor for BinaryArray {
95
96
}
96
97
}
97
98
99
+ impl NullableArrayAccessor for FixedSizeBinaryArray {
100
+ type Native = Vec < u8 > ;
101
+
102
+ fn value_at ( & self , idx : usize ) -> Option < Self :: Native > {
103
+ if self . is_valid ( idx) {
104
+ Some ( self . value ( idx) . to_vec ( ) )
105
+ } else {
106
+ None
107
+ }
108
+ }
109
+ }
110
+
98
111
impl NullableArrayAccessor for StringArray {
99
112
type Native = String ;
100
113
@@ -221,76 +234,190 @@ where
221
234
}
222
235
}
223
236
224
- pub type DictionaryStringArrayAccessor < ' a , K > = DictionaryArrayAccessor < ' a , K , StringArray > ;
237
+ /// Wrapper around various arrays that may return a byte slice. Note that
238
+ /// this delegates to the underlying NullableArrayAccessor implementation
239
+ /// for the Arrow array which copies the bytes when value_at is called
240
+ pub enum ByteArrayAccessor < ' a > {
241
+ Binary ( MaybeDictArrayAccessor < ' a , BinaryArray > ) ,
242
+ FixedSizeBinary ( MaybeDictArrayAccessor < ' a , FixedSizeBinaryArray > ) ,
243
+ }
225
244
226
- /// [StringArrayAccessor] allows to access string values from [StringArray]s and [DictionaryArray]s.
227
- pub enum StringArrayAccessor < ' a > {
228
- /// Plain StringArray
229
- String ( & ' a StringArray ) ,
230
- /// DictionaryArray with UInt8 keys and String values.
231
- Dictionary8 ( DictionaryStringArrayAccessor < ' a , UInt8Type > ) ,
232
- /// DictionaryArray with UInt16 keys and String values.
233
- Dictionary16 ( DictionaryStringArrayAccessor < ' a , UInt16Type > ) ,
245
+ impl < ' a > ByteArrayAccessor < ' a > {
246
+ pub fn try_new ( arr : & ' a ArrayRef ) -> error:: Result < Self > {
247
+ match arr. data_type ( ) {
248
+ DataType :: Binary => {
249
+ MaybeDictArrayAccessor :: < BinaryArray > :: try_new ( arr) . map ( Self :: Binary )
250
+ }
251
+ DataType :: FixedSizeBinary ( dims) => {
252
+ MaybeDictArrayAccessor :: < FixedSizeBinaryArray > :: try_new ( arr, * dims)
253
+ . map ( Self :: FixedSizeBinary )
254
+ }
255
+ DataType :: Dictionary ( _, val) => match * * val {
256
+ DataType :: Binary => {
257
+ MaybeDictArrayAccessor :: < BinaryArray > :: try_new ( arr) . map ( Self :: Binary )
258
+ }
259
+ DataType :: FixedSizeBinary ( dims) => {
260
+ MaybeDictArrayAccessor :: < FixedSizeBinaryArray > :: try_new ( arr, dims)
261
+ . map ( Self :: FixedSizeBinary )
262
+ }
263
+ _ => error:: UnsupportedDictionaryValueTypeSnafu {
264
+ expect_oneof : vec ! [ DataType :: Binary , DataType :: FixedSizeBinary ( -1 ) ] ,
265
+ actual : ( * * val) . clone ( ) ,
266
+ }
267
+ . fail ( ) ,
268
+ } ,
269
+ _ => InvalidListArraySnafu {
270
+ expect_oneof : vec ! [
271
+ DataType :: Binary ,
272
+ DataType :: FixedSizeBinary ( -1 ) ,
273
+ DataType :: Dictionary ( Box :: new( DataType :: UInt8 ) , Box :: new( DataType :: Binary ) ) ,
274
+ DataType :: Dictionary ( Box :: new( DataType :: UInt16 ) , Box :: new( DataType :: Binary ) ) ,
275
+ DataType :: Dictionary (
276
+ Box :: new( DataType :: UInt8 ) ,
277
+ Box :: new( DataType :: FixedSizeBinary ( -1 ) ) ,
278
+ ) ,
279
+ DataType :: Dictionary (
280
+ Box :: new( DataType :: UInt16 ) ,
281
+ Box :: new( DataType :: FixedSizeBinary ( -1 ) ) ,
282
+ ) ,
283
+ ] ,
284
+ actual : arr. data_type ( ) . clone ( ) ,
285
+ }
286
+ . fail ( ) ,
287
+ }
288
+ }
234
289
}
235
290
236
- impl NullableArrayAccessor for StringArrayAccessor < ' _ > {
237
- type Native = String ;
291
+ impl NullableArrayAccessor for ByteArrayAccessor < ' _ > {
292
+ type Native = Vec < u8 > ;
238
293
239
294
fn value_at ( & self , idx : usize ) -> Option < Self :: Native > {
240
295
match self {
241
- StringArrayAccessor :: String ( s) => s. value_at ( idx) ,
242
- StringArrayAccessor :: Dictionary8 ( d) => d. value_at ( idx) ,
243
- StringArrayAccessor :: Dictionary16 ( d) => d. value_at ( idx) ,
296
+ Self :: Binary ( b) => b. value_at ( idx) ,
297
+ Self :: FixedSizeBinary ( b) => b. value_at ( idx) ,
244
298
}
245
299
}
246
300
}
247
301
248
- impl < ' a > StringArrayAccessor < ' a > {
249
- pub fn new ( a : & ' a ArrayRef ) -> error:: Result < Self > {
250
- let result = match a. data_type ( ) {
251
- DataType :: Utf8 => {
252
- // safety: we've checked array data type
253
- Self :: String ( a. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) )
254
- }
255
- DataType :: Dictionary ( key, v) => {
256
- ensure ! (
257
- * * v == DataType :: Utf8 ,
258
- error:: UnsupportedStringColumnTypeSnafu {
259
- data_type: ( * * v) . clone( )
260
- }
261
- ) ;
262
- match * * key {
263
- DataType :: UInt8 => Self :: Dictionary8 ( DictionaryArrayAccessor :: new (
264
- // safety: we've checked the key type
265
- a. as_any ( )
266
- . downcast_ref :: < DictionaryArray < UInt8Type > > ( )
267
- . unwrap ( ) ,
268
- ) ) ,
269
- DataType :: UInt16 => Self :: Dictionary16 ( DictionaryArrayAccessor :: new (
270
- // safety: we've checked the key type
271
- a. as_any ( )
272
- . downcast_ref :: < DictionaryArray < UInt16Type > > ( )
273
- . unwrap ( ) ,
274
- ) ) ,
275
- _ => {
276
- return error:: UnsupportedStringDictKeyTypeSnafu {
277
- data_type : a. data_type ( ) . clone ( ) ,
278
- }
279
- . fail ( ) ;
280
- }
302
+ /// Wrapper around an array that might be a dictionary or it might just be an unencoded
303
+ /// array of the base type
304
+ pub enum MaybeDictArrayAccessor < ' a , V > {
305
+ Native ( & ' a V ) ,
306
+ Dictionary8 ( DictionaryArrayAccessor < ' a , UInt8Type , V > ) ,
307
+ Dictionary16 ( DictionaryArrayAccessor < ' a , UInt16Type , V > ) ,
308
+ }
309
+
310
+ impl < ' a , T > NullableArrayAccessor for MaybeDictArrayAccessor < ' a , T >
311
+ where
312
+ T : Array + NullableArrayAccessor + ' static ,
313
+ {
314
+ type Native = T :: Native ;
315
+
316
+ fn value_at (
317
+ & self ,
318
+ idx : usize ,
319
+ ) -> Option < <MaybeDictArrayAccessor < ' a , T > as NullableArrayAccessor >:: Native > {
320
+ match self {
321
+ Self :: Native ( s) => s. value_at ( idx) ,
322
+ Self :: Dictionary8 ( d) => d. value_at ( idx) ,
323
+ Self :: Dictionary16 ( d) => d. value_at ( idx) ,
324
+ }
325
+ }
326
+ }
327
+
328
+ impl < ' a , T > MaybeDictArrayAccessor < ' a , T >
329
+ where
330
+ T : Array + NullableArrayAccessor + ' static ,
331
+ {
332
+ /// Inspects the given array to determine whether it can be treated as an array
333
+ /// of the specified data type. The array must either:
334
+ /// - Directly have the expected data type, or
335
+ /// - Be a dictionary array whose value type matches the expected data type.
336
+ ///
337
+ /// Returns a wrapped native array if the type matches.
338
+ /// Returns an error if the array type can't be treated as this datatype
339
+ fn try_new_with_datatype ( data_type : DataType , arr : & ' a ArrayRef ) -> error:: Result < Self > {
340
+ // if the type isn't a dictionary, we treat it as an unencoded array
341
+ if * arr. data_type ( ) == data_type {
342
+ return Ok ( Self :: Native ( arr. as_any ( ) . downcast_ref :: < T > ( ) . unwrap ( ) ) ) ;
343
+ }
344
+
345
+ // determine if the type is a dictionary where the value is the desired datatype
346
+ if let DataType :: Dictionary ( key, v) = arr. data_type ( ) {
347
+ ensure ! (
348
+ * * v == data_type,
349
+ error:: UnsupportedDictionaryValueTypeSnafu {
350
+ expect_oneof: vec![ data_type] ,
351
+ actual: ( * * v) . clone( )
281
352
}
282
- }
283
- _ => {
284
- return error:: UnsupportedStringColumnTypeSnafu {
285
- data_type : a. data_type ( ) . clone ( ) ,
353
+ ) ;
354
+
355
+ let result = match * * key {
356
+ DataType :: UInt8 => Self :: Dictionary8 ( DictionaryArrayAccessor :: new (
357
+ arr. as_any ( )
358
+ . downcast_ref :: < DictionaryArray < UInt8Type > > ( )
359
+ . unwrap ( ) ,
360
+ ) ) ,
361
+ DataType :: UInt16 => Self :: Dictionary16 ( DictionaryArrayAccessor :: new (
362
+ arr. as_any ( )
363
+ . downcast_ref :: < DictionaryArray < UInt16Type > > ( )
364
+ . unwrap ( ) ,
365
+ ) ) ,
366
+ _ => {
367
+ return error:: UnsupportedDictionaryKeyTypeSnafu {
368
+ expect_oneof : vec ! [ DataType :: UInt8 , DataType :: UInt16 ] ,
369
+ actual : ( * * key) . clone ( ) ,
370
+ }
371
+ . fail ( ) ;
286
372
}
287
- . fail ( ) ;
288
- }
289
- } ;
290
- Ok ( result)
373
+ } ;
374
+
375
+ return Ok ( result) ;
376
+ }
377
+
378
+ InvalidListArraySnafu {
379
+ expect_oneof : vec ! [
380
+ data_type. clone( ) ,
381
+ DataType :: Dictionary ( Box :: new( DataType :: UInt8 ) , Box :: new( data_type. clone( ) ) ) ,
382
+ DataType :: Dictionary ( Box :: new( DataType :: UInt16 ) , Box :: new( data_type. clone( ) ) ) ,
383
+ ] ,
384
+ actual : arr. data_type ( ) . clone ( ) ,
385
+ }
386
+ . fail ( )
291
387
}
292
388
}
293
389
390
+ impl < ' a , V > MaybeDictArrayAccessor < ' a , PrimitiveArray < V > >
391
+ where
392
+ V : ArrowPrimitiveType ,
393
+ {
394
+ pub fn try_new ( arr : & ' a ArrayRef ) -> error:: Result < Self > {
395
+ Self :: try_new_with_datatype ( V :: DATA_TYPE , arr)
396
+ }
397
+ }
398
+
399
+ impl < ' a > MaybeDictArrayAccessor < ' a , BinaryArray > {
400
+ pub fn try_new ( arr : & ' a ArrayRef ) -> error:: Result < Self > {
401
+ Self :: try_new_with_datatype ( BinaryArray :: DATA_TYPE , arr)
402
+ }
403
+ }
404
+
405
+ impl < ' a > MaybeDictArrayAccessor < ' a , FixedSizeBinaryArray > {
406
+ pub fn try_new ( arr : & ' a ArrayRef , dims : i32 ) -> error:: Result < Self > {
407
+ Self :: try_new_with_datatype ( DataType :: FixedSizeBinary ( dims) , arr)
408
+ }
409
+ }
410
+
411
+ impl < ' a > MaybeDictArrayAccessor < ' a , StringArray > {
412
+ pub fn try_new ( arr : & ' a ArrayRef ) -> error:: Result < Self > {
413
+ Self :: try_new_with_datatype ( StringArray :: DATA_TYPE , arr)
414
+ }
415
+ }
416
+
417
+ pub type Int32ArrayAccessor < ' a > = MaybeDictArrayAccessor < ' a , Int32Array > ;
418
+ pub type Int64ArrayAccessor < ' a > = MaybeDictArrayAccessor < ' a , Int64Array > ;
419
+ pub type StringArrayAccessor < ' a > = MaybeDictArrayAccessor < ' a , StringArray > ;
420
+
294
421
pub struct DictionaryArrayAccessor < ' a , K , V >
295
422
where
296
423
K : ArrowDictionaryKeyType ,
@@ -311,8 +438,12 @@ where
311
438
}
312
439
313
440
pub fn value_at ( & self , idx : usize ) -> Option < V :: Native > {
314
- let offset = self . inner . key ( idx) . unwrap ( ) ;
315
- self . value . value_at ( offset)
441
+ if self . inner . is_valid ( idx) {
442
+ let offset = self . inner . key ( idx) . unwrap ( ) ;
443
+ self . value . value_at ( offset)
444
+ } else {
445
+ None
446
+ }
316
447
}
317
448
}
318
449
@@ -327,7 +458,7 @@ mod tests {
327
458
fn test_dictionary_accessor ( ) {
328
459
let expected: DictionaryArray < UInt16Type > = vec ! [ "a" , "a" , "b" , "c" ] . into_iter ( ) . collect ( ) ;
329
460
let dict = Arc :: new ( expected) as ArrayRef ;
330
- let accessor = StringArrayAccessor :: new ( & dict) . unwrap ( ) ;
461
+ let accessor = StringArrayAccessor :: try_new ( & dict) . unwrap ( ) ;
331
462
assert_eq ! ( "a" , accessor. value_at( 0 ) . unwrap( ) ) ;
332
463
assert_eq ! ( "a" , accessor. value_at( 1 ) . unwrap( ) ) ;
333
464
assert_eq ! ( "b" , accessor. value_at( 2 ) . unwrap( ) ) ;
0 commit comments