@@ -17,60 +17,104 @@ create or replace table t(c int, s string) compression = 'lz4';
1717statement ok
1818insert into t_encoded(c, s) select number as c, to_string(number) as s from numbers(500000);
1919
20+ statement ok
21+ optimize table t_encoded compact;
22+
2023statement ok
2124insert into t(c, s) select number as c, to_string(number) as s from numbers(500000);
2225
23- query T
24- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 't_encoded');
25- ----
26- 2.30 MiB
26+ statement ok
27+ optimize table t compact;
2728
29+ # In this case, lz4 with encoding produces smaller block files
2830query T
29- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 't');
31+ with
32+ e as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 't_encoded') limit 1),
33+ p as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 't') limit 1)
34+ select e.c < p.c from e, p
3035----
31- 3.91 MiB
36+ 1
37+
3238
3339################################
3440# Alter table parquet encoding #
3541################################
3642
43+
44+ # 1. prepare plain encoded data and keep the file size
3745statement ok
3846create or replace table tbl (c int, s string) compression = 'lz4';
3947
4048statement ok
4149insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
4250
43- query T
44- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
45- ----
46- 3.91 MiB
51+ # insertion might be executed in a distributed manner, in this case, data blocks might be fragmented
52+ statement ok
53+ optimize table tbl compact;
54+
55+ statement ok
56+ create temp table tbl_size(s uint64);
57+
58+ statement ok
59+ insert into tbl_size select bytes_compressed from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1;
60+
61+
62+ # 2. truncate table data and insert the same data with parquet encoding enabled
63+ statement ok
64+ truncate table tbl;
4765
4866statement ok
4967ALTER TABLE tbl SET OPTIONS (enable_parquet_encoding = 'true');
5068
5169statement ok
5270insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
5371
54- # newly created block should be smaller, since enable_parquet_encoding is 'true'
72+ # insertion might be executed in a distributed manner, in this case, data blocks might be fragmented, let's compact them
73+ statement ok
74+ optimize table tbl compact;
75+
76+
77+ # 3. check that file size of newly created blocks with encoding is smaller
78+
5579query T
56- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
80+ with
81+ e as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1),
82+ p as (select s as c from tbl_size)
83+ select e.c < p.c from e,p
5784----
58- 2.30 MiB
59- 3.91 MiB
85+ 1
86+
87+ # keep the size, will be used later
88+ statement ok
89+ create temp table e_tbl_size(s uint64);
90+
91+ statement ok
92+ insert into e_tbl_size select bytes_compressed from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1;
93+
94+ # 4. check that table option `enable_parquet_encoding` could be turned off
95+
96+ statement ok
97+ truncate table tbl;
6098
6199statement ok
62100ALTER TABLE tbl SET OPTIONS (enable_parquet_encoding = 'false');
63101
64102statement ok
65103insert into tbl(c, s) select number as c, to_string(number) as s from numbers(500000);
66104
67- # newly created block should be larger, since enable_parquet_encoding is 'false'
105+ statement ok
106+ optimize table tbl compact;
107+
108+
109+ # 3. check that file size of newly created blocks with encoding is smaller
68110query T
69- select humanize_size(file_size) from fuse_block('test_tbl_opt_parquet_encoding', 'tbl');
111+ with
112+ p as (select bytes_compressed c from fuse_snapshot('test_tbl_opt_parquet_encoding', 'tbl') limit 1),
113+ e as (select s as c from e_tbl_size)
114+ select e.c < p.c from e,p
70115----
71- 3.91 MiB
72- 2.30 MiB
73- 3.91 MiB
116+ 1
117+
74118
75119# Test invalid option value
76120
0 commit comments