Skip to content

Commit c0eadc9

Browse files
[BugFix] Fix alter mv active bugs (backport #60291) (backport #60318) (#60690)
Signed-off-by: shuming.li <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: shuming.li <[email protected]>
1 parent 5834028 commit c0eadc9

File tree

6 files changed

+216
-19
lines changed

6 files changed

+216
-19
lines changed

fe/fe-core/src/main/java/com/starrocks/alter/AlterJobMgr.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,7 @@ public void alterMaterializedViewStatus(MaterializedView materializedView, Strin
226226
List<BaseTableInfo> baseTableInfos =
227227
Lists.newArrayList(MaterializedViewAnalyzer.getBaseTableInfos(mvQueryStatement, !isReplay));
228228
materializedView.setBaseTableInfos(baseTableInfos);
229-
materializedView.onReload();
230-
materializedView.setActive();
229+
materializedView.fixRelationship();
231230
} else if (AlterMaterializedViewStatusClause.INACTIVE.equalsIgnoreCase(status)) {
232231
materializedView.setInactiveAndReason(reason);
233232
// clear running & pending task runs since the mv has been inactive

fe/fe-core/src/main/java/com/starrocks/alter/AlterMVJobExecutor.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,6 @@ public Void visitAlterMaterializedViewStatusClause(AlterMaterializedViewStatusCl
501501
try {
502502
if (AlterMaterializedViewStatusClause.ACTIVE.equalsIgnoreCase(status)) {
503503
// check if the materialized view can be activated without rebuilding relationships.
504-
materializedView.fixRelationship();
505504
if (materializedView.isActive()) {
506505
return null;
507506
}

fe/fe-core/src/main/java/com/starrocks/catalog/MaterializedView.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,8 +1053,31 @@ private void onDropImpl(Database db, boolean replay) {
10531053

10541054
@Override
10551055
public void onReload() {
1056+
onReload(false);
1057+
}
1058+
1059+
/**
1060+
* Reload the materialized view with original active state.
1061+
* NOTE: This method will not try to activate the materialized view.
1062+
* @param postLoadImage: whether this reload is called after FE's image loading process.
1063+
*/
1064+
public void onReload(boolean postLoadImage) {
1065+
onReload(postLoadImage, isActive());
1066+
}
1067+
1068+
/**
1069+
* `postLoadImage` is used to distinct wether it's called after FE's image loading process,
1070+
* such as FE startup or checkpointing.
1071+
*
1072+
* Note!! The `onReload` method is called in some other scenarios such as - schema change of a materialize view.
1073+
* The reloaded flag was introduced only to increase the speed of FE startup and checkpointing.
1074+
* It shouldn't affect the behavior of other operations which might indeed need to do a reload process.
1075+
*
1076+
* @param postLoadImage whether this reload is called after FE's image loading process.
1077+
* @param desiredActive whether the materialized view should be active after reload.
1078+
*/
1079+
private void onReload(boolean postLoadImage, boolean desiredActive) {
10561080
try {
1057-
boolean desiredActive = active;
10581081
active = false;
10591082
boolean reloadActive = onReloadImpl();
10601083
if (desiredActive && reloadActive) {
@@ -1073,7 +1096,7 @@ public void onReload() {
10731096
* NOTE: caller need to hold the db lock
10741097
*/
10751098
public void fixRelationship() {
1076-
onReload();
1099+
onReload(false, true);
10771100
}
10781101

10791102
/**
@@ -1925,6 +1948,20 @@ private void analyzePartitionExprs() {
19251948
analyzeRefBaseTablePartitionColumns();
19261949
// analyze partition retention condition
19271950
analyzeMVRetentionCondition(connectContext);
1951+
1952+
// add a check for partition columns to ensure they are not empty if the table is partitioned.
1953+
// throw exception is ok which will make mv inactive to avoid using it in query rewrite.
1954+
if (partitionExprMaps != null && !partitionExprMaps.isEmpty()) {
1955+
Preconditions.checkArgument(refBaseTablePartitionColumnsOpt.isPresent() &&
1956+
!refBaseTablePartitionColumnsOpt.get().isEmpty(), String.format("Ref base table " +
1957+
"partition columns should not be empty:%s", refBaseTablePartitionColumnsOpt));
1958+
Preconditions.checkArgument(refBaseTablePartitionExprsOpt.isPresent() &&
1959+
!refBaseTablePartitionExprsOpt.get().isEmpty(), String.format("Ref base table " +
1960+
"partition exprs should not be empty:%s", refBaseTablePartitionExprsOpt));
1961+
Preconditions.checkArgument(refBaseTablePartitionSlotsOpt.isPresent() &&
1962+
!refBaseTablePartitionSlotsOpt.get().isEmpty(), String.format("Ref base table " +
1963+
"partition column slots should not be empty:%s", refBaseTablePartitionSlotsOpt));
1964+
}
19281965
}
19291966

19301967
public synchronized void analyzeMVRetentionCondition(ConnectContext connectContext) {

test/lib/sr_sql_lib.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def self_print(msg, color: ColorEnum = None, bold=False, logout=False, need_prin
139139
T_R_TABLE = "t_r_table"
140140

141141
SECRET_INFOS = {}
142+
TASK_RUN_SUCCESS_STATES = set(["SUCCESS", "MERGED", "SKIPPED"])
142143

143144

144145
class StarrocksSQLApiLib(object):
@@ -1942,19 +1943,11 @@ def is_all_finished3():
19421943
res = self.execute_sql(show_sql, True)
19431944
if not res["status"]:
19441945
tools.assert_true(False, "show mv state error")
1945-
success_cnt = get_success_count(res["result"])
1946+
success_cnt = self.get_task_run_success_count(res["result"])
19461947
if success_cnt >= check_count:
19471948
return True
19481949
return False
19491950

1950-
# information_schema.task_runs result
1951-
def get_success_count(results):
1952-
cnt = 0
1953-
for _res in results:
1954-
if _res[0] == "SUCCESS" or _res[0] == "MERGED":
1955-
cnt += 1
1956-
return cnt
1957-
19581951
max_loop_count = 180
19591952
is_all_ok = False
19601953
count = 0
@@ -1976,10 +1969,18 @@ def get_success_count(results):
19761969
count += 1
19771970
tools.assert_equal(True, is_all_ok, "wait async materialized view finish error")
19781971

1972+
# information_schema.task_runs result
1973+
def get_task_run_success_count(self, results):
1974+
cnt = 0
1975+
for _res in results:
1976+
if _res[0] in TASK_RUN_SUCCESS_STATES:
1977+
cnt += 1
1978+
return cnt
1979+
19791980
def wait_mv_refresh_count(self, db_name, mv_name, expect_count):
1980-
show_sql = """select count(*) from information_schema.materialized_views
1981+
show_sql = """select state from information_schema.materialized_views
19811982
join information_schema.task_runs using(task_name)
1982-
where table_schema='{}' and table_name='{}' and (state = 'SUCCESS' or state = 'MERGED')
1983+
where table_schema='{}' and table_name='{}';
19831984
""".format(
19841985
db_name, mv_name
19851986
)
@@ -1988,9 +1989,8 @@ def wait_mv_refresh_count(self, db_name, mv_name, expect_count):
19881989
cnt = 1
19891990
refresh_count = 0
19901991
while cnt < 60:
1991-
res = self.execute_sql(show_sql, True)
1992-
print(res)
1993-
refresh_count = res["result"][0][0]
1992+
res = self.retry_execute_sql(show_sql, True)
1993+
refresh_count = self.get_task_run_success_count(res["result"])
19941994
if refresh_count >= expect_count:
19951995
return
19961996
else:
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
-- name: test_mv_with_schema_change_column_rename
2+
create database db_${uuid0};
3+
-- result:
4+
-- !result
5+
use db_${uuid0};
6+
-- result:
7+
-- !result
8+
CREATE TABLE `duplicate_table_with_null_partition` (
9+
`k1` date,
10+
`k2` datetime,
11+
`k3` char(20),
12+
`k4` varchar(20),
13+
`k5` boolean,
14+
`k6` tinyint
15+
)
16+
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
17+
PARTITION BY RANGE(`k1`)
18+
(
19+
PARTITION `p202006` VALUES LESS THAN ("2020-07-01"),
20+
PARTITION `p202007` VALUES LESS THAN ("2020-08-01"),
21+
PARTITION `p202008` VALUES LESS THAN ("2020-09-01")
22+
)
23+
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`, `k4`, `k5`) BUCKETS 3;
24+
-- result:
25+
-- !result
26+
INSERT INTO duplicate_table_with_null_partition VALUES
27+
("2020-06-23","2020-06-23 00:00:00","beijing","haidian",-1,-128),
28+
("2020-07-23","2020-07-23 00:00:00","shanghai","pudong",0,0),
29+
("2020-07-23","2020-07-24 00:00:00","shanghai1","pudong1",1,1),
30+
("2020-08-23","2020-08-23 00:00:00","xian","zhonglou",1,127),
31+
(NULL,NULL,NULL,NULL,NULL,NULL);
32+
-- result:
33+
-- !result
34+
CREATE MATERIALIZED VIEW test_mv1 PARTITION BY k1
35+
DISTRIBUTED BY HASH(k1) BUCKETS 10
36+
REFRESH ASYNC
37+
AS SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;
38+
-- result:
39+
-- !result
40+
function: wait_async_materialized_view_finish("db_${uuid0}", "test_mv1")
41+
-- result:
42+
None
43+
-- !result
44+
function: print_hit_materialized_view("SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;", "test_mv1")
45+
-- result:
46+
True
47+
-- !result
48+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
49+
-- result:
50+
true
51+
-- !result
52+
SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1 ORDER BY k1;
53+
-- result:
54+
None 0 None None None None
55+
2020-06-23 1 beijing haidian 1 -128.0
56+
2020-07-23 2 shanghai pudong1 1 0.5
57+
2020-08-23 1 xian zhonglou 1 127.0
58+
-- !result
59+
alter table duplicate_table_with_null_partition rename column k1 to k11;
60+
-- result:
61+
-- !result
62+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
63+
function: print_hit_materialized_view("SELECT count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition", "test_mv1")
64+
-- result:
65+
False
66+
-- !result
67+
SELECT count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition ORDER BY count_datetime, min_char, max_varchar, sum_boolean, avg_tinyint;
68+
-- result:
69+
4 beijing zhonglou 3 0.0
70+
-- !result
71+
INSERT INTO duplicate_table_with_null_partition VALUES ("2020-06-24","2020-06-23 00:00:00","beijing","haidian",-1,-128);
72+
-- result:
73+
-- !result
74+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
75+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
76+
-- result:
77+
false base table schema changed for columns: k1
78+
-- !result
79+
alter table duplicate_table_with_null_partition rename column k11 to k1;
80+
-- result:
81+
-- !result
82+
INSERT INTO duplicate_table_with_null_partition VALUES ("2020-06-24","2020-06-23 00:00:00","beijing","haidian",-1,-128);
83+
-- result:
84+
-- !result
85+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
86+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
87+
-- result:
88+
true
89+
-- !result
90+
function: print_hit_materialized_view("SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;", "test_mv1")
91+
-- result:
92+
True
93+
-- !result
94+
SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1 ORDER BY k1;
95+
-- result:
96+
None 0 None None None None
97+
2020-06-23 1 beijing haidian 1 -128.0
98+
2020-06-24 2 beijing haidian 2 -128.0
99+
2020-07-23 2 shanghai pudong1 1 0.5
100+
2020-08-23 1 xian zhonglou 1 127.0
101+
-- !result
102+
drop database db_${uuid0} force;
103+
-- result:
104+
-- !result
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
-- name: test_mv_with_schema_change_column_rename
2+
3+
create database db_${uuid0};
4+
use db_${uuid0};
5+
6+
CREATE TABLE `duplicate_table_with_null_partition` (
7+
`k1` date,
8+
`k2` datetime,
9+
`k3` char(20),
10+
`k4` varchar(20),
11+
`k5` boolean,
12+
`k6` tinyint
13+
)
14+
DUPLICATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
15+
PARTITION BY RANGE(`k1`)
16+
(
17+
PARTITION `p202006` VALUES LESS THAN ("2020-07-01"),
18+
PARTITION `p202007` VALUES LESS THAN ("2020-08-01"),
19+
PARTITION `p202008` VALUES LESS THAN ("2020-09-01")
20+
)
21+
DISTRIBUTED BY HASH(`k1`, `k2`, `k3`, `k4`, `k5`) BUCKETS 3;
22+
23+
INSERT INTO duplicate_table_with_null_partition VALUES
24+
("2020-06-23","2020-06-23 00:00:00","beijing","haidian",-1,-128),
25+
("2020-07-23","2020-07-23 00:00:00","shanghai","pudong",0,0),
26+
("2020-07-23","2020-07-24 00:00:00","shanghai1","pudong1",1,1),
27+
("2020-08-23","2020-08-23 00:00:00","xian","zhonglou",1,127),
28+
(NULL,NULL,NULL,NULL,NULL,NULL);
29+
30+
CREATE MATERIALIZED VIEW test_mv1 PARTITION BY k1
31+
DISTRIBUTED BY HASH(k1) BUCKETS 10
32+
REFRESH ASYNC
33+
AS SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;
34+
35+
function: wait_async_materialized_view_finish("db_${uuid0}", "test_mv1")
36+
function: print_hit_materialized_view("SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;", "test_mv1")
37+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
38+
SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1 ORDER BY k1;
39+
40+
alter table duplicate_table_with_null_partition rename column k1 to k11;
41+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
42+
function: print_hit_materialized_view("SELECT count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition", "test_mv1")
43+
SELECT count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition ORDER BY count_datetime, min_char, max_varchar, sum_boolean, avg_tinyint;
44+
45+
INSERT INTO duplicate_table_with_null_partition VALUES ("2020-06-24","2020-06-23 00:00:00","beijing","haidian",-1,-128);
46+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
47+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
48+
49+
alter table duplicate_table_with_null_partition rename column k11 to k1;
50+
51+
INSERT INTO duplicate_table_with_null_partition VALUES ("2020-06-24","2020-06-23 00:00:00","beijing","haidian",-1,-128);
52+
[UC]REFRESH MATERIALIZED VIEW test_mv1 with sync mode;
53+
SELECT IS_ACTIVE, INACTIVE_REASON FROM information_schema.materialized_views WHERE table_name = 'test_mv1' and TABLE_SCHEMA='db_${uuid0}';
54+
55+
function: print_hit_materialized_view("SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1;", "test_mv1")
56+
SELECT k1, count(k2) as count_datetime, min(k3) as min_char, max(k4) as max_varchar, sum(k5) as sum_boolean, avg(k6) as avg_tinyint FROM duplicate_table_with_null_partition GROUP BY k1 ORDER BY k1;
57+
58+
drop database db_${uuid0} force;

0 commit comments

Comments
 (0)