Skip to content

Commit 5121f92

Browse files
devonherikjohnston
andauthored
Add background job to clear unreferenced state groups (#18154)
Fixes #18150 ### Pull Request Checklist <!-- Please read https://element-hq.github.io/synapse/latest/development/contributing_guide.html before submitting your pull request --> * [X] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [X] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --------- Co-authored-by: Erik Johnston <[email protected]>
1 parent b9276e2 commit 5121f92

File tree

10 files changed

+375
-74
lines changed

10 files changed

+375
-74
lines changed

changelog.d/18154.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add background job to clear unreferenced state groups.

docs/development/database_schema.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ by a unique name, the current status (stored in JSON), and some dependency infor
162162
* Whether the update requires a previous update to be complete.
163163
* A rough ordering for which to complete updates.
164164

165-
A new background updates needs to be added to the `background_updates` table:
165+
A new background update needs to be added to the `background_updates` table:
166166

167167
```sql
168168
INSERT INTO background_updates (ordering, update_name, depends_on, progress_json) VALUES

synapse/_scripts/synapse_port_db.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@
191191

192192

193193
IGNORED_TABLES = {
194+
# Porting the auto generated sequence in this table is non-trivial.
195+
# None of the entries in this list are mandatory for Synapse to keep working.
196+
# If state group disk space is an issue after the port, the
197+
# `delete_unreferenced_state_groups_bg_update` background task can be run again.
198+
"state_groups_pending_deletion",
194199
# We don't port these tables, as they're a faff and we can regenerate
195200
# them anyway.
196201
"user_directory",
@@ -216,6 +221,15 @@
216221
}
217222

218223

224+
# These background updates will not be applied upon creation of the postgres database.
225+
IGNORED_BACKGROUND_UPDATES = {
226+
# Reapplying this background update to the postgres database is unnecessary after
227+
# already having waited for the SQLite database to complete all running background
228+
# updates.
229+
"delete_unreferenced_state_groups_bg_update",
230+
}
231+
232+
219233
# Error returned by the run function. Used at the top-level part of the script to
220234
# handle errors and return codes.
221235
end_error: Optional[str] = None
@@ -687,6 +701,20 @@ def _is_sqlite_autovacuum_enabled(txn: LoggingTransaction) -> bool:
687701
# 0 means off. 1 means full. 2 means incremental.
688702
return autovacuum_setting != 0
689703

704+
async def remove_ignored_background_updates_from_database(self) -> None:
705+
def _remove_delete_unreferenced_state_groups_bg_updates(
706+
txn: LoggingTransaction,
707+
) -> None:
708+
txn.execute(
709+
"DELETE FROM background_updates WHERE update_name = ANY(?)",
710+
(list(IGNORED_BACKGROUND_UPDATES),),
711+
)
712+
713+
await self.postgres_store.db_pool.runInteraction(
714+
"remove_delete_unreferenced_state_groups_bg_updates",
715+
_remove_delete_unreferenced_state_groups_bg_updates,
716+
)
717+
690718
async def run(self) -> None:
691719
"""Ports the SQLite database to a PostgreSQL database.
692720
@@ -732,6 +760,8 @@ async def run(self) -> None:
732760
self.hs_config.database.get_single_database()
733761
)
734762

763+
await self.remove_ignored_background_updates_from_database()
764+
735765
await self.run_background_updates_on_postgres()
736766

737767
self.progress.set_state("Creating port tables")

synapse/storage/controllers/purge_events.py

Lines changed: 183 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,18 @@
2121

2222
import itertools
2323
import logging
24-
from typing import TYPE_CHECKING, Collection, Mapping, Set
24+
from typing import (
25+
TYPE_CHECKING,
26+
Collection,
27+
Mapping,
28+
Set,
29+
)
2530

2631
from synapse.logging.context import nested_logging_context
2732
from synapse.metrics.background_process_metrics import wrap_as_background_process
33+
from synapse.storage.database import LoggingTransaction
2834
from synapse.storage.databases import Databases
35+
from synapse.types.storage import _BackgroundUpdates
2936

3037
if TYPE_CHECKING:
3138
from synapse.server import HomeServer
@@ -44,6 +51,11 @@ def __init__(self, hs: "HomeServer", stores: Databases):
4451
self._delete_state_groups_loop, 60 * 1000
4552
)
4653

54+
self.stores.state.db_pool.updates.register_background_update_handler(
55+
_BackgroundUpdates.DELETE_UNREFERENCED_STATE_GROUPS_BG_UPDATE,
56+
self._background_delete_unrefereneced_state_groups,
57+
)
58+
4759
async def purge_room(self, room_id: str) -> None:
4860
"""Deletes all record of a room"""
4961

@@ -80,68 +92,6 @@ async def purge_history(
8092
sg_to_delete
8193
)
8294

83-
async def _find_unreferenced_groups(
84-
self, state_groups: Collection[int]
85-
) -> Set[int]:
86-
"""Used when purging history to figure out which state groups can be
87-
deleted.
88-
89-
Args:
90-
state_groups: Set of state groups referenced by events
91-
that are going to be deleted.
92-
93-
Returns:
94-
The set of state groups that can be deleted.
95-
"""
96-
# Set of events that we have found to be referenced by events
97-
referenced_groups = set()
98-
99-
# Set of state groups we've already seen
100-
state_groups_seen = set(state_groups)
101-
102-
# Set of state groups to handle next.
103-
next_to_search = set(state_groups)
104-
while next_to_search:
105-
# We bound size of groups we're looking up at once, to stop the
106-
# SQL query getting too big
107-
if len(next_to_search) < 100:
108-
current_search = next_to_search
109-
next_to_search = set()
110-
else:
111-
current_search = set(itertools.islice(next_to_search, 100))
112-
next_to_search -= current_search
113-
114-
referenced = await self.stores.main.get_referenced_state_groups(
115-
current_search
116-
)
117-
referenced_groups |= referenced
118-
119-
# We don't continue iterating up the state group graphs for state
120-
# groups that are referenced.
121-
current_search -= referenced
122-
123-
edges = await self.stores.state.get_previous_state_groups(current_search)
124-
125-
prevs = set(edges.values())
126-
# We don't bother re-handling groups we've already seen
127-
prevs -= state_groups_seen
128-
next_to_search |= prevs
129-
state_groups_seen |= prevs
130-
131-
# We also check to see if anything referencing the state groups are
132-
# also unreferenced. This helps ensure that we delete unreferenced
133-
# state groups, if we don't then we will de-delta them when we
134-
# delete the other state groups leading to increased DB usage.
135-
next_edges = await self.stores.state.get_next_state_groups(current_search)
136-
nexts = set(next_edges.keys())
137-
nexts -= state_groups_seen
138-
next_to_search |= nexts
139-
state_groups_seen |= nexts
140-
141-
to_delete = state_groups_seen - referenced_groups
142-
143-
return to_delete
144-
14595
@wrap_as_background_process("_delete_state_groups_loop")
14696
async def _delete_state_groups_loop(self) -> None:
14797
"""Background task that deletes any state groups that may be pending
@@ -203,3 +153,173 @@ async def _delete_state_groups(
203153
room_id,
204154
groups_to_sequences,
205155
)
156+
157+
async def _background_delete_unrefereneced_state_groups(
158+
self, progress: dict, batch_size: int
159+
) -> int:
160+
"""This background update will slowly delete any unreferenced state groups"""
161+
162+
last_checked_state_group = progress.get("last_checked_state_group")
163+
max_state_group = progress.get("max_state_group")
164+
165+
if last_checked_state_group is None or max_state_group is None:
166+
# This is the first run.
167+
last_checked_state_group = 0
168+
169+
max_state_group = await self.stores.state.db_pool.simple_select_one_onecol(
170+
table="state_groups",
171+
keyvalues={},
172+
retcol="MAX(id)",
173+
allow_none=True,
174+
desc="get_max_state_group",
175+
)
176+
if max_state_group is None:
177+
# There are no state groups so the background process is finished.
178+
await self.stores.state.db_pool.updates._end_background_update(
179+
_BackgroundUpdates.DELETE_UNREFERENCED_STATE_GROUPS_BG_UPDATE
180+
)
181+
return batch_size
182+
183+
(
184+
last_checked_state_group,
185+
final_batch,
186+
) = await self._delete_unreferenced_state_groups_batch(
187+
last_checked_state_group, batch_size, max_state_group
188+
)
189+
190+
if not final_batch:
191+
# There are more state groups to check.
192+
progress = {
193+
"last_checked_state_group": last_checked_state_group,
194+
"max_state_group": max_state_group,
195+
}
196+
await self.stores.state.db_pool.updates._background_update_progress(
197+
_BackgroundUpdates.DELETE_UNREFERENCED_STATE_GROUPS_BG_UPDATE,
198+
progress,
199+
)
200+
else:
201+
# This background process is finished.
202+
await self.stores.state.db_pool.updates._end_background_update(
203+
_BackgroundUpdates.DELETE_UNREFERENCED_STATE_GROUPS_BG_UPDATE
204+
)
205+
206+
return batch_size
207+
208+
async def _delete_unreferenced_state_groups_batch(
209+
self,
210+
last_checked_state_group: int,
211+
batch_size: int,
212+
max_state_group: int,
213+
) -> tuple[int, bool]:
214+
"""Looks for unreferenced state groups starting from the last state group
215+
checked, and any state groups which would become unreferenced if a state group
216+
was deleted, and marks them for deletion.
217+
218+
Args:
219+
last_checked_state_group: The last state group that was checked.
220+
batch_size: How many state groups to process in this iteration.
221+
222+
Returns:
223+
(last_checked_state_group, final_batch)
224+
"""
225+
226+
# Look for state groups that can be cleaned up.
227+
def get_next_state_groups_txn(txn: LoggingTransaction) -> Set[int]:
228+
state_group_sql = "SELECT id FROM state_groups WHERE ? < id AND id <= ? ORDER BY id LIMIT ?"
229+
txn.execute(
230+
state_group_sql, (last_checked_state_group, max_state_group, batch_size)
231+
)
232+
233+
next_set = {row[0] for row in txn}
234+
235+
return next_set
236+
237+
next_set = await self.stores.state.db_pool.runInteraction(
238+
"get_next_state_groups", get_next_state_groups_txn
239+
)
240+
241+
final_batch = False
242+
if len(next_set) < batch_size:
243+
final_batch = True
244+
else:
245+
last_checked_state_group = max(next_set)
246+
247+
if len(next_set) == 0:
248+
return last_checked_state_group, final_batch
249+
250+
# Find all state groups that can be deleted if the original set is deleted.
251+
# This set includes the original set, as well as any state groups that would
252+
# become unreferenced upon deleting the original set.
253+
to_delete = await self._find_unreferenced_groups(next_set)
254+
255+
if len(to_delete) == 0:
256+
return last_checked_state_group, final_batch
257+
258+
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
259+
to_delete
260+
)
261+
262+
return last_checked_state_group, final_batch
263+
264+
async def _find_unreferenced_groups(
265+
self,
266+
state_groups: Collection[int],
267+
) -> Set[int]:
268+
"""Used when purging history to figure out which state groups can be
269+
deleted.
270+
271+
Args:
272+
state_groups: Set of state groups referenced by events
273+
that are going to be deleted.
274+
275+
Returns:
276+
The set of state groups that can be deleted.
277+
"""
278+
# Set of events that we have found to be referenced by events
279+
referenced_groups = set()
280+
281+
# Set of state groups we've already seen
282+
state_groups_seen = set(state_groups)
283+
284+
# Set of state groups to handle next.
285+
next_to_search = set(state_groups)
286+
while next_to_search:
287+
# We bound size of groups we're looking up at once, to stop the
288+
# SQL query getting too big
289+
if len(next_to_search) < 100:
290+
current_search = next_to_search
291+
next_to_search = set()
292+
else:
293+
current_search = set(itertools.islice(next_to_search, 100))
294+
next_to_search -= current_search
295+
296+
referenced = await self.stores.main.get_referenced_state_groups(
297+
current_search
298+
)
299+
referenced_groups |= referenced
300+
301+
# We don't continue iterating up the state group graphs for state
302+
# groups that are referenced.
303+
current_search -= referenced
304+
305+
edges = await self.stores.state.get_previous_state_groups(current_search)
306+
307+
prevs = set(edges.values())
308+
# We don't bother re-handling groups we've already seen
309+
prevs -= state_groups_seen
310+
next_to_search |= prevs
311+
state_groups_seen |= prevs
312+
313+
# We also check to see if anything referencing the state groups are
314+
# also unreferenced. This helps ensure that we delete unreferenced
315+
# state groups, if we don't then we will de-delta them when we
316+
# delete the other state groups leading to increased DB usage.
317+
next_edges = await self.stores.state.get_next_state_groups(current_search)
318+
nexts = set(next_edges.keys())
319+
nexts -= state_groups_seen
320+
next_to_search |= nexts
321+
state_groups_seen |= nexts
322+
323+
to_delete = state_groups_seen - referenced_groups
324+
325+
return to_delete

synapse/storage/databases/state/bg_updates.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,15 @@
2020
#
2121

2222
import logging
23-
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union
23+
from typing import (
24+
TYPE_CHECKING,
25+
Dict,
26+
List,
27+
Mapping,
28+
Optional,
29+
Tuple,
30+
Union,
31+
)
2432

2533
from synapse.logging.opentracing import tag_args, trace
2634
from synapse.storage._base import SQLBaseStore

0 commit comments

Comments
 (0)