Skip to content

Commit edd49b5

Browse files
Dreamsorcererpre-commit-ci[bot]webknjaz
authored
Add a graceful shutdown period to allow tasks to complete. (#7188)
When the server is shutting down gracefully, it should wait on pending tasks before running the application shutdown/cleanup steps and cancelling all remaining tasks. This helps ensure that tasks have a chance to finish writing to a DB, handlers can finish responding to clients etc. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko <[email protected]>
1 parent 82c944c commit edd49b5

File tree

7 files changed

+251
-20
lines changed

7 files changed

+251
-20
lines changed

CHANGES/7188.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added a graceful shutdown period which allows pending tasks to complete before the application's cleanup is called. The period can be adjusted with the ``shutdown_timeout`` parameter -- by :user:`Dreamsorcerer`.

aiohttp/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ async def start_server(self, **kwargs: Any) -> None:
115115
if self.runner:
116116
return
117117
self._ssl = kwargs.pop("ssl", None)
118-
self.runner = await self._make_runner(**kwargs)
118+
self.runner = await self._make_runner(handler_cancellation=True, **kwargs)
119119
await self.runner.setup()
120120
if not self.port:
121121
self.port = 0

aiohttp/web_fileresponse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,4 +283,4 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter
283283
try:
284284
return await self._sendfile(request, fobj, offset, count)
285285
finally:
286-
await loop.run_in_executor(None, fobj.close)
286+
await asyncio.shield(loop.run_in_executor(None, fobj.close))

aiohttp/web_runner.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import signal
33
import socket
44
from abc import ABC, abstractmethod
5+
from contextlib import suppress
56
from typing import Any, List, Optional, Set, Type
67

78
from yarl import URL
@@ -80,11 +81,26 @@ async def stop(self) -> None:
8081
# named pipes do not have wait_closed property
8182
if hasattr(self._server, "wait_closed"):
8283
await self._server.wait_closed()
84+
85+
# Wait for pending tasks for a given time limit.
86+
with suppress(asyncio.TimeoutError):
87+
await asyncio.wait_for(
88+
self._wait(asyncio.current_task()), timeout=self._shutdown_timeout
89+
)
90+
8391
await self._runner.shutdown()
8492
assert self._runner.server
8593
await self._runner.server.shutdown(self._shutdown_timeout)
8694
self._runner._unreg_site(self)
8795

96+
async def _wait(self, parent_task: Optional["asyncio.Task[object]"]) -> None:
97+
exclude = self._runner.starting_tasks | {asyncio.current_task(), parent_task}
98+
# TODO(PY38): while tasks := asyncio.all_tasks() - exclude:
99+
tasks = asyncio.all_tasks() - exclude
100+
while tasks:
101+
await asyncio.wait(tasks)
102+
tasks = asyncio.all_tasks() - exclude
103+
88104

89105
class TCPSite(BaseSite):
90106
__slots__ = ("_host", "_port", "_reuse_address", "_reuse_port")
@@ -247,7 +263,7 @@ async def start(self) -> None:
247263

248264

249265
class BaseRunner(ABC):
250-
__slots__ = ("_handle_signals", "_kwargs", "_server", "_sites")
266+
__slots__ = ("starting_tasks", "_handle_signals", "_kwargs", "_server", "_sites")
251267

252268
def __init__(self, *, handle_signals: bool = False, **kwargs: Any) -> None:
253269
self._handle_signals = handle_signals
@@ -287,6 +303,11 @@ async def setup(self) -> None:
287303
pass
288304

289305
self._server = await self._make_server()
306+
# On shutdown we want to avoid waiting on tasks which run forever.
307+
# It's very likely that all tasks which run forever will have been created by
308+
# the time we have completed the application startup (in self._make_server()),
309+
# so we just record all running tasks here and exclude them later.
310+
self.starting_tasks = asyncio.all_tasks()
290311

291312
@abstractmethod
292313
async def shutdown(self) -> None:

docs/web_advanced.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,8 +927,14 @@ Graceful shutdown
927927
Stopping *aiohttp web server* by just closing all connections is not
928928
always satisfactory.
929929

930-
The problem is: if application supports :term:`websocket`\s or *data
931-
streaming* it most likely has open connections at server
930+
The first thing aiohttp will do is to stop listening on the sockets,
931+
so new connections will be rejected. It will then wait a few
932+
seconds to allow any pending tasks to complete before continuing
933+
with application shutdown. The timeout can be adjusted with
934+
``shutdown_timeout`` in :func:`run_app`.
935+
936+
Another problem is if the application supports :term:`websockets <websocket>` or
937+
*data streaming* it most likely has open connections at server
932938
shutdown time.
933939

934940
The *library* has no knowledge how to close them gracefully but

docs/web_reference.rst

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2688,9 +2688,10 @@ application on specific TCP or Unix socket, e.g.::
26882688

26892689
:param int port: PORT to listed on, ``8080`` if ``None`` (default).
26902690

2691-
:param float shutdown_timeout: a timeout for closing opened
2692-
connections on :meth:`BaseSite.stop`
2693-
call.
2691+
:param float shutdown_timeout: a timeout used for both waiting on pending
2692+
tasks before application shutdown and for
2693+
closing opened connections on
2694+
:meth:`BaseSite.stop` call.
26942695

26952696
:param ssl_context: a :class:`ssl.SSLContext` instance for serving
26962697
SSL/TLS secure server, ``None`` for plain HTTP
@@ -2723,9 +2724,10 @@ application on specific TCP or Unix socket, e.g.::
27232724

27242725
:param str path: PATH to UNIX socket to listen.
27252726

2726-
:param float shutdown_timeout: a timeout for closing opened
2727-
connections on :meth:`BaseSite.stop`
2728-
call.
2727+
:param float shutdown_timeout: a timeout used for both waiting on pending
2728+
tasks before application shutdown and for
2729+
closing opened connections on
2730+
:meth:`BaseSite.stop` call.
27292731

27302732
:param ssl_context: a :class:`ssl.SSLContext` instance for serving
27312733
SSL/TLS secure server, ``None`` for plain HTTP
@@ -2745,9 +2747,10 @@ application on specific TCP or Unix socket, e.g.::
27452747

27462748
:param str path: PATH of named pipe to listen.
27472749

2748-
:param float shutdown_timeout: a timeout for closing opened
2749-
connections on :meth:`BaseSite.stop`
2750-
call.
2750+
:param float shutdown_timeout: a timeout used for both waiting on pending
2751+
tasks before application shutdown and for
2752+
closing opened connections on
2753+
:meth:`BaseSite.stop` call.
27512754

27522755
.. class:: SockSite(runner, sock, *, \
27532756
shutdown_timeout=60.0, ssl_context=None, \
@@ -2759,9 +2762,10 @@ application on specific TCP or Unix socket, e.g.::
27592762

27602763
:param sock: A :ref:`socket instance <socket-objects>` to listen to.
27612764

2762-
:param float shutdown_timeout: a timeout for closing opened
2763-
connections on :meth:`BaseSite.stop`
2764-
call.
2765+
:param float shutdown_timeout: a timeout used for both waiting on pending
2766+
tasks before application shutdown and for
2767+
closing opened connections on
2768+
:meth:`BaseSite.stop` call.
27652769

27662770
:param ssl_context: a :class:`ssl.SSLContext` instance for serving
27672771
SSL/TLS secure server, ``None`` for plain HTTP
@@ -2857,9 +2861,13 @@ Utilities
28572861
shutdown before disconnecting all
28582862
open client sockets hard way.
28592863

2864+
This is used as a delay to wait for
2865+
pending tasks to complete and then
2866+
again to close any pending connections.
2867+
28602868
A system with properly
28612869
:ref:`aiohttp-web-graceful-shutdown`
2862-
implemented never waits for this
2870+
implemented never waits for the second
28632871
timeout but closes a server in a few
28642872
milliseconds.
28652873

tests/test_run_app.py

Lines changed: 197 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@
99
import ssl
1010
import subprocess
1111
import sys
12-
from typing import Any
12+
import time
13+
from typing import Any, Callable, NoReturn
1314
from unittest import mock
1415
from uuid import uuid4
1516

1617
import pytest
1718
from conftest import needs_unix
1819

19-
from aiohttp import web
20+
from aiohttp import ClientConnectorError, ClientSession, web
2021
from aiohttp.test_utils import make_mocked_coro
2122
from aiohttp.web_runner import BaseRunner
2223

@@ -926,3 +927,197 @@ async def init():
926927

927928
web.run_app(init(), print=stopper(patched_loop), loop=patched_loop)
928929
assert count == 3
930+
931+
932+
class TestShutdown:
933+
def raiser(self) -> NoReturn:
934+
raise KeyboardInterrupt
935+
936+
async def stop(self, request: web.Request) -> web.Response:
937+
asyncio.get_running_loop().call_soon(self.raiser)
938+
return web.Response()
939+
940+
def run_app(self, port: int, timeout: int, task, extra_test=None) -> asyncio.Task:
941+
async def test() -> None:
942+
await asyncio.sleep(1)
943+
async with ClientSession() as sess:
944+
async with sess.get(f"http://localhost:{port}/"):
945+
pass
946+
async with sess.get(f"http://localhost:{port}/stop"):
947+
pass
948+
949+
if extra_test:
950+
await extra_test(sess)
951+
952+
async def run_test(app: web.Application) -> None:
953+
nonlocal test_task
954+
test_task = asyncio.create_task(test())
955+
yield
956+
await test_task
957+
958+
async def handler(request: web.Request) -> web.Response:
959+
nonlocal t
960+
t = asyncio.create_task(task())
961+
return web.Response(text="FOO")
962+
963+
t = test_task = None
964+
app = web.Application()
965+
app.cleanup_ctx.append(run_test)
966+
app.router.add_get("/", handler)
967+
app.router.add_get("/stop", self.stop)
968+
969+
web.run_app(app, port=port, shutdown_timeout=timeout)
970+
assert test_task.exception() is None
971+
return t
972+
973+
def test_shutdown_wait_for_task(
974+
self, aiohttp_unused_port: Callable[[], int]
975+
) -> None:
976+
port = aiohttp_unused_port()
977+
finished = False
978+
979+
async def task():
980+
nonlocal finished
981+
await asyncio.sleep(2)
982+
finished = True
983+
984+
t = self.run_app(port, 3, task)
985+
986+
assert finished is True
987+
assert t.done()
988+
assert not t.cancelled()
989+
990+
def test_shutdown_timeout_task(
991+
self, aiohttp_unused_port: Callable[[], int]
992+
) -> None:
993+
port = aiohttp_unused_port()
994+
finished = False
995+
996+
async def task():
997+
nonlocal finished
998+
await asyncio.sleep(2)
999+
finished = True
1000+
1001+
t = self.run_app(port, 1, task)
1002+
1003+
assert finished is False
1004+
assert t.done()
1005+
assert t.cancelled()
1006+
1007+
def test_shutdown_wait_for_spawned_task(
1008+
self, aiohttp_unused_port: Callable[[], int]
1009+
) -> None:
1010+
port = aiohttp_unused_port()
1011+
finished = False
1012+
finished_sub = False
1013+
sub_t = None
1014+
1015+
async def sub_task():
1016+
nonlocal finished_sub
1017+
await asyncio.sleep(1.5)
1018+
finished_sub = True
1019+
1020+
async def task():
1021+
nonlocal finished, sub_t
1022+
await asyncio.sleep(0.5)
1023+
sub_t = asyncio.create_task(sub_task())
1024+
finished = True
1025+
1026+
t = self.run_app(port, 3, task)
1027+
1028+
assert finished is True
1029+
assert t.done()
1030+
assert not t.cancelled()
1031+
assert finished_sub is True
1032+
assert sub_t.done()
1033+
assert not sub_t.cancelled()
1034+
1035+
def test_shutdown_timeout_not_reached(
1036+
self, aiohttp_unused_port: Callable[[], int]
1037+
) -> None:
1038+
port = aiohttp_unused_port()
1039+
finished = False
1040+
1041+
async def task():
1042+
nonlocal finished
1043+
await asyncio.sleep(1)
1044+
finished = True
1045+
1046+
start_time = time.time()
1047+
t = self.run_app(port, 15, task)
1048+
1049+
assert finished is True
1050+
assert t.done()
1051+
# Verify run_app has not waited for timeout.
1052+
assert time.time() - start_time < 10
1053+
1054+
def test_shutdown_new_conn_rejected(
1055+
self, aiohttp_unused_port: Callable[[], int]
1056+
) -> None:
1057+
port = aiohttp_unused_port()
1058+
finished = False
1059+
1060+
async def task() -> None:
1061+
nonlocal finished
1062+
await asyncio.sleep(9)
1063+
finished = True
1064+
1065+
async def test(sess: ClientSession) -> None:
1066+
# Ensure we are in the middle of shutdown (waiting for task()).
1067+
await asyncio.sleep(1)
1068+
with pytest.raises(ClientConnectorError):
1069+
# Use a new session to try and open a new connection.
1070+
async with ClientSession() as sess:
1071+
async with sess.get(f"http://localhost:{port}/"):
1072+
pass
1073+
assert finished is False
1074+
1075+
t = self.run_app(port, 10, task, test)
1076+
1077+
assert finished is True
1078+
assert t.done()
1079+
1080+
def test_shutdown_pending_handler_responds(
1081+
self, aiohttp_unused_port: Callable[[], int]
1082+
) -> None:
1083+
port = aiohttp_unused_port()
1084+
finished = False
1085+
1086+
async def test() -> None:
1087+
async def test_resp(sess):
1088+
async with sess.get(f"http://localhost:{port}/") as resp:
1089+
assert await resp.text() == "FOO"
1090+
1091+
await asyncio.sleep(1)
1092+
async with ClientSession() as sess:
1093+
t = asyncio.create_task(test_resp(sess))
1094+
await asyncio.sleep(1)
1095+
# Handler is in-progress while we trigger server shutdown.
1096+
async with sess.get(f"http://localhost:{port}/stop"):
1097+
pass
1098+
1099+
assert finished is False
1100+
# Handler should still complete and produce a response.
1101+
await t
1102+
1103+
async def run_test(app: web.Application) -> None:
1104+
nonlocal t
1105+
t = asyncio.create_task(test())
1106+
yield
1107+
await t
1108+
1109+
async def handler(request: web.Request) -> web.Response:
1110+
nonlocal finished
1111+
await asyncio.sleep(3)
1112+
finished = True
1113+
return web.Response(text="FOO")
1114+
1115+
t = None
1116+
app = web.Application()
1117+
app.cleanup_ctx.append(run_test)
1118+
app.router.add_get("/", handler)
1119+
app.router.add_get("/stop", self.stop)
1120+
1121+
web.run_app(app, port=port, shutdown_timeout=5)
1122+
assert t.exception() is None
1123+
assert finished is True

0 commit comments

Comments
 (0)