Skip to content

Commit f18b03f

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
bpf: Implement BPF exceptions
This patch implements BPF exceptions, and introduces a bpf_throw kfunc to allow programs to throw exceptions during their execution at runtime. A bpf_throw invocation is treated as an immediate termination of the program, returning back to its caller within the kernel, unwinding all stack frames. This allows the program to simplify its implementation, by testing for runtime conditions which the verifier has no visibility into, and assert that they are true. In case they are not, the program can simply throw an exception from the other branch. BPF exceptions are explicitly *NOT* an unlikely slowpath error handling primitive, and this objective has guided design choices of the implementation of the them within the kernel (with the bulk of the cost for unwinding the stack offloaded to the bpf_throw kfunc). The implementation of this mechanism requires use of add_hidden_subprog mechanism introduced in the previous patch, which generates a couple of instructions to move R1 to R0 and exit. The JIT then rewrites the prologue of this subprog to take the stack pointer and frame pointer as inputs and reset the stack frame, popping all callee-saved registers saved by the main subprog. The bpf_throw function then walks the stack at runtime, and invokes this exception subprog with the stack and frame pointers as parameters. Reviewers must take note that currently the main program is made to save all callee-saved registers on x86_64 during entry into the program. This is because we must do an equivalent of a lightweight context switch when unwinding the stack, therefore we need the callee-saved registers of the caller of the BPF program to be able to return with a sane state. Note that we have to additionally handle r12, even though it is not used by the program, because when throwing the exception the program makes an entry into the kernel which could clobber r12 after saving it on the stack. To be able to preserve the value we received on program entry, we push r12 and restore it from the generated subprogram when unwinding the stack. For now, bpf_throw invocation fails when lingering resources or locks exist in that path of the program. In a future followup, bpf_throw will be extended to perform frame-by-frame unwinding to release lingering resources for each stack frame, removing this limitation. Signed-off-by: Kumar Kartikeya Dwivedi <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 335d1c5 commit f18b03f

File tree

8 files changed

+247
-27
lines changed

8 files changed

+247
-27
lines changed

arch/x86/net/bpf_jit_comp.c

Lines changed: 77 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include <asm/text-patching.h>
1919
#include <asm/unwind.h>
2020

21+
static bool all_callee_regs_used[4] = {true, true, true, true};
22+
2123
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
2224
{
2325
if (len == 1)
@@ -256,6 +258,14 @@ struct jit_context {
256258
/* Number of bytes that will be skipped on tailcall */
257259
#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
258260

261+
static void push_r12(u8 **pprog)
262+
{
263+
u8 *prog = *pprog;
264+
265+
EMIT2(0x41, 0x54); /* push r12 */
266+
*pprog = prog;
267+
}
268+
259269
static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
260270
{
261271
u8 *prog = *pprog;
@@ -271,6 +281,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
271281
*pprog = prog;
272282
}
273283

284+
static void pop_r12(u8 **pprog)
285+
{
286+
u8 *prog = *pprog;
287+
288+
EMIT2(0x41, 0x5C); /* pop r12 */
289+
*pprog = prog;
290+
}
291+
274292
static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
275293
{
276294
u8 *prog = *pprog;
@@ -292,7 +310,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
292310
* while jumping to another program
293311
*/
294312
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
295-
bool tail_call_reachable, bool is_subprog)
313+
bool tail_call_reachable, bool is_subprog,
314+
bool is_exception_cb)
296315
{
297316
u8 *prog = *pprog;
298317

@@ -312,8 +331,22 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
312331
/* Keep the same instruction layout. */
313332
EMIT2(0x66, 0x90); /* nop2 */
314333
}
315-
EMIT1(0x55); /* push rbp */
316-
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
334+
/* Exception callback receives FP as third parameter */
335+
if (is_exception_cb) {
336+
EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
337+
EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */
338+
/* The main frame must have exception_boundary as true, so we
339+
* first restore those callee-saved regs from stack, before
340+
* reusing the stack frame.
341+
*/
342+
pop_callee_regs(&prog, all_callee_regs_used);
343+
pop_r12(&prog);
344+
/* Reset the stack frame. */
345+
EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */
346+
} else {
347+
EMIT1(0x55); /* push rbp */
348+
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
349+
}
317350

318351
/* X86_TAIL_CALL_OFFSET is here */
319352
EMIT_ENDBR();
@@ -472,7 +505,8 @@ static void emit_return(u8 **pprog, u8 *ip)
472505
* goto *(prog->bpf_func + prologue_size);
473506
* out:
474507
*/
475-
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
508+
static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
509+
u8 **pprog, bool *callee_regs_used,
476510
u32 stack_depth, u8 *ip,
477511
struct jit_context *ctx)
478512
{
@@ -522,7 +556,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
522556
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
523557
EMIT2(X86_JE, offset); /* je out */
524558

525-
pop_callee_regs(&prog, callee_regs_used);
559+
if (bpf_prog->aux->exception_boundary) {
560+
pop_callee_regs(&prog, all_callee_regs_used);
561+
pop_r12(&prog);
562+
} else {
563+
pop_callee_regs(&prog, callee_regs_used);
564+
}
526565

527566
EMIT1(0x58); /* pop rax */
528567
if (stack_depth)
@@ -546,7 +585,8 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
546585
*pprog = prog;
547586
}
548587

549-
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
588+
static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
589+
struct bpf_jit_poke_descriptor *poke,
550590
u8 **pprog, u8 *ip,
551591
bool *callee_regs_used, u32 stack_depth,
552592
struct jit_context *ctx)
@@ -575,7 +615,13 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
575615
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
576616
poke->tailcall_bypass);
577617

578-
pop_callee_regs(&prog, callee_regs_used);
618+
if (bpf_prog->aux->exception_boundary) {
619+
pop_callee_regs(&prog, all_callee_regs_used);
620+
pop_r12(&prog);
621+
} else {
622+
pop_callee_regs(&prog, callee_regs_used);
623+
}
624+
579625
EMIT1(0x58); /* pop rax */
580626
if (stack_depth)
581627
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
@@ -1050,8 +1096,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
10501096

10511097
emit_prologue(&prog, bpf_prog->aux->stack_depth,
10521098
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
1053-
bpf_is_subprog(bpf_prog));
1054-
push_callee_regs(&prog, callee_regs_used);
1099+
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
1100+
/* Exception callback will clobber callee regs for its own use, and
1101+
* restore the original callee regs from main prog's stack frame.
1102+
*/
1103+
if (bpf_prog->aux->exception_boundary) {
1104+
/* We also need to save r12, which is not mapped to any BPF
1105+
* register, as we throw after entry into the kernel, which may
1106+
* overwrite r12.
1107+
*/
1108+
push_r12(&prog);
1109+
push_callee_regs(&prog, all_callee_regs_used);
1110+
} else {
1111+
push_callee_regs(&prog, callee_regs_used);
1112+
}
10551113

10561114
ilen = prog - temp;
10571115
if (rw_image)
@@ -1648,13 +1706,15 @@ st: if (is_imm8(insn->off))
16481706

16491707
case BPF_JMP | BPF_TAIL_CALL:
16501708
if (imm32)
1651-
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
1709+
emit_bpf_tail_call_direct(bpf_prog,
1710+
&bpf_prog->aux->poke_tab[imm32 - 1],
16521711
&prog, image + addrs[i - 1],
16531712
callee_regs_used,
16541713
bpf_prog->aux->stack_depth,
16551714
ctx);
16561715
else
1657-
emit_bpf_tail_call_indirect(&prog,
1716+
emit_bpf_tail_call_indirect(bpf_prog,
1717+
&prog,
16581718
callee_regs_used,
16591719
bpf_prog->aux->stack_depth,
16601720
image + addrs[i - 1],
@@ -1907,7 +1967,12 @@ st: if (is_imm8(insn->off))
19071967
seen_exit = true;
19081968
/* Update cleanup_addr */
19091969
ctx->cleanup_addr = proglen;
1910-
pop_callee_regs(&prog, callee_regs_used);
1970+
if (bpf_prog->aux->exception_boundary) {
1971+
pop_callee_regs(&prog, all_callee_regs_used);
1972+
pop_r12(&prog);
1973+
} else {
1974+
pop_callee_regs(&prog, callee_regs_used);
1975+
}
19111976
EMIT1(0xC9); /* leave */
19121977
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
19131978
break;

include/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,8 @@ struct bpf_prog_aux {
14101410
bool sleepable;
14111411
bool tail_call_reachable;
14121412
bool xdp_has_frags;
1413+
bool exception_cb;
1414+
bool exception_boundary;
14131415
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
14141416
const struct btf_type *attach_func_proto;
14151417
/* function name for valid attach_btf_id */
@@ -1432,6 +1434,7 @@ struct bpf_prog_aux {
14321434
int cgroup_atype; /* enum cgroup_bpf_attach_type */
14331435
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
14341436
char name[BPF_OBJ_NAME_LEN];
1437+
unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
14351438
#ifdef CONFIG_SECURITY
14361439
void *security;
14371440
#endif

include/linux/bpf_verifier.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,9 @@ struct bpf_subprog_info {
541541
bool has_tail_call;
542542
bool tail_call_reachable;
543543
bool has_ld_abs;
544+
bool is_cb;
544545
bool is_async_cb;
546+
bool is_exception_cb;
545547
};
546548

547549
struct bpf_verifier_env;
@@ -589,13 +591,15 @@ struct bpf_verifier_env {
589591
u32 used_btf_cnt; /* number of used BTF objects */
590592
u32 id_gen; /* used to generate unique reg IDs */
591593
u32 hidden_subprog_cnt; /* number of hidden subprogs */
594+
int exception_callback_subprog;
592595
bool explore_alu_limits;
593596
bool allow_ptr_leaks;
594597
bool allow_uninit_stack;
595598
bool bpf_capable;
596599
bool bypass_spec_v1;
597600
bool bypass_spec_v4;
598601
bool seen_direct_write;
602+
bool seen_exception;
599603
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
600604
const struct bpf_line_info *prev_linfo;
601605
struct bpf_verifier_log log;

include/linux/filter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,7 @@ const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
11711171
bool is_bpf_text_address(unsigned long addr);
11721172
int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
11731173
char *sym);
1174+
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr);
11741175

11751176
static inline const char *
11761177
bpf_address_lookup(unsigned long addr, unsigned long *size,
@@ -1238,6 +1239,11 @@ static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
12381239
return -ERANGE;
12391240
}
12401241

1242+
static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
1243+
{
1244+
return NULL;
1245+
}
1246+
12411247
static inline const char *
12421248
bpf_address_lookup(unsigned long addr, unsigned long *size,
12431249
unsigned long *off, char **modname, char *sym)

kernel/bpf/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ bool is_bpf_text_address(unsigned long addr)
733733
return ret;
734734
}
735735

736-
static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
736+
struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
737737
{
738738
struct bpf_ksym *ksym = bpf_ksym_find(addr);
739739

kernel/bpf/helpers.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,6 +2449,43 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
24492449
rcu_read_unlock();
24502450
}
24512451

2452+
struct bpf_throw_ctx {
2453+
struct bpf_prog_aux *aux;
2454+
u64 sp;
2455+
u64 bp;
2456+
int cnt;
2457+
};
2458+
2459+
static bool bpf_stack_walker(void *cookie, u64 ip, u64 sp, u64 bp)
2460+
{
2461+
struct bpf_throw_ctx *ctx = cookie;
2462+
struct bpf_prog *prog;
2463+
2464+
if (!is_bpf_text_address(ip))
2465+
return !ctx->cnt;
2466+
prog = bpf_prog_ksym_find(ip);
2467+
ctx->cnt++;
2468+
if (bpf_is_subprog(prog))
2469+
return true;
2470+
ctx->aux = prog->aux;
2471+
ctx->sp = sp;
2472+
ctx->bp = bp;
2473+
return false;
2474+
}
2475+
2476+
__bpf_kfunc void bpf_throw(u64 cookie)
2477+
{
2478+
struct bpf_throw_ctx ctx = {};
2479+
2480+
arch_bpf_stack_walk(bpf_stack_walker, &ctx);
2481+
WARN_ON_ONCE(!ctx.aux);
2482+
if (ctx.aux)
2483+
WARN_ON_ONCE(!ctx.aux->exception_boundary);
2484+
WARN_ON_ONCE(!ctx.bp);
2485+
WARN_ON_ONCE(!ctx.cnt);
2486+
ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
2487+
}
2488+
24522489
__diag_pop();
24532490

24542491
BTF_SET8_START(generic_btf_ids)
@@ -2478,6 +2515,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
24782515
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
24792516
#endif
24802517
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
2518+
BTF_ID_FLAGS(func, bpf_throw)
24812519
BTF_SET8_END(generic_btf_ids)
24822520

24832521
static const struct btf_kfunc_id_set generic_kfunc_set = {

0 commit comments

Comments
 (0)