Skip to content

Commit 482000c

Browse files
committed
Merge tag 'for-netdev' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2024-06-24 We've added 12 non-merge commits during the last 10 day(s) which contain a total of 10 files changed, 412 insertions(+), 16 deletions(-). The main changes are: 1) Fix a BPF verifier issue validating may_goto with a negative offset, from Alexei Starovoitov. 2) Fix a BPF verifier validation bug with may_goto combined with jump to the first instruction, also from Alexei Starovoitov. 3) Fix a bug with overrunning reservations in BPF ring buffer, from Daniel Borkmann. 4) Fix a bug in BPF verifier due to missing proper var_off setting related to movsx instruction, from Yonghong Song. 5) Silence unnecessary syzkaller-triggered warning in __xdp_reg_mem_model(), from Daniil Dulov. * tag 'for-netdev' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: xdp: Remove WARN() from __xdp_reg_mem_model() selftests/bpf: Add tests for may_goto with negative offset. bpf: Fix may_goto with negative offset. selftests/bpf: Add more ring buffer test coverage bpf: Fix overrunning reservations in ringbuf selftests/bpf: Tests with may_goto and jumps to the 1st insn bpf: Fix the corner case with may_goto and jump to the 1st insn. bpf: Update BPF LSM maintainer list bpf: Fix remap of arena. selftests/bpf: Add a few tests to cover bpf: Add missed var_off setting in coerce_subreg_to_size_sx() bpf: Add missed var_off setting in set_sext32_default_val() ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 058722e + 7e9f794 commit 482000c

File tree

10 files changed

+412
-16
lines changed

10 files changed

+412
-16
lines changed

MAINTAINERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4083,12 +4083,13 @@ F: kernel/bpf/ringbuf.c
40834083

40844084
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
40854085
M: KP Singh <[email protected]>
4086-
R: Matt Bobrowski <[email protected]>
4086+
M: Matt Bobrowski <[email protected]>
40874087
40884088
S: Maintained
40894089
F: Documentation/bpf/prog_lsm.rst
40904090
F: include/linux/bpf_lsm.h
40914091
F: kernel/bpf/bpf_lsm.c
4092+
F: kernel/trace/bpf_trace.c
40924093
F: security/bpf/
40934094

40944095
BPF [SELFTESTS] (Test Runners & Infrastructure)

kernel/bpf/arena.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static u64 arena_map_mem_usage(const struct bpf_map *map)
212212
struct vma_list {
213213
struct vm_area_struct *vma;
214214
struct list_head head;
215+
atomic_t mmap_count;
215216
};
216217

217218
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
@@ -221,20 +222,30 @@ static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
221222
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
222223
if (!vml)
223224
return -ENOMEM;
225+
atomic_set(&vml->mmap_count, 1);
224226
vma->vm_private_data = vml;
225227
vml->vma = vma;
226228
list_add(&vml->head, &arena->vma_list);
227229
return 0;
228230
}
229231

232+
static void arena_vm_open(struct vm_area_struct *vma)
233+
{
234+
struct vma_list *vml = vma->vm_private_data;
235+
236+
atomic_inc(&vml->mmap_count);
237+
}
238+
230239
static void arena_vm_close(struct vm_area_struct *vma)
231240
{
232241
struct bpf_map *map = vma->vm_file->private_data;
233242
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
234-
struct vma_list *vml;
243+
struct vma_list *vml = vma->vm_private_data;
235244

245+
if (!atomic_dec_and_test(&vml->mmap_count))
246+
return;
236247
guard(mutex)(&arena->lock);
237-
vml = vma->vm_private_data;
248+
/* update link list under lock */
238249
list_del(&vml->head);
239250
vma->vm_private_data = NULL;
240251
kfree(vml);
@@ -287,6 +298,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
287298
}
288299

289300
static const struct vm_operations_struct arena_vm_ops = {
301+
.open = arena_vm_open,
290302
.close = arena_vm_close,
291303
.fault = arena_vm_fault,
292304
};

kernel/bpf/ringbuf.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ struct bpf_ringbuf {
5151
* This prevents a user-space application from modifying the
5252
* position and ruining in-kernel tracking. The permissions of the
5353
* pages depend on who is producing samples: user-space or the
54-
* kernel.
54+
* kernel. Note that the pending counter is placed in the same
55+
* page as the producer, so that it shares the same cache line.
5556
*
5657
* Kernel-producer
5758
* ---------------
@@ -70,6 +71,7 @@ struct bpf_ringbuf {
7071
*/
7172
unsigned long consumer_pos __aligned(PAGE_SIZE);
7273
unsigned long producer_pos __aligned(PAGE_SIZE);
74+
unsigned long pending_pos;
7375
char data[] __aligned(PAGE_SIZE);
7476
};
7577

@@ -179,6 +181,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
179181
rb->mask = data_sz - 1;
180182
rb->consumer_pos = 0;
181183
rb->producer_pos = 0;
184+
rb->pending_pos = 0;
182185

183186
return rb;
184187
}
@@ -404,9 +407,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
404407

405408
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
406409
{
407-
unsigned long cons_pos, prod_pos, new_prod_pos, flags;
408-
u32 len, pg_off;
410+
unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
409411
struct bpf_ringbuf_hdr *hdr;
412+
u32 len, pg_off, tmp_size, hdr_len;
410413

411414
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
412415
return NULL;
@@ -424,13 +427,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
424427
spin_lock_irqsave(&rb->spinlock, flags);
425428
}
426429

430+
pend_pos = rb->pending_pos;
427431
prod_pos = rb->producer_pos;
428432
new_prod_pos = prod_pos + len;
429433

430-
/* check for out of ringbuf space by ensuring producer position
431-
* doesn't advance more than (ringbuf_size - 1) ahead
434+
while (pend_pos < prod_pos) {
435+
hdr = (void *)rb->data + (pend_pos & rb->mask);
436+
hdr_len = READ_ONCE(hdr->len);
437+
if (hdr_len & BPF_RINGBUF_BUSY_BIT)
438+
break;
439+
tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
440+
tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
441+
pend_pos += tmp_size;
442+
}
443+
rb->pending_pos = pend_pos;
444+
445+
/* check for out of ringbuf space:
446+
* - by ensuring producer position doesn't advance more than
447+
* (ringbuf_size - 1) ahead
448+
* - by ensuring oldest not yet committed record until newest
449+
* record does not span more than (ringbuf_size - 1)
432450
*/
433-
if (new_prod_pos - cons_pos > rb->mask) {
451+
if (new_prod_pos - cons_pos > rb->mask ||
452+
new_prod_pos - pend_pos > rb->mask) {
434453
spin_unlock_irqrestore(&rb->spinlock, flags);
435454
return NULL;
436455
}

kernel/bpf/verifier.c

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6236,6 +6236,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
62366236
}
62376237
reg->u32_min_value = 0;
62386238
reg->u32_max_value = U32_MAX;
6239+
reg->var_off = tnum_subreg(tnum_unknown);
62396240
}
62406241

62416242
static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
@@ -6280,6 +6281,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
62806281
reg->s32_max_value = s32_max;
62816282
reg->u32_min_value = (u32)s32_min;
62826283
reg->u32_max_value = (u32)s32_max;
6284+
reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max));
62836285
return;
62846286
}
62856287

@@ -12719,6 +12721,16 @@ static bool signed_add32_overflows(s32 a, s32 b)
1271912721
return res < a;
1272012722
}
1272112723

12724+
static bool signed_add16_overflows(s16 a, s16 b)
12725+
{
12726+
/* Do the add in u16, where overflow is well-defined */
12727+
s16 res = (s16)((u16)a + (u16)b);
12728+
12729+
if (b < 0)
12730+
return res > a;
12731+
return res < a;
12732+
}
12733+
1272212734
static bool signed_sub_overflows(s64 a, s64 b)
1272312735
{
1272412736
/* Do the sub in u64, where overflow is well-defined */
@@ -17448,11 +17460,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
1744817460
goto skip_inf_loop_check;
1744917461
}
1745017462
if (is_may_goto_insn_at(env, insn_idx)) {
17451-
if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
17463+
if (sl->state.may_goto_depth != cur->may_goto_depth &&
17464+
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
1745217465
update_loop_entry(cur, &sl->state);
1745317466
goto hit;
1745417467
}
17455-
goto skip_inf_loop_check;
1745617468
}
1745717469
if (calls_callback(env, insn_idx)) {
1745817470
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
@@ -18730,6 +18742,39 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
1873018742
return new_prog;
1873118743
}
1873218744

18745+
/*
18746+
* For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the
18747+
* jump offset by 'delta'.
18748+
*/
18749+
static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
18750+
{
18751+
struct bpf_insn *insn = prog->insnsi;
18752+
u32 insn_cnt = prog->len, i;
18753+
18754+
for (i = 0; i < insn_cnt; i++, insn++) {
18755+
u8 code = insn->code;
18756+
18757+
if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
18758+
BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
18759+
continue;
18760+
18761+
if (insn->code == (BPF_JMP32 | BPF_JA)) {
18762+
if (i + 1 + insn->imm != tgt_idx)
18763+
continue;
18764+
if (signed_add32_overflows(insn->imm, delta))
18765+
return -ERANGE;
18766+
insn->imm += delta;
18767+
} else {
18768+
if (i + 1 + insn->off != tgt_idx)
18769+
continue;
18770+
if (signed_add16_overflows(insn->imm, delta))
18771+
return -ERANGE;
18772+
insn->off += delta;
18773+
}
18774+
}
18775+
return 0;
18776+
}
18777+
1873318778
static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
1873418779
u32 off, u32 cnt)
1873518780
{
@@ -20004,7 +20049,10 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
2000420049

2000520050
stack_depth_extra = 8;
2000620051
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off);
20007-
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
20052+
if (insn->off >= 0)
20053+
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2);
20054+
else
20055+
insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
2000820056
insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
2000920057
insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off);
2001020058
cnt = 4;
@@ -20546,6 +20594,13 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
2054620594
if (!new_prog)
2054720595
return -ENOMEM;
2054820596
env->prog = prog = new_prog;
20597+
/*
20598+
* If may_goto is a first insn of a prog there could be a jmp
20599+
* insn that points to it, hence adjust all such jmps to point
20600+
* to insn after BPF_ST that inits may_goto count.
20601+
* Adjustment will succeed because bpf_patch_insn_data() didn't fail.
20602+
*/
20603+
WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
2054920604
}
2055020605

2055120606
/* Since poke tab is now finalized, publish aux to tracker. */

net/core/xdp.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
295295
mutex_lock(&mem_id_lock);
296296
ret = __mem_id_init_hash_table();
297297
mutex_unlock(&mem_id_lock);
298-
if (ret < 0) {
299-
WARN_ON(1);
298+
if (ret < 0)
300299
return ERR_PTR(ret);
301-
}
302300
}
303301

304302
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);

tools/testing/selftests/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
457457
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
458458
trace_printk.c trace_vprintk.c map_ptr_kern.c \
459459
core_kern.c core_kern_overflow.c test_ringbuf.c \
460-
test_ringbuf_n.c test_ringbuf_map_key.c
460+
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
461461

462462
# Generate both light skeleton and libbpf skeleton for these
463463
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \

tools/testing/selftests/bpf/prog_tests/ringbuf.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
#include <sys/sysinfo.h>
1313
#include <linux/perf_event.h>
1414
#include <linux/ring_buffer.h>
15+
1516
#include "test_ringbuf.lskel.h"
1617
#include "test_ringbuf_n.lskel.h"
1718
#include "test_ringbuf_map_key.lskel.h"
19+
#include "test_ringbuf_write.lskel.h"
1820

1921
#define EDONE 7777
2022

@@ -84,6 +86,58 @@ static void *poll_thread(void *input)
8486
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
8587
}
8688

89+
static void ringbuf_write_subtest(void)
90+
{
91+
struct test_ringbuf_write_lskel *skel;
92+
int page_size = getpagesize();
93+
size_t *mmap_ptr;
94+
int err, rb_fd;
95+
96+
skel = test_ringbuf_write_lskel__open();
97+
if (!ASSERT_OK_PTR(skel, "skel_open"))
98+
return;
99+
100+
skel->maps.ringbuf.max_entries = 0x4000;
101+
102+
err = test_ringbuf_write_lskel__load(skel);
103+
if (!ASSERT_OK(err, "skel_load"))
104+
goto cleanup;
105+
106+
rb_fd = skel->maps.ringbuf.map_fd;
107+
108+
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
109+
if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
110+
goto cleanup;
111+
*mmap_ptr = 0x3000;
112+
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
113+
114+
skel->bss->pid = getpid();
115+
116+
ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
117+
if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
118+
goto cleanup;
119+
120+
err = test_ringbuf_write_lskel__attach(skel);
121+
if (!ASSERT_OK(err, "skel_attach"))
122+
goto cleanup_ringbuf;
123+
124+
skel->bss->discarded = 0;
125+
skel->bss->passed = 0;
126+
127+
/* trigger exactly two samples */
128+
syscall(__NR_getpgid);
129+
syscall(__NR_getpgid);
130+
131+
ASSERT_EQ(skel->bss->discarded, 2, "discarded");
132+
ASSERT_EQ(skel->bss->passed, 0, "passed");
133+
134+
test_ringbuf_write_lskel__detach(skel);
135+
cleanup_ringbuf:
136+
ring_buffer__free(ringbuf);
137+
cleanup:
138+
test_ringbuf_write_lskel__destroy(skel);
139+
}
140+
87141
static void ringbuf_subtest(void)
88142
{
89143
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
@@ -451,4 +505,6 @@ void test_ringbuf(void)
451505
ringbuf_n_subtest();
452506
if (test__start_subtest("ringbuf_map_key"))
453507
ringbuf_map_key_subtest();
508+
if (test__start_subtest("ringbuf_write"))
509+
ringbuf_write_subtest();
454510
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/bpf.h>
4+
#include <bpf/bpf_helpers.h>
5+
#include "bpf_misc.h"
6+
7+
char _license[] SEC("license") = "GPL";
8+
9+
struct {
10+
__uint(type, BPF_MAP_TYPE_RINGBUF);
11+
} ringbuf SEC(".maps");
12+
13+
/* inputs */
14+
int pid = 0;
15+
16+
/* outputs */
17+
long passed = 0;
18+
long discarded = 0;
19+
20+
SEC("fentry/" SYS_PREFIX "sys_getpgid")
21+
int test_ringbuf_write(void *ctx)
22+
{
23+
int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
24+
void *sample1, *sample2;
25+
26+
if (cur_pid != pid)
27+
return 0;
28+
29+
sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
30+
if (!sample1)
31+
return 0;
32+
/* first one can pass */
33+
sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
34+
if (!sample2) {
35+
bpf_ringbuf_discard(sample1, 0);
36+
__sync_fetch_and_add(&discarded, 1);
37+
return 0;
38+
}
39+
/* second one must not */
40+
__sync_fetch_and_add(&passed, 1);
41+
foo = sample2 + 4084;
42+
*foo = 256;
43+
bpf_ringbuf_discard(sample1, 0);
44+
bpf_ringbuf_discard(sample2, 0);
45+
return 0;
46+
}

0 commit comments

Comments
 (0)