diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 0d56cea71602..18db8b826836 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -550,6 +550,34 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt) } } +/* + * Reserve an arena virtual address range without populating it. This call stops + * bpf_arena_alloc_pages from adding pages to this range. + */ +static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt) +{ + long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT; + long pgoff; + int ret; + + if (uaddr & ~PAGE_MASK) + return 0; + + pgoff = compute_pgoff(arena, uaddr); + if (pgoff + page_cnt > page_cnt_max) + return -EINVAL; + + guard(mutex)(&arena->lock); + + /* Cannot guard already allocated pages. */ + ret = is_range_tree_set(&arena->rt, pgoff, page_cnt); + if (ret) + return -EALREADY; + + /* "Allocate" the region to prevent it from being allocated. */ + return range_tree_clear(&arena->rt, pgoff, page_cnt); +} + __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt, @@ -573,11 +601,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt return; arena_free_pages(arena, (long)ptr__ign, page_cnt); } + +__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt) +{ + struct bpf_map *map = p__map; + struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + + if (map->map_type != BPF_MAP_TYPE_ARENA) + return -EINVAL; + + if (!page_cnt) + return 0; + + return arena_reserve_pages(arena, (long)ptr__ign, page_cnt); +} __bpf_kfunc_end_defs(); BTF_KFUNCS_START(arena_kfuncs) BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2) BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2) +BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2) BTF_KFUNCS_END(arena_kfuncs) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 68a51dcc0669..16f8ce832004 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -46,8 +46,11 @@ void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, int node_id, __u64 flags) __ksym __weak; +int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; +#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) + #else /* when compiled as user space code */ #define __arena diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 67509c5d3982..aded43ecbc27 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -114,6 +114,112 @@ int basic_alloc3(void *ctx) return 0; } +SEC("syscall") +__success __retval(0) +int basic_reserve1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page) + return 1; + + page += __PAGE_SIZE; + + /* Reserve the second page */ + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 2; + + /* Try to explicitly allocate the reserved page. */ + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if (page) + return 3; + + /* Try to implicitly allocate the page (since there's only 2 of them). */ + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (page) + return 4; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int basic_reserve2(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if ((u64)page) + return 2; +#endif + return 0; +} + +/* Reserve the same page twice, should return -EALREADY. */ +SEC("syscall") +__success __retval(0) +int reserve_twice(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + /* Should be -EALREADY. */ + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret != -114) + return 2; +#endif + return 0; +} + +/* Try to reserve past the end of the arena. */ +SEC("syscall") +__success __retval(0) +int reserve_invalid_region(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + /* Try a NULL pointer. */ + ret = bpf_arena_reserve_pages(&arena, NULL, 3); + if (ret != -22) + return 1; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 3); + if (ret != -22) + return 2; + + ret = bpf_arena_reserve_pages(&arena, page, 4096); + if (ret != -22) + return 3; + + ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1); + if (ret != -22) + return 4; +#endif + return 0; +} + SEC("iter.s/bpf_map") __success __log_level(2) int iter_maps1(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index f94f30cf1bb8..9eee51912280 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -67,6 +67,101 @@ int big_alloc1(void *ctx) return 0; } +/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */ +SEC("syscall") +__success __retval(0) +int access_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + const size_t len = 4; + int ret, i; + + /* Get a separate region of the arena. */ + page = base = arena_base(&arena) + 16384 * PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base, len); + if (ret) + return 1; + + /* Try to dirty reserved memory. */ + for (i = 0; i < len && can_loop; i++) + *page = 0x5a; + + for (i = 0; i < len && can_loop; i++) { + page = (volatile char __arena *)(base + i * PAGE_SIZE); + + /* + * Error out in case either the write went through, + * or the address has random garbage. + */ + if (*page == 0x5a) + return 2 + 2 * i; + + if (*page) + return 2 + 2 * i + 1; + } +#endif + return 0; +} + +/* Try to allocate a region overlapping with a reservation. */ +SEC("syscall") +__success __retval(0) +int request_partially_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + int ret; + + /* Add an arbitrary page offset. */ + page = base = arena_base(&arena) + 4096 * __PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0); + if ((u64)page != 0ULL) + return 2; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int free_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *addr; + char __arena *page; + int ret; + + /* Add an arbitrary page offset. */ + addr = arena_base(&arena) + 32768 * __PAGE_SIZE; + + page = bpf_arena_alloc_pages(&arena, addr, 4, NUMA_NO_NODE, 0); + if (!page) + return 1; + + ret = bpf_arena_reserve_pages(&arena, addr + 4 * __PAGE_SIZE, 4); + if (ret) + return 2; + + /* Freeing a reserved area, fully or partially, should succeed. */ + bpf_arena_free_pages(&arena, addr, 2); + bpf_arena_free_pages(&arena, addr + 2 * __PAGE_SIZE, 2); + + /* The free pages call above should have succeeded, so this allocation should too. */ + page = bpf_arena_alloc_pages(&arena, addr + 3 * __PAGE_SIZE, 1, NUMA_NO_NODE, 0); + if (!page) + return 3; +#endif + return 0; +} + #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) #define PAGE_CNT 100 __u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */