Skip to content

Commit 60bbf7e

Browse files
netoptimizerdavem330
authored andcommitted
mlx5: use page_pool for xdp_return_frame call
This patch shows how it is possible to have both the driver local page cache, which uses elevated refcnt for "catching"/avoiding SKB put_page returns the page through the page allocator. And at the same time, have pages getting returned to the page_pool from ndp_xdp_xmit DMA completion. The performance improvement for XDP_REDIRECT in this patch is really good. Especially considering that (currently) the xdp_return_frame API and page_pool_put_page() does per frame operations of both rhashtable ID-lookup and locked return into (page_pool) ptr_ring. (It is the plan to remove these per frame operation in a followup patchset). The benchmark performed was RX on mlx5 and XDP_REDIRECT out ixgbe, with xdp_redirect_map (using devmap) . And the target/maximum capability of ixgbe is 13Mpps (on this HW setup). Before this patch for mlx5, XDP redirected frames were returned via the page allocator. The single flow performance was 6Mpps, and if I started two flows the collective performance drop to 4Mpps, because we hit the page allocator lock (further negative scaling occurs). Two test scenarios need to be covered, for xdp_return_frame API, which is DMA-TX completion running on same-CPU or cross-CPU free/return. Results were same-CPU=10Mpps, and cross-CPU=12Mpps. This is very close to our 13Mpps max target. The reason max target isn't reached in cross-CPU test, is likely due to RX-ring DMA unmap/map overhead (which doesn't occur in ixgbe to ixgbe testing). It is also planned to remove this unnecessary DMA unmap in a later patchset V2: Adjustments requested by Tariq - Changed page_pool_create return codes not return NULL, only ERR_PTR, as this simplifies err handling in drivers. - Save a branch in mlx5e_page_release - Correct page_pool size calc for MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ V5: Updated patch desc V8: Adjust for b0cedc8 ("net/mlx5e: Remove rq_headroom field from params") V9: - Adjust for 121e892 ("net/mlx5e: Refactor RQ XDP_TX indication") - Adjust for 73281b7 ("net/mlx5e: Derive Striding RQ size from MTU") - Correct handling if page_pool_create fail for MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ V10: Req from Tariq - Change pool_size calc for MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ Signed-off-by: Jesper Dangaard Brouer <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Acked-by: Saeed Mahameed <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 57d0a1c commit 60bbf7e

File tree

3 files changed

+48
-12
lines changed

3 files changed

+48
-12
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
#include "mlx5_core.h"
5454
#include "en_stats.h"
5555

56+
struct page_pool;
57+
5658
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
5759

5860
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
@@ -534,6 +536,7 @@ struct mlx5e_rq {
534536
unsigned int hw_mtu;
535537
struct mlx5e_xdpsq xdpsq;
536538
DECLARE_BITMAP(flags, 8);
539+
struct page_pool *page_pool;
537540

538541
/* control */
539542
struct mlx5_wq_ctrl wq_ctrl;

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/mlx5/fs.h>
3636
#include <net/vxlan.h>
3737
#include <linux/bpf.h>
38+
#include <net/page_pool.h>
3839
#include "eswitch.h"
3940
#include "en.h"
4041
#include "en_tc.h"
@@ -389,10 +390,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
389390
struct mlx5e_rq_param *rqp,
390391
struct mlx5e_rq *rq)
391392
{
393+
struct page_pool_params pp_params = { 0 };
392394
struct mlx5_core_dev *mdev = c->mdev;
393395
void *rqc = rqp->rqc;
394396
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
395-
u32 byte_count;
397+
u32 byte_count, pool_size;
396398
int npages;
397399
int wq_sz;
398400
int err;
@@ -432,9 +434,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
432434

433435
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
434436
rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params);
437+
pool_size = 1 << params->log_rq_mtu_frames;
435438

436439
switch (rq->wq_type) {
437440
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
441+
442+
pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params);
438443
rq->post_wqes = mlx5e_post_rx_mpwqes;
439444
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
440445

@@ -512,13 +517,31 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
512517
rq->mkey_be = c->mkey_be;
513518
}
514519

515-
/* This must only be activate for order-0 pages */
516-
if (rq->xdp_prog) {
517-
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
518-
MEM_TYPE_PAGE_ORDER0, NULL);
519-
if (err)
520-
goto err_rq_wq_destroy;
520+
/* Create a page_pool and register it with rxq */
521+
pp_params.order = rq->buff.page_order;
522+
pp_params.flags = 0; /* No-internal DMA mapping in page_pool */
523+
pp_params.pool_size = pool_size;
524+
pp_params.nid = cpu_to_node(c->cpu);
525+
pp_params.dev = c->pdev;
526+
pp_params.dma_dir = rq->buff.map_dir;
527+
528+
/* page_pool can be used even when there is no rq->xdp_prog,
529+
* given page_pool does not handle DMA mapping there is no
530+
* required state to clear. And page_pool gracefully handle
531+
* elevated refcnt.
532+
*/
533+
rq->page_pool = page_pool_create(&pp_params);
534+
if (IS_ERR(rq->page_pool)) {
535+
if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
536+
kfree(rq->wqe.frag_info);
537+
err = PTR_ERR(rq->page_pool);
538+
rq->page_pool = NULL;
539+
goto err_rq_wq_destroy;
521540
}
541+
err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
542+
MEM_TYPE_PAGE_POOL, rq->page_pool);
543+
if (err)
544+
goto err_rq_wq_destroy;
522545

523546
for (i = 0; i < wq_sz; i++) {
524547
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
@@ -556,6 +579,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
556579
if (rq->xdp_prog)
557580
bpf_prog_put(rq->xdp_prog);
558581
xdp_rxq_info_unreg(&rq->xdp_rxq);
582+
if (rq->page_pool)
583+
page_pool_destroy(rq->page_pool);
559584
mlx5_wq_destroy(&rq->wq_ctrl);
560585

561586
return err;
@@ -569,6 +594,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
569594
bpf_prog_put(rq->xdp_prog);
570595

571596
xdp_rxq_info_unreg(&rq->xdp_rxq);
597+
if (rq->page_pool)
598+
page_pool_destroy(rq->page_pool);
572599

573600
switch (rq->wq_type) {
574601
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <linux/bpf_trace.h>
3838
#include <net/busy_poll.h>
3939
#include <net/ip6_checksum.h>
40+
#include <net/page_pool.h>
4041
#include "en.h"
4142
#include "en_tc.h"
4243
#include "eswitch.h"
@@ -221,7 +222,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
221222
if (mlx5e_rx_cache_get(rq, dma_info))
222223
return 0;
223224

224-
dma_info->page = dev_alloc_pages(rq->buff.page_order);
225+
dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
225226
if (unlikely(!dma_info->page))
226227
return -ENOMEM;
227228

@@ -246,11 +247,16 @@ static void mlx5e_page_dma_unmap(struct mlx5e_rq *rq,
246247
void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
247248
bool recycle)
248249
{
249-
if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
250-
return;
250+
if (likely(recycle)) {
251+
if (mlx5e_rx_cache_put(rq, dma_info))
252+
return;
251253

252-
mlx5e_page_dma_unmap(rq, dma_info);
253-
put_page(dma_info->page);
254+
mlx5e_page_dma_unmap(rq, dma_info);
255+
page_pool_recycle_direct(rq->page_pool, dma_info->page);
256+
} else {
257+
mlx5e_page_dma_unmap(rq, dma_info);
258+
put_page(dma_info->page);
259+
}
254260
}
255261

256262
static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,

0 commit comments

Comments
 (0)