Add basic support for attaching an XDP program to the device and support for PASS/DROP/ABORT actions. In fbnic, buffers are always mapped as DMA_BIDIRECTIONAL. Testing: Hook a simple XDP program that passes all the packets destined for a specific port iperf3 -c 192.168.1.10 -P 5 -p 12345 Connecting to host 192.168.1.10, port 12345 [ 5] local 192.168.1.9 port 46702 connected to 192.168.1.10 port 12345 [ ID] Interval Transfer Bitrate Retr Cwnd - - - - - - - - - - - - - - - - - - - - - - - - - [SUM] 1.00-2.00 sec 3.86 GBytes 33.2 Gbits/sec 0 XDP_DROP: Hook an XDP program that drops packets destined for a specific port iperf3 -c 192.168.1.10 -P 5 -p 12345 ^C- - - - - - - - - - - - - - - - - - - - - - - - - [ ID] Interval Transfer Bitrate Retr [SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec 0 sender [SUM] 0.00-0.00 sec 0.00 Bytes 0.00 bits/sec receiver iperf3: interrupt - the client has terminated XDP with HDS: - Validate XDP attachment failure when HDS is low ~] ethtool -G eth0 hds-thresh 512 ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp ~] Error: fbnic: MTU too high, or HDS threshold is too low for single buffer XDP. - Validate successful XDP attachment when HDS threshold is appropriate ~] ethtool -G eth0 hds-thresh 1536 ~] sudo ip link set eth0 xdpdrv obj xdp_pass_12345.o sec xdp - Validate when the XDP program is attached, changing HDS thresh to a lower value fails ~] ethtool -G eth0 hds-thresh 512 ~] netlink error: fbnic: Use higher HDS threshold or multi-buf capable program - Validate HDS thresh does not matter when xdp frags support is available ~] ethtool -G eth0 hds-thresh 512 ~] sudo ip link set eth0 xdpdrv obj xdp_pass_mb_12345.o sec xdp.frags Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 11 +++ .../net/ethernet/meta/fbnic/fbnic_netdev.c | 35 +++++++ .../net/ethernet/meta/fbnic/fbnic_netdev.h | 5 + drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 95 +++++++++++++++++-- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 1 + 5 files changed, 140 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 84a0db9f1be0..d7b9eb267ead 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -329,6 +329,17 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, return -EINVAL; } + /* If an XDP program is attached, we should check for potential frame + * splitting. If the new HDS threshold can cause splitting, we should + * only allow if the attached XDP program can handle frags. + */ + if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu, + kernel_ring->hds_thresh)) { + NL_SET_ERR_MSG_MOD(extack, + "Use higher HDS threshold or multi-buf capable program"); + return -EINVAL; + } + if (!netif_running(netdev)) { fbnic_set_rings(fbn, ring, kernel_ring); return 0; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index d039e1c7a0d5..0621b89cbf3d 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -504,6 +504,40 @@ static void fbnic_get_stats64(struct net_device *dev, } } +bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu, + u32 hds_thresh) +{ + if (!prog) + return false; + + if (prog->aux->xdp_has_frags) + return false; + + return mtu + ETH_HLEN > hds_thresh; +} + +static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog, *prev_prog; + struct fbnic_net *fbn = netdev_priv(netdev); + + if (bpf->command != XDP_SETUP_PROG) + return -EINVAL; + + if (fbnic_check_split_frames(prog, netdev->mtu, + fbn->hds_thresh)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "MTU too high, or HDS threshold is too low for single buffer XDP"); + return -EOPNOTSUPP; + } + + prev_prog = xchg(&fbn->xdp_prog, prog); + if (prev_prog) + bpf_prog_put(prev_prog); + + return 0; +} + static const struct net_device_ops fbnic_netdev_ops = { .ndo_open = fbnic_open, .ndo_stop = fbnic_stop, @@ -513,6 +547,7 @@ static const struct net_device_ops fbnic_netdev_ops = { .ndo_set_mac_address = fbnic_set_mac, .ndo_set_rx_mode = fbnic_set_rx_mode, .ndo_get_stats64 = fbnic_get_stats64, + .ndo_bpf = fbnic_bpf, .ndo_hwtstamp_get = fbnic_hwtstamp_get, .ndo_hwtstamp_set = fbnic_hwtstamp_set, }; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index 04c5c7ed6c3a..bfa79ea910d8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -18,6 +18,8 @@ #define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6 struct fbnic_net { + struct bpf_prog *xdp_prog; + struct fbnic_ring *tx[FBNIC_MAX_TXQS]; struct fbnic_ring *rx[FBNIC_MAX_RXQS]; @@ -104,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev, int fbnic_phylink_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam); int fbnic_phylink_init(struct net_device *netdev); + +bool fbnic_check_split_frames(struct bpf_prog *prog, + unsigned int mtu, u32 hds_threshold); #endif /* _FBNIC_NETDEV_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 71af7b9d5bcd..486c14e83ad5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -2,17 +2,26 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include +#include +#include #include #include #include #include #include +#include #include "fbnic.h" #include "fbnic_csr.h" #include "fbnic_netdev.h" #include "fbnic_txrx.h" +enum { + FBNIC_XDP_PASS = 0, + FBNIC_XDP_CONSUME, + FBNIC_XDP_LEN_ERR, +}; + enum { FBNIC_XMIT_CB_TS = 0x01, }; @@ -877,7 +886,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; frame_sz = hdr_pg_end - hdr_pg_start; - xdp_init_buff(&pkt->buff, frame_sz, NULL); + xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * FBNIC_BD_FRAG_SIZE; @@ -966,6 +975,38 @@ static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, return skb; } +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + struct bpf_prog *xdp_prog; + int act; + + xdp_prog = READ_ONCE(fbn->xdp_prog); + if (!xdp_prog) + goto xdp_pass; + + if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) + return ERR_PTR(-FBNIC_XDP_LEN_ERR); + + act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); + switch (act) { + case XDP_PASS: +xdp_pass: + return fbnic_build_skb(nv, pkt); + default: + bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + break; + } + + return ERR_PTR(-FBNIC_XDP_CONSUME); +} + static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) { return (FBNIC_RCD_META_L4_TYPE_MASK & rcd) ? PKT_HASH_TYPE_L4 : @@ -1064,7 +1105,7 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, if (unlikely(pkt->add_frag_failed)) skb = NULL; else if (likely(!fbnic_rcd_metadata_err(rcd))) - skb = fbnic_build_skb(nv, pkt); + skb = fbnic_run_xdp(nv, pkt); /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { @@ -1250,6 +1291,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, } for (j = 0; j < nv->rxt_count; j++, i++) { + xdp_rxq_info_unreg(&nv->qt[i].xdp_rxq); fbnic_remove_rx_ring(fbn, &nv->qt[i].sub0); fbnic_remove_rx_ring(fbn, &nv->qt[i].sub1); fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); @@ -1422,6 +1464,11 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, fbnic_ring_init(&qt->cmpl, db, rxq_idx, FBNIC_RING_F_STATS); fbn->rx[rxq_idx] = &qt->cmpl; + err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, rxq_idx, + nv->napi.napi_id); + if (err) + goto free_ring_cur_qt; + /* Update Rx queue index */ rxt_count--; rxq_idx += v_count; @@ -1432,6 +1479,25 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, return 0; + while (rxt_count < nv->rxt_count) { + qt--; + + xdp_rxq_info_unreg(&qt->xdp_rxq); +free_ring_cur_qt: + fbnic_remove_rx_ring(fbn, &qt->sub0); + fbnic_remove_rx_ring(fbn, &qt->sub1); + fbnic_remove_rx_ring(fbn, &qt->cmpl); + rxt_count++; + } + while (txt_count < nv->txt_count) { + qt--; + + fbnic_remove_tx_ring(fbn, &qt->sub0); + fbnic_remove_tx_ring(fbn, &qt->cmpl); + + txt_count++; + } + fbnic_napi_free_irq(fbd, nv); pp_destroy: page_pool_destroy(nv->page_pool); napi_del: @@ -1708,8 +1774,10 @@ static void fbnic_free_nv_resources(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) fbnic_free_qt_resources(fbn, &nv->qt[i]); - for (j = 0; j < nv->rxt_count; j++, i++) + for (j = 0; j < nv->rxt_count; j++, i++) { fbnic_free_qt_resources(fbn, &nv->qt[i]); + xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq); + } } static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, @@ -1721,19 +1789,32 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } /* Allocate Rx Resources */ for (j = 0; j < nv->rxt_count; j++, i++) { + /* Register XDP memory model for completion queue */ + err = xdp_reg_mem_model(&nv->qt[i].xdp_rxq.mem, + MEM_TYPE_PAGE_POOL, + nv->page_pool); + if (err) + goto xdp_unreg_mem_model; + err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]); if (err) - goto free_resources; + goto xdp_unreg_cur_model; } return 0; -free_resources: +xdp_unreg_mem_model: + while (j-- && i--) { + fbnic_free_qt_resources(fbn, &nv->qt[i]); +xdp_unreg_cur_model: + xdp_rxq_info_unreg_mem_model(&nv->qt[i].xdp_rxq); + } +free_qt_resources: while (i--) fbnic_free_qt_resources(fbn, &nv->qt[i]); return err; @@ -2025,7 +2106,7 @@ void fbnic_flush(struct fbnic_net *fbn) memset(qt->cmpl.desc, 0, qt->cmpl.size); fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); - qt->cmpl.pkt->buff.data_hard_start = NULL; + memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); } } } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index be34962c465e..0fefd1f00196 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -129,6 +129,7 @@ struct fbnic_ring { struct fbnic_q_triad { struct fbnic_ring sub0, sub1, cmpl; + struct xdp_rxq_info xdp_rxq; }; struct fbnic_napi_vector { -- 2.47.1