From: Tao Lan add support for tracepoint to dump some fields of rx_desc Signed-off-by: Tao Lan Signed-off-by: Jijie Shao --- .../net/ethernet/hisilicon/hibmcge/Makefile | 1 + .../net/ethernet/hisilicon/hibmcge/hbg_reg.h | 4 + .../ethernet/hisilicon/hibmcge/hbg_trace.h | 84 +++++++++++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_txrx.c | 4 + 4 files changed, 93 insertions(+) create mode 100644 drivers/net/ethernet/hisilicon/hibmcge/hbg_trace.h diff --git a/drivers/net/ethernet/hisilicon/hibmcge/Makefile b/drivers/net/ethernet/hisilicon/hibmcge/Makefile index 1a9da564b306..d6610ba16855 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/Makefile +++ b/drivers/net/ethernet/hisilicon/hibmcge/Makefile @@ -3,6 +3,7 @@ # Makefile for the HISILICON BMC GE network device drivers. # +ccflags-y += -I$(src) obj-$(CONFIG_HIBMCGE) += hibmcge.o hibmcge-objs = hbg_main.o hbg_hw.o hbg_mdio.o hbg_irq.o hbg_txrx.o hbg_ethtool.o \ diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index a39d1e796e4a..30b3903c8f2d 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -252,6 +252,8 @@ struct hbg_rx_desc { #define HBG_RX_DESC_W2_PKT_LEN_M GENMASK(31, 16) #define HBG_RX_DESC_W2_PORT_NUM_M GENMASK(15, 12) +#define HBG_RX_DESC_W3_IP_OFFSET_M GENMASK(23, 16) +#define HBG_RX_DESC_W3_VLAN_M GENMASK(15, 0) #define HBG_RX_DESC_W4_IP_TCP_UDP_M GENMASK(31, 30) #define HBG_RX_DESC_W4_IPSEC_B BIT(29) #define HBG_RX_DESC_W4_IP_VERSION_B BIT(28) @@ -269,6 +271,8 @@ struct hbg_rx_desc { #define HBG_RX_DESC_W4_L3_ERR_CODE_M GENMASK(12, 9) #define HBG_RX_DESC_W4_L2_ERR_B BIT(8) #define HBG_RX_DESC_W4_IDX_MATCH_B BIT(7) +#define HBG_RX_DESC_W4_PARSE_MODE_M GENMASK(6, 5) +#define HBG_RX_DESC_W5_VALID_SIZE_M GENMASK(15, 0) enum hbg_l3_err_code { HBG_L3_OK = 0, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_trace.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_trace.h new file mode 100644 index 000000000000..b70fd960da8d --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_trace.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2025 Hisilicon Limited. */ + +/* This must be outside ifdef _HBG_TRACE_H */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hibmcge + +#if !defined(_HBG_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _HBG_TRACE_H_ + +#include +#include +#include +#include +#include "hbg_reg.h" + +TRACE_EVENT(hbg_rx_desc, + TP_PROTO(struct hbg_priv *priv, u32 index, + struct hbg_rx_desc *rx_desc), + TP_ARGS(priv, index, rx_desc), + + TP_STRUCT__entry(__field(u32, index) + __field(u8, port_num) + __field(u8, ip_offset) + __field(u8, parse_mode) + __field(u8, l4_error_code) + __field(u8, l3_error_code) + __field(u8, l2_error_code) + __field(u16, packet_len) + __field(u16, valid_size) + __field(u16, vlan) + __string(pciname, pci_name(priv->pdev)) + __string(devname, priv->netdev->name) + ), + + TP_fast_assign(__entry->index = index, + __entry->packet_len = + FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, + rx_desc->word2); + __entry->port_num = + FIELD_GET(HBG_RX_DESC_W2_PORT_NUM_M, + rx_desc->word2); + __entry->ip_offset = + FIELD_GET(HBG_RX_DESC_W3_IP_OFFSET_M, + rx_desc->word3); + __entry->vlan = + FIELD_GET(HBG_RX_DESC_W3_VLAN_M, + rx_desc->word3); + __entry->parse_mode = + FIELD_GET(HBG_RX_DESC_W4_PARSE_MODE_M, + rx_desc->word4); + __entry->l4_error_code = + FIELD_GET(HBG_RX_DESC_W4_L4_ERR_CODE_M, + rx_desc->word4); + __entry->l3_error_code = + FIELD_GET(HBG_RX_DESC_W4_L3_ERR_CODE_M, + rx_desc->word4); + __entry->l2_error_code = + FIELD_GET(HBG_RX_DESC_W4_L2_ERR_B, + rx_desc->word4); + __entry->valid_size = + FIELD_GET(HBG_RX_DESC_W5_VALID_SIZE_M, + rx_desc->word5); + __assign_str(pciname); + __assign_str(devname); + ), + + TP_printk("%s %s index:%u, port num:%u, len:%u, valid size:%u, ip_offset:%u, vlan:0x%04x, parse mode:%u, l4_err:0x%x, l3_err:0x%x, l2_err:0x%x", + __get_str(pciname), __get_str(devname), __entry->index, + __entry->port_num, __entry->packet_len, + __entry->valid_size, __entry->ip_offset, __entry->vlan, + __entry->parse_mode, __entry->l4_error_code, + __entry->l3_error_code, __entry->l2_error_code + ) +); + +#endif /* _HBG_TRACE_H_ */ + +/* This must be outside ifdef _HBG_TRACE_H */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE hbg_trace +#include diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c index 8d814c8f19ea..5f2e48f1dd25 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c @@ -7,6 +7,9 @@ #include "hbg_reg.h" #include "hbg_txrx.h" +#define CREATE_TRACE_POINTS +#include "hbg_trace.h" + #define netdev_get_tx_ring(netdev) \ (&(((struct hbg_priv *)netdev_priv(netdev))->tx_ring)) @@ -429,6 +432,7 @@ static int hbg_napi_rx_poll(struct napi_struct *napi, int budget) break; rx_desc = (struct hbg_rx_desc *)buffer->skb->data; pkt_len = FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, rx_desc->word2); + trace_hbg_rx_desc(priv, ring->ntc, rx_desc); if (unlikely(!hbg_rx_pkt_check(priv, rx_desc, buffer->skb))) { hbg_buffer_free(buffer); -- 2.33.0 Under stress test scenarios, hibmcge driver may not receive packets in a timely manner, which can lead to the buffer of the hardware queue being exhausted, resulting in packet drop. This patch doubles the software queue depth and uses half of the buffer to fill the hardware queue before receiving packets, thus preventing packet loss caused by the hardware queue buffer being exhausted. Signed-off-by: Jijie Shao --- .../net/ethernet/hisilicon/hibmcge/hbg_txrx.c | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c index 5f2e48f1dd25..ea691d564161 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c @@ -377,7 +377,8 @@ static int hbg_rx_fill_one_buffer(struct hbg_priv *priv) struct hbg_buffer *buffer; int ret; - if (hbg_queue_is_full(ring->ntc, ring->ntu, ring)) + if (hbg_queue_is_full(ring->ntc, ring->ntu, ring) || + hbg_fifo_is_full(priv, ring->dir)) return 0; buffer = &ring->queue[ring->ntu]; @@ -396,6 +397,26 @@ static int hbg_rx_fill_one_buffer(struct hbg_priv *priv) return 0; } +static int hbg_rx_fill_buffers(struct hbg_priv *priv) +{ + u32 remained = hbg_hw_get_fifo_used_num(priv, HBG_DIR_RX); + u32 max_count = priv->dev_specs.rx_fifo_num; + u32 refill_count; + int ret; + + if (unlikely(remained >= max_count)) + return 0; + + refill_count = max_count - remained; + while (refill_count--) { + ret = hbg_rx_fill_one_buffer(priv); + if (unlikely(ret)) + break; + } + + return ret; +} + static bool hbg_sync_data_from_hw(struct hbg_priv *priv, struct hbg_buffer *buffer) { @@ -420,6 +441,7 @@ static int hbg_napi_rx_poll(struct napi_struct *napi, int budget) u32 packet_done = 0; u32 pkt_len; + hbg_rx_fill_buffers(priv); while (packet_done < budget) { if (unlikely(hbg_queue_is_empty(ring->ntc, ring->ntu, ring))) break; @@ -497,6 +519,16 @@ static int hbg_ring_init(struct hbg_priv *priv, struct hbg_ring *ring, u32 i, len; len = hbg_get_spec_fifo_max_num(priv, dir) + 1; + /* To improve receiving performance under high-stress scenarios, + * in the `hbg_napi_rx_poll()`, we first use the other half of + * the buffer to receive packets from the hardware via the + * `hbg_rx_fill_buffers()`, and then process the packets in the + * original half of the buffer to avoid packet loss caused by + * hardware overflow as much as possible. + */ + if (dir == HBG_DIR_RX) + len += hbg_get_spec_fifo_max_num(priv, dir); + ring->queue = dma_alloc_coherent(&priv->pdev->dev, len * sizeof(*ring->queue), &ring->queue_dma, GFP_KERNEL); @@ -545,21 +577,16 @@ static int hbg_tx_ring_init(struct hbg_priv *priv) static int hbg_rx_ring_init(struct hbg_priv *priv) { int ret; - u32 i; ret = hbg_ring_init(priv, &priv->rx_ring, hbg_napi_rx_poll, HBG_DIR_RX); if (ret) return ret; - for (i = 0; i < priv->rx_ring.len - 1; i++) { - ret = hbg_rx_fill_one_buffer(priv); - if (ret) { - hbg_ring_uninit(&priv->rx_ring); - return ret; - } - } + ret = hbg_rx_fill_buffers(priv); + if (ret) + hbg_ring_uninit(&priv->rx_ring); - return 0; + return ret; } int hbg_txrx_init(struct hbg_priv *priv) -- 2.33.0 add support for pagepool on rx, and remove the legacy path Signed-off-by: Jijie Shao --- ChangeLog: v1 -> v2: - remove the legacy path after using pagepool, suggested by Jakub. v1: https://lore.kernel.org/all/20251117174957.631e7b40@kernel.org/ --- drivers/net/ethernet/hisilicon/Kconfig | 1 + .../ethernet/hisilicon/hibmcge/hbg_common.h | 8 + .../net/ethernet/hisilicon/hibmcge/hbg_txrx.c | 166 ++++++++++++++---- 3 files changed, 142 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 38875c196cb6..18eca7d12c20 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -151,6 +151,7 @@ config HIBMCGE select FIXED_PHY select MOTORCOMM_PHY select REALTEK_PHY + select PAGE_POOL help If you wish to compile a kernel for a BMC with HIBMC-xx_gmac then you should answer Y to this. This makes this driver suitable for use diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index 2097e4c2b3d7..8e134da3e217 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "hbg_reg.h" #define HBG_STATUS_DISABLE 0x0 @@ -55,6 +56,12 @@ struct hbg_buffer { dma_addr_t skb_dma; u32 skb_len; + struct page *page; + void *page_addr; + dma_addr_t page_dma; + u32 page_size; + u32 page_offset; + enum hbg_dir dir; struct hbg_ring *ring; struct hbg_priv *priv; @@ -78,6 +85,7 @@ struct hbg_ring { struct hbg_priv *priv; struct napi_struct napi; char *tout_log_buf; /* tx timeout log buffer */ + struct page_pool *page_pool; /* only for rx */ }; enum hbg_hw_event_type { diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c index ea691d564161..a4ea92c31c2f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_txrx.c @@ -31,6 +31,11 @@ typeof(ring) _ring = (ring); \ _ring->p = hbg_queue_next_prt(_ring->p, _ring); }) +#define hbg_get_page_order(ring) ({ \ + typeof(ring) _ring = (ring); \ + get_order(hbg_spec_max_frame_len(_ring->priv, _ring->dir)); }) +#define hbg_get_page_size(ring) (PAGE_SIZE << hbg_get_page_order((ring))) + #define HBG_TX_STOP_THRS 2 #define HBG_TX_START_THRS (2 * HBG_TX_STOP_THRS) @@ -65,6 +70,43 @@ static void hbg_dma_unmap(struct hbg_buffer *buffer) buffer->skb_dma = 0; } +static void hbg_buffer_free_page(struct hbg_buffer *buffer) +{ + struct hbg_ring *ring = buffer->ring; + + if (unlikely(!buffer->page)) + return; + + page_pool_put_full_page(ring->page_pool, buffer->page, false); + + buffer->page = NULL; + buffer->page_dma = 0; + buffer->page_addr = NULL; + buffer->page_size = 0; + buffer->page_offset = 0; +} + +static int hbg_buffer_alloc_page(struct hbg_buffer *buffer) +{ + struct hbg_ring *ring = buffer->ring; + u32 len = hbg_get_page_size(ring); + u32 offset; + + if (unlikely(!ring->page_pool)) + return 0; + + buffer->page = page_pool_dev_alloc_frag(ring->page_pool, &offset, len); + if (unlikely(!buffer->page)) + return -ENOMEM; + + buffer->page_dma = page_pool_get_dma_addr(buffer->page) + offset; + buffer->page_addr = page_address(buffer->page) + offset; + buffer->page_size = len; + buffer->page_offset = offset; + + return 0; +} + static void hbg_init_tx_desc(struct hbg_buffer *buffer, struct hbg_tx_desc *tx_desc) { @@ -138,24 +180,14 @@ static void hbg_buffer_free_skb(struct hbg_buffer *buffer) buffer->skb = NULL; } -static int hbg_buffer_alloc_skb(struct hbg_buffer *buffer) -{ - u32 len = hbg_spec_max_frame_len(buffer->priv, buffer->dir); - struct hbg_priv *priv = buffer->priv; - - buffer->skb = netdev_alloc_skb(priv->netdev, len); - if (unlikely(!buffer->skb)) - return -ENOMEM; - - buffer->skb_len = len; - memset(buffer->skb->data, 0, HBG_PACKET_HEAD_SIZE); - return 0; -} - static void hbg_buffer_free(struct hbg_buffer *buffer) { - hbg_dma_unmap(buffer); - hbg_buffer_free_skb(buffer); + if (buffer->skb) { + hbg_dma_unmap(buffer); + return hbg_buffer_free_skb(buffer); + } + + hbg_buffer_free_page(buffer); } static int hbg_napi_tx_recycle(struct napi_struct *napi, int budget) @@ -382,17 +414,15 @@ static int hbg_rx_fill_one_buffer(struct hbg_priv *priv) return 0; buffer = &ring->queue[ring->ntu]; - ret = hbg_buffer_alloc_skb(buffer); + ret = hbg_buffer_alloc_page(buffer); if (unlikely(ret)) return ret; - ret = hbg_dma_map(buffer); - if (unlikely(ret)) { - hbg_buffer_free_skb(buffer); - return ret; - } + memset(buffer->page_addr, 0, HBG_PACKET_HEAD_SIZE); + dma_sync_single_for_device(&priv->pdev->dev, buffer->page_dma, + HBG_PACKET_HEAD_SIZE, DMA_TO_DEVICE); - hbg_hw_fill_buffer(priv, buffer->skb_dma); + hbg_hw_fill_buffer(priv, buffer->page_dma); hbg_queue_move_next(ntu, ring); return 0; } @@ -425,13 +455,29 @@ static bool hbg_sync_data_from_hw(struct hbg_priv *priv, /* make sure HW write desc complete */ dma_rmb(); - dma_sync_single_for_cpu(&priv->pdev->dev, buffer->skb_dma, - buffer->skb_len, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(&priv->pdev->dev, buffer->page_dma, + buffer->page_size, DMA_FROM_DEVICE); - rx_desc = (struct hbg_rx_desc *)buffer->skb->data; + rx_desc = (struct hbg_rx_desc *)buffer->page_addr; return FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, rx_desc->word2) != 0; } +static int hbg_build_skb(struct hbg_priv *priv, + struct hbg_buffer *buffer, u32 pkt_len) +{ + net_prefetch(buffer->page_addr); + + buffer->skb = napi_build_skb(buffer->page_addr, buffer->page_size); + if (unlikely(!buffer->skb)) + return -ENOMEM; + skb_mark_for_recycle(buffer->skb); + + /* page will be freed together with the skb */ + buffer->page = NULL; + + return 0; +} + static int hbg_napi_rx_poll(struct napi_struct *napi, int budget) { struct hbg_ring *ring = container_of(napi, struct hbg_ring, napi); @@ -447,29 +493,33 @@ static int hbg_napi_rx_poll(struct napi_struct *napi, int budget) break; buffer = &ring->queue[ring->ntc]; - if (unlikely(!buffer->skb)) + if (unlikely(!buffer->page)) goto next_buffer; if (unlikely(!hbg_sync_data_from_hw(priv, buffer))) break; - rx_desc = (struct hbg_rx_desc *)buffer->skb->data; + rx_desc = (struct hbg_rx_desc *)buffer->page_addr; pkt_len = FIELD_GET(HBG_RX_DESC_W2_PKT_LEN_M, rx_desc->word2); trace_hbg_rx_desc(priv, ring->ntc, rx_desc); + if (unlikely(hbg_build_skb(priv, buffer, pkt_len))) { + hbg_buffer_free_page(buffer); + goto next_buffer; + } + if (unlikely(!hbg_rx_pkt_check(priv, rx_desc, buffer->skb))) { - hbg_buffer_free(buffer); + hbg_buffer_free_skb(buffer); goto next_buffer; } - hbg_dma_unmap(buffer); skb_reserve(buffer->skb, HBG_PACKET_HEAD_SIZE + NET_IP_ALIGN); skb_put(buffer->skb, pkt_len); buffer->skb->protocol = eth_type_trans(buffer->skb, priv->netdev); - dev_sw_netstats_rx_add(priv->netdev, pkt_len); napi_gro_receive(napi, buffer->skb); buffer->skb = NULL; + buffer->page = NULL; next_buffer: hbg_rx_fill_one_buffer(priv); @@ -484,6 +534,42 @@ static int hbg_napi_rx_poll(struct napi_struct *napi, int budget) return packet_done; } +static void hbg_ring_page_pool_destory(struct hbg_ring *ring) +{ + if (!ring->page_pool) + return; + + page_pool_destroy(ring->page_pool); + ring->page_pool = NULL; +} + +static int hbg_ring_page_pool_init(struct hbg_priv *priv, struct hbg_ring *ring) +{ + u32 buf_size = hbg_spec_max_frame_len(priv, ring->dir); + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = hbg_get_page_order(ring), + .pool_size = ring->len * buf_size / hbg_get_page_size(ring), + .nid = dev_to_node(&priv->pdev->dev), + .dev = &priv->pdev->dev, + .napi = &ring->napi, + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + .max_len = hbg_get_page_size(ring), + }; + int ret = 0; + + ring->page_pool = page_pool_create(&pp_params); + if (IS_ERR(ring->page_pool)) { + ret = PTR_ERR(ring->page_pool); + dev_err(&priv->pdev->dev, + "failed to create page pool, ret = %d\n", ret); + ring->page_pool = NULL; + } + + return ret; +} + static void hbg_ring_uninit(struct hbg_ring *ring) { struct hbg_buffer *buffer; @@ -502,6 +588,7 @@ static void hbg_ring_uninit(struct hbg_ring *ring) buffer->priv = NULL; } + hbg_ring_page_pool_destory(ring); dma_free_coherent(&ring->priv->pdev->dev, ring->len * sizeof(*ring->queue), ring->queue, ring->queue_dma); @@ -517,6 +604,7 @@ static int hbg_ring_init(struct hbg_priv *priv, struct hbg_ring *ring, { struct hbg_buffer *buffer; u32 i, len; + int ret; len = hbg_get_spec_fifo_max_num(priv, dir) + 1; /* To improve receiving performance under high-stress scenarios, @@ -550,11 +638,23 @@ static int hbg_ring_init(struct hbg_priv *priv, struct hbg_ring *ring, ring->ntu = 0; ring->len = len; - if (dir == HBG_DIR_TX) + if (dir == HBG_DIR_TX) { netif_napi_add_tx(priv->netdev, &ring->napi, napi_poll); - else + } else { netif_napi_add(priv->netdev, &ring->napi, napi_poll); + ret = hbg_ring_page_pool_init(priv, ring); + if (ret) { + netif_napi_del(&ring->napi); + dma_free_coherent(&ring->priv->pdev->dev, + ring->len * sizeof(*ring->queue), + ring->queue, ring->queue_dma); + ring->queue = NULL; + ring->len = 0; + return ret; + } + } + napi_enable(&ring->napi); return 0; } -- 2.33.0