enic_remove() cancels the reset and change_mtu_work items but does not cancel tx_hang_reset. A TX timeout that fires while the device is being removed can schedule enic_tx_hang_reset() so that it runs after free_netdev(), resulting in a use-after-free. Cancel tx_hang_reset alongside the other work items before unregister_netdev(). This is a pre-existing issue, not introduced by the SR-IOV V2 series; it is included here as an independent fix. Fixes: 937317c7c109 ("enic: do hang reset only in case of tx timeout") Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index e7125b818087..b65796d96efc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -3012,6 +3012,7 @@ static void enic_remove(struct pci_dev *pdev) struct enic *enic = netdev_priv(netdev); cancel_work_sync(&enic->reset); + cancel_work_sync(&enic->tx_hang_reset); cancel_work_sync(&enic->change_mtu_work); unregister_netdev(netdev); enic_dev_deinit(enic); -- 2.43.0 During PF probe, query the firmware get-supported-feature interface to verify that the running firmware supports V2 SR-IOV. Firmware version 5.3(4.72) and later report VIC_FEATURE_SRIOV via CMD_GET_SUPP_FEATURE_VER. If the firmware does not support the feature, set vf_type to ENIC_VF_TYPE_NONE and log a warning so the admin knows a firmware upgrade is needed. V2 VFs are only ever enabled later through the sysfs .sriov_configure path (enic_sriov_configure()), which rejects ENIC_VF_TYPE_NONE before calling pci_enable_sriov(); there is no probe-time auto-enable, so firmware that lacks V2 support never exposes VFs. VIC_FEATURE_SRIOV is assigned the explicit value 4 to match the firmware ABI. Slot 3 (firmware's VIC_FEATURE_PTP) is reserved with a comment rather than a placeholder enum entry, since PTP is not used by the upstream driver. Suggested-by: Breno Leitao Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic_main.c | 21 ++++++++++++++++++++- drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 2 ++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b65796d96efc..6992411bd3b5 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2641,8 +2641,10 @@ static void enic_iounmap(struct enic *enic) static void enic_sriov_detect_vf_type(struct enic *enic) { struct pci_dev *pdev = enic->pdev; - int pos; + u64 supported_versions, a1 = 0; u16 vf_dev_id; + int pos; + int err; if (enic_is_sriov_vf(enic) || enic_is_dynamic(enic)) return; @@ -2669,6 +2671,23 @@ static void enic_sriov_detect_vf_type(struct enic *enic) enic->vf_type = ENIC_VF_TYPE_NONE; break; } + + if (enic->vf_type != ENIC_VF_TYPE_V2) + return; + + /* A successful command means firmware recognizes + * VIC_FEATURE_SRIOV; supported_versions is available + * for sub-feature versioning in the future. + */ + err = vnic_dev_get_supported_feature_ver(enic->vdev, + VIC_FEATURE_SRIOV, + &supported_versions, + &a1); + if (err) { + dev_warn(&pdev->dev, + "SR-IOV V2 not supported by current firmware. Upgrade to VIC FW 5.3(4.72) or higher.\n"); + enic->vf_type = ENIC_VF_TYPE_NONE; + } } #endif diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h index 605ef17f967e..3b6efa743dba 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h +++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h @@ -734,6 +734,8 @@ enum vic_feature_t { VIC_FEATURE_VXLAN, VIC_FEATURE_RDMA, VIC_FEATURE_VXLAN_PATCH, + /* slot 3 reserved for firmware VIC_FEATURE_PTP */ + VIC_FEATURE_SRIOV = 4, VIC_FEATURE_MAX, }; -- 2.43.0 The V2 SR-IOV design uses a dedicated admin channel (WQ/RQ/CQ/INTR on separate BAR resources) for PF-VF mailbox communication rather than firmware-proxied devcmds. Introduce enic_admin_channel_open() and enic_admin_channel_close(). Open allocates and initialises the admin WQ, RQ, and two CQs (one per direction), then issues CMD_QP_TYPE_SET to tell firmware the queues are admin-type. Close reverses the sequence. enic_admin_wq_buf_clean() unmaps and frees any WQ buffers still held at close time, fixing a DMA mapping leak when a send times out. Add CMD_QP_TYPE_SET (97), QP_TYPE_ADMIN/DATA, and QP_ENABLE/QP_DISABLE defines to vnic_devcmd.h. Add VNIC_CQ_* named constants to vnic_cq.h so CQ initialisation parameters are self-documenting from their first introduction. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/Makefile | 3 +- drivers/net/ethernet/cisco/enic/enic.h | 5 + drivers/net/ethernet/cisco/enic/enic_admin.c | 227 ++++++++++++++++++++++++++ drivers/net/ethernet/cisco/enic/enic_admin.h | 15 ++ drivers/net/ethernet/cisco/enic/vnic_cq.h | 9 + drivers/net/ethernet/cisco/enic/vnic_devcmd.h | 11 ++ 6 files changed, 269 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index a96b8332e6e2..7ae72fefc99a 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -3,5 +3,6 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ - enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o enic_wq.o + enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o enic_wq.o \ + enic_admin.o diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 08472420f3a1..398227448b37 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -292,6 +292,11 @@ struct enic { /* Admin channel resources for SR-IOV MBOX */ bool has_admin_channel; + /* true only while the admin WQ/RQ/CQ are allocated and enabled; gates + * enic_admin_channel_close() so it is a no-op after a failed (re)open + * left the resources freed. + */ + bool admin_chan_up; struct vnic_wq admin_wq; struct vnic_rq admin_rq; struct vnic_cq admin_cq[2]; diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c new file mode 100644 index 000000000000..50b46b92c88f --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_admin.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2025 Cisco Systems, Inc. All rights reserved. + +#include +#include + +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_rq.h" +#include "vnic_cq.h" +#include "vnic_intr.h" +#include "vnic_resource.h" +#include "vnic_devcmd.h" +#include "enic.h" +#include "enic_admin.h" +#include "cq_desc.h" +#include "wq_enet_desc.h" +#include "rq_enet_desc.h" + +/* Clean up any admin WQ buffers still held by hardware at close time. + * Normally buffers are freed inline after send completion, but a timed-out + * send intentionally leaves the buffer live until the queue is stopped. + */ +static void enic_admin_wq_buf_clean(struct vnic_wq *wq, + struct vnic_wq_buf *buf) +{ + struct enic *enic = vnic_dev_priv(wq->vdev); + + if (buf->os_buf) { + dma_unmap_single(&enic->pdev->dev, buf->dma_addr, + buf->len, DMA_TO_DEVICE); + kfree(buf->os_buf); + buf->os_buf = NULL; + } +} + +/* No-op: admin RQ buffer teardown is handled in enic_admin_channel_close */ +static void enic_admin_rq_buf_clean(struct vnic_rq *rq, + struct vnic_rq_buf *buf) +{ +} + +static int enic_admin_qp_type_set(struct enic *enic, u32 enable) +{ + u64 a0 = QP_TYPE_ADMIN, a1 = enable; + int wait = 1000; + int err; + + spin_lock_bh(&enic->devcmd_lock); + err = vnic_dev_cmd(enic->vdev, CMD_QP_TYPE_SET, &a0, &a1, wait); + spin_unlock_bh(&enic->devcmd_lock); + + return err; +} + +static int enic_admin_alloc_resources(struct enic *enic) +{ + int err; + + err = vnic_wq_alloc_with_type(enic->vdev, &enic->admin_wq, 0, + ENIC_ADMIN_DESC_COUNT, + sizeof(struct wq_enet_desc), + RES_TYPE_ADMIN_WQ); + if (err) + return err; + + err = vnic_rq_alloc_with_type(enic->vdev, &enic->admin_rq, 0, + ENIC_ADMIN_DESC_COUNT, + sizeof(struct rq_enet_desc), + RES_TYPE_ADMIN_RQ); + if (err) + goto free_wq; + + /* admin_cq[0] is the WQ completion queue. WQ CQEs are always + * 16 bytes wide; firmware always writes 16-byte CQEs for WQ + * completions on every WQ, including the admin channel WQ. + * Use sizeof(struct cq_desc) accordingly. + */ + err = vnic_cq_alloc_with_type(enic->vdev, &enic->admin_cq[0], 0, + ENIC_ADMIN_DESC_COUNT, + sizeof(struct cq_desc), + RES_TYPE_ADMIN_CQ); + if (err) + goto free_rq; + + /* admin_cq[1] is the RQ completion queue. Its descriptor size + * must match what firmware writes. enic_ext_cq() called earlier + * in probe issues CMD_CQ_ENTRY_SIZE_SET for VNIC_RQ_ALL, + * programming firmware to write CQ entries of (16 << enic->ext_cq) + * bytes for every RQ CQ on the vNIC, including the admin RQ CQ. + * Allocating with the same size keeps the host poller and + * firmware in lockstep: + * + * - The color/valid bit lives at byte (desc_size - 1) of every + * cq_enet_rq_desc[_32|_64] variant, so enic_admin_cq_color() + * reads it from the correct offset. + * - Only the first 15 bytes of the descriptor (vlan, + * bytes_written_flags, ...) are accessed by the admin path; + * these fields are identical across all three variants (see + * comment in enic_rq.c above cq_enet_rq_desc_dec()). + */ + err = vnic_cq_alloc_with_type(enic->vdev, &enic->admin_cq[1], 1, + ENIC_ADMIN_DESC_COUNT, + 16 << enic->ext_cq, + RES_TYPE_ADMIN_CQ); + if (err) + goto free_cq0; + + return 0; + +free_cq0: + vnic_cq_free(&enic->admin_cq[0]); +free_rq: + vnic_rq_free(&enic->admin_rq); +free_wq: + vnic_wq_free(&enic->admin_wq); + return err; +} + +static void enic_admin_free_resources(struct enic *enic) +{ + vnic_cq_free(&enic->admin_cq[1]); + vnic_cq_free(&enic->admin_cq[0]); + vnic_rq_free(&enic->admin_rq); + vnic_wq_free(&enic->admin_wq); +} + +static void enic_admin_init_resources(struct enic *enic) +{ + vnic_wq_init(&enic->admin_wq, + 0, 0, 0); /* cq_index, err_intr_enable, err_intr_offset */ + vnic_rq_init(&enic->admin_rq, + 1, 0, 0); /* cq_index, err_intr_enable, err_intr_offset */ + vnic_cq_init(&enic->admin_cq[0], + VNIC_CQ_FC_DISABLE, + VNIC_CQ_COLOR_ENABLE, + 0, 0, 1, /* cq_head, cq_tail, cq_tail_color */ + VNIC_CQ_INTR_DISABLE, + VNIC_CQ_ENTRY_ENABLE, + VNIC_CQ_MSG_DISABLE, + 0, /* interrupt_offset */ + 0 /* cq_message_addr */); + vnic_cq_init(&enic->admin_cq[1], + VNIC_CQ_FC_DISABLE, + VNIC_CQ_COLOR_ENABLE, + 0, 0, 1, /* cq_head, cq_tail, cq_tail_color */ + VNIC_CQ_INTR_DISABLE, + VNIC_CQ_ENTRY_ENABLE, + VNIC_CQ_MSG_DISABLE, + 0, /* interrupt_offset */ + 0 /* cq_message_addr */); +} + +int enic_admin_channel_open(struct enic *enic) +{ + int err; + + if (!enic->has_admin_channel) + return -ENODEV; + + err = enic_admin_alloc_resources(enic); + if (err) { + netdev_err(enic->netdev, + "Failed to alloc admin channel resources: %d\n", + err); + return err; + } + + enic_admin_init_resources(enic); + + vnic_wq_enable(&enic->admin_wq); + vnic_rq_enable(&enic->admin_rq); + + err = enic_admin_qp_type_set(enic, QP_ENABLE); + if (err) { + netdev_err(enic->netdev, + "Failed to set admin QP type: %d\n", err); + goto disable_queues; + } + + enic->admin_chan_up = true; + + return 0; + +disable_queues: + enic_admin_qp_type_set(enic, QP_DISABLE); + if (vnic_wq_disable(&enic->admin_wq)) + netdev_warn(enic->netdev, "Failed to disable admin WQ\n"); + if (vnic_rq_disable(&enic->admin_rq)) + netdev_warn(enic->netdev, "Failed to disable admin RQ\n"); + enic_admin_free_resources(enic); + return err; +} + +void enic_admin_channel_close(struct enic *enic) +{ + int err; + + if (!enic->has_admin_channel) + return; + + /* Nothing to tear down if the channel was never (re)opened, e.g. a + * failed enic_admin_channel_open() in probe or in the reset path; + * otherwise the disable/clean calls below dereference freed resources. + */ + if (!enic->admin_chan_up) + return; + + enic_admin_qp_type_set(enic, QP_DISABLE); + + err = vnic_wq_disable(&enic->admin_wq); + if (err) + netdev_warn(enic->netdev, + "Failed to disable admin WQ: %d\n", err); + err = vnic_rq_disable(&enic->admin_rq); + if (err) + netdev_warn(enic->netdev, + "Failed to disable admin RQ: %d\n", err); + + vnic_wq_clean(&enic->admin_wq, enic_admin_wq_buf_clean); + vnic_rq_clean(&enic->admin_rq, enic_admin_rq_buf_clean); + vnic_cq_clean(&enic->admin_cq[0]); + vnic_cq_clean(&enic->admin_cq[1]); + enic_admin_free_resources(enic); + + enic->admin_chan_up = false; +} diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.h b/drivers/net/ethernet/cisco/enic/enic_admin.h new file mode 100644 index 000000000000..569aadeb9312 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_admin.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2025 Cisco Systems, Inc. All rights reserved. */ + +#ifndef _ENIC_ADMIN_H_ +#define _ENIC_ADMIN_H_ + +#define ENIC_ADMIN_DESC_COUNT 64 +#define ENIC_ADMIN_BUF_SIZE 2048 + +struct enic; + +int enic_admin_channel_open(struct enic *enic); +void enic_admin_channel_close(struct enic *enic); + +#endif /* _ENIC_ADMIN_H_ */ diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.h b/drivers/net/ethernet/cisco/enic/vnic_cq.h index d46d4d2ef6bb..35ffa3230713 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_cq.h +++ b/drivers/net/ethernet/cisco/enic/vnic_cq.h @@ -76,6 +76,15 @@ int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, int vnic_cq_alloc_with_type(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index, unsigned int desc_count, unsigned int desc_size, unsigned int res_type); +#define VNIC_CQ_FC_ENABLE 1 +#define VNIC_CQ_FC_DISABLE 0 +#define VNIC_CQ_COLOR_ENABLE 1 +#define VNIC_CQ_INTR_ENABLE 1 +#define VNIC_CQ_INTR_DISABLE 0 +#define VNIC_CQ_ENTRY_ENABLE 1 +#define VNIC_CQ_MSG_ENABLE 1 +#define VNIC_CQ_MSG_DISABLE 0 + void vnic_cq_init(struct vnic_cq *cq, unsigned int flow_control_enable, unsigned int color_enable, unsigned int cq_head, unsigned int cq_tail, unsigned int cq_tail_color, unsigned int interrupt_enable, diff --git a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h index 3b6efa743dba..90ca06691ebd 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_devcmd.h +++ b/drivers/net/ethernet/cisco/enic/vnic_devcmd.h @@ -455,8 +455,19 @@ enum vnic_devcmd_cmd { */ CMD_CQ_ENTRY_SIZE_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 90), + /* + * Set queue pair type (admin or data) + * in: (u32) a0 = queue pair type (0 = admin, 1 = data) + * in: (u32) a1 = enable (1) / disable (0) + */ + CMD_QP_TYPE_SET = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 97), }; +#define QP_TYPE_ADMIN 0 +#define QP_TYPE_DATA 1 +#define QP_ENABLE 1 +#define QP_DISABLE 0 + /* CMD_ENABLE2 flags */ #define CMD_ENABLE2_STANDBY 0x0 #define CMD_ENABLE2_ACTIVE 0x1 -- 2.43.0 The admin receive queue needs pre-posted DMA buffers for incoming mailbox messages from VFs. Each buffer is a kmalloc'd region mapped for DMA (2048 bytes, sufficient for any MBOX message). Add enic_admin_rq_fill(gfp) to post buffers at open time, and enic_admin_rq_drain() to unmap and free them at close time. Wire both into the admin channel open/close paths. The gfp_t parameter lets the caller pass the allocation context; both current callers -- channel open and the CQ-poll work handler that refills after draining (added in the next patch) -- run in process context and use GFP_KERNEL. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic_admin.c | 66 +++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c index 50b46b92c88f..b2be42092106 100644 --- a/drivers/net/ethernet/cisco/enic/enic_admin.c +++ b/drivers/net/ethernet/cisco/enic/enic_admin.c @@ -3,6 +3,7 @@ #include #include +#include #include "vnic_dev.h" #include "vnic_wq.h" @@ -34,10 +35,63 @@ static void enic_admin_wq_buf_clean(struct vnic_wq *wq, } } -/* No-op: admin RQ buffer teardown is handled in enic_admin_channel_close */ static void enic_admin_rq_buf_clean(struct vnic_rq *rq, struct vnic_rq_buf *buf) { + struct enic *enic = vnic_dev_priv(rq->vdev); + + if (!buf->os_buf) + return; + + dma_unmap_single(&enic->pdev->dev, buf->dma_addr, buf->len, + DMA_FROM_DEVICE); + kfree(buf->os_buf); + buf->os_buf = NULL; +} + +static int enic_admin_rq_post_one(struct enic *enic, gfp_t gfp) +{ + struct vnic_rq *rq = &enic->admin_rq; + struct rq_enet_desc *desc; + dma_addr_t dma_addr; + void *buf; + + buf = kmalloc(ENIC_ADMIN_BUF_SIZE, gfp); + if (!buf) + return -ENOMEM; + + dma_addr = dma_map_single(&enic->pdev->dev, buf, ENIC_ADMIN_BUF_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(&enic->pdev->dev, dma_addr)) { + kfree(buf); + return -ENOMEM; + } + + desc = vnic_rq_next_desc(rq); + rq_enet_desc_enc(desc, (u64)dma_addr | VNIC_PADDR_TARGET, + RQ_ENET_TYPE_ONLY_SOP, ENIC_ADMIN_BUF_SIZE); + vnic_rq_post(rq, buf, 0, dma_addr, ENIC_ADMIN_BUF_SIZE, 0); + + return 0; +} + +static int enic_admin_rq_fill(struct enic *enic, gfp_t gfp) +{ + struct vnic_rq *rq = &enic->admin_rq; + int err; + + while (vnic_rq_desc_avail(rq) > 0) { + err = enic_admin_rq_post_one(enic, gfp); + if (err) + return err; + } + + return 0; +} + +static void enic_admin_rq_drain(struct enic *enic) +{ + vnic_rq_clean(&enic->admin_rq, enic_admin_rq_buf_clean); } static int enic_admin_qp_type_set(struct enic *enic, u32 enable) @@ -171,6 +225,13 @@ int enic_admin_channel_open(struct enic *enic) vnic_wq_enable(&enic->admin_wq); vnic_rq_enable(&enic->admin_rq); + err = enic_admin_rq_fill(enic, GFP_KERNEL); + if (err) { + netdev_err(enic->netdev, + "Failed to fill admin RQ buffers: %d\n", err); + goto disable_queues; + } + err = enic_admin_qp_type_set(enic, QP_ENABLE); if (err) { netdev_err(enic->netdev, @@ -188,6 +249,7 @@ int enic_admin_channel_open(struct enic *enic) netdev_warn(enic->netdev, "Failed to disable admin WQ\n"); if (vnic_rq_disable(&enic->admin_rq)) netdev_warn(enic->netdev, "Failed to disable admin RQ\n"); + enic_admin_rq_drain(enic); enic_admin_free_resources(enic); return err; } @@ -218,7 +280,7 @@ void enic_admin_channel_close(struct enic *enic) "Failed to disable admin RQ: %d\n", err); vnic_wq_clean(&enic->admin_wq, enic_admin_wq_buf_clean); - vnic_rq_clean(&enic->admin_rq, enic_admin_rq_buf_clean); + enic_admin_rq_drain(enic); vnic_cq_clean(&enic->admin_cq[0]); vnic_cq_clean(&enic->admin_cq[1]); enic_admin_free_resources(enic); -- 2.43.0 Add completion queue (CQ) service for the admin channel work queue (WQ) and receive queue (RQ), driven by a dedicated MSI-X interrupt and a workqueue-based CQ poller. The admin WQ CQ service advances the completion ring and returns the number of descriptors consumed. The admin RQ CQ service does the same for receive completions and copies each received message into a preallocated buffer. Received messages are enqueued for deferred dispatch by a separate work_struct so the CQ poller stays short. When the MSI-X interrupt fires, the ISR schedules the CQ poll work_struct. The work handler drains all pending completions, kicks message dispatch if work was done, and returns credits to unmask the interrupt. Log a rate-limited warning when admin RQ buffer refill fails so that transient memory pressure is visible without flooding the log. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic.h | 8 + drivers/net/ethernet/cisco/enic/enic_admin.c | 311 ++++++++++++++++++++++++++- drivers/net/ethernet/cisco/enic/enic_admin.h | 12 ++ 3 files changed, 327 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 398227448b37..401123e6df1d 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -301,6 +301,14 @@ struct enic { struct vnic_rq admin_rq; struct vnic_cq admin_cq[2]; struct vnic_intr admin_intr; + struct work_struct admin_poll_work; + unsigned int admin_intr_index; + struct work_struct admin_msg_work; + spinlock_t admin_msg_lock; /* protects admin_msg_list */ + struct list_head admin_msg_list; + unsigned int admin_msg_count; /* current depth of admin_msg_list */ + void (*admin_rq_handler)(struct enic *enic, void *buf, + unsigned int len); }; static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c index b2be42092106..6062a18043ba 100644 --- a/drivers/net/ethernet/cisco/enic/enic_admin.c +++ b/drivers/net/ethernet/cisco/enic/enic_admin.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "vnic_dev.h" #include "vnic_wq.h" @@ -15,6 +16,7 @@ #include "enic.h" #include "enic_admin.h" #include "cq_desc.h" +#include "cq_enet_desc.h" #include "wq_enet_desc.h" #include "rq_enet_desc.h" @@ -94,6 +96,254 @@ static void enic_admin_rq_drain(struct enic *enic) vnic_rq_clean(&enic->admin_rq, enic_admin_rq_buf_clean); } +static unsigned int enic_admin_cq_color(void *cq_desc, unsigned int desc_size) +{ + u8 type_color = *((u8 *)cq_desc + desc_size - 1); + + return (type_color >> CQ_DESC_COLOR_SHIFT) & CQ_DESC_COLOR_MASK; +} + +unsigned int enic_admin_wq_cq_service(struct enic *enic) +{ + struct vnic_cq *cq = &enic->admin_cq[0]; + unsigned int work = 0; + void *desc; + + desc = vnic_cq_to_clean(cq); + while (enic_admin_cq_color(desc, cq->ring.desc_size) != + cq->last_color) { + vnic_cq_inc_to_clean(cq); + work++; + desc = vnic_cq_to_clean(cq); + } + + return work; +} + +/* Upper bound on pending admin messages. A buggy or hostile VF could flood + * the PF admin channel faster than admin_msg_work drains it; cap the backlog + * so a guest cannot drive the host out of memory. + */ +#define ENIC_ADMIN_MSG_MAX 256 + +static void enic_admin_msg_enqueue(struct enic *enic, void *buf, + unsigned int len) +{ + struct enic_admin_msg *msg; + + msg = kmalloc(struct_size(msg, data, len), GFP_KERNEL); + if (!msg) + return; + + msg->len = len; + memcpy(msg->data, buf, len); + + spin_lock(&enic->admin_msg_lock); + if (enic->admin_msg_count >= ENIC_ADMIN_MSG_MAX) { + spin_unlock(&enic->admin_msg_lock); + kfree(msg); + if (net_ratelimit()) + netdev_warn(enic->netdev, + "admin msg backlog full (%u); dropping\n", + ENIC_ADMIN_MSG_MAX); + return; + } + list_add_tail(&msg->list, &enic->admin_msg_list); + enic->admin_msg_count++; + spin_unlock(&enic->admin_msg_lock); +} + +unsigned int enic_admin_rq_cq_service(struct enic *enic) +{ + struct vnic_cq *cq = &enic->admin_cq[1]; + struct vnic_rq *rq = &enic->admin_rq; + struct cq_enet_rq_desc *rq_desc; + struct vnic_rq_buf *buf; + u16 bwf, bytes_written; + unsigned int work = 0; + void *desc; + + /* The admin RQ and its CQ form a single in-order channel: firmware + * posts exactly one CQE per consumed RQ descriptor, in submission + * order. Each CQE therefore pairs with rq->to_clean below without a + * completed_index cross-check, mirroring the in-order assumption of + * the main enic RX path. + */ + desc = vnic_cq_to_clean(cq); + while (enic_admin_cq_color(desc, cq->ring.desc_size) != + cq->last_color) { + /* Ensure DMA descriptor fields are read after + * the color/valid check. dma_rmb() is the + * correct barrier for DMA-written descriptors. + */ + dma_rmb(); + buf = rq->to_clean; + + /* Decode the actual number of bytes hardware wrote into + * the RX buffer. buf->len is the static allocation size + * (ENIC_ADMIN_BUF_SIZE) and would expose uninitialised + * heap memory beyond the real payload. bytes_written_flags + * is at the same offset in every cq_enet_rq_desc[_32|_64] + * variant. + */ + rq_desc = desc; + bwf = le16_to_cpu(rq_desc->bytes_written_flags); + bytes_written = bwf & CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + if (bytes_written > buf->len) + goto next_desc; + + dma_sync_single_for_cpu(&enic->pdev->dev, + buf->dma_addr, buf->len, + DMA_FROM_DEVICE); + + /* Drop on hardware error indications. Admin messages + * are internal to the VIC, not received over the wire. + * Firmware sets TRUNCATED when the message does not fit + * in the posted buffer, and FCS_OK is always set on + * healthy admin completions. + */ + if (bwf & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) { + netdev_warn_once(enic->netdev, + "admin RQ: truncated message dropped\n"); + goto next_desc; + } + if (!(rq_desc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK)) { + netdev_warn_once(enic->netdev, + "admin RQ: bad FCS, dropping message\n"); + goto next_desc; + } + + enic_admin_msg_enqueue(enic, buf->os_buf, bytes_written); + +next_desc: + enic_admin_rq_buf_clean(rq, rq->to_clean); + rq->to_clean = rq->to_clean->next; + rq->ring.desc_avail++; + + vnic_cq_inc_to_clean(cq); + work++; + desc = vnic_cq_to_clean(cq); + } + + if (enic_admin_rq_fill(enic, GFP_KERNEL) && net_ratelimit()) + netdev_warn(enic->netdev, + "admin RQ refill failed\n"); + + return work; +} + +static irqreturn_t enic_admin_isr_msix(int irq, void *data) +{ + struct enic *enic = data; + + schedule_work(&enic->admin_poll_work); + + return IRQ_HANDLED; +} + +static void enic_admin_msg_work_handler(struct work_struct *work) +{ + struct enic *enic = container_of(work, struct enic, admin_msg_work); + struct enic_admin_msg *msg, *tmp; + LIST_HEAD(local_list); + + spin_lock_bh(&enic->admin_msg_lock); + list_splice_init(&enic->admin_msg_list, &local_list); + enic->admin_msg_count = 0; + spin_unlock_bh(&enic->admin_msg_lock); + + list_for_each_entry_safe(msg, tmp, &local_list, list) { + if (enic->admin_rq_handler) + enic->admin_rq_handler(enic, msg->data, msg->len); + list_del(&msg->list); + kfree(msg); + } +} + +static void enic_admin_poll_work_handler(struct work_struct *work) +{ + struct enic *enic = container_of(work, struct enic, admin_poll_work); + unsigned int credits; + unsigned int rq_work; + + credits = vnic_intr_credits(&enic->admin_intr); + + rq_work = enic_admin_rq_cq_service(enic); + + if (rq_work > 0) + schedule_work(&enic->admin_msg_work); + + vnic_intr_return_credits(&enic->admin_intr, + credits ?: 1, + 1 /* unmask */, 0); +} + +static int enic_admin_setup_intr(struct enic *enic) +{ + unsigned int intr_index = enic->intr_count; + int err; + + if (vnic_dev_get_intr_mode(enic->vdev) != VNIC_DEV_INTR_MODE_MSIX || + intr_index >= enic->intr_avail) + return -ENODEV; + + /* The admin INTR uses a slot in the same RES_TYPE_INTR_CTRL + * strided array of per-vector control blocks (mask, coalescing + * timer, credit return) that the data-path IRQs occupy in BAR0. + * vnic_intr_alloc() defaults to RES_TYPE_INTR_CTRL, which is what + * we want here. + */ + err = vnic_intr_alloc(enic->vdev, &enic->admin_intr, intr_index); + if (err) { + netdev_warn(enic->netdev, + "Failed to alloc admin intr at index %u: %d\n", + intr_index, err); + return err; + } + + enic->admin_intr_index = intr_index; + + /* A V2 VF opens the admin channel during probe, before + * register_netdev() resolves the "eth%d" name template, so using + * netdev->name here would register the literal "eth%d-admin" in + * /proc/interrupts. Use the already-stable PCI device name instead. + */ + snprintf(enic->msix[intr_index].devname, + sizeof(enic->msix[intr_index].devname), + "%s-admin", pci_name(enic->pdev)); + enic->msix[intr_index].isr = enic_admin_isr_msix; + enic->msix[intr_index].devid = enic; + + err = request_irq(enic->msix_entry[intr_index].vector, + enic->msix[intr_index].isr, 0, + enic->msix[intr_index].devname, + enic->msix[intr_index].devid); + if (err) { + netdev_warn(enic->netdev, + "Failed to request admin MSI-X irq: %d\n", err); + vnic_intr_free(&enic->admin_intr); + return err; + } + + enic->msix[intr_index].requested = 1; + + netdev_dbg(enic->netdev, + "admin channel using MSI-X interrupt (index %u)\n", + intr_index); + + return 0; +} + +static void enic_admin_teardown_intr(struct enic *enic) +{ + unsigned int intr_index = enic->admin_intr_index; + + free_irq(enic->msix_entry[intr_index].vector, + enic->msix[intr_index].devid); + cancel_work_sync(&enic->admin_poll_work); + enic->msix[intr_index].requested = 0; +} + static int enic_admin_qp_type_set(struct enic *enic, u32 enable) { u64 a0 = QP_TYPE_ADMIN, a1 = enable; @@ -173,6 +423,7 @@ static int enic_admin_alloc_resources(struct enic *enic) static void enic_admin_free_resources(struct enic *enic) { + vnic_intr_free(&enic->admin_intr); vnic_cq_free(&enic->admin_cq[1]); vnic_cq_free(&enic->admin_cq[0]); vnic_rq_free(&enic->admin_rq); @@ -181,6 +432,8 @@ static void enic_admin_free_resources(struct enic *enic) static void enic_admin_init_resources(struct enic *enic) { + unsigned int intr_offset = enic->admin_intr_index; + vnic_wq_init(&enic->admin_wq, 0, 0, 0); /* cq_index, err_intr_enable, err_intr_offset */ vnic_rq_init(&enic->admin_rq, @@ -189,20 +442,35 @@ static void enic_admin_init_resources(struct enic *enic) VNIC_CQ_FC_DISABLE, VNIC_CQ_COLOR_ENABLE, 0, 0, 1, /* cq_head, cq_tail, cq_tail_color */ - VNIC_CQ_INTR_DISABLE, + VNIC_CQ_INTR_DISABLE, /* polled synchronously by mbox send */ VNIC_CQ_ENTRY_ENABLE, VNIC_CQ_MSG_DISABLE, - 0, /* interrupt_offset */ + intr_offset, 0 /* cq_message_addr */); vnic_cq_init(&enic->admin_cq[1], VNIC_CQ_FC_DISABLE, VNIC_CQ_COLOR_ENABLE, 0, 0, 1, /* cq_head, cq_tail, cq_tail_color */ - VNIC_CQ_INTR_DISABLE, + VNIC_CQ_INTR_ENABLE, VNIC_CQ_ENTRY_ENABLE, VNIC_CQ_MSG_DISABLE, - 0, /* interrupt_offset */ + intr_offset, 0 /* cq_message_addr */); + vnic_intr_init(&enic->admin_intr, + 0, 0, 1); /* coalescing_timer, coalescing_type, mask_on_assertion */ +} + +static void enic_admin_msg_drain(struct enic *enic) +{ + struct enic_admin_msg *msg, *tmp; + + spin_lock_bh(&enic->admin_msg_lock); + list_for_each_entry_safe(msg, tmp, &enic->admin_msg_list, list) { + list_del(&msg->list); + kfree(msg); + } + enic->admin_msg_count = 0; + spin_unlock_bh(&enic->admin_msg_lock); } int enic_admin_channel_open(struct enic *enic) @@ -220,6 +488,19 @@ int enic_admin_channel_open(struct enic *enic) return err; } + spin_lock_init(&enic->admin_msg_lock); + INIT_LIST_HEAD(&enic->admin_msg_list); + INIT_WORK(&enic->admin_msg_work, enic_admin_msg_work_handler); + INIT_WORK(&enic->admin_poll_work, enic_admin_poll_work_handler); + + err = enic_admin_setup_intr(enic); + if (err) { + netdev_err(enic->netdev, + "Admin channel requires MSI-X, SR-IOV unavailable: %d\n", + err); + goto free_resources; + } + enic_admin_init_resources(enic); vnic_wq_enable(&enic->admin_wq); @@ -239,17 +520,31 @@ int enic_admin_channel_open(struct enic *enic) goto disable_queues; } + vnic_intr_unmask(&enic->admin_intr); + + netdev_dbg(enic->netdev, + "admin channel open: intr=%u wq_avail=%u rq_avail=%u cq0_color=%u cq1_color=%u\n", + enic->admin_intr_index, + vnic_wq_desc_avail(&enic->admin_wq), + vnic_rq_desc_avail(&enic->admin_rq), + enic->admin_cq[0].last_color, + enic->admin_cq[1].last_color); + enic->admin_chan_up = true; return 0; disable_queues: + enic_admin_teardown_intr(enic); enic_admin_qp_type_set(enic, QP_DISABLE); if (vnic_wq_disable(&enic->admin_wq)) netdev_warn(enic->netdev, "Failed to disable admin WQ\n"); if (vnic_rq_disable(&enic->admin_rq)) netdev_warn(enic->netdev, "Failed to disable admin RQ\n"); + cancel_work_sync(&enic->admin_msg_work); + enic_admin_msg_drain(enic); enic_admin_rq_drain(enic); +free_resources: enic_admin_free_resources(enic); return err; } @@ -268,6 +563,13 @@ void enic_admin_channel_close(struct enic *enic) if (!enic->admin_chan_up) return; + netdev_dbg(enic->netdev, "admin channel close\n"); + + vnic_intr_mask(&enic->admin_intr); + enic_admin_teardown_intr(enic); + cancel_work_sync(&enic->admin_msg_work); + enic_admin_msg_drain(enic); + enic_admin_qp_type_set(enic, QP_DISABLE); err = vnic_wq_disable(&enic->admin_wq); @@ -283,6 +585,7 @@ void enic_admin_channel_close(struct enic *enic) enic_admin_rq_drain(enic); vnic_cq_clean(&enic->admin_cq[0]); vnic_cq_clean(&enic->admin_cq[1]); + vnic_intr_clean(&enic->admin_intr); enic_admin_free_resources(enic); enic->admin_chan_up = false; diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.h b/drivers/net/ethernet/cisco/enic/enic_admin.h index 569aadeb9312..62c80220b0ca 100644 --- a/drivers/net/ethernet/cisco/enic/enic_admin.h +++ b/drivers/net/ethernet/cisco/enic/enic_admin.h @@ -9,7 +9,19 @@ struct enic; +/* Wrapper for received admin messages queued for deferred processing. + * The admin CQ poll work handler enqueues these; a separate work handler + * processes them where sleeping (mutex, GFP_KERNEL) is safe. + */ +struct enic_admin_msg { + struct list_head list; + unsigned int len; + u8 data[] __aligned(8); +}; + int enic_admin_channel_open(struct enic *enic); void enic_admin_channel_close(struct enic *enic); +unsigned int enic_admin_wq_cq_service(struct enic *enic); +unsigned int enic_admin_rq_cq_service(struct enic *enic); #endif /* _ENIC_ADMIN_H_ */ -- 2.43.0 Define the mailbox protocol structures for PF-VF communication: message header, generic reply, and per-message-type payloads for capability negotiation, VF registration/unregistration, and link state notification/acknowledgment. Include linux/types.h and linux/bits.h for __le16/__le32/__le64 and BIT() used in the header. Message types use an even=request / odd=reply convention. The header carries source and destination VNIC IDs, a monotonically increasing message number, and the total message length. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic_mbox.h | 83 +++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.h b/drivers/net/ethernet/cisco/enic/enic_mbox.h new file mode 100644 index 000000000000..a52f1d25cb21 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright 2025 Cisco Systems, Inc. All rights reserved. */ + +#ifndef _ENIC_MBOX_H_ +#define _ENIC_MBOX_H_ + +#include +#include + +/* + * Mailbox protocol for PF-VF communication over the admin channel. + * + * Even numbers are requests, odd numbers are replies/acks. + * The prefix indicates the initiator: VF_ = VF-initiated, PF_ = PF-initiated. + */ +enum enic_mbox_msg_type { + ENIC_MBOX_VF_CAPABILITY_REQUEST = 0, + ENIC_MBOX_VF_CAPABILITY_REPLY = 1, + ENIC_MBOX_VF_REGISTER_REQUEST = 2, + ENIC_MBOX_VF_REGISTER_REPLY = 3, + ENIC_MBOX_VF_UNREGISTER_REQUEST = 4, + ENIC_MBOX_VF_UNREGISTER_REPLY = 5, + ENIC_MBOX_PF_LINK_STATE_NOTIF = 6, + ENIC_MBOX_PF_LINK_STATE_ACK = 7, + ENIC_MBOX_MAX +}; + +struct enic_mbox_hdr { + __le16 src_vnic_id; + __le16 dst_vnic_id; + u8 msg_type; + u8 flags; + __le16 msg_len; + __le64 msg_num; +}; + +struct enic_mbox_generic_reply { + __le16 ret_major; + __le16 ret_minor; +}; + +#define ENIC_MBOX_ERR_GENERIC BIT(0) +#define ENIC_MBOX_ERR_VF_NOT_REGISTERED BIT(1) +#define ENIC_MBOX_ERR_MSG_NOT_SUPPORTED BIT(2) + +/* ENIC_MBOX_VF_CAPABILITY_REQUEST / _REPLY */ +#define ENIC_MBOX_CAP_VERSION_0 0 +#define ENIC_MBOX_CAP_VERSION_1 1 + +struct enic_mbox_vf_capability_msg { + __le32 version; + __le32 reserved[32]; +}; + +/* The embedded enic_mbox_generic_reply has 2-byte alignment, but the + * __le32 members give this struct 4-byte natural alignment. Receive + * buffers come from kmalloc (>= 8-byte aligned), so there is no + * misaligned access risk when casting from the receive buffer. + */ +struct enic_mbox_vf_capability_reply_msg { + struct enic_mbox_generic_reply reply; + __le32 version; + __le32 reserved[32]; +}; + +/* ENIC_MBOX_VF_REGISTER / _UNREGISTER */ +struct enic_mbox_vf_register_reply_msg { + struct enic_mbox_generic_reply reply; +}; + +/* ENIC_MBOX_PF_LINK_STATE_NOTIF / _ACK */ +#define ENIC_MBOX_LINK_STATE_DISABLE 0 +#define ENIC_MBOX_LINK_STATE_ENABLE 1 + +struct enic_mbox_pf_link_state_notif_msg { + __le32 link_state; +}; + +struct enic_mbox_pf_link_state_ack_msg { + struct enic_mbox_generic_reply ack; +}; + +#endif /* _ENIC_MBOX_H_ */ -- 2.43.0 Implement the mailbox protocol engine used for PF-VF communication over the admin channel. The send path (enic_mbox_send_msg) builds a message with a common header, DMA-maps it, posts a single WQ descriptor with the destination vnic ID encoded in the VLAN tag field, and polls the WQ CQ for completion. MBOX sends are gated by enic->mbox_send_disabled: enic_mbox_send_msg() returns early while it is set. The flag is cleared in enic_admin_channel_open() only once the admin WQ/RQ/CQ and interrupt are fully programmed, and set again at the start of enic_admin_channel_close(), so a send can never race a not-yet-ready or torn-down admin channel. The receive path (enic_mbox_recv_handler) is installed as the admin RQ callback and validates incoming message headers. PF/VF-specific dispatch will be added in subsequent commits. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/Makefile | 2 +- drivers/net/ethernet/cisco/enic/enic.h | 6 + drivers/net/ethernet/cisco/enic/enic_admin.c | 35 +++++- drivers/net/ethernet/cisco/enic/enic_mbox.c | 170 +++++++++++++++++++++++++++ drivers/net/ethernet/cisco/enic/enic_mbox.h | 8 ++ 5 files changed, 218 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/Makefile b/drivers/net/ethernet/cisco/enic/Makefile index 7ae72fefc99a..e38aaf34c148 100644 --- a/drivers/net/ethernet/cisco/enic/Makefile +++ b/drivers/net/ethernet/cisco/enic/Makefile @@ -4,5 +4,5 @@ obj-$(CONFIG_ENIC) := enic.o enic-y := enic_main.o vnic_cq.o vnic_intr.o vnic_wq.o \ enic_res.o enic_dev.o enic_pp.o vnic_dev.o vnic_rq.o vnic_vic.o \ enic_ethtool.o enic_api.o enic_clsf.o enic_rq.o enic_wq.o \ - enic_admin.o + enic_admin.o enic_mbox.o diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 401123e6df1d..b009d87da4bd 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -297,6 +297,8 @@ struct enic { * left the resources freed. */ bool admin_chan_up; + /* set on send timeout; cleared on channel re-open */ + bool mbox_send_disabled; struct vnic_wq admin_wq; struct vnic_rq admin_rq; struct vnic_cq admin_cq[2]; @@ -309,6 +311,10 @@ struct enic { unsigned int admin_msg_count; /* current depth of admin_msg_list */ void (*admin_rq_handler)(struct enic *enic, void *buf, unsigned int len); + + /* MBOX protocol state — mbox_lock serializes admin WQ sends */ + struct mutex mbox_lock; + u64 mbox_msg_num; }; static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c index 6062a18043ba..d695b16765a1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_admin.c +++ b/drivers/net/ethernet/cisco/enic/enic_admin.c @@ -19,6 +19,7 @@ #include "cq_enet_desc.h" #include "wq_enet_desc.h" #include "rq_enet_desc.h" +#include "enic_mbox.h" /* Clean up any admin WQ buffers still held by hardware at close time. * Normally buffers are freed inline after send completion, but a timed-out @@ -213,7 +214,26 @@ unsigned int enic_admin_rq_cq_service(struct enic *enic) goto next_desc; } - enic_admin_msg_enqueue(enic, buf->os_buf, bytes_written); + if (enic->admin_rq_handler) { + u16 sender_vlan; + + /* Firmware sets the CQ VLAN field to identify the + * sender: 0 = PF, 1-based = VF index. Overwrite + * the untrusted src_vnic_id in the MBOX header with + * the hardware-verified value. + */ + sender_vlan = le16_to_cpu(rq_desc->vlan); + if (bytes_written >= sizeof(struct enic_mbox_hdr)) { + struct enic_mbox_hdr *hdr = buf->os_buf; + + hdr->src_vnic_id = (sender_vlan == 0) ? + cpu_to_le16(ENIC_MBOX_DST_PF) : + cpu_to_le16(sender_vlan - 1); + } + + enic_admin_msg_enqueue(enic, buf->os_buf, + bytes_written); + } next_desc: enic_admin_rq_buf_clean(rq, rq->to_clean); @@ -456,8 +476,9 @@ static void enic_admin_init_resources(struct enic *enic) VNIC_CQ_MSG_DISABLE, intr_offset, 0 /* cq_message_addr */); + /* coalescing_timer, coalescing_type, mask_on_assertion */ vnic_intr_init(&enic->admin_intr, - 0, 0, 1); /* coalescing_timer, coalescing_type, mask_on_assertion */ + 0, 0, 1); } static void enic_admin_msg_drain(struct enic *enic) @@ -522,6 +543,14 @@ int enic_admin_channel_open(struct enic *enic) vnic_intr_unmask(&enic->admin_intr); + /* Only now that the admin WQ/RQ/CQ and interrupt are fully allocated, + * programmed and enabled is it safe to allow MBOX sends. Clearing this + * earlier opened a window where a concurrent sender (e.g. link-notify + * work scheduled by a post-reset link-up) could call enic_mbox_send_msg() + * against a not-yet-allocated admin_wq and crash. + */ + WRITE_ONCE(enic->mbox_send_disabled, false); + netdev_dbg(enic->netdev, "admin channel open: intr=%u wq_avail=%u rq_avail=%u cq0_color=%u cq1_color=%u\n", enic->admin_intr_index, @@ -563,6 +592,8 @@ void enic_admin_channel_close(struct enic *enic) if (!enic->admin_chan_up) return; + WRITE_ONCE(enic->mbox_send_disabled, true); + netdev_dbg(enic->netdev, "admin channel close\n"); vnic_intr_mask(&enic->admin_intr); diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.c b/drivers/net/ethernet/cisco/enic/enic_mbox.c new file mode 100644 index 000000000000..3709704bee02 --- /dev/null +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright 2025 Cisco Systems, Inc. All rights reserved. + +#include +#include +#include +#include + +#include "vnic_dev.h" +#include "vnic_wq.h" +#include "vnic_cq.h" +#include "enic.h" +#include "enic_admin.h" +#include "enic_mbox.h" +#include "wq_enet_desc.h" + +#define ENIC_MBOX_POLL_TIMEOUT_US 5000000 +#define ENIC_MBOX_POLL_INTERVAL_US 100 + +static void enic_mbox_fill_hdr(struct enic *enic, struct enic_mbox_hdr *hdr, + u8 msg_type, u16 dst_vnic_id, u16 msg_len) +{ + memset(hdr, 0, sizeof(*hdr)); + hdr->dst_vnic_id = cpu_to_le16(dst_vnic_id); + hdr->msg_type = msg_type; + hdr->msg_len = cpu_to_le16(msg_len); + hdr->msg_num = cpu_to_le64(++enic->mbox_msg_num); +} + +int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, + void *payload, u16 payload_len) +{ + u16 total_len = sizeof(struct enic_mbox_hdr) + payload_len; + struct vnic_wq *wq = &enic->admin_wq; + struct wq_enet_desc *desc; + unsigned long timeout; + dma_addr_t dma_addr; + u16 vlan_tag; + void *buf; + int err; + + /* Serialize MBOX sends. The admin channel is a low-frequency + * control path; holding the mutex across the poll is acceptable. + */ + mutex_lock(&enic->mbox_lock); + + if (!enic->has_admin_channel || READ_ONCE(enic->mbox_send_disabled)) { + err = -ENODEV; + goto unlock; + } + + if (vnic_wq_desc_avail(wq) == 0) { + err = -ENOSPC; + goto unlock; + } + + buf = kmalloc(total_len, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto unlock; + } + + enic_mbox_fill_hdr(enic, buf, msg_type, dst_vnic_id, total_len); + if (payload_len) { + void *dst = buf + sizeof(struct enic_mbox_hdr); + + memcpy(dst, payload, payload_len); + } + + dma_addr = dma_map_single(&enic->pdev->dev, buf, total_len, + DMA_TO_DEVICE); + if (dma_mapping_error(&enic->pdev->dev, dma_addr)) { + kfree(buf); + err = -ENOMEM; + goto unlock; + } + + /* Firmware uses vlan field for routing: 0 = PF, 1-based = VF index */ + if (dst_vnic_id == ENIC_MBOX_DST_PF) + vlan_tag = 0; + else + vlan_tag = dst_vnic_id + 1; + + desc = vnic_wq_next_desc(wq); + wq_enet_desc_enc(desc, (u64)dma_addr | VNIC_PADDR_TARGET, + total_len, + 0, 0, 0, /* mss, hdr_len, offload_mode */ + 1, 1, /* eop, cq_entry */ + 0, /* fcoe_encap */ + 1, vlan_tag, /* vlan_tag_insert, vlan_tag */ + 0); /* loopback */ + vnic_wq_post(wq, buf, dma_addr, total_len, + 1, 1, /* sop, eop */ + 1, 1, /* desc_skip_cnt, cq_entry */ + 0, 0); /* compressed_send, wrid */ + vnic_wq_doorbell(wq); + + timeout = jiffies + usecs_to_jiffies(ENIC_MBOX_POLL_TIMEOUT_US); + err = -ETIMEDOUT; + while (time_before(jiffies, timeout)) { + if (enic_admin_wq_cq_service(enic)) { + err = 0; + break; + } + usleep_range(ENIC_MBOX_POLL_INTERVAL_US, + ENIC_MBOX_POLL_INTERVAL_US + 50); + } + /* Final check in case completion arrived during the last sleep */ + if (err && enic_admin_wq_cq_service(enic)) + err = 0; + + if (!err) { + wq->to_clean = wq->to_clean->next; + wq->ring.desc_avail++; + dma_unmap_single(&enic->pdev->dev, dma_addr, total_len, + DMA_TO_DEVICE); + kfree(buf); + } else { + netdev_err(enic->netdev, + "MBOX send timed out (type %u dst %u), disabling channel\n", + msg_type, dst_vnic_id); + /* + * The WQ descriptor is still live in hardware. Do not unmap + * or free the buffer: the device may still DMA from dma_addr. + * Mark the channel unusable so no further sends are attempted. + */ + WRITE_ONCE(enic->mbox_send_disabled, true); + } + + netdev_dbg(enic->netdev, + "MBOX send msg_type %u dst %u vlan %u err %d\n", + msg_type, dst_vnic_id, vlan_tag, err); +unlock: + mutex_unlock(&enic->mbox_lock); + return err; +} + +static void enic_mbox_recv_handler(struct enic *enic, void *buf, + unsigned int len) +{ + struct enic_mbox_hdr *hdr = buf; + + if (len < sizeof(*hdr)) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: truncated message (len %u < %zu)\n", + len, sizeof(*hdr)); + return; + } + + if (hdr->msg_type >= ENIC_MBOX_MAX) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: unknown msg type %u\n", + hdr->msg_type); + return; + } + + netdev_dbg(enic->netdev, + "MBOX recv: type %u from vnic %u len %u\n", + hdr->msg_type, le16_to_cpu(hdr->src_vnic_id), + le16_to_cpu(hdr->msg_len)); +} + +void enic_mbox_init(struct enic *enic) +{ + enic->mbox_msg_num = 0; + mutex_init(&enic->mbox_lock); + enic->admin_rq_handler = enic_mbox_recv_handler; +} diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.h b/drivers/net/ethernet/cisco/enic/enic_mbox.h index a52f1d25cb21..73fd7f783ee2 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.h +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.h @@ -80,4 +80,12 @@ struct enic_mbox_pf_link_state_ack_msg { struct enic_mbox_generic_reply ack; }; +#define ENIC_MBOX_DST_PF 0xFFFF + +struct enic; + +void enic_mbox_init(struct enic *enic); +int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, + void *payload, u16 payload_len); + #endif /* _ENIC_MBOX_H_ */ -- 2.43.0 Implement PF-side mailbox message processing for SR-IOV V2 admin channel communication. When the PF receives messages from VFs, the dispatch routes them to type-specific handlers: - VF_CAPABILITY_REQUEST: reply with protocol version 1 - VF_REGISTER_REQUEST: send the register reply, mark the VF registered on success, then send PF_LINK_STATE_NOTIF reflecting the PF's current carrier state - VF_UNREGISTER_REQUEST: mark VF unregistered, send reply - PF_LINK_STATE_ACK: log errors from VF acknowledgment Per-VF state (struct enic_vf_state) is tracked via enic->vf_state which will be allocated when SRIOV V2 is enabled. Remove the CONFIG_PCI_IOV guard from num_vfs in struct enic. The PF handlers reference enic->num_vfs for VF ID bounds checking in enic_mbox.c, which is compiled unconditionally. The field must be visible regardless of CONFIG_PCI_IOV to avoid build failures. Add enic_mbox_send_link_state() helper for PF-initiated link state notifications, also used later by ndo_set_vf_link_state. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic.h | 7 +- drivers/net/ethernet/cisco/enic/enic_mbox.c | 190 +++++++++++++++++++++++++++- drivers/net/ethernet/cisco/enic/enic_mbox.h | 1 + 3 files changed, 194 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index b009d87da4bd..d459318c46fc 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -256,9 +256,7 @@ struct enic { struct enic_rx_coal rx_coalesce_setting; u32 rx_coalesce_usecs; u32 tx_coalesce_usecs; -#ifdef CONFIG_PCI_IOV u16 num_vfs; -#endif enum enic_vf_type vf_type; unsigned int enable_count; spinlock_t enic_api_lock; @@ -315,6 +313,11 @@ struct enic { /* MBOX protocol state — mbox_lock serializes admin WQ sends */ struct mutex mbox_lock; u64 mbox_msg_num; + + /* PF: per-VF MBOX state, allocated when SRIOV V2 is enabled */ + struct enic_vf_state { + bool registered; + } *vf_state; }; static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev) diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.c b/drivers/net/ethernet/cisco/enic/enic_mbox.c index 3709704bee02..b6f05b03ae26 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.c +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.c @@ -135,10 +135,183 @@ int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, return err; } +int enic_mbox_send_link_state(struct enic *enic, u16 vf_id, u32 link_state) +{ + struct enic_mbox_pf_link_state_notif_msg notif = {}; + + if (!enic->vf_state || vf_id >= enic->num_vfs || + !enic->vf_state[vf_id].registered) { + netdev_dbg(enic->netdev, + "MBOX: skip link state to unregistered VF %u\n", + vf_id); + return 0; + } + + notif.link_state = cpu_to_le32(link_state); + return enic_mbox_send_msg(enic, ENIC_MBOX_PF_LINK_STATE_NOTIF, vf_id, + ¬if, sizeof(notif)); +} + +static int enic_mbox_pf_handle_capability(struct enic *enic, void *msg, + u16 vf_id, u64 msg_num) +{ + struct enic_mbox_vf_capability_reply_msg reply = {}; + + reply.reply.ret_major = cpu_to_le16(0); + reply.version = cpu_to_le32(ENIC_MBOX_CAP_VERSION_1); + + return enic_mbox_send_msg(enic, ENIC_MBOX_VF_CAPABILITY_REPLY, vf_id, + &reply, sizeof(reply)); +} + +static int enic_mbox_pf_handle_register(struct enic *enic, void *msg, + u16 vf_id, u64 msg_num) +{ + struct enic_mbox_vf_register_reply_msg reply = {}; + u32 link_state; + int err; + + if (!enic->vf_state || vf_id >= enic->num_vfs) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: register from invalid VF %u\n", + vf_id); + return -EINVAL; + } + + /* VF re-registering (e.g. guest reboot without clean unregister): + * mark the previous registration inactive before accepting the new one. + */ + if (enic->vf_state[vf_id].registered) { + netdev_dbg(enic->netdev, + "MBOX: VF %u re-register, cleaning previous state\n", + vf_id); + enic->vf_state[vf_id].registered = false; + } + + reply.reply.ret_major = cpu_to_le16(0); + err = enic_mbox_send_msg(enic, ENIC_MBOX_VF_REGISTER_REPLY, vf_id, + &reply, sizeof(reply)); + if (err) + return err; + + enic->vf_state[vf_id].registered = true; + if (net_ratelimit()) + netdev_info(enic->netdev, "VF %u registered via MBOX\n", vf_id); + + link_state = netif_carrier_ok(enic->netdev) ? + ENIC_MBOX_LINK_STATE_ENABLE : + ENIC_MBOX_LINK_STATE_DISABLE; + err = enic_mbox_send_link_state(enic, vf_id, link_state); + if (err && net_ratelimit()) + netdev_warn(enic->netdev, + "VF %u: failed to send initial link state: %d\n", + vf_id, err); + /* Registration succeeded; initial link state notification sent + * above. Subsequent link state changes are sent from the PF + * when enic_link_check() detects carrier changes. + */ + return 0; +} + +static int enic_mbox_pf_handle_unregister(struct enic *enic, void *msg, + u16 vf_id, u64 msg_num) +{ + struct enic_mbox_vf_register_reply_msg reply = {}; + int err; + + if (!enic->vf_state || vf_id >= enic->num_vfs) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: unregister from invalid VF %u\n", + vf_id); + return -EINVAL; + } + + /* VF is unloading; clear local state regardless of whether + * the reply is successfully delivered to avoid the PF treating + * a dead VF as still registered. + */ + enic->vf_state[vf_id].registered = false; + + reply.reply.ret_major = cpu_to_le16(0); + err = enic_mbox_send_msg(enic, ENIC_MBOX_VF_UNREGISTER_REPLY, vf_id, + &reply, sizeof(reply)); + + if (net_ratelimit()) + netdev_info(enic->netdev, + "VF %u unregistered via MBOX\n", vf_id); + + return err; +} + +static void enic_mbox_pf_process_msg(struct enic *enic, + struct enic_mbox_hdr *hdr, void *payload) +{ + u16 vf_id = le16_to_cpu(hdr->src_vnic_id); + u16 msg_len = le16_to_cpu(hdr->msg_len); + int err = 0; + + if (!enic->vf_state) { + netdev_dbg(enic->netdev, + "MBOX: PF received msg but SRIOV not active\n"); + return; + } + + if (vf_id >= enic->num_vfs) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: PF received msg from invalid VF %u\n", + vf_id); + return; + } + + switch (hdr->msg_type) { + case ENIC_MBOX_VF_CAPABILITY_REQUEST: + err = enic_mbox_pf_handle_capability(enic, payload, vf_id, + le64_to_cpu(hdr->msg_num)); + break; + case ENIC_MBOX_VF_REGISTER_REQUEST: + err = enic_mbox_pf_handle_register(enic, payload, vf_id, + le64_to_cpu(hdr->msg_num)); + break; + case ENIC_MBOX_VF_UNREGISTER_REQUEST: + err = enic_mbox_pf_handle_unregister(enic, payload, vf_id, + le64_to_cpu(hdr->msg_num)); + break; + case ENIC_MBOX_PF_LINK_STATE_ACK: { + struct enic_mbox_pf_link_state_ack_msg *ack = payload; + + if (msg_len < sizeof(*hdr) + sizeof(*ack)) + break; + if (le16_to_cpu(ack->ack.ret_major) && net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: VF %u link state ACK error %u/%u\n", + vf_id, + le16_to_cpu(ack->ack.ret_major), + le16_to_cpu(ack->ack.ret_minor)); + break; + } + default: + netdev_dbg(enic->netdev, + "MBOX: PF unhandled msg type %u from VF %u\n", + hdr->msg_type, vf_id); + err = -EOPNOTSUPP; + break; + } + + if (err && net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: PF handler for msg type %u from VF %u failed: %d\n", + hdr->msg_type, vf_id, err); +} + static void enic_mbox_recv_handler(struct enic *enic, void *buf, unsigned int len) { struct enic_mbox_hdr *hdr = buf; + void *payload; + u16 msg_len; if (len < sizeof(*hdr)) { if (net_ratelimit()) @@ -156,10 +329,23 @@ static void enic_mbox_recv_handler(struct enic *enic, void *buf, return; } + msg_len = le16_to_cpu(hdr->msg_len); + if (msg_len < sizeof(*hdr) || msg_len > len) { + if (net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: invalid msg_len %u (buf len %u)\n", + msg_len, len); + return; + } + netdev_dbg(enic->netdev, "MBOX recv: type %u from vnic %u len %u\n", - hdr->msg_type, le16_to_cpu(hdr->src_vnic_id), - le16_to_cpu(hdr->msg_len)); + hdr->msg_type, le16_to_cpu(hdr->src_vnic_id), msg_len); + + payload = buf + sizeof(*hdr); + + if (enic->vf_state) + enic_mbox_pf_process_msg(enic, hdr, payload); } void enic_mbox_init(struct enic *enic) diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.h b/drivers/net/ethernet/cisco/enic/enic_mbox.h index 73fd7f783ee2..f1de67db1273 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.h +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.h @@ -87,5 +87,6 @@ struct enic; void enic_mbox_init(struct enic *enic); int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, void *payload, u16 payload_len); +int enic_mbox_send_link_state(struct enic *enic, u16 vf_id, u32 link_state); #endif /* _ENIC_MBOX_H_ */ -- 2.43.0 Implement VF-side mailbox message processing for SR-IOV V2 admin channel communication. VF receive handlers: - VF_CAPABILITY_REPLY: store PF protocol version, signal completion - VF_REGISTER_REPLY: mark VF as registered, signal completion - VF_UNREGISTER_REPLY: mark VF as unregistered, signal completion - PF_LINK_STATE_NOTIF: update carrier state via netif_carrier_on/off, send ACK back to PF VF initiation functions for the probe-time handshake: - enic_mbox_vf_capability_check: send capability request, wait for PF reply via completion - enic_mbox_vf_register: send register request, wait for PF confirmation via completion - enic_mbox_vf_unregister: send unregister request, wait for PF confirmation The wait helper (enic_mbox_wait_reply) uses wait_for_completion_timeout, signaled when the admin ISR and CQ-poll/dispatch workqueue pipeline delivers the reply message. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic.h | 11 ++ drivers/net/ethernet/cisco/enic/enic_mbox.c | 277 +++++++++++++++++++++++++++- drivers/net/ethernet/cisco/enic/enic_mbox.h | 3 + 3 files changed, 290 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index d459318c46fc..a9a376d2cf0e 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -258,6 +258,8 @@ struct enic { u32 tx_coalesce_usecs; u16 num_vfs; enum enic_vf_type vf_type; + bool vf_registered; + u32 pf_cap_version; unsigned int enable_count; spinlock_t enic_api_lock; bool enic_api_busy; @@ -313,6 +315,15 @@ struct enic { /* MBOX protocol state — mbox_lock serializes admin WQ sends */ struct mutex mbox_lock; u64 mbox_msg_num; + /* MBOX request-reply state. Written by the process-context request + * helpers (capability/register/unregister) and read/cleared by the + * admin_msg_work receive handlers. No explicit lock is needed because + * only one request is in flight at a time: requesters run under RTNL or + * single-threaded probe/remove, so each request is serialized and its + * reply completes mbox_comp before the next request is issued. + */ + struct completion mbox_comp; + u8 mbox_expected_reply; /* PF: per-VF MBOX state, allocated when SRIOV V2 is enabled */ struct enic_vf_state { diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.c b/drivers/net/ethernet/cisco/enic/enic_mbox.c index b6f05b03ae26..4676a9a15af5 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.c +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "vnic_dev.h" #include "vnic_wq.h" @@ -135,6 +136,16 @@ int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, return err; } +static int enic_mbox_wait_reply(struct enic *enic, unsigned long timeout_ms) +{ + unsigned long left; + + left = wait_for_completion_timeout(&enic->mbox_comp, + msecs_to_jiffies(timeout_ms)); + + return left ? 0 : -ETIMEDOUT; +} + int enic_mbox_send_link_state(struct enic *enic, u16 vf_id, u32 link_state) { struct enic_mbox_pf_link_state_notif_msg notif = {}; @@ -306,6 +317,166 @@ static void enic_mbox_pf_process_msg(struct enic *enic, hdr->msg_type, vf_id, err); } +static void enic_mbox_vf_handle_capability_reply(struct enic *enic, + void *payload) +{ + struct enic_mbox_vf_capability_reply_msg *reply = payload; + + if (enic->mbox_expected_reply != ENIC_MBOX_VF_CAPABILITY_REPLY) { + netdev_warn(enic->netdev, + "MBOX: stale capability reply (expected %u), drop\n", + enic->mbox_expected_reply); + return; + } + + if (le16_to_cpu(reply->reply.ret_major) == 0) + enic->pf_cap_version = le32_to_cpu(reply->version); + else + netdev_warn(enic->netdev, + "MBOX: PF rejected capability request: %u/%u\n", + le16_to_cpu(reply->reply.ret_major), + le16_to_cpu(reply->reply.ret_minor)); + complete(&enic->mbox_comp); +} + +static void enic_mbox_vf_handle_register_reply(struct enic *enic, + void *payload) +{ + struct enic_mbox_vf_register_reply_msg *reply = payload; + + if (enic->mbox_expected_reply != ENIC_MBOX_VF_REGISTER_REPLY) { + netdev_warn(enic->netdev, + "MBOX: stale register reply (expected %u), drop\n", + enic->mbox_expected_reply); + return; + } + + if (le16_to_cpu(reply->reply.ret_major)) { + netdev_warn(enic->netdev, + "MBOX: VF register rejected by PF: %u/%u\n", + le16_to_cpu(reply->reply.ret_major), + le16_to_cpu(reply->reply.ret_minor)); + } else { + enic->vf_registered = true; + } + complete(&enic->mbox_comp); +} + +static void enic_mbox_vf_handle_unregister_reply(struct enic *enic, + void *payload) +{ + struct enic_mbox_vf_register_reply_msg *reply = payload; + + if (enic->mbox_expected_reply != ENIC_MBOX_VF_UNREGISTER_REPLY) { + netdev_warn(enic->netdev, + "MBOX: stale unregister reply (expected %u), drop\n", + enic->mbox_expected_reply); + return; + } + + if (le16_to_cpu(reply->reply.ret_major)) { + netdev_warn(enic->netdev, + "MBOX: VF unregister rejected by PF: %u/%u\n", + le16_to_cpu(reply->reply.ret_major), + le16_to_cpu(reply->reply.ret_minor)); + } else { + enic->vf_registered = false; + } + complete(&enic->mbox_comp); +} + +static void enic_mbox_vf_handle_link_state(struct enic *enic, void *payload) +{ + struct enic_mbox_pf_link_state_notif_msg *notif = payload; + struct enic_mbox_pf_link_state_ack_msg ack = {}; + int err; + + switch (le32_to_cpu(notif->link_state)) { + case ENIC_MBOX_LINK_STATE_ENABLE: + if (!netif_carrier_ok(enic->netdev)) + netif_carrier_on(enic->netdev); + netdev_dbg(enic->netdev, "MBOX: link state -> UP\n"); + break; + case ENIC_MBOX_LINK_STATE_DISABLE: + if (netif_carrier_ok(enic->netdev)) + netif_carrier_off(enic->netdev); + netdev_dbg(enic->netdev, "MBOX: link state -> DOWN\n"); + break; + default: + netdev_warn(enic->netdev, "MBOX: unknown link state %u\n", + le32_to_cpu(notif->link_state)); + ack.ack.ret_major = cpu_to_le16(ENIC_MBOX_ERR_GENERIC); + break; + } + + err = enic_mbox_send_msg(enic, ENIC_MBOX_PF_LINK_STATE_ACK, + ENIC_MBOX_DST_PF, &ack, sizeof(ack)); + if (err && net_ratelimit()) + netdev_warn(enic->netdev, + "MBOX: failed to send link state ACK: %d\n", err); +} + +static bool enic_mbox_vf_payload_ok(struct enic *enic, u8 msg_type, + u16 payload_len, size_t min_len) +{ + if (payload_len < min_len) { + netdev_warn(enic->netdev, + "MBOX: short payload for type %u (%u < %zu)\n", + msg_type, payload_len, min_len); + return false; + } + return true; +} + +static void enic_mbox_vf_process_msg(struct enic *enic, + struct enic_mbox_hdr *hdr, void *payload, + u16 payload_len) +{ + switch (hdr->msg_type) { + case ENIC_MBOX_VF_CAPABILITY_REPLY: { + size_t exp = sizeof(struct enic_mbox_vf_capability_reply_msg); + + if (!enic_mbox_vf_payload_ok(enic, hdr->msg_type, + payload_len, exp)) + return; + enic_mbox_vf_handle_capability_reply(enic, payload); + break; + } + case ENIC_MBOX_VF_REGISTER_REPLY: { + size_t exp = sizeof(struct enic_mbox_vf_register_reply_msg); + + if (!enic_mbox_vf_payload_ok(enic, hdr->msg_type, + payload_len, exp)) + return; + enic_mbox_vf_handle_register_reply(enic, payload); + break; + } + case ENIC_MBOX_VF_UNREGISTER_REPLY: { + size_t exp = sizeof(struct enic_mbox_vf_register_reply_msg); + + if (!enic_mbox_vf_payload_ok(enic, hdr->msg_type, + payload_len, exp)) + return; + enic_mbox_vf_handle_unregister_reply(enic, payload); + break; + } + case ENIC_MBOX_PF_LINK_STATE_NOTIF: { + size_t exp = sizeof(struct enic_mbox_pf_link_state_notif_msg); + + if (!enic_mbox_vf_payload_ok(enic, hdr->msg_type, + payload_len, exp)) + return; + enic_mbox_vf_handle_link_state(enic, payload); + break; + } + default: + netdev_dbg(enic->netdev, + "MBOX: VF unhandled msg type %u\n", + hdr->msg_type); + break; + } +} + static void enic_mbox_recv_handler(struct enic *enic, void *buf, unsigned int len) { @@ -344,13 +515,117 @@ static void enic_mbox_recv_handler(struct enic *enic, void *buf, payload = buf + sizeof(*hdr); - if (enic->vf_state) + if (enic->vf_state) { enic_mbox_pf_process_msg(enic, hdr, payload); + } else if (le16_to_cpu(hdr->src_vnic_id) == ENIC_MBOX_DST_PF) { + /* src_vnic_id was overwritten from the hardware-verified CQ + * VLAN sender field, so a VF only accepts messages that the + * adapter attributes to the PF. Its sole admin-channel peer is + * the PF; drop anything else as a spoofed notification. + */ + enic_mbox_vf_process_msg(enic, hdr, payload, + msg_len - (u16)sizeof(*hdr)); + } else if (net_ratelimit()) { + netdev_warn(enic->netdev, + "MBOX: VF dropping non-PF message from vnic %u\n", + le16_to_cpu(hdr->src_vnic_id)); + } +} + +int enic_mbox_vf_capability_check(struct enic *enic) +{ + struct enic_mbox_vf_capability_msg req = {}; + int err; + + enic->pf_cap_version = 0; + reinit_completion(&enic->mbox_comp); + enic->mbox_expected_reply = ENIC_MBOX_VF_CAPABILITY_REPLY; + req.version = cpu_to_le32(ENIC_MBOX_CAP_VERSION_1); + + err = enic_mbox_send_msg(enic, ENIC_MBOX_VF_CAPABILITY_REQUEST, + ENIC_MBOX_DST_PF, &req, sizeof(req)); + if (err) { + enic->mbox_expected_reply = 0; + return err; + } + + err = enic_mbox_wait_reply(enic, 3000); + enic->mbox_expected_reply = 0; + if (err) { + netdev_warn(enic->netdev, + "MBOX: no capability reply from PF\n"); + return err; + } + + if (enic->pf_cap_version < ENIC_MBOX_CAP_VERSION_1) { + netdev_warn(enic->netdev, + "MBOX: PF version %u too old\n", + enic->pf_cap_version); + return -EOPNOTSUPP; + } + + return 0; +} + +int enic_mbox_vf_register(struct enic *enic) +{ + int err; + + enic->vf_registered = false; + reinit_completion(&enic->mbox_comp); + enic->mbox_expected_reply = ENIC_MBOX_VF_REGISTER_REPLY; + + err = enic_mbox_send_msg(enic, ENIC_MBOX_VF_REGISTER_REQUEST, + ENIC_MBOX_DST_PF, NULL, 0); + if (err) { + enic->mbox_expected_reply = 0; + return err; + } + + err = enic_mbox_wait_reply(enic, 3000); + enic->mbox_expected_reply = 0; + if (err) { + netdev_warn(enic->netdev, + "MBOX: VF registration with PF timed out\n"); + return err; + } + + if (!enic->vf_registered) + return -ENODEV; + + return 0; +} + +int enic_mbox_vf_unregister(struct enic *enic) +{ + int err; + + if (!enic->vf_registered) + return 0; + + reinit_completion(&enic->mbox_comp); + enic->mbox_expected_reply = ENIC_MBOX_VF_UNREGISTER_REPLY; + + err = enic_mbox_send_msg(enic, ENIC_MBOX_VF_UNREGISTER_REQUEST, + ENIC_MBOX_DST_PF, NULL, 0); + if (err) { + enic->mbox_expected_reply = 0; + return err; + } + + err = enic_mbox_wait_reply(enic, 3000); + enic->mbox_expected_reply = 0; + if (err) + return err; + if (enic->vf_registered) + return -EACCES; + return 0; } void enic_mbox_init(struct enic *enic) { enic->mbox_msg_num = 0; mutex_init(&enic->mbox_lock); + init_completion(&enic->mbox_comp); enic->admin_rq_handler = enic_mbox_recv_handler; } diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.h b/drivers/net/ethernet/cisco/enic/enic_mbox.h index f1de67db1273..15e30ee2b0ed 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.h +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.h @@ -88,5 +88,8 @@ void enic_mbox_init(struct enic *enic); int enic_mbox_send_msg(struct enic *enic, u8 msg_type, u16 dst_vnic_id, void *payload, u16 payload_len); int enic_mbox_send_link_state(struct enic *enic, u16 vf_id, u32 link_state); +int enic_mbox_vf_capability_check(struct enic *enic); +int enic_mbox_vf_register(struct enic *enic); +int enic_mbox_vf_unregister(struct enic *enic); #endif /* _ENIC_MBOX_H_ */ -- 2.43.0 Extend enic_sriov_configure() to handle V2 SR-IOV VFs. When the PF detects V2 VF device IDs, the enable path allocates per-VF MBOX state, opens the admin channel, initializes the MBOX protocol, and then calls pci_enable_sriov(). The admin channel must be ready before VFs are created so that VF drivers can immediately begin the MBOX capability and registration handshake during their probe. The enic_sriov_configure() dispatcher and its V2 helpers (enic_sriov_v2_enable, enic_sriov_v2_disable) are defined here but intentionally not yet wired into struct pci_driver via .sriov_configure -- hence the __maybe_unused annotations. This series introduces only the admin channel and MBOX infrastructure; sysfs-driven V2 enable/disable will be activated in a follow-up patch by adding ".sriov_configure = enic_sriov_configure," to enic_driver. The disable path first clears ENIC_SRIOV_ENABLED and flushes the link-notify work, so no further VF link-state broadcast can run, then calls pci_disable_sriov() (VF drivers unregister via MBOX), closes the admin channel, and frees per-VF state. Clearing the flag and flushing the work before vf_state is freed closes a use-after-free window against the link-notify path. Notify registered VFs of PF link transitions: enic_link_check() schedules link_notify_work on each carrier up/down edge, and the work handler sends PF_LINK_STATE_NOTIF to the VFs from process context. The broadcast cannot run directly in enic_link_check() because the MBOX send path may sleep and link check runs in the notify timer/ISR context. Re-establish the admin/MBOX channel across a PF reset. enic_reset() and enic_tx_hang_reset() fully close the admin channel before the soft/hang reset (which wipes all hardware queues, including the admin WQ/RQ), then reopen it and re-run enic_mbox_init() after the data path is back up, and re-push the current link state to registered VFs. Reject VF port profile requests when V2 SR-IOV is active (enic_is_valid_pp_vf), since enic->pp is not reallocated for V2 VFs and the V2 protocol uses MBOX instead of port profiles. Update enic_remove() to run enic_dev_deinit() and vnic_dev_close() after SR-IOV teardown, so the PF device remains functional while VFs are being cleaned up. This ordering applies to both V1 and V2 SR-IOV paths. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic.h | 2 + drivers/net/ethernet/cisco/enic/enic_admin.c | 3 + drivers/net/ethernet/cisco/enic/enic_main.c | 252 +++++++++++++++++++++++++-- drivers/net/ethernet/cisco/enic/enic_mbox.c | 13 +- drivers/net/ethernet/cisco/enic/enic_pp.c | 5 + drivers/net/ethernet/cisco/enic/enic_res.c | 1 + drivers/net/ethernet/cisco/enic/vnic_enet.h | 4 +- 7 files changed, 266 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index a9a376d2cf0e..b5a43fe04877 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -305,6 +305,7 @@ struct enic { struct vnic_intr admin_intr; struct work_struct admin_poll_work; unsigned int admin_intr_index; + struct work_struct link_notify_work; struct work_struct admin_msg_work; spinlock_t admin_msg_lock; /* protects admin_msg_list */ struct list_head admin_msg_list; @@ -324,6 +325,7 @@ struct enic { */ struct completion mbox_comp; u8 mbox_expected_reply; + bool mbox_initialized; /* PF: per-VF MBOX state, allocated when SRIOV V2 is enabled */ struct enic_vf_state { diff --git a/drivers/net/ethernet/cisco/enic/enic_admin.c b/drivers/net/ethernet/cisco/enic/enic_admin.c index d695b16765a1..242644fa2cbf 100644 --- a/drivers/net/ethernet/cisco/enic/enic_admin.c +++ b/drivers/net/ethernet/cisco/enic/enic_admin.c @@ -598,6 +598,7 @@ void enic_admin_channel_close(struct enic *enic) vnic_intr_mask(&enic->admin_intr); enic_admin_teardown_intr(enic); + cancel_work_sync(&enic->link_notify_work); cancel_work_sync(&enic->admin_msg_work); enic_admin_msg_drain(enic); @@ -617,6 +618,8 @@ void enic_admin_channel_close(struct enic *enic) vnic_cq_clean(&enic->admin_cq[0]); vnic_cq_clean(&enic->admin_cq[1]); vnic_intr_clean(&enic->admin_intr); + + enic->admin_rq_handler = NULL; enic_admin_free_resources(enic); enic->admin_chan_up = false; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 6992411bd3b5..185da2fbc5c7 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -60,6 +60,8 @@ #include "enic_clsf.h" #include "enic_rq.h" #include "enic_wq.h" +#include "enic_admin.h" +#include "enic_mbox.h" #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) @@ -411,6 +413,24 @@ static void enic_set_rx_coal_setting(struct enic *enic) rx_coal->use_adaptive_rx_coalesce = 1; } +static void enic_link_notify_work_handler(struct work_struct *work) +{ + struct enic *enic = container_of(work, struct enic, + link_notify_work); + u32 state; + u16 i; + + if (!enic_sriov_enabled(enic) || !enic->vf_state) + return; + + state = netif_carrier_ok(enic->netdev) ? + ENIC_MBOX_LINK_STATE_ENABLE : + ENIC_MBOX_LINK_STATE_DISABLE; + + for (i = 0; i < enic->num_vfs; i++) + enic_mbox_send_link_state(enic, i, state); +} + static void enic_link_check(struct enic *enic) { int link_status = vnic_dev_link_status(enic->vdev); @@ -420,9 +440,13 @@ static void enic_link_check(struct enic *enic) netdev_info(enic->netdev, "Link UP\n"); netif_carrier_on(enic->netdev); enic_set_rx_coal_setting(enic); + if (enic_sriov_enabled(enic) && enic->vf_state) + schedule_work(&enic->link_notify_work); } else if (!link_status && carrier_ok) { netdev_info(enic->netdev, "Link DOWN\n"); netif_carrier_off(enic->netdev); + if (enic_sriov_enabled(enic) && enic->vf_state) + schedule_work(&enic->link_notify_work); } } @@ -2154,15 +2178,47 @@ static void enic_reset(struct work_struct *work) /* Stop any activity from infiniband */ enic_set_api_busy(enic, true); + /* Fully tear down the V2 admin/MBOX channel before the soft reset. + * The reset wipes all hardware queues including the admin WQ/RQ; + * closing first tells firmware to stop the admin QP (so it no longer + * DMAs from the about-to-be-reset rings) and frees the admin resources + * so they are cleanly re-allocated afterwards. + */ + if (enic_sriov_enabled(enic) && + enic->vf_type == ENIC_VF_TYPE_V2) + enic_admin_channel_close(enic); + enic_stop(enic->netdev); + enic_dev_soft_reset(enic); enic_reset_addr_lists(enic); enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); enic_ext_cq(enic); + enic_open(enic->netdev); + /* Re-establish the admin/MBOX channel after the data path is back up, + * mirroring the SR-IOV enable path (channel open + mbox init). The + * channel was fully torn down by enic_admin_channel_close() above. + */ + if (enic_sriov_enabled(enic) && + enic->vf_type == ENIC_VF_TYPE_V2) { + if (enic_admin_channel_open(enic)) { + netdev_err(enic->netdev, + "admin channel reopen after reset failed\n"); + } else { + enic_mbox_init(enic); + /* The link came back up during enic_open() above + * while MBOX sends were still disabled (channel not + * yet reopened), so that link-notify was dropped. + * Re-push current link state to registered VFs now. + */ + schedule_work(&enic->link_notify_work); + } + } + /* Allow infiniband to fiddle with the device again */ enic_set_api_busy(enic, false); @@ -2180,16 +2236,46 @@ static void enic_tx_hang_reset(struct work_struct *work) /* Stop any activity from infiniband */ enic_set_api_busy(enic, true); + /* Fully tear down the V2 admin/MBOX channel before the hang reset, for + * the same reason as the soft reset path: stop the admin QP and free + * the admin resources before the hardware queues are wiped. + */ + if (enic_sriov_enabled(enic) && + enic->vf_type == ENIC_VF_TYPE_V2) + enic_admin_channel_close(enic); + enic_dev_hang_notify(enic); enic_stop(enic->netdev); + enic_dev_hang_reset(enic); enic_reset_addr_lists(enic); enic_init_vnic_resources(enic); enic_set_rss_nic_cfg(enic); enic_dev_set_ig_vlan_rewrite_mode(enic); enic_ext_cq(enic); + enic_open(enic->netdev); + /* Re-establish the admin/MBOX channel after the data path is back up, + * mirroring the SR-IOV enable path (channel open + mbox init). The + * channel was fully torn down by enic_admin_channel_close() above. + */ + if (enic_sriov_enabled(enic) && + enic->vf_type == ENIC_VF_TYPE_V2) { + if (enic_admin_channel_open(enic)) { + netdev_err(enic->netdev, + "admin channel reopen after reset failed\n"); + } else { + enic_mbox_init(enic); + /* The link came back up during enic_open() above + * while MBOX sends were still disabled (channel not + * yet reopened), so that link-notify was dropped. + * Re-push current link state to registered VFs now. + */ + schedule_work(&enic->link_notify_work); + } + } + /* Allow infiniband to fiddle with the device again */ enic_set_api_busy(enic, false); @@ -2200,6 +2286,8 @@ static void enic_tx_hang_reset(struct work_struct *work) static int enic_set_intr_mode(struct enic *enic) { + unsigned int admin_reserve = enic->has_admin_channel ? 1 : 0; + unsigned int min_intr = ENIC_MSIX_MIN_INTR + admin_reserve; unsigned int i; int num_intr; @@ -2210,12 +2298,12 @@ static int enic_set_intr_mode(struct enic *enic) */ if (enic->config.intr_mode < 1 && - enic->intr_avail >= ENIC_MSIX_MIN_INTR) { + enic->intr_avail >= min_intr) { for (i = 0; i < enic->intr_avail; i++) enic->msix_entry[i].entry = i; num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, - ENIC_MSIX_MIN_INTR, + min_intr, enic->intr_avail); if (num_intr > 0) { vnic_dev_set_intr_mode(enic->vdev, @@ -2310,7 +2398,13 @@ static int enic_adjust_resources(struct enic *enic) enic->cq_count = 2; enic->intr_count = enic->intr_avail; break; - case VNIC_DEV_INTR_MODE_MSIX: + case VNIC_DEV_INTR_MODE_MSIX: { + /* Reserve one MSI-X slot for the admin channel interrupt + * when V2 SR-IOV admin channel resources are present. + */ + unsigned int admin_reserve = + enic->has_admin_channel ? 1 : 0; + /* Adjust the number of wqs/rqs/cqs/interrupts that will be * used based on which resource is the most constrained */ @@ -2319,7 +2413,8 @@ static int enic_adjust_resources(struct enic *enic) ENIC_RQ_MIN_DEFAULT); rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); max_queues = min(enic->cq_avail, - enic->intr_avail - ENIC_MSIX_RESERVED_INTR); + enic->intr_avail - ENIC_MSIX_RESERVED_INTR - + admin_reserve); if (wq_avail + rq_avail <= max_queues) { enic->rq_count = rq_avail; enic->wq_count = wq_avail; @@ -2337,6 +2432,7 @@ static int enic_adjust_resources(struct enic *enic) enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; break; + } default: dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); return -EINVAL; @@ -2689,6 +2785,132 @@ static void enic_sriov_detect_vf_type(struct enic *enic) enic->vf_type = ENIC_VF_TYPE_NONE; } } + +static int __maybe_unused +enic_sriov_v2_enable(struct enic *enic, int num_vfs) +{ + int err; + + if (!enic->has_admin_channel) { + netdev_err(enic->netdev, + "V2 SR-IOV requires admin channel resources\n"); + return -EOPNOTSUPP; + } + + enic->vf_state = kcalloc(num_vfs, sizeof(*enic->vf_state), GFP_KERNEL); + if (!enic->vf_state) + return -ENOMEM; + + err = enic_admin_channel_open(enic); + if (err) { + netdev_err(enic->netdev, + "Failed to open admin channel: %d\n", err); + goto free_vf_state; + } + + enic_mbox_init(enic); + + enic->num_vfs = num_vfs; + + err = pci_enable_sriov(enic->pdev, num_vfs); + if (err) { + netdev_err(enic->netdev, + "pci_enable_sriov failed: %d\n", err); + goto close_admin; + } + + enic->priv_flags |= ENIC_SRIOV_ENABLED; + return num_vfs; + +close_admin: + enic->num_vfs = 0; + enic_admin_channel_close(enic); +free_vf_state: + kfree(enic->vf_state); + enic->vf_state = NULL; + return err; +} + +static void enic_sriov_v2_disable(struct enic *enic) +{ + /* Stop new VF link-state broadcasts before tearing down vf_state. + * Clearing ENIC_SRIOV_ENABLED makes enic_link_check() (called from + * the notify timer/ISR) skip the VF notify path, and cancelling + * link_notify_work ensures any already-queued broadcast has finished + * before vf_state is freed, closing a use-after-free window. + */ + enic->priv_flags &= ~ENIC_SRIOV_ENABLED; + cancel_work_sync(&enic->link_notify_work); + + pci_disable_sriov(enic->pdev); + enic_admin_channel_close(enic); + kfree(enic->vf_state); + enic->vf_state = NULL; + enic->num_vfs = 0; +} + +static int __maybe_unused +enic_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct enic *enic = netdev_priv(netdev); + struct enic_port_profile *pp; + int err; + + if (num_vfs > 0) { + if (enic->config.mq_subvnic_count) { + netdev_err(netdev, + "SR-IOV not supported with multi-queue sub-vnics\n"); + return -EOPNOTSUPP; + } + + if (enic->vf_type == ENIC_VF_TYPE_NONE) { + netdev_err(netdev, + "SR-IOV not supported on this firmware version\n"); + return -EOPNOTSUPP; + } + + if (enic->vf_type == ENIC_VF_TYPE_V2) + return enic_sriov_v2_enable(enic, num_vfs); + + pp = kcalloc(num_vfs, sizeof(*pp), GFP_KERNEL); + if (!pp) + return -ENOMEM; + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + kfree(pp); + return err; + } + + kfree(enic->pp); + enic->pp = pp; + enic->num_vfs = num_vfs; + enic->priv_flags |= ENIC_SRIOV_ENABLED; + return num_vfs; + } + + if (!enic_sriov_enabled(enic)) + return 0; + + if (enic->vf_type == ENIC_VF_TYPE_V2) { + enic_sriov_v2_disable(enic); + return 0; + } + + pp = kzalloc_obj(*enic->pp, GFP_KERNEL); + if (!pp) + return -ENOMEM; + + pci_disable_sriov(pdev); + enic->num_vfs = 0; + enic->priv_flags &= ~ENIC_SRIOV_ENABLED; + + kfree(enic->pp); + enic->pp = pp; + + return 0; +} #endif static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -2787,12 +3009,18 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_vnic_unregister; #ifdef CONFIG_PCI_IOV - /* Get number of subvnics */ + enic_sriov_detect_vf_type(enic); + + /* Auto-enable SR-IOV if VFs were pre-configured (e.g. at boot). + * V2 VFs require the admin channel, which is not yet set up at probe + * time; use sysfs (enic_sriov_configure) to enable V2 SR-IOV instead. + */ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); if (pos) { pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF, &enic->num_vfs); - if (enic->num_vfs) { + if (enic->num_vfs && + enic->vf_type != ENIC_VF_TYPE_V2) { err = pci_enable_sriov(pdev, enic->num_vfs); if (err) { dev_err(dev, "SRIOV enable failed, aborting." @@ -2804,7 +3032,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) num_pps = enic->num_vfs; } } - enic_sriov_detect_vf_type(enic); #endif /* Allocate structure for port profiles */ @@ -2881,6 +3108,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); + INIT_WORK(&enic->link_notify_work, enic_link_notify_work_handler); for (i = 0; i < enic->wq_count; i++) spin_lock_init(&enic->wq[i].lock); @@ -3034,14 +3262,16 @@ static void enic_remove(struct pci_dev *pdev) cancel_work_sync(&enic->tx_hang_reset); cancel_work_sync(&enic->change_mtu_work); unregister_netdev(netdev); - enic_dev_deinit(enic); - vnic_dev_close(enic->vdev); #ifdef CONFIG_PCI_IOV if (enic_sriov_enabled(enic)) { - pci_disable_sriov(pdev); - enic->priv_flags &= ~ENIC_SRIOV_ENABLED; + if (enic->vf_type == ENIC_VF_TYPE_V2) + enic_sriov_v2_disable(enic); + else + pci_disable_sriov(pdev); } #endif + enic_dev_deinit(enic); + vnic_dev_close(enic->vdev); kfree(enic->pp); vnic_dev_unregister(enic->vdev); enic_iounmap(enic); diff --git a/drivers/net/ethernet/cisco/enic/enic_mbox.c b/drivers/net/ethernet/cisco/enic/enic_mbox.c index 4676a9a15af5..5cdaf5a0e524 100644 --- a/drivers/net/ethernet/cisco/enic/enic_mbox.c +++ b/drivers/net/ethernet/cisco/enic/enic_mbox.c @@ -624,8 +624,17 @@ int enic_mbox_vf_unregister(struct enic *enic) void enic_mbox_init(struct enic *enic) { + /* mbox_lock and mbox_comp must be initialized exactly once per + * device lifetime; the PF sriov_configure path can re-enter this + * on each enable cycle where these primitives are already set up. + */ + if (!enic->mbox_initialized) { + mutex_init(&enic->mbox_lock); + init_completion(&enic->mbox_comp); + enic->mbox_initialized = true; + } else { + reinit_completion(&enic->mbox_comp); + } enic->mbox_msg_num = 0; - mutex_init(&enic->mbox_lock); - init_completion(&enic->mbox_comp); enic->admin_rq_handler = enic_mbox_recv_handler; } diff --git a/drivers/net/ethernet/cisco/enic/enic_pp.c b/drivers/net/ethernet/cisco/enic/enic_pp.c index 4720a952725d..3f611e240c25 100644 --- a/drivers/net/ethernet/cisco/enic/enic_pp.c +++ b/drivers/net/ethernet/cisco/enic/enic_pp.c @@ -25,6 +25,11 @@ int enic_is_valid_pp_vf(struct enic *enic, int vf, int *err) if (vf != PORT_SELF_VF) { #ifdef CONFIG_PCI_IOV if (enic_sriov_enabled(enic)) { + /* V2 SR-IOV uses MBOX, not port profiles */ + if (enic->vf_type == ENIC_VF_TYPE_V2) { + *err = -EOPNOTSUPP; + goto err_out; + } if (vf < 0 || vf >= enic->num_vfs) { *err = -EINVAL; goto err_out; diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 2b7545d6a67f..436326ace049 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -59,6 +59,7 @@ int enic_get_vnic_config(struct enic *enic) GET_CONFIG(intr_timer_usec); GET_CONFIG(loop_tag); GET_CONFIG(num_arfs); + GET_CONFIG(mq_subvnic_count); GET_CONFIG(max_rq_ring); GET_CONFIG(max_wq_ring); GET_CONFIG(max_cq_ring); diff --git a/drivers/net/ethernet/cisco/enic/vnic_enet.h b/drivers/net/ethernet/cisco/enic/vnic_enet.h index 9e8e86262a3f..519d2969990b 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_enet.h +++ b/drivers/net/ethernet/cisco/enic/vnic_enet.h @@ -21,7 +21,9 @@ struct vnic_enet_config { u16 loop_tag; u16 vf_rq_count; u16 num_arfs; - u8 reserved[66]; + u8 reserved1[32]; + u16 mq_subvnic_count; + u8 reserved2[32]; u32 max_rq_ring; // MAX RQ ring size u32 max_wq_ring; // MAX WQ ring size u32 max_cq_ring; // MAX CQ ring size -- 2.43.0 When a V2 SR-IOV VF probes, open the admin channel, initialize the MBOX protocol, perform the capability check with the PF, and register with the PF. This establishes the PF-VF communication path that the PF uses to send link state notifications. The admin channel and MBOX registration happen after enic_dev_init() (which discovers admin channel resources) and before register_netdev() so the VF is fully initialized before the interface is visible to userspace. The admin channel is opened before enic_mbox_init() installs the receive handler. This is safe because enic_admin_rq_cq_service() checks admin_rq_handler before enqueuing received buffers, so any interrupt that fires between open and mbox_init is harmlessly discarded. On remove, the VF unregisters from the PF and closes its admin channel before tearing down data path resources. V2 VFs are not provisioned with an RES_TYPE_SRIOV_INTR resource by firmware, so bypass that check in the admin channel capability detection for V2 VFs. The PF still requires this resource. The admin MSI-X vector reserved by enic_set_intr_mode() is used for the admin channel interrupt. enic_adjust_resources() ensures the reserved slot is within intr_avail bounds even at maximum queue configurations. The admin INTR uses a RES_TYPE_INTR_CTRL slot shared with the data path. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic.h | 1 + drivers/net/ethernet/cisco/enic/enic_main.c | 101 +++++++++++++++++++++++++--- drivers/net/ethernet/cisco/enic/enic_res.c | 3 +- 3 files changed, 94 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index b5a43fe04877..62b8941489d7 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -452,6 +452,7 @@ void enic_reset_addr_lists(struct enic *enic); int enic_sriov_enabled(struct enic *enic); int enic_is_valid_vf(struct enic *enic, int vf); int enic_is_dynamic(struct enic *enic); +int enic_is_sriov_vf_v2(struct enic *enic); void enic_set_ethtool_ops(struct net_device *netdev); int __enic_set_rsskey(struct enic *enic); void enic_ext_cq(struct enic *enic); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 185da2fbc5c7..abb30e5457c1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -316,6 +316,11 @@ static int enic_is_sriov_vf(struct enic *enic) enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2; } +int enic_is_sriov_vf_v2(struct enic *enic) +{ + return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF_V2; +} + int enic_is_valid_vf(struct enic *enic, int vf) { #ifdef CONFIG_PCI_IOV @@ -2399,15 +2404,19 @@ static int enic_adjust_resources(struct enic *enic) enic->intr_count = enic->intr_avail; break; case VNIC_DEV_INTR_MODE_MSIX: { - /* Reserve one MSI-X slot for the admin channel interrupt - * when V2 SR-IOV admin channel resources are present. - */ - unsigned int admin_reserve = - enic->has_admin_channel ? 1 : 0; - /* Adjust the number of wqs/rqs/cqs/interrupts that will be - * used based on which resource is the most constrained + * used based on which resource is the most constrained. + * Reserve one extra MSI-X slot for the admin channel INTR + * when has_admin_channel is set so that + * enic_admin_setup_intr() can allocate at intr_count + * within the intr_avail bounds even when the data queue + * count is maxed out. intr_count counts only the data-path + * IRQs (registered by enic_request_intr()); the admin INTR + * lives at msix index intr_count and is set up later by + * enic_admin_setup_intr(). */ + unsigned int admin_reserve = enic->has_admin_channel ? 1 : 0; + wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); rq_default = max(netif_get_num_default_rss_queues(), ENIC_RQ_MIN_DEFAULT); @@ -3096,6 +3105,44 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_dev_close; } + /* Initialise link_notify_work before the V2-VF admin-open block below: + * its error path (err_out_admin_close -> enic_admin_channel_close() -> + * cancel_work_sync()) would otherwise act on an uninitialised work. + */ + INIT_WORK(&enic->link_notify_work, enic_link_notify_work_handler); + + /* V2 VF: open admin channel and register with PF. + * Must happen before register_netdev so the VF is fully + * initialized before the interface is visible to userspace. + * + * admin_channel_open() runs before enic_mbox_init() installs + * the receive handler. This is safe because + * enic_admin_rq_cq_service() checks admin_rq_handler before + * enqueuing any received buffer, so interrupts that fire + * between open and mbox_init are harmlessly discarded. + */ + if (enic_is_sriov_vf_v2(enic)) { + err = enic_admin_channel_open(enic); + if (err) { + dev_err(dev, + "Failed to open admin channel: %d\n", err); + goto err_out_dev_deinit; + } + enic_mbox_init(enic); + err = enic_mbox_vf_capability_check(enic); + if (err) { + dev_err(dev, + "MBOX capability check failed: %d\n", err); + goto err_out_admin_close; + } + err = enic_mbox_vf_register(enic); + if (err) { + dev_err(dev, + "MBOX VF registration failed: %d\n", err); + goto err_out_admin_close; + } + } + netif_set_real_num_tx_queues(netdev, enic->wq_count); netif_set_real_num_rx_queues(netdev, enic->rq_count); @@ -3108,7 +3155,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); - INIT_WORK(&enic->link_notify_work, enic_link_notify_work_handler); for (i = 0; i < enic->wq_count; i++) spin_lock_init(&enic->wq[i].lock); @@ -3121,7 +3167,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = enic_set_mac_addr(netdev, enic->mac_addr); if (err) { dev_err(dev, "Invalid MAC address, aborting\n"); - goto err_out_dev_deinit; + goto err_out_admin_close; } enic->tx_coalesce_usecs = enic->config.intr_timer_usec; @@ -3219,11 +3265,23 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Cannot register net device, aborting\n"); - goto err_out_dev_deinit; + goto err_out_admin_close; } return 0; +err_out_admin_close: + if (enic_is_sriov_vf_v2(enic)) { + if (enic->vf_registered) { + int unreg_err = enic_mbox_vf_unregister(enic); + + if (unreg_err) + netdev_warn(netdev, + "Failed to unregister from PF: %d\n", + unreg_err); + } + enic_admin_channel_close(enic); + } err_out_dev_deinit: enic_dev_deinit(enic); err_out_dev_close: @@ -3261,7 +3319,30 @@ static void enic_remove(struct pci_dev *pdev) cancel_work_sync(&enic->reset); cancel_work_sync(&enic->tx_hang_reset); cancel_work_sync(&enic->change_mtu_work); + + /* Close the admin channel and unregister from the PF before + * unregister_netdev() to prevent a late PF notification from + * touching a netdev that has been freed. + */ + if (enic_is_sriov_vf_v2(enic)) { + if (enic->vf_registered) { + int unreg_err = enic_mbox_vf_unregister(enic); + + if (unreg_err) + netdev_warn(netdev, + "Failed to unregister from PF: %d\n", + unreg_err); + } + enic_admin_channel_close(enic); + } + unregister_netdev(netdev); + /* unregister_netdev() -> enic_stop() stops the notify timer, so + * no new link_notify_work can be queued past this point. Cancel + * unconditionally to cover the narrow window where + * enic_link_check() scheduled it just as SR-IOV was disabled. + */ + cancel_work_sync(&enic->link_notify_work); #ifdef CONFIG_PCI_IOV if (enic_sriov_enabled(enic)) { if (enic->vf_type == ENIC_VF_TYPE_V2) diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 436326ace049..74cd2ee3af5c 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -211,7 +211,8 @@ void enic_get_res_counts(struct enic *enic) vnic_dev_get_res_count(enic->vdev, RES_TYPE_ADMIN_RQ) >= 1 && vnic_dev_get_res_count(enic->vdev, RES_TYPE_ADMIN_CQ) >= ARRAY_SIZE(enic->admin_cq) && - vnic_dev_get_res_count(enic->vdev, RES_TYPE_SRIOV_INTR) >= 1; + (enic_is_sriov_vf_v2(enic) || + vnic_dev_get_res_count(enic->vdev, RES_TYPE_SRIOV_INTR) >= 1); dev_info(enic_get_dev(enic), "vNIC resources avail: wq %d rq %d cq %d intr %d admin %s\n", -- 2.43.0 The reset paths (enic_reset/enic_tx_hang_reset) tore down and re-opened the V2 admin/MBOX channel only for the PF: the close/reopen was gated on enic_sriov_enabled() && vf_type == ENIC_VF_TYPE_V2, which is never true on a VF (vf_type is set only on the PF; VFs are identified by enic_is_sriov_vf_v2()). A VF-initiated reset therefore left the VF admin QP wiped by the reset but never re-opened, and the VF never re-registered with the PF, so VF<->PF MBOX traffic (link state, MAC, packet filter) stopped working until the VF was re-probed. Factor the decision into enic_has_admin_chan() (true for a V2 PF while SR-IOV is enabled and for every V2 VF) and the reopen sequence into enic_admin_chan_reopen(). For a VF the helper additionally re-runs the probe-time handshake (enic_mbox_vf_capability_check() + enic_mbox_vf_register()) so the PF learns about the VF again; for a PF it re-pushes the current link state as before. Signed-off-by: Satish Kharat --- drivers/net/ethernet/cisco/enic/enic_main.c | 105 +++++++++++++++++----------- 1 file changed, 65 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index abb30e5457c1..de90332dc40c 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2171,6 +2171,57 @@ static void enic_set_api_busy(struct enic *enic, bool busy) spin_unlock(&enic->enic_api_lock); } +/* The admin/MBOX channel exists on a V2 PF while SR-IOV is enabled and on + * every V2 VF. A reset wipes the admin WQ/RQ/CQ, so such devices must tear + * the channel down before the reset and re-establish it afterwards. + */ +static bool enic_has_admin_chan(struct enic *enic) +{ + return enic_is_sriov_vf_v2(enic) || + (enic_sriov_enabled(enic) && enic->vf_type == ENIC_VF_TYPE_V2); +} + +/* Re-establish the admin/MBOX channel after a reset has re-created the data + * path. Mirrors the relevant part of the probe / SR-IOV-enable sequence: + * reopen the channel and reinitialise MBOX, then for a VF re-run the PF + * handshake (its admin QP and PF-side registration were torn down by the + * reset), or for a PF re-push the current link state to registered VFs. + */ +static void enic_admin_chan_reopen(struct enic *enic) +{ + int err; + + err = enic_admin_channel_open(enic); + if (err) { + netdev_err(enic->netdev, + "admin channel reopen after reset failed: %d\n", err); + return; + } + + enic_mbox_init(enic); + + if (enic_is_sriov_vf_v2(enic)) { + err = enic_mbox_vf_capability_check(enic); + if (err) { + netdev_err(enic->netdev, + "MBOX capability check after reset failed: %d\n", + err); + return; + } + err = enic_mbox_vf_register(enic); + if (err) + netdev_err(enic->netdev, + "MBOX VF re-registration after reset failed: %d\n", + err); + } else { + /* The link came back up during enic_open() above while MBOX + * sends were still disabled (channel not yet reopened), so that + * link-notify was dropped. Re-push current link state now. + */ + schedule_work(&enic->link_notify_work); + } +} + static void enic_reset(struct work_struct *work) { struct enic *enic = container_of(work, struct enic, reset); @@ -2189,8 +2240,7 @@ static void enic_reset(struct work_struct *work) * DMAs from the about-to-be-reset rings) and frees the admin resources * so they are cleanly re-allocated afterwards. */ - if (enic_sriov_enabled(enic) && - enic->vf_type == ENIC_VF_TYPE_V2) + if (enic_has_admin_chan(enic)) enic_admin_channel_close(enic); enic_stop(enic->netdev); @@ -2204,25 +2254,13 @@ static void enic_reset(struct work_struct *work) enic_open(enic->netdev); - /* Re-establish the admin/MBOX channel after the data path is back up, - * mirroring the SR-IOV enable path (channel open + mbox init). The - * channel was fully torn down by enic_admin_channel_close() above. + /* Re-establish the admin/MBOX channel after the data path is back up. + * It was fully torn down by enic_admin_channel_close() above; + * enic_admin_chan_reopen() reopens it and, for a PF re-pushes link + * state, or for a VF re-runs the probe-time PF handshake. */ - if (enic_sriov_enabled(enic) && - enic->vf_type == ENIC_VF_TYPE_V2) { - if (enic_admin_channel_open(enic)) { - netdev_err(enic->netdev, - "admin channel reopen after reset failed\n"); - } else { - enic_mbox_init(enic); - /* The link came back up during enic_open() above - * while MBOX sends were still disabled (channel not - * yet reopened), so that link-notify was dropped. - * Re-push current link state to registered VFs now. - */ - schedule_work(&enic->link_notify_work); - } - } + if (enic_has_admin_chan(enic)) + enic_admin_chan_reopen(enic); /* Allow infiniband to fiddle with the device again */ enic_set_api_busy(enic, false); @@ -2245,8 +2283,7 @@ static void enic_tx_hang_reset(struct work_struct *work) * the same reason as the soft reset path: stop the admin QP and free * the admin resources before the hardware queues are wiped. */ - if (enic_sriov_enabled(enic) && - enic->vf_type == ENIC_VF_TYPE_V2) + if (enic_has_admin_chan(enic)) enic_admin_channel_close(enic); enic_dev_hang_notify(enic); @@ -2261,25 +2298,13 @@ static void enic_tx_hang_reset(struct work_struct *work) enic_open(enic->netdev); - /* Re-establish the admin/MBOX channel after the data path is back up, - * mirroring the SR-IOV enable path (channel open + mbox init). The - * channel was fully torn down by enic_admin_channel_close() above. + /* Re-establish the admin/MBOX channel after the data path is back up. + * It was fully torn down by enic_admin_channel_close() above; + * enic_admin_chan_reopen() reopens it and, for a PF re-pushes link + * state, or for a VF re-runs the probe-time PF handshake. */ - if (enic_sriov_enabled(enic) && - enic->vf_type == ENIC_VF_TYPE_V2) { - if (enic_admin_channel_open(enic)) { - netdev_err(enic->netdev, - "admin channel reopen after reset failed\n"); - } else { - enic_mbox_init(enic); - /* The link came back up during enic_open() above - * while MBOX sends were still disabled (channel not - * yet reopened), so that link-notify was dropped. - * Re-push current link state to registered VFs now. - */ - schedule_work(&enic->link_notify_work); - } - } + if (enic_has_admin_chan(enic)) + enic_admin_chan_reopen(enic); /* Allow infiniband to fiddle with the device again */ enic_set_api_busy(enic, false); -- 2.43.0