Make the log message parameter const, it's not modified and this lets us pass in strings which are const for the caller. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h | 2 +- drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h index cb6555f40a24..50ec79003108 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h @@ -41,5 +41,5 @@ void fbnic_fw_log_disable(struct fbnic_dev *fbd); int fbnic_fw_log_init(struct fbnic_dev *fbd); void fbnic_fw_log_free(struct fbnic_dev *fbd); int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, - char *msg); + const char *msg); #endif /* _FBNIC_FW_LOG_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c index c1663f042245..85a883dba385 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c @@ -72,7 +72,7 @@ void fbnic_fw_log_free(struct fbnic_dev *fbd) } int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, - char *msg) + const char *msg) { struct fbnic_fw_log_entry *entry, *head, *tail, *next; struct fbnic_fw_log *log = &fbd->fw_log; -- 2.51.0 Currently we only detect FW crashes when it stops responding to heartbeat messages. FW has a watchdog which will reset it in case of crashes. Use FW uptime sent in the heartbeat messages to detect that the watchdog has fired. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic.h | 4 ++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 6 ++++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 311c7dda911a..09058d847729 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -84,6 +84,10 @@ struct fbnic_dev { /* Local copy of hardware statistics */ struct fbnic_hw_stats hw_stats; + /* Firmware time since boot in milliseconds */ + u64 firmware_time; + u64 prev_firmware_time; + struct fbnic_fw_log fw_log; }; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index ec67b80809b0..c1de2793fa3d 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -201,6 +201,12 @@ enum { FBNIC_FW_OWNERSHIP_MSG_MAX }; +enum { + FBNIC_FW_HEARTBEAT_UPTIME = 0x0, + FBNIC_FW_HEARTBEAT_NUMBER_OF_MESSAGES = 0x1, + FBNIC_FW_HEARTBEAT_MSG_MAX +}; + enum { FBNIC_FW_START_UPGRADE_ERROR = 0x0, FBNIC_FW_START_UPGRADE_SECTION = 0x1, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 6e580654493c..72f750eea055 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -495,6 +495,11 @@ int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership) fbd->last_heartbeat_request = req_time; + /* Set prev_firmware_time to 0 to avoid triggering firmware crash + * detection now that we received a response from firmware. + */ + fbd->prev_firmware_time = 0; + /* Set heartbeat detection based on if we are taking ownership */ fbd->fw_heartbeat_enabled = take_ownership; @@ -671,6 +676,9 @@ static int fbnic_fw_parse_ownership_resp(void *opaque, /* Count the ownership response as a heartbeat reply */ fbd->last_heartbeat_response = jiffies; + /* Capture firmware time for logging and firmware crash check */ + fbd->firmware_time = fta_get_uint(results, FBNIC_FW_HEARTBEAT_UPTIME); + return 0; } @@ -685,6 +693,9 @@ static int fbnic_fw_parse_heartbeat_resp(void *opaque, fbd->last_heartbeat_response = jiffies; + /* Capture firmware time for logging and firmware crash check */ + fbd->firmware_time = fta_get_uint(results, FBNIC_FW_HEARTBEAT_UPTIME); + return 0; } @@ -706,6 +717,7 @@ static int fbnic_fw_xmit_heartbeat_message(struct fbnic_dev *fbd) goto free_message; fbd->last_heartbeat_request = req_time; + fbd->prev_firmware_time = fbd->firmware_time; return err; @@ -766,7 +778,8 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) return; /* Was the last heartbeat response long time ago? */ - if (!fbnic_fw_heartbeat_current(fbd)) { + if (!fbnic_fw_heartbeat_current(fbd) || + fbd->firmware_time < fbd->prev_firmware_time) { dev_warn(fbd->dev, "Firmware did not respond to heartbeat message\n"); fbd->fw_heartbeat_enabled = false; -- 2.51.0 We'll want to wipe the driver TCAM state after FW crash, to force a re-programming. Factor out the clearing logic. Remove the micro- -optimization to skip clearing the BMC entry twice, it doesn't hurt. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_rpc.c | 36 ++++++++++++--------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index 4284b3cb7fcc..d944d0fdd3b7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -1124,13 +1124,25 @@ void fbnic_write_ip_addr(struct fbnic_dev *fbd) } } -void fbnic_clear_rules(struct fbnic_dev *fbd) +static void fbnic_clear_valid_act_tcam(struct fbnic_dev *fbd) { - u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, - FBNIC_RPC_ACT_TBL0_DEST_BMC); int i = FBNIC_RPC_TCAM_ACT_NUM_ENTRIES - 1; struct fbnic_act_tcam *act_tcam; + /* Work from the bottom up deleting all other rules from hardware */ + do { + act_tcam = &fbd->act_tcam[i]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + continue; + + fbnic_clear_act_tcam(fbd, i); + act_tcam->state = FBNIC_TCAM_S_UPDATE; + } while (i--); +} + +void fbnic_clear_rules(struct fbnic_dev *fbd) +{ /* Clear MAC rules */ fbnic_clear_macda(fbd); @@ -1145,6 +1157,11 @@ void fbnic_clear_rules(struct fbnic_dev *fbd) * the interface back up. */ if (fbnic_bmc_present(fbd)) { + u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, + FBNIC_RPC_ACT_TBL0_DEST_BMC); + int i = FBNIC_RPC_TCAM_ACT_NUM_ENTRIES - 1; + struct fbnic_act_tcam *act_tcam; + act_tcam = &fbd->act_tcam[i]; if (act_tcam->state == FBNIC_TCAM_S_VALID && @@ -1153,21 +1170,10 @@ void fbnic_clear_rules(struct fbnic_dev *fbd) wr32(fbd, FBNIC_RPC_ACT_TBL1(i), 0); act_tcam->state = FBNIC_TCAM_S_UPDATE; - - i--; } } - /* Work from the bottom up deleting all other rules from hardware */ - do { - act_tcam = &fbd->act_tcam[i]; - - if (act_tcam->state != FBNIC_TCAM_S_VALID) - continue; - - fbnic_clear_act_tcam(fbd, i); - act_tcam->state = FBNIC_TCAM_S_UPDATE; - } while (i--); + fbnic_clear_valid_act_tcam(fbd); } static void fbnic_delete_act_tcam(struct fbnic_dev *fbd, unsigned int idx) -- 2.51.0 FW may mess with the TCAM after it boots, to try to restore the traffic flow to the BMC (it may not be aware that the host is already up). Make sure that we reprogram the TCAMs after detecting a crash. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic.h | 2 ++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 23 ++++++++++++++------- drivers/net/ethernet/meta/fbnic/fbnic_rpc.c | 21 +++++++++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 09058d847729..b364c2f0724b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -191,6 +191,8 @@ void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd); void fbnic_dbg_init(void); void fbnic_dbg_exit(void); +void fbnic_rpc_reset_valid_entries(struct fbnic_dev *fbd); + void fbnic_csr_get_regs(struct fbnic_dev *fbd, u32 *data, u32 *regs_version); int fbnic_csr_regs_len(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index e246c50c8bf3..53690db14d77 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -167,6 +167,20 @@ void fbnic_down(struct fbnic_net *fbn) fbnic_flush(fbn); } +static int fbnic_fw_config_after_crash(struct fbnic_dev *fbd) +{ + if (fbnic_fw_xmit_ownership_msg(fbd, true)) { + dev_err(fbd->dev, "NIC failed to take ownership\n"); + + return -1; + } + + fbnic_rpc_reset_valid_entries(fbd); + __fbnic_set_rx_mode(fbd); + + return 0; +} + static void fbnic_health_check(struct fbnic_dev *fbd) { struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; @@ -182,13 +196,8 @@ static void fbnic_health_check(struct fbnic_dev *fbd) if (tx_mbx->head != tx_mbx->tail) return; - /* TBD: Need to add a more thorough recovery here. - * Specifically I need to verify what all the firmware will have - * changed since we had setup and it rebooted. May just need to - * perform a down/up. For now we will just reclaim ownership so - * the heartbeat can catch the next fault. - */ - fbnic_fw_xmit_ownership_msg(fbd, true); + if (fbnic_fw_config_after_crash(fbd)) + dev_err(fbd->dev, "Firmware recovery failed after crash\n"); } static void fbnic_service_task(struct work_struct *work) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index d944d0fdd3b7..7f31e890031c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -596,6 +596,21 @@ static void fbnic_clear_macda(struct fbnic_dev *fbd) } } +static void fbnic_clear_valid_macda(struct fbnic_dev *fbd) +{ + int idx; + + for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[idx]; + + if (mac_addr->state == FBNIC_TCAM_S_VALID) { + fbnic_clear_macda_entry(fbd, idx); + + mac_addr->state = FBNIC_TCAM_S_UPDATE; + } + } +} + static void fbnic_write_macda_entry(struct fbnic_dev *fbd, unsigned int idx, struct fbnic_mac_addr *mac_addr) { @@ -1223,3 +1238,9 @@ void fbnic_write_rules(struct fbnic_dev *fbd) fbnic_update_act_tcam(fbd, i); } } + +void fbnic_rpc_reset_valid_entries(struct fbnic_dev *fbd) +{ + fbnic_clear_valid_act_tcam(fbd); + fbnic_clear_valid_macda(fbd); +} -- 2.51.0 Support allocating extra space after the FW completion. This makes it easy to pass extra variable size buffer space to FW response handlers without worrying about synchronization (completion itself is already refcounted). Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 2 ++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index c1de2793fa3d..6a413e7296da 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -100,6 +100,8 @@ int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable, bool send_log_history); int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd); +struct fbnic_fw_completion *__fbnic_fw_alloc_cmpl(u32 msg_type, + size_t priv_size); struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 72f750eea055..1a92e4be010e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -1542,11 +1542,12 @@ void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, fw_version, str_sz); } -struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) +struct fbnic_fw_completion *__fbnic_fw_alloc_cmpl(u32 msg_type, + size_t priv_size) { struct fbnic_fw_completion *cmpl; - cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + cmpl = kzalloc(sizeof(*cmpl) + priv_size, GFP_KERNEL); if (!cmpl) return NULL; @@ -1557,6 +1558,11 @@ struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) return cmpl; } +struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) +{ + return __fbnic_fw_alloc_cmpl(msg_type, 0); +} + void fbnic_fw_put_cmpl(struct fbnic_fw_completion *fw_cmpl) { kref_put(&fw_cmpl->ref_count, fbnic_fw_release_cmpl_data); -- 2.51.0 To read FW core dump we need to issue two commands to FW: - first get the FW core dump info - second read the dump chunk by chunk Implement these two FW commands. Subsequent commits will use them to expose FW dump via devlink heath. Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 38 ++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 214 +++++++++++++++++++++ 2 files changed, 252 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 6a413e7296da..7d16a307ff3f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -66,6 +66,14 @@ struct fbnic_fw_completion { struct kref ref_count; int result; union { + struct { + u32 size; + } coredump_info; + struct { + u32 size; + u16 stride; + u8 *data[]; + } coredump; struct { u32 offset; u32 length; @@ -89,6 +97,12 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership); int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll); void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd); +int fbnic_fw_xmit_coredump_info_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + bool force); +int fbnic_fw_xmit_coredump_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 offset, u32 length); int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data, unsigned int id, unsigned int len); @@ -137,6 +151,10 @@ enum { FBNIC_TLV_MSG_ID_OWNERSHIP_RESP = 0x13, FBNIC_TLV_MSG_ID_HEARTBEAT_REQ = 0x14, FBNIC_TLV_MSG_ID_HEARTBEAT_RESP = 0x15, + FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_REQ = 0x18, + FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP = 0x19, + FBNIC_TLV_MSG_ID_COREDUMP_READ_REQ = 0x20, + FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP = 0x21, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ = 0x22, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_RESP = 0x23, FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ = 0x24, @@ -209,6 +227,26 @@ enum { FBNIC_FW_HEARTBEAT_MSG_MAX }; +enum { + FBNIC_FW_COREDUMP_REQ_INFO_CREATE = 0x0, + FBNIC_FW_COREDUMP_REQ_INFO_MSG_MAX +}; + +enum { + FBNIC_FW_COREDUMP_INFO_AVAILABLE = 0x0, + FBNIC_FW_COREDUMP_INFO_SIZE = 0x1, + FBNIC_FW_COREDUMP_INFO_ERROR = 0x2, + FBNIC_FW_COREDUMP_INFO_MSG_MAX +}; + +enum { + FBNIC_FW_COREDUMP_READ_OFFSET = 0x0, + FBNIC_FW_COREDUMP_READ_LENGTH = 0x1, + FBNIC_FW_COREDUMP_READ_DATA = 0x2, + FBNIC_FW_COREDUMP_READ_ERROR = 0x3, + FBNIC_FW_COREDUMP_READ_MSG_MAX +}; + enum { FBNIC_FW_START_UPGRADE_ERROR = 0x0, FBNIC_FW_START_UPGRADE_SECTION = 0x1, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 1a92e4be010e..4644c9f5e300 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -791,6 +791,215 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) dev_warn(fbd->dev, "Failed to send heartbeat message\n"); } +/** + * fbnic_fw_xmit_coredump_info_msg - Create and transmit a coredump info message + * @fbd: FBNIC device structure + * @cmpl_data: Structure to store info in + * @force: Force coredump event if one hasn't already occurred + * + * Return: zero on success, negative errno on failure + * + * Asks the FW for info related to coredump. If a coredump doesn't exist it + * can optionally force one if force is true. + */ +int fbnic_fw_xmit_coredump_info_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + bool force) +{ + struct fbnic_tlv_msg *msg; + int err = 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_REQ); + if (!msg) + return -ENOMEM; + + if (force) { + err = fbnic_tlv_attr_put_flag(msg, FBNIC_FW_COREDUMP_REQ_INFO_CREATE); + if (err) + goto free_msg; + } + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_msg; + + return 0; + +free_msg: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_coredump_info_resp_index[] = { + FBNIC_TLV_ATTR_FLAG(FBNIC_FW_COREDUMP_INFO_AVAILABLE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_INFO_SIZE), + FBNIC_TLV_ATTR_S32(FBNIC_FW_COREDUMP_INFO_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int +fbnic_fw_parse_coredump_info_resp(void *opaque, struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer to provide with data */ + msg_type = FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + err = fta_get_sint(results, FBNIC_FW_COREDUMP_INFO_ERROR); + if (err) + goto msg_err; + + if (!results[FBNIC_FW_COREDUMP_INFO_AVAILABLE]) { + err = -ENOENT; + goto msg_err; + } + + cmpl_data->u.coredump_info.size = + fta_get_uint(results, FBNIC_FW_COREDUMP_INFO_SIZE); + +msg_err: + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return err; +} + +/** + * fbnic_fw_xmit_coredump_read_msg - Create and transmit a coredump read request + * @fbd: FBNIC device structure + * @cmpl_data: Completion struct to store coredump + * @offset: Offset into coredump requested + * @length: Length of section of cordeump to fetch + * + * Return: zero on success, negative errno on failure + * + * Asks the firmware to provide a section of the cordeump back in a message. + * The response will have an offset and size matching the values provided. + */ +int fbnic_fw_xmit_coredump_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 offset, u32 length) +{ + struct fbnic_tlv_msg *msg; + int err = 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_COREDUMP_READ_REQ); + if (!msg) + return -ENOMEM; + + if (offset) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_COREDUMP_READ_OFFSET, + offset); + if (err) + goto free_message; + } + + if (length) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_COREDUMP_READ_LENGTH, + length); + if (err) + goto free_message; + } + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_coredump_resp_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_READ_OFFSET), + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_READ_LENGTH), + FBNIC_TLV_ATTR_RAW_DATA(FBNIC_FW_COREDUMP_READ_DATA), + FBNIC_TLV_ATTR_S32(FBNIC_FW_COREDUMP_READ_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_coredump_resp(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + u32 index, last_offset, last_length; + struct fbnic_dev *fbd = opaque; + struct fbnic_tlv_msg *data_hdr; + u32 length, offset; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer to provide with data */ + msg_type = FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + err = fta_get_sint(results, FBNIC_FW_COREDUMP_READ_ERROR); + if (err) + goto msg_err; + + data_hdr = results[FBNIC_FW_COREDUMP_READ_DATA]; + if (!data_hdr) { + err = -ENODATA; + goto msg_err; + } + + offset = fta_get_uint(results, FBNIC_FW_COREDUMP_READ_OFFSET); + length = fta_get_uint(results, FBNIC_FW_COREDUMP_READ_LENGTH); + + if (length > le16_to_cpu(data_hdr->hdr.len) - sizeof(u32)) { + dev_err(fbd->dev, "length greater than size of message\n"); + err = -EINVAL; + goto msg_err; + } + + /* Only the last offset can have a length != stride */ + last_length = + (cmpl_data->u.coredump.size % cmpl_data->u.coredump.stride) ? : + cmpl_data->u.coredump.stride; + last_offset = cmpl_data->u.coredump.size - last_length; + + /* Verify offset and length */ + if (offset % cmpl_data->u.coredump.stride || offset > last_offset) { + dev_err(fbd->dev, "offset %d out of range\n", offset); + err = -EINVAL; + } else if (length != ((offset == last_offset) ? + last_length : cmpl_data->u.coredump.stride)) { + dev_err(fbd->dev, "length %d out of range for offset %d\n", + length, offset); + err = -EINVAL; + } + if (err) + goto msg_err; + + /* If data pointer is NULL it is already filled, just skip the copy */ + index = offset / cmpl_data->u.coredump.stride; + if (!cmpl_data->u.coredump.data[index]) + goto msg_err; + + /* Copy data and mark index filled by setting pointer to NULL */ + memcpy(cmpl_data->u.coredump.data[index], + fbnic_tlv_attr_get_value_ptr(data_hdr), length); + cmpl_data->u.coredump.data[index] = NULL; + +msg_err: + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return err; +} + int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data, unsigned int id, unsigned int len) @@ -1220,6 +1429,11 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { fbnic_fw_parse_ownership_resp), FBNIC_TLV_PARSER(HEARTBEAT_RESP, fbnic_heartbeat_resp_index, fbnic_fw_parse_heartbeat_resp), + FBNIC_TLV_PARSER(COREDUMP_GET_INFO_RESP, + fbnic_coredump_info_resp_index, + fbnic_fw_parse_coredump_info_resp), + FBNIC_TLV_PARSER(COREDUMP_READ_RESP, fbnic_coredump_resp_index, + fbnic_fw_parse_coredump_resp), FBNIC_TLV_PARSER(FW_START_UPGRADE_RESP, fbnic_fw_start_upgrade_resp_index, fbnic_fw_parse_fw_start_upgrade_resp), -- 2.51.0 Add a health reporter to catch FW crashes. Dumping the reporter if FW has not crashed will create a snapshot of FW memory. Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/meta/fbnic.rst | 10 ++ drivers/net/ethernet/meta/fbnic/fbnic.h | 5 + .../net/ethernet/meta/fbnic/fbnic_devlink.c | 157 ++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 11 +- 4 files changed, 182 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index fb6559fa4be4..62693566ff1f 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -69,6 +69,16 @@ On host boot the latest UEFI driver is always used, no explicit activation is required. Firmware activation is required to run new control firmware. cmrt firmware can only be activated by power cycling the NIC. +Health reporters +---------------- + +fw reporter +~~~~~~~~~~~ + +The ``fw`` health reporter tracks FW crashes. Dumping the reporter will +show the core dump of the most recent FW crash, and if no FW crash has +happened since power cycle - a snapshot of the FW memory. + Statistics ---------- diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index b364c2f0724b..5f99976de0bb 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -27,6 +27,7 @@ struct fbnic_dev { struct net_device *netdev; struct dentry *dbg_fbd; struct device *hwmon; + struct devlink_health_reporter *fw_reporter; u32 __iomem *uc_addr0; u32 __iomem *uc_addr4; @@ -159,8 +160,12 @@ extern char fbnic_driver_name[]; void fbnic_devlink_free(struct fbnic_dev *fbd); struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); +int fbnic_devlink_health_create(struct fbnic_dev *fbd); +void fbnic_devlink_health_destroy(struct fbnic_dev *fbd); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); +void __printf(2, 3) +fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...); int fbnic_fw_request_mbx(struct fbnic_dev *fbd); void fbnic_fw_free_mbx(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index c5f81f139e7e..0e8920685da6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -8,6 +8,7 @@ #include #include "fbnic.h" +#include "fbnic_fw.h" #include "fbnic_tlv.h" #define FBNIC_SN_STR_LEN 24 @@ -369,6 +370,162 @@ static const struct devlink_ops fbnic_devlink_ops = { .flash_update = fbnic_devlink_flash_update, }; +static int fbnic_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + u32 offset, index, index_count, length, size; + struct fbnic_fw_completion *fw_cmpl; + u8 *dump_data, **data; + int err; + + fw_cmpl = fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP); + if (!fw_cmpl) + return -ENOMEM; + + err = fbnic_fw_xmit_coredump_info_msg(fbd, fw_cmpl, true); + if (err) { + dev_err(fbd->dev, + "Failed to transmit core dump info msg: %d\n", err); + goto cmpl_free; + } + if (!wait_for_completion_timeout(&fw_cmpl->done, 2 * HZ)) { + dev_err(fbd->dev, "Timed out waiting on core dump info\n"); + err = -ETIMEDOUT; + goto cmpl_cleanup; + } + + size = fw_cmpl->u.coredump_info.size; + err = fw_cmpl->result; + + fbnic_mbx_clear_cmpl(fbd, fw_cmpl); + fbnic_fw_put_cmpl(fw_cmpl); + + /* Handle error returned by firmware */ + if (err) { + if (err == -ETIMEDOUT) + dev_info(fbd->dev, "Firmware timed out on core dump\n"); + else + dev_err(fbd->dev, + "Firmware core dump returned error: %d\n", err); + return err; + } + if (!size) { + dev_err(fbd->dev, "Firmware core dump returned size 0\n"); + return -EIO; + } + + /* Read the dump, we can only transfer TLV_MAX_DATA at a time */ + index_count = DIV_ROUND_UP(size, TLV_MAX_DATA); + + fw_cmpl = __fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP, + sizeof(void *) * index_count + size); + if (!fw_cmpl) + return -ENOMEM; + + /* Populate pointer table w/ pointer offsets */ + dump_data = (void *)&fw_cmpl->u.coredump.data[index_count]; + data = fw_cmpl->u.coredump.data; + fw_cmpl->u.coredump.size = size; + fw_cmpl->u.coredump.stride = TLV_MAX_DATA; + + for (index = 0; index < index_count; index++) { + /* First iteration installs completion */ + struct fbnic_fw_completion *cmpl_arg = index ? NULL : fw_cmpl; + + offset = index * TLV_MAX_DATA; + length = min(size - offset, TLV_MAX_DATA); + + data[index] = dump_data + offset; + err = fbnic_fw_xmit_coredump_read_msg(fbd, cmpl_arg, + offset, length); + if (err) { + dev_err(fbd->dev, "Failed to transmit core dump msg: %d\n", + err); + if (cmpl_arg) + goto cmpl_free; + else + goto cmpl_cleanup; + } + + if (wait_for_completion_timeout(&fw_cmpl->done, 2 * HZ)) { + reinit_completion(&fw_cmpl->done); + } else { + dev_err(fbd->dev, + "Timed out waiting on core dump (%d/%d)\n", + index + 1, index_count); + err = -ETIMEDOUT; + goto cmpl_cleanup; + } + + /* If we didn't see the reply record as incomplete */ + if (fw_cmpl->u.coredump.data[index]) { + dev_err(fbd->dev, + "No data for core dump chunk (%d/%d)\n", + index + 1, index_count); + err = -EIO; + goto cmpl_cleanup; + } + } + + devlink_fmsg_binary_pair_nest_start(fmsg, "FW coredump"); + + for (offset = 0; offset < size; offset += length) { + length = min_t(u32, size - offset, TLV_MAX_DATA); + + devlink_fmsg_binary_put(fmsg, dump_data + offset, length); + } + + devlink_fmsg_binary_pair_nest_end(fmsg); + +cmpl_cleanup: + fbnic_mbx_clear_cmpl(fbd, fw_cmpl); +cmpl_free: + fbnic_fw_put_cmpl(fw_cmpl); + + return err; +} + +void __printf(2, 3) +fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...) +{ + char msg[FBNIC_FW_LOG_MAX_SIZE]; + va_list args; + + va_start(args, format); + vsnprintf(msg, FBNIC_FW_LOG_MAX_SIZE, format, args); + va_end(args); + + devlink_health_report(fbd->fw_reporter, msg, fbd); + if (fbnic_fw_log_ready(fbd)) + fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg); +} + +static const struct devlink_health_reporter_ops fbnic_fw_ops = { + .name = "fw", + .dump = fbnic_fw_reporter_dump, +}; + +int fbnic_devlink_health_create(struct fbnic_dev *fbd) +{ + fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd), + &fbnic_fw_ops, fbd); + if (IS_ERR(fbd->fw_reporter)) { + dev_warn(fbd->dev, + "Failed to create FW fault reporter: %pe\n", + fbd->fw_reporter); + return PTR_ERR(fbd->fw_reporter); + } + + return 0; +} + +void fbnic_devlink_health_destroy(struct fbnic_dev *fbd) +{ + devlink_health_reporter_destroy(fbd->fw_reporter); +} + void fbnic_devlink_free(struct fbnic_dev *fbd) { struct devlink *devlink = priv_to_devlink(fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 53690db14d77..e71be857d692 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -196,6 +196,8 @@ static void fbnic_health_check(struct fbnic_dev *fbd) if (tx_mbx->head != tx_mbx->tail) return; + fbnic_devlink_fw_report(fbd, "Firmware crashed detected!"); + if (fbnic_fw_config_after_crash(fbd)) dev_err(fbd->dev, "Firmware recovery failed after crash\n"); } @@ -279,6 +281,10 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; } + err = fbnic_devlink_health_create(fbd); + if (err) + goto free_fbd; + /* Populate driver with hardware-specific info and handlers */ fbd->max_num_queues = info->max_num_queues; @@ -289,7 +295,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = fbnic_alloc_irqs(fbd); if (err) - goto free_fbd; + goto err_destroy_health; err = fbnic_mac_init(fbd); if (err) { @@ -358,6 +364,8 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; free_irqs: fbnic_free_irqs(fbd); +err_destroy_health: + fbnic_devlink_health_destroy(fbd); free_fbd: fbnic_devlink_free(fbd); @@ -392,6 +400,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); + fbnic_devlink_health_destroy(fbd); fbnic_devlink_free(fbd); } -- 2.51.0 FW crashes are detected based on uptime going back, expose the uptime via devlink health diagnose. Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/meta/fbnic.rst | 4 +++- drivers/net/ethernet/meta/fbnic/fbnic_devlink.c | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index 62693566ff1f..3c81b58d8292 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -77,7 +77,9 @@ fw reporter The ``fw`` health reporter tracks FW crashes. Dumping the reporter will show the core dump of the most recent FW crash, and if no FW crash has -happened since power cycle - a snapshot of the FW memory. +happened since power cycle - a snapshot of the FW memory. Diagnose callback +shows current FW uptime (the crashes are detected by checking if uptime +goes down). Statistics ---------- diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index 0e8920685da6..f3f3585c0aac 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -487,6 +487,18 @@ static int fbnic_fw_reporter_dump(struct devlink_health_reporter *reporter, return err; } +static int +fbnic_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + + devlink_fmsg_u32_pair_put(fmsg, "FW uptime", fbd->firmware_time); + + return 0; +} + void __printf(2, 3) fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...) { @@ -505,6 +517,7 @@ fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...) static const struct devlink_health_reporter_ops fbnic_fw_ops = { .name = "fw", .dump = fbnic_fw_reporter_dump, + .diagnose = fbnic_fw_reporter_diagnose, }; int fbnic_devlink_health_create(struct fbnic_dev *fbd) -- 2.51.0 OTP memory ("fuses") are used for secure boot and anti-rollback protection. The OTP memory is ECC protected. Check for its health periodically to notice when the chip is starting to go bad. Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/meta/fbnic.rst | 7 ++ drivers/net/ethernet/meta/fbnic/fbnic.h | 2 + drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 18 +++++ .../net/ethernet/meta/fbnic/fbnic_devlink.c | 65 +++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 5 ++ 5 files changed, 97 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index 3c81b58d8292..5d364832f814 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -81,6 +81,13 @@ happened since power cycle - a snapshot of the FW memory. Diagnose callback shows current FW uptime (the crashes are detected by checking if uptime goes down). +otp reporter +~~~~~~~~~~~~ + +OTP memory ("fuses") are used for secure boot and anti-rollback +protection. The OTP memory is ECC protected, ECC errors indicate +either manufacturing defect or part deteriorating with age. + Statistics ---------- diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 5f99976de0bb..b03e5a3d5144 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -28,6 +28,7 @@ struct fbnic_dev { struct dentry *dbg_fbd; struct device *hwmon; struct devlink_health_reporter *fw_reporter; + struct devlink_health_reporter *otp_reporter; u32 __iomem *uc_addr0; u32 __iomem *uc_addr4; @@ -166,6 +167,7 @@ void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); void __printf(2, 3) fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...); +void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg); int fbnic_fw_request_mbx(struct fbnic_dev *fbd); void fbnic_fw_free_mbx(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index e2fffe1597e9..d3a7ad921f18 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -1178,4 +1178,22 @@ enum { #define FBNIC_IPC_MBX_DESC_FW_CMPL DESC_BIT(1) #define FBNIC_IPC_MBX_DESC_HOST_CMPL DESC_BIT(0) +/* OTP Registers + * These registers are accessible via bar4 offset and are written by CMRT + * on boot. For the write status, the register is broken up in half with OTP + * Write Data Status occupying the top 16 bits and the ECC status occupying the + * bottom 16 bits. + */ +#define FBNIC_NS_OTP_STATUS 0x0021d +#define FBNIC_NS_OTP_WRITE_STATUS 0x0021e + +#define FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK CSR_GENMASK(31, 16) +#define FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK CSR_GENMASK(15, 0) + +#define FBNIC_REGS_VERSION CSR_GENMASK(31, 16) +#define FBNIC_REGS_HW_TYPE CSR_GENMASK(15, 8) +enum{ + FBNIC_CSR_VERSION_V1_0_ASIC = 1, +}; + #endif /* _FBNIC_CSR_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index f3f3585c0aac..227236d0676c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -520,6 +520,60 @@ static const struct devlink_health_reporter_ops fbnic_fw_ops = { .diagnose = fbnic_fw_reporter_diagnose, }; +static u32 fbnic_read_otp_status(struct fbnic_dev *fbd) +{ + return fbnic_fw_rd32(fbd, FBNIC_NS_OTP_STATUS); +} + +static int +fbnic_otp_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + u32 otp_status, otp_write_status, m; + + otp_status = fbnic_read_otp_status(fbd); + otp_write_status = fbnic_fw_rd32(fbd, FBNIC_NS_OTP_WRITE_STATUS); + + /* Dump OTP status */ + devlink_fmsg_pair_nest_start(fmsg, "OTP"); + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Status", otp_status); + + /* Extract OTP Write Data status */ + m = FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK; + devlink_fmsg_u32_pair_put(fmsg, "Data", + FIELD_GET(m, otp_write_status)); + + /* Extract OTP Write ECC status */ + m = FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK; + devlink_fmsg_u32_pair_put(fmsg, "ECC", + FIELD_GET(m, otp_write_status)); + + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + return 0; +} + +void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg) +{ + /* Check if there is anything to report */ + if (!fbnic_read_otp_status(fbd)) + return; + + devlink_health_report(fbd->otp_reporter, msg, fbd); + if (fbnic_fw_log_ready(fbd)) + fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg); +} + +static const struct devlink_health_reporter_ops fbnic_otp_ops = { + .name = "otp", + .dump = fbnic_otp_reporter_dump, +}; + int fbnic_devlink_health_create(struct fbnic_dev *fbd) { fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd), @@ -531,11 +585,22 @@ int fbnic_devlink_health_create(struct fbnic_dev *fbd) return PTR_ERR(fbd->fw_reporter); } + fbd->otp_reporter = devlink_health_reporter_create(priv_to_devlink(fbd), + &fbnic_otp_ops, fbd); + if (IS_ERR(fbd->otp_reporter)) { + devlink_health_reporter_destroy(fbd->fw_reporter); + dev_warn(fbd->dev, + "Failed to create OTP fault reporter: %pe\n", + fbd->otp_reporter); + return PTR_ERR(fbd->otp_reporter); + } + return 0; } void fbnic_devlink_health_destroy(struct fbnic_dev *fbd) { + devlink_health_reporter_destroy(fbd->otp_reporter); devlink_health_reporter_destroy(fbd->fw_reporter); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index e71be857d692..c934fb00ee64 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -197,6 +197,7 @@ static void fbnic_health_check(struct fbnic_dev *fbd) return; fbnic_devlink_fw_report(fbd, "Firmware crashed detected!"); + fbnic_devlink_otp_check(fbd, "error detected after firmware recovery"); if (fbnic_fw_config_after_crash(fbd)) dev_err(fbd->dev, "Firmware recovery failed after crash\n"); @@ -322,6 +323,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err); fbnic_devlink_register(fbd); + fbnic_devlink_otp_check(fbd, "error detected during probe"); fbnic_dbg_fbd_init(fbd); /* Capture snapshot of hardware stats so netdev can calculate delta */ @@ -475,6 +477,9 @@ static int __fbnic_pm_resume(struct device *dev) */ fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries)); + /* Since the FW should be up, check if it reported OTP errors */ + fbnic_devlink_otp_check(fbd, "error detected after PM resume"); + /* No netdev means there isn't a network interface to bring up */ if (fbnic_init_failure(fbd)) return 0; -- 2.51.0