System controller allows programming per error threshold value, which it uses to raise error events to the driver. Set it using mailbox command so that it can be programmed by the user. Signed-off-by: Raag Jadav --- v2: Add RAS operation status codes (Riana) --- drivers/gpu/drm/xe/xe_ras.c | 72 +++++++++++++++++++ drivers/gpu/drm/xe/xe_ras.h | 1 + drivers/gpu/drm/xe/xe_ras_types.h | 28 ++++++++ drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h | 2 + 4 files changed, 103 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ras.c b/drivers/gpu/drm/xe/xe_ras.c index 434dea8bbdb1..4548e5cb08b9 100644 --- a/drivers/gpu/drm/xe/xe_ras.c +++ b/drivers/gpu/drm/xe/xe_ras.c @@ -34,6 +34,17 @@ enum xe_ras_component { XE_RAS_COMP_MAX }; +/* RAS operation status codes */ +enum xe_ras_status { + XE_RAS_STATUS_SUCCESS = 0, + XE_RAS_STATUS_INVALID_PARAM, + XE_RAS_STATUS_NOT_SUPPORTED, + XE_RAS_STATUS_TIMEOUT, + XE_RAS_STATUS_HARDWARE_FAILURE, + XE_RAS_STATUS_INSUFFICIENT_RESOURCES, + XE_RAS_STATUS_MAX +}; + static const char *const xe_ras_severities[] = { [XE_RAS_SEV_NOT_SUPPORTED] = "Not Supported", [XE_RAS_SEV_CORRECTABLE] = "Correctable Error", @@ -70,6 +81,24 @@ static const int drm_to_xe_ras_severities[] = { }; static_assert(ARRAY_SIZE(drm_to_xe_ras_severities) == DRM_XE_RAS_ERR_SEV_MAX); +static int ras_status_to_errno(u32 status) +{ + switch (status) { + case XE_RAS_STATUS_INVALID_PARAM: + return -EINVAL; + case XE_RAS_STATUS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case XE_RAS_STATUS_TIMEOUT: + return -ETIMEDOUT; + case XE_RAS_STATUS_HARDWARE_FAILURE: + return -EIO; + case XE_RAS_STATUS_INSUFFICIENT_RESOURCES: + return -ENOSPC; + default: + return -EPROTO; + } +}; + static inline const char *sev_to_str(u8 severity) { if (severity >= XE_RAS_SEV_MAX) @@ -149,3 +178,46 @@ int xe_ras_get_threshold(struct xe_device *xe, u32 severity, u32 component, u32 comp_to_str(counter.common.component), sev_to_str(counter.common.severity)); return 0; } + +int xe_ras_set_threshold(struct xe_device *xe, u32 severity, u32 component, u32 threshold) +{ + struct xe_ras_set_threshold_response response = {}; + struct xe_ras_set_threshold_request request = {}; + struct xe_sysctrl_mailbox_command command = {}; + struct xe_ras_error_class counter = {}; + size_t len; + int ret; + + counter.common.severity = drm_to_xe_ras_severities[severity]; + counter.common.component = drm_to_xe_ras_components[component]; + request.counter = counter; + request.threshold = threshold; + + xe_sysctrl_populate_command(&command, &request, &response, sizeof(request), + sizeof(response), XE_SYSCTRL_GROUP_GFSP, + XE_SYSCTRL_CMD_SET_THRESHOLD); + + guard(xe_pm_runtime)(xe); + ret = xe_sysctrl_send_command(&xe->sc, &command, &len); + if (ret) { + xe_err(xe, "sysctrl: failed to set threshold %d\n", ret); + return ret; + } + + if (len != sizeof(response)) { + xe_err(xe, "sysctrl: unexpected set threshold response length %zu (expected %zu)\n", + len, sizeof(response)); + return -EIO; + } + + if (response.status) { + xe_err(xe, "sysctrl: set threshold operation failed %#x\n", response.status); + return ras_status_to_errno(response.status); + } + + counter = response.counter; + + xe_dbg(xe, "[RAS]: set threshold %u for %s %s\n", response.threshold, + comp_to_str(counter.common.component), sev_to_str(counter.common.severity)); + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_ras.h b/drivers/gpu/drm/xe/xe_ras.h index 982bbe61461e..d1f71b1de723 100644 --- a/drivers/gpu/drm/xe/xe_ras.h +++ b/drivers/gpu/drm/xe/xe_ras.h @@ -14,5 +14,6 @@ struct xe_sysctrl_event_response; void xe_ras_counter_threshold_crossed(struct xe_device *xe, struct xe_sysctrl_event_response *response); int xe_ras_get_threshold(struct xe_device *xe, u32 severity, u32 component, u32 *threshold); +int xe_ras_set_threshold(struct xe_device *xe, u32 severity, u32 component, u32 threshold); #endif diff --git a/drivers/gpu/drm/xe/xe_ras_types.h b/drivers/gpu/drm/xe/xe_ras_types.h index c29e9a3d43ce..6047fd891022 100644 --- a/drivers/gpu/drm/xe/xe_ras_types.h +++ b/drivers/gpu/drm/xe/xe_ras_types.h @@ -92,4 +92,32 @@ struct xe_ras_get_threshold_response { u32 reserved[4]; } __packed; +/** + * struct xe_ras_set_threshold_request - Request structure for set threshold + */ +struct xe_ras_set_threshold_request { + /** @counter: Counter to set threshold for */ + struct xe_ras_error_class counter; + /** @threshold: Threshold value to set */ + u32 threshold; + /** @reserved: Reserved for future use */ + u32 reserved; +} __packed; + +/** + * struct xe_ras_set_threshold_response - Response structure for set threshold + */ +struct xe_ras_set_threshold_response { + /** @counter: Counter ID */ + struct xe_ras_error_class counter; + /** @threshold_prev: Previous threshold value */ + u32 threshold_prev; + /** @threshold: Updated threshold value */ + u32 threshold; + /** @status: Set threshold operation status */ + u32 status; + /** @reserved: Reserved for future use */ + u32 reserved[2]; +} __packed; + #endif diff --git a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h index a1b71218deca..b865768e903b 100644 --- a/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h +++ b/drivers/gpu/drm/xe/xe_sysctrl_mailbox_types.h @@ -23,10 +23,12 @@ enum xe_sysctrl_group { * enum xe_sysctrl_gfsp_cmd - Commands supported by GFSP group * * @XE_SYSCTRL_CMD_GET_THRESHOLD: Retrieve error threshold + * @XE_SYSCTRL_CMD_SET_THRESHOLD: Set error threshold * @XE_SYSCTRL_CMD_GET_PENDING_EVENT: Retrieve pending event */ enum xe_sysctrl_gfsp_cmd { XE_SYSCTRL_CMD_GET_THRESHOLD = 0x05, + XE_SYSCTRL_CMD_SET_THRESHOLD = 0x06, XE_SYSCTRL_CMD_GET_PENDING_EVENT = 0x07, }; -- 2.43.0