The commit 97f0b13452198290799f ("tracing: add trace event for memory-failure") introduces the selection of RAS in memory-failure. This commit is just a tracing feature; in reality, there is no dependency between memory-failure and RAS. RAS increases the size of the bzImage image by 8k, which is very valuable for embedded devices. Move the memory-failure traceing code from ras_event.h to memory-failure.h and remove the selection of RAS. v2->v3: https://lore.kernel.org/20251104072306.100738-3-xieyuanbin1@huawei.com - Change define TRACE_SYSTEM from ras to memory_failure - Add include/trace/events/memory-failure.h to "HWPOISON MEMORY FAILURE HANDLING" section in MAINTAINERS - Rebase to latest linux-next source v1->v2: https://lore.kernel.org/20251103033536.52234-2-xieyuanbin1@huawei.com - Move the memory-failure traceing code from ras_event.h to memory-failure.h Signed-off-by: Xie Yuanbin Cc: David Hildenbrand (Red Hat) Cc: Borislav Petkov Cc: Miaohe Lin --- MAINTAINERS | 1 + include/ras/ras_event.h | 87 ------------------------ include/trace/events/memory-failure.h | 98 +++++++++++++++++++++++++++ mm/Kconfig | 1 - mm/memory-failure.c | 5 +- 5 files changed, 103 insertions(+), 89 deletions(-) create mode 100644 include/trace/events/memory-failure.h diff --git a/MAINTAINERS b/MAINTAINERS index 7310d9ca0370..43d6eb95fb05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11631,10 +11631,11 @@ R: Naoya Horiguchi L: linux-mm@kvack.org S: Maintained F: include/linux/memory-failure.h F: mm/hwpoison-inject.c F: mm/memory-failure.c +F: include/trace/events/memory-failure.h HYCON HY46XX TOUCHSCREEN SUPPORT M: Giulio Benetti L: linux-input@vger.kernel.org S: Maintained diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index fecfeb7c8be7..1e5e87020eef 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -10,11 +10,10 @@ #include #include #include #include #include -#include /* * MCE Extended Error Log trace event * * These events are generated when hardware detects a corrected or @@ -337,95 +336,9 @@ TRACE_EVENT(aer_event, __entry->tlp_header_valid ? __print_array(__entry->tlp_header, PCIE_STD_MAX_TLP_HEADERLOG, 4) : "Not available") ); #endif /* CONFIG_PCIEAER */ - -/* - * memory-failure recovery action result event - * - * unsigned long pfn - Page Frame Number of the corrupted page - * int type - Page types of the corrupted page - * int result - Result of recovery action - */ - -#ifdef CONFIG_MEMORY_FAILURE -#define MF_ACTION_RESULT \ - EM ( MF_IGNORED, "Ignored" ) \ - EM ( MF_FAILED, "Failed" ) \ - EM ( MF_DELAYED, "Delayed" ) \ - EMe ( MF_RECOVERED, "Recovered" ) - -#define MF_PAGE_TYPE \ - EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ - EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ - EM ( MF_MSG_HUGE, "huge page" ) \ - EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ - EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ - EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ - EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ - EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ - EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ - EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ - EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ - EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ - EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ - EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ - EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ - EM ( MF_MSG_BUDDY, "free buddy page" ) \ - EM ( MF_MSG_DAX, "dax page" ) \ - EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ - EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ - EM ( MF_MSG_PFN_MAP, "non struct page pfn" ) \ - EMe ( MF_MSG_UNKNOWN, "unknown page" ) - -/* - * First define the enums in MM_ACTION_RESULT to be exported to userspace - * via TRACE_DEFINE_ENUM(). - */ -#undef EM -#undef EMe -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -MF_ACTION_RESULT -MF_PAGE_TYPE - -/* - * Now redefine the EM() and EMe() macros to map the enums to the strings - * that will be printed in the output. - */ -#undef EM -#undef EMe -#define EM(a, b) { a, b }, -#define EMe(a, b) { a, b } - -TRACE_EVENT(memory_failure_event, - TP_PROTO(unsigned long pfn, - int type, - int result), - - TP_ARGS(pfn, type, result), - - TP_STRUCT__entry( - __field(unsigned long, pfn) - __field(int, type) - __field(int, result) - ), - - TP_fast_assign( - __entry->pfn = pfn; - __entry->type = type; - __entry->result = result; - ), - - TP_printk("pfn %#lx: recovery action for %s: %s", - __entry->pfn, - __print_symbolic(__entry->type, MF_PAGE_TYPE), - __print_symbolic(__entry->result, MF_ACTION_RESULT) - ) -); -#endif /* CONFIG_MEMORY_FAILURE */ #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ #include diff --git a/include/trace/events/memory-failure.h b/include/trace/events/memory-failure.h new file mode 100644 index 000000000000..aa57cc8f896b --- /dev/null +++ b/include/trace/events/memory-failure.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM memory_failure +#define TRACE_INCLUDE_FILE memory-failure + +#if !defined(_TRACE_MEMORY_FAILURE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MEMORY_FAILURE_H + +#include +#include + +/* + * memory-failure recovery action result event + * + * unsigned long pfn - Page Frame Number of the corrupted page + * int type - Page types of the corrupted page + * int result - Result of recovery action + */ + +#define MF_ACTION_RESULT \ + EM ( MF_IGNORED, "Ignored" ) \ + EM ( MF_FAILED, "Failed" ) \ + EM ( MF_DELAYED, "Delayed" ) \ + EMe ( MF_RECOVERED, "Recovered" ) + +#define MF_PAGE_TYPE \ + EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ + EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ + EM ( MF_MSG_HUGE, "huge page" ) \ + EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ + EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ + EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ + EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ + EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ + EM ( MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page" ) \ + EM ( MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page" ) \ + EM ( MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page" ) \ + EM ( MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page" ) \ + EM ( MF_MSG_DIRTY_LRU, "dirty LRU page" ) \ + EM ( MF_MSG_CLEAN_LRU, "clean LRU page" ) \ + EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \ + EM ( MF_MSG_BUDDY, "free buddy page" ) \ + EM ( MF_MSG_DAX, "dax page" ) \ + EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ + EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ + EM ( MF_MSG_PFN_MAP, "non struct page pfn" ) \ + EMe ( MF_MSG_UNKNOWN, "unknown page" ) + +/* + * First define the enums in MM_ACTION_RESULT to be exported to userspace + * via TRACE_DEFINE_ENUM(). + */ +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +MF_ACTION_RESULT +MF_PAGE_TYPE + +/* + * Now redefine the EM() and EMe() macros to map the enums to the strings + * that will be printed in the output. + */ +#undef EM +#undef EMe +#define EM(a, b) { a, b }, +#define EMe(a, b) { a, b } + +TRACE_EVENT(memory_failure_event, + TP_PROTO(unsigned long pfn, + int type, + int result), + + TP_ARGS(pfn, type, result), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(int, type) + __field(int, result) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->type = type; + __entry->result = result; + ), + + TP_printk("pfn %#lx: recovery action for %s: %s", + __entry->pfn, + __print_symbolic(__entry->type, MF_PAGE_TYPE), + __print_symbolic(__entry->result, MF_ACTION_RESULT) + ) +); +#endif /* _TRACE_MEMORY_FAILURE_H */ + +/* This part must be outside protection */ +#include diff --git a/mm/Kconfig b/mm/Kconfig index d548976d0e0a..bd0ea5454af8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -738,11 +738,10 @@ config ARCH_SUPPORTS_MEMORY_FAILURE config MEMORY_FAILURE depends on MMU depends on ARCH_SUPPORTS_MEMORY_FAILURE bool "Enable recovery from hardware memory errors" - select RAS select INTERVAL_TREE help Enables code to recover from some memory failures on systems with MCA recovery. This allows a system to continue running even when some of its memory has uncorrected errors. This requires diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 7f908ad795ad..fbc5a01260c8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -59,13 +59,16 @@ #include #include #include #include #include + +#define CREATE_TRACE_POINTS +#include + #include "swap.h" #include "internal.h" -#include "ras/ras_event.h" static int sysctl_memory_failure_early_kill __read_mostly; static int sysctl_memory_failure_recovery __read_mostly = 1; -- 2.51.0