When get_any_page() fails to grab a page reference, the *reason* it failed is known at the call site but is not surfaced to callers: the HWPoisonHandlable() rejection path (a stable kernel page hwpoison cannot handle — slab, vmalloc, page tables, kernel stacks, ...) and the page_count() / put_page race paths (a transient page-allocator lifecycle race) all collapse to a single negative errno by the time memory_failure() sees them. memory_failure() can only observe the conflated result and reports both as MF_MSG_GET_HWPOISON. Surface the diagnosis explicitly. Add an mf_get_page_status enum, plumbed out through get_any_page() and get_hwpoison_page() (NULL is accepted by callers that do not care — unpoison_memory() and soft_offline_page() pass NULL). get_any_page() sets the status at the moment it gives up: MF_GET_PAGE_UNHANDLABLE — HWPoisonHandlable() rejected the page after retries. MF_GET_PAGE_RACE — exhausted retries on a refcount / lifecycle race with the allocator. memory_failure() then promotes the unhandlable case to MF_MSG_KERNEL alongside the existing PageReserved branch, and leaves the transient-race case as MF_MSG_GET_HWPOISON. This forms the foundation a later patch will rely on to decide whether an unrecoverable failure should panic. Drop the "reserved" qualifier from action_page_types[MF_MSG_KERNEL] and the matching tracepoint string in MF_PAGE_TYPE: the enum value now covers both PageReserved pages and unhandlable kernel pages (slab, vmalloc, page tables, kernel stacks, ...), so "kernel page" is the accurate label for both populations. Suggested-by: Lance Yang Signed-off-by: Breno Leitao --- include/trace/events/memory-failure.h | 2 +- mm/memory-failure.c | 46 +++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/trace/events/memory-failure.h b/include/trace/events/memory-failure.h index aa57cc8f896be..8a860e6fcb4e9 100644 --- a/include/trace/events/memory-failure.h +++ b/include/trace/events/memory-failure.h @@ -24,7 +24,7 @@ EMe ( MF_RECOVERED, "Recovered" ) #define MF_PAGE_TYPE \ - EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ + EM ( MF_MSG_KERNEL, "kernel page" ) \ EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f112fb27a8ff6..4210173060aac 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -878,7 +878,7 @@ static const char *action_name[] = { }; static const char * const action_page_types[] = { - [MF_MSG_KERNEL] = "reserved kernel page", + [MF_MSG_KERNEL] = "kernel page", [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page", [MF_MSG_HUGE] = "huge page", [MF_MSG_FREE_HUGE] = "free huge page", @@ -1389,11 +1389,29 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags) #define GET_PAGE_MAX_RETRY_NUM 3 -static int get_any_page(struct page *p, unsigned long flags) +enum mf_get_page_status { + MF_GET_PAGE_OK = 0, + MF_GET_PAGE_RACE, + MF_GET_PAGE_UNHANDLABLE, +}; + +static void set_mf_get_page_status(enum mf_get_page_status *gp_status, + enum mf_get_page_status value) +{ + if (!gp_status) + return; + + *gp_status = value; +} + +static int get_any_page(struct page *p, unsigned long flags, + enum mf_get_page_status *gp_status) { int ret = 0, pass = 0; bool count_increased = false; + set_mf_get_page_status(gp_status, MF_GET_PAGE_OK); + if (flags & MF_COUNT_INCREASED) count_increased = true; @@ -1406,11 +1424,13 @@ static int get_any_page(struct page *p, unsigned long flags) if (pass++ < GET_PAGE_MAX_RETRY_NUM) goto try_again; ret = -EBUSY; + set_mf_get_page_status(gp_status, MF_GET_PAGE_RACE); } else if (!PageHuge(p) && !is_free_buddy_page(p)) { /* We raced with put_page, retry. */ if (pass++ < GET_PAGE_MAX_RETRY_NUM) goto try_again; ret = -EIO; + set_mf_get_page_status(gp_status, MF_GET_PAGE_RACE); } goto out; } else if (ret == -EBUSY) { @@ -1423,6 +1443,7 @@ static int get_any_page(struct page *p, unsigned long flags) goto try_again; } ret = -EIO; + set_mf_get_page_status(gp_status, MF_GET_PAGE_UNHANDLABLE); goto out; } } @@ -1442,6 +1463,7 @@ static int get_any_page(struct page *p, unsigned long flags) } put_page(p); ret = -EIO; + set_mf_get_page_status(gp_status, MF_GET_PAGE_UNHANDLABLE); } out: if (ret == -EIO) @@ -1480,6 +1502,7 @@ static int __get_unpoison_page(struct page *page) * get_hwpoison_page() - Get refcount for memory error handling * @p: Raw error page (hit by memory error) * @flags: Flags controlling behavior of error handling + * @gp_status: Optional output for the reason get_any_page() failed * * get_hwpoison_page() takes a page refcount of an error page to handle memory * error on it, after checking that the error page is in a well-defined state @@ -1503,7 +1526,8 @@ static int __get_unpoison_page(struct page *page) * operations like allocation and free, * -EHWPOISON when the page is hwpoisoned and taken off from buddy. */ -static int get_hwpoison_page(struct page *p, unsigned long flags) +static int get_hwpoison_page(struct page *p, unsigned long flags, + enum mf_get_page_status *gp_status) { int ret; @@ -1511,7 +1535,7 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) if (flags & MF_UNPOISON) ret = __get_unpoison_page(p); else - ret = get_any_page(p, flags); + ret = get_any_page(p, flags, gp_status); zone_pcp_enable(page_zone(p)); return ret; @@ -2349,6 +2373,7 @@ int memory_failure(unsigned long pfn, int flags) bool retry = true; int hugetlb = 0; bool is_reserved; + enum mf_get_page_status gp_status = MF_GET_PAGE_OK; if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -2424,7 +2449,7 @@ int memory_failure(unsigned long pfn, int flags) */ is_reserved = PageReserved(p); - res = get_hwpoison_page(p, flags); + res = get_hwpoison_page(p, flags, &gp_status); if (!res) { if (is_free_buddy_page(p)) { if (take_page_off_buddy(p)) { @@ -2445,7 +2470,12 @@ int memory_failure(unsigned long pfn, int flags) } goto unlock_mutex; } else if (res < 0) { - if (is_reserved) + /* + * Promote a stable unhandlable kernel page diagnosed by + * get_hwpoison_page() to MF_MSG_KERNEL alongside reserved + * pages; transient lifecycle races stay as MF_MSG_GET_HWPOISON. + */ + if (is_reserved || gp_status == MF_GET_PAGE_UNHANDLABLE) res = action_result(pfn, MF_MSG_KERNEL, MF_IGNORED); else res = action_result(pfn, MF_MSG_GET_HWPOISON, @@ -2750,7 +2780,7 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } - ghp = get_hwpoison_page(p, MF_UNPOISON); + ghp = get_hwpoison_page(p, MF_UNPOISON, NULL); if (!ghp) { if (folio_test_hugetlb(folio)) { huge = true; @@ -2957,7 +2987,7 @@ int soft_offline_page(unsigned long pfn, int flags) retry: get_online_mems(); - ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE); + ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE, NULL); put_online_mems(); if (hwpoison_filter(page)) { -- 2.53.0-Meta