Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to page_ext. This config will be used by the IOMMU API to track pages mapped in the IOMMU to catch drivers trying to free kernel memory that they still map in their domains, causing all types of memory corruption. This behaviour is disabled by default and can be enabled using kernel cmdline iommu.debug_pagealloc. Signed-off-by: Mostafa Saleh --- .../admin-guide/kernel-parameters.txt | 6 ++++ drivers/iommu/Kconfig | 14 ++++++++ drivers/iommu/Makefile | 1 + drivers/iommu/iommu-debug.c | 32 +++++++++++++++++++ include/linux/iommu-debug.h | 17 ++++++++++ mm/page_ext.c | 4 +++ 6 files changed, 74 insertions(+) create mode 100644 drivers/iommu/iommu-debug.c create mode 100644 include/linux/iommu-debug.h diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 74ca438d2d6d..b2691a5527dd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2539,6 +2539,12 @@ 1 - Bypass the IOMMU for DMA. unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH. + iommu.debug_pagealloc= + [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this + parameter enables the feature at boot time. By default, it + is disabled and the system will work mostly the same as a + kernel built without CONFIG_IOMMU_DEBUG_PAGEALLOC. + io7= [HW] IO7 for Marvel-based Alpha systems See comment before marvel_specify_io7 in arch/alpha/kernel/core_marvel.c. diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 70d29b14d851..5b40ec9b6e04 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -383,4 +383,18 @@ config SPRD_IOMMU Say Y here if you want to use the multimedia devices listed above. +config IOMMU_DEBUG_PAGEALLOC + bool "Debug page memory allocations against IOMMU" + depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION + help + This config checks when a page is freed by the kernel + it's not mapped in any IOMMU domain. It can help with + debugging use-after-free from driver doing DMA. + This santaizer can have false-negative cases where some + problems won't be detected. + Expect overhead when enabling this + enabling the kernel + command line iommu.debug_pagealloc. + + If unsure, say N here. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 355294fa9033..c834d3f70dfc 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -34,3 +34,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o +obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug.o \ No newline at end of file diff --git a/drivers/iommu/iommu-debug.c b/drivers/iommu/iommu-debug.c new file mode 100644 index 000000000000..297a35137b38 --- /dev/null +++ b/drivers/iommu/iommu-debug.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 - Google Inc + * Author: Mostafa Saleh + * IOMMU API santaizers and debug + */ +#include +#include +#include +#include + +static bool needed; + +struct iommu_debug_metadate { + atomic_t ref; +}; + +static __init bool need_iommu_debug(void) +{ + return needed; +} + +struct page_ext_operations page_iommu_debug_ops = { + .size = sizeof(struct iommu_debug_metadate), + .need = need_iommu_debug, +}; + +static int __init iommu_debug_pagealloc(char *str) +{ + return kstrtobool(str, &needed); +} +early_param("iommu.debug_pagealloc", iommu_debug_pagealloc); diff --git a/include/linux/iommu-debug.h b/include/linux/iommu-debug.h new file mode 100644 index 000000000000..a9c11855c4ed --- /dev/null +++ b/include/linux/iommu-debug.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 - Google Inc + * Author: Mostafa Saleh + * IOMMU API santaizers and debug + */ + +#ifndef __LINUX_IOMMU_DEBUG_H +#define __LINUX_IOMMU_DEBUG_H + +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + +extern struct page_ext_operations page_iommu_debug_ops; + +#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */ + +#endif /* __LINUX_IOMMU_DEBUG_H */ diff --git a/mm/page_ext.c b/mm/page_ext.c index d7396a8970e5..37c764a55a0f 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * struct page extension @@ -89,6 +90,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = { #ifdef CONFIG_PAGE_TABLE_CHECK &page_table_check_ops, #endif +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + &page_iommu_debug_ops, +#endif }; unsigned long page_ext_size; -- 2.51.0.618.g983fd99d29-goog Add calls for the new iommu debug config IOMMU_DEBUG_PAGEALLOC: - iommu_debug_init: Enable the debug mode if configured by the user. - iommu_debug_map: Track iommu pages mapped, using physical address. - iommu_debug_unmap: Track iommu pages unmapped, using IO virtual address. - iommu_debug_remap: Track iommu pages, already mapped using IOVA. We have to do the unmap/remap as once pages are unmapped we lose the information of the physical address. This is racy, but the API is racy by construction as it uses refcounts and doesn't attempt to lock/synchronize with the IOMMU API as that will be costly, meaning that possibility of false negative exists. Signed-off-by: Mostafa Saleh --- drivers/iommu/iommu-debug.c | 23 +++++++++++++++++++++++ drivers/iommu/iommu.c | 21 +++++++++++++++++++-- include/linux/iommu-debug.h | 6 ++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu-debug.c b/drivers/iommu/iommu-debug.c index 297a35137b38..607f1fcf2235 100644 --- a/drivers/iommu/iommu-debug.c +++ b/drivers/iommu/iommu-debug.c @@ -5,11 +5,13 @@ * IOMMU API santaizers and debug */ #include +#include #include #include #include static bool needed; +static DEFINE_STATIC_KEY_FALSE(iommu_debug_initialized); struct iommu_debug_metadate { atomic_t ref; @@ -25,6 +27,27 @@ struct page_ext_operations page_iommu_debug_ops = { .need = need_iommu_debug, }; +void iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size) +{ +} + +void iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +{ +} + +void iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size) +{ +} + +void iommu_debug_init(void) +{ + if (!needed) + return; + + pr_info("iommu: Debugging page allocations, expect overhead or disable iommu.debug_pagealloc"); + static_branch_enable(&iommu_debug_initialized); +} + static int __init iommu_debug_pagealloc(char *str) { return kstrtobool(str, &needed); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 060ebe330ee1..56c89636a33c 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +232,9 @@ static int __init iommu_subsys_init(void) if (!nb) return -ENOMEM; +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + iommu_debug_init(); +#endif for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) { nb[i].notifier_call = iommu_bus_notifier; bus_register_notifier(iommu_buses[i], &nb[i]); @@ -2518,10 +2522,14 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, } /* unroll mapping in case something went wrong */ - if (ret) + if (ret) { iommu_unmap(domain, orig_iova, orig_size - size); - else + } else { trace_map(orig_iova, orig_paddr, orig_size); +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + iommu_debug_map(domain, orig_paddr, orig_size); +#endif + } return ret; } @@ -2583,6 +2591,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain, pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size); +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + iommu_debug_unmap(domain, iova, size); +#endif + /* * Keep iterating until we either unmap 'size' bytes (or more) * or we hit an area that isn't mapped. @@ -2602,6 +2614,11 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unmapped += unmapped_page; } +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + if (unmapped < size) + iommu_debug_remap(domain, iova, size - unmapped); +#endif + trace_unmap(orig_iova, size, unmapped); return unmapped; } diff --git a/include/linux/iommu-debug.h b/include/linux/iommu-debug.h index a9c11855c4ed..8d3ea661660f 100644 --- a/include/linux/iommu-debug.h +++ b/include/linux/iommu-debug.h @@ -11,6 +11,12 @@ #ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC extern struct page_ext_operations page_iommu_debug_ops; +struct iommu_domain; + +void iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size); +void iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); +void iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size); +void iommu_debug_init(void); #endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */ -- 2.51.0.618.g983fd99d29-goog Using the new calls use an atomic refcount to track how many times a page is mapped in any of the IOMMUs. For unmap we need to use iova_to_phys() to get the physical address of the pages. We use the smallest supported page size as the granularity of tracking per domain. This is important as it possible to map pages and unmap them with larger sizes (as in map_sg()) cases. Signed-off-by: Mostafa Saleh --- drivers/iommu/iommu-debug.c | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/iommu/iommu-debug.c b/drivers/iommu/iommu-debug.c index 607f1fcf2235..cec8f594c7fa 100644 --- a/drivers/iommu/iommu-debug.c +++ b/drivers/iommu/iommu-debug.c @@ -27,16 +27,99 @@ struct page_ext_operations page_iommu_debug_ops = { .need = need_iommu_debug, }; +static struct page_ext *get_iommu_page_ext(phys_addr_t phys) +{ + struct page *page = phys_to_page(phys); + struct page_ext *page_ext = page_ext_get(page); + + return page_ext; +} + +static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext) +{ + return page_ext_data(page_ext, &page_iommu_debug_ops); +} + +static void iommu_debug_inc_page(phys_addr_t phys) +{ + struct page_ext *page_ext = get_iommu_page_ext(phys); + struct iommu_debug_metadate *d = get_iommu_data(page_ext); + + WARN_ON(atomic_inc_return(&d->ref) <= 0); + page_ext_put(page_ext); +} + +static void iommu_debug_dec_page(phys_addr_t phys) +{ + struct page_ext *page_ext = get_iommu_page_ext(phys); + struct iommu_debug_metadate *d = get_iommu_data(page_ext); + + WARN_ON(atomic_dec_return(&d->ref) < 0); + page_ext_put(page_ext); +} + +/* + * IOMMU pages size might not match the CPU page size, in that case, we use + * the smallest IOMMU page size to refcount the pages in the vmemap. + * That's is important as both map and unmap has to use the same page size + * to update the refcount to avoid double counting the same page. + * And as we can't know from iommu_unmap() what was the original page size + * used for map, we just use the minimum supported one for both. + */ +static size_t iommu_debug_page_size(struct iommu_domain *domain) +{ + return 1UL << __ffs(domain->pgsize_bitmap); +} + void iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size) { + size_t off; + size_t page_size = iommu_debug_page_size(domain); + + if (!static_branch_likely(&iommu_debug_initialized)) + return; + + for (off = 0 ; off < size ; off += page_size) { + if (!pfn_valid(__phys_to_pfn(phys + off))) + continue; + iommu_debug_inc_page(phys + off); + } } void iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { + size_t off; + size_t page_size = iommu_debug_page_size(domain); + + if (!static_branch_likely(&iommu_debug_initialized)) + return; + + for (off = 0 ; off < size ; off += page_size) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off); + + if (!phys || !pfn_valid(__phys_to_pfn(phys + off))) + continue; + + iommu_debug_dec_page(phys); + } } void iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size) { + size_t off; + size_t page_size = iommu_debug_page_size(domain); + + if (!static_branch_likely(&iommu_debug_initialized)) + return; + + for (off = 0 ; off < size ; off += page_size) { + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off); + + if (!phys || !pfn_valid(__phys_to_pfn(phys + off))) + continue; + + iommu_debug_inc_page(phys); + } } void iommu_debug_init(void) -- 2.51.0.618.g983fd99d29-goog Now, as the page_ext holds count of IOMMU mappings, we can use it to assert that any page allocated/freed is indeed not in the IOMMU. The sanitizer doesn’t protect against mapping/unmapping during this period. However, that’s less harmful as the page is not used by the kernel. Signed-off-by: Mostafa Saleh --- drivers/iommu/iommu-debug.c | 22 ++++++++++++++++++++++ include/linux/iommu-debug.h | 1 + include/linux/mm.h | 7 +++++++ 3 files changed, 30 insertions(+) diff --git a/drivers/iommu/iommu-debug.c b/drivers/iommu/iommu-debug.c index cec8f594c7fa..09157fef697e 100644 --- a/drivers/iommu/iommu-debug.c +++ b/drivers/iommu/iommu-debug.c @@ -71,6 +71,28 @@ static size_t iommu_debug_page_size(struct iommu_domain *domain) return 1UL << __ffs(domain->pgsize_bitmap); } +static unsigned int iommu_debug_page_count(unsigned long phys) +{ + unsigned int ref; + struct page_ext *page_ext = get_iommu_page_ext(phys); + struct iommu_debug_metadate *d = get_iommu_data(page_ext); + + ref = atomic_read(&d->ref); + page_ext_put(page_ext); + return ref; +} + +void iommu_debug_check_unmapped(const struct page *page, int numpages) +{ + if (!static_branch_likely(&iommu_debug_initialized)) + return; + + while (numpages--) { + WARN_ON(iommu_debug_page_count(page_to_phys(page))); + page++; + } +} + void iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size) { size_t off; diff --git a/include/linux/iommu-debug.h b/include/linux/iommu-debug.h index 8d3ea661660f..aaf893cfafd0 100644 --- a/include/linux/iommu-debug.h +++ b/include/linux/iommu-debug.h @@ -17,6 +17,7 @@ void iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size) void iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); void iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size); void iommu_debug_init(void); +void iommu_debug_check_unmapped(const struct page *page, int numpages); #endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 06978b4dbeb8..00f5de44faa0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,6 +36,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -3806,12 +3807,18 @@ extern void __kernel_map_pages(struct page *page, int numpages, int enable); #ifdef CONFIG_DEBUG_PAGEALLOC static inline void debug_pagealloc_map_pages(struct page *page, int numpages) { +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + iommu_debug_check_unmapped(page, numpages); +#endif if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 1); } static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) { +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC + iommu_debug_check_unmapped(page, numpages); +#endif if (debug_pagealloc_enabled_static()) __kernel_map_pages(page, numpages, 0); } -- 2.51.0.618.g983fd99d29-goog