From: Cong Wang This commit introduces: * Multikernel flag support in kexec_file_load by adding KEXEC_MULTIKERNEL to KEXEC_FILE_FLAGS, enabling user-space to specify multikernel operations through the file-based kexec interface with proper flag validation and ID extraction. * Instance-based memory allocation for multikernel images through kexec_alloc_multikernel() that allocates kernel segments from instance-specific memory pools rather than system memory, ensuring compliance with device tree resource specifications. * Multikernel control page allocation via kimage_alloc_multikernel_control_pages() that provides page-aligned control structures from instance pools with proper alignment validation and conflict detection against existing segments. * Enhanced kimage_file_alloc_init() with multikernel instance association that extracts multikernel IDs from kexec flags, validates instance availability, establishes bidirectional cross-references, and updates instance states to LOADING during the load process. * Integrated memory hole location in kexec_locate_mem_hole() that prioritizes multikernel instance pool allocation over system memory allocation, ensuring multikernel segments respect reserved memory boundaries and resource isolation. The integration maintains compatibility with existing kexec_file_load() use cases, such as crash kernel, while extending it for multikernel case. Standard kexec operations continue to use system memory allocation, while multikernel operations automatically use instance-specific pools when the KEXEC_MULTIKERNEL flag is specified. This enables user-space tools to load multikernel images using the more secure and flexible kexec_file_load interface rather than the legacy kexec_load syscall, providing better integration with modern security frameworks and signed kernel verification. Signed-off-by: Cong Wang --- include/linux/kexec.h | 3 +- kernel/kexec_core.c | 61 ++++++++++++++++++++++ kernel/kexec_file.c | 116 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 177 insertions(+), 3 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5e9e9ad1dfeb..b907b7a92fd2 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -477,7 +477,8 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \ - KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB) + KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB | \ + KEXEC_MULTIKERNEL) /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 7db755e64dd6..61ad01acd034 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -476,6 +476,64 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, } #endif +static struct page *kimage_alloc_multikernel_control_pages(struct kimage *image, + unsigned int order) +{ + /* Control pages for multikernel must be allocated from the instance's + * memory pool to ensure they stay within the reserved memory regions + * specified in the device tree configuration. + * + * We use mk_kimage_alloc() to get memory from the instance pool, + * then convert it to page structures. + */ + void *virt_addr; + phys_addr_t phys_addr; + struct page *pages; + unsigned long size; + unsigned int count; + + if (!image->mk_instance) { + pr_err("Multikernel image has no associated instance\n"); + return NULL; + } + + count = 1 << order; + size = count << PAGE_SHIFT; + + /* Allocate from the multikernel instance pool (page aligned) */ + virt_addr = mk_kimage_alloc(image, size, PAGE_SIZE); + if (!virt_addr) { + pr_debug("Failed to allocate %lu bytes for multikernel control pages\n", size); + return NULL; + } + + /* Convert virtual address to physical */ + phys_addr = virt_to_phys(virt_addr); + + /* Check alignment requirements - control pages need page alignment */ + if (!IS_ALIGNED(phys_addr, PAGE_SIZE)) { + pr_err("Multikernel control page allocation not page-aligned: phys=0x%llx\n", + (unsigned long long)phys_addr); + mk_kimage_free(image, virt_addr, size); + return NULL; + } + + /* Get the page structure */ + pages = virt_to_page(virt_addr); + + /* Check for conflicts with existing segments */ + if (kimage_is_destination_range(image, phys_addr, phys_addr + size - 1)) { + pr_debug("Multikernel control pages conflict with existing segments: 0x%llx+0x%lx\n", + (unsigned long long)phys_addr, size); + mk_kimage_free(image, virt_addr, size); + return NULL; + } + + pr_debug("Allocated multikernel control pages: order=%u, phys=0x%llx, virt=%px\n", + order, (unsigned long long)phys_addr, virt_addr); + + return pages; +} struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order) @@ -491,6 +549,9 @@ struct page *kimage_alloc_control_pages(struct kimage *image, pages = kimage_alloc_crash_control_pages(image, order); break; #endif + case KEXEC_TYPE_MULTIKERNEL: + pages = kimage_alloc_multikernel_control_pages(image, order); + break; } return pages; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 2d9d5626c8da..f9979c1d9f9e 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "kexec_internal.h" #ifdef CONFIG_KEXEC_SIG @@ -309,6 +310,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, int ret; struct kimage *image; bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH; + bool multikernel_load = flags & KEXEC_MULTIKERNEL; image = do_kimage_alloc_init(); if (!image) @@ -322,8 +324,50 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, /* Enable special crash kernel control page alloc policy. */ image->control_page = crashk_res.start; image->type = KEXEC_TYPE_CRASH; - } + } else #endif + if (multikernel_load) { + struct mk_instance *instance; + int mk_id = KEXEC_GET_MK_ID(flags); + + /* Set multikernel image type for proper memory allocation */ + image->type = KEXEC_TYPE_MULTIKERNEL; + + pr_info("kexec_file_load: multikernel load - flags=0x%lx, extracted mk_id=%d\n", + flags, mk_id); + + if (mk_id <= 0) { + pr_err("Invalid multikernel ID %d in flags\n", mk_id); + ret = -EINVAL; + goto out_free_image; + } + + /* Find the existing mk_instance */ + instance = mk_instance_find(mk_id); + if (!instance) { + pr_err("No multikernel instance found with ID %d\n", mk_id); + ret = -ENOENT; + goto out_free_image; + } + + /* Check if instance is already associated with a kimage */ + if (instance->kimage) { + pr_err("Multikernel instance %d already has an associated kimage\n", mk_id); + mk_instance_put(instance); + ret = -EBUSY; + goto out_free_image; + } + + /* Establish cross-references */ + image->mk_instance = instance; /* Transfer reference from find */ + image->mk_id = mk_id; + instance->kimage = image; + + /* Update instance state */ + mk_instance_set_state(instance, MK_STATE_LOADING); + + pr_info("Associated kimage with multikernel instance %d\n", mk_id); + } ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, cmdline_ptr, cmdline_len, flags); @@ -731,6 +775,61 @@ static int kexec_alloc_contig(struct kexec_buf *kbuf) return 0; } +static int kexec_alloc_multikernel(struct kexec_buf *kbuf) +{ + void *virt_addr; + phys_addr_t phys_addr; + + pr_info("kexec_alloc_multikernel: called for segment size=0x%lx, buf_min=0x%lx, buf_max=0x%lx, align=0x%lx\n", + kbuf->memsz, kbuf->buf_min, kbuf->buf_max, kbuf->buf_align); + + /* Check if this is a multikernel image with an associated instance */ + if (!kbuf->image->mk_instance || kbuf->image->type != KEXEC_TYPE_MULTIKERNEL) { + pr_info("kexec_alloc_multikernel: not a multikernel image (mk_instance=%p, type=%d)\n", + kbuf->image->mk_instance, kbuf->image->type); + return -EPERM; + } + + /* Allocate from the multikernel instance pool using the proper API */ + virt_addr = mk_kimage_alloc(kbuf->image, kbuf->memsz, kbuf->buf_align); + if (!virt_addr) { + pr_info("Failed to allocate %lu bytes from multikernel instance pool (align=0x%lx)\n", + kbuf->memsz, kbuf->buf_align); + return -ENOMEM; + } + + /* Convert virtual address to physical */ + phys_addr = virt_to_phys(virt_addr); + + if (!IS_ALIGNED(phys_addr, kbuf->buf_align)) { + pr_info("Multikernel allocation not aligned: phys=0x%llx, required=0x%lx\n", + (unsigned long long)phys_addr, kbuf->buf_align); + mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz); + return -ENOMEM; + } + + if (phys_addr < kbuf->buf_min || (phys_addr + kbuf->memsz - 1) > kbuf->buf_max) { + pr_info("Multikernel allocation out of bounds: phys=0x%llx, min=0x%lx, max=0x%lx\n", + (unsigned long long)phys_addr, kbuf->buf_min, kbuf->buf_max); + mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz); + return -ENOMEM; + } + + if (kimage_is_destination_range(kbuf->image, phys_addr, phys_addr + kbuf->memsz - 1)) { + pr_info("Multikernel allocation conflicts with existing segments: 0x%llx+0x%lx\n", + (unsigned long long)phys_addr, kbuf->memsz); + mk_kimage_free(kbuf->image, virt_addr, kbuf->memsz); + return -EBUSY; + } + + kbuf->mem = phys_addr; + + pr_info("Allocated %lu bytes from multikernel pool at 0x%llx (virt=%px)\n", + kbuf->memsz, (unsigned long long)phys_addr, virt_addr); + + return 0; +} + /** * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel * @kbuf: Parameters for the memory search. @@ -743,8 +842,21 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) { int ret; + pr_info("kexec_locate_mem_hole: called for segment size=0x%lx, mem=0x%lx, image_type=%d\n", + kbuf->memsz, kbuf->mem, kbuf->image->type); + /* Arch knows where to place */ - if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) + if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) { + pr_info("kexec_locate_mem_hole: memory already specified (0x%lx), skipping allocation\n", kbuf->mem); + return 0; + } + + /* + * If this is a multikernel image, try to allocate from the instance's + * memory pool first. This ensures multikernel segments use pre-reserved + * memory from the device tree configuration and respects the pool management. + */ + if (!kexec_alloc_multikernel(kbuf)) return 0; /* -- 2.34.1