From: Ankit Agrawal It is possible for some system memory pages on the EGM to have retired pages with uncorrectable ECC errors. A list of pages known with such errors (referred as retired pages) are maintained by the Host UEFI. The Host UEFI populates such list in a reserved region. It communicates the SPA of this region through a ACPI DSDT property. nvgrace-egm module is responsible to store the list of retired page offsets to be made available for usermode processes. The module: 1. Get the reserved memory region SPA and maps to it to fetch the list of bad pages. 2. Calculate the retired page offsets in the EGM and stores it. Signed-off-by: Ankit Agrawal --- drivers/vfio/pci/nvgrace-gpu/egm.c | 75 ++++++++++++++++++++++++++ drivers/vfio/pci/nvgrace-gpu/egm_dev.c | 32 +++++++++-- drivers/vfio/pci/nvgrace-gpu/egm_dev.h | 5 +- drivers/vfio/pci/nvgrace-gpu/main.c | 8 +-- include/linux/nvgrace-egm.h | 2 + 5 files changed, 112 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/nvgrace-gpu/egm.c b/drivers/vfio/pci/nvgrace-gpu/egm.c index de7771a4145d..077de3833046 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm.c +++ b/drivers/vfio/pci/nvgrace-gpu/egm.c @@ -8,6 +8,11 @@ #define MAX_EGM_NODES 4 +struct h_node { + unsigned long mem_offset; + struct hlist_node node; +}; + static dev_t dev; static struct class *class; static DEFINE_XARRAY(egm_chardevs); @@ -16,6 +21,7 @@ struct chardev { struct device device; struct cdev cdev; atomic_t open_count; + DECLARE_HASHTABLE(htbl, 0x10); }; static struct nvgrace_egm_dev * @@ -174,20 +180,88 @@ static void del_egm_chardev(struct chardev *egm_chardev) put_device(&egm_chardev->device); } +static void cleanup_retired_pages(struct chardev *egm_chardev) +{ + struct h_node *cur_page; + unsigned long bkt; + struct hlist_node *temp_node; + + hash_for_each_safe(egm_chardev->htbl, bkt, temp_node, cur_page, node) { + hash_del(&cur_page->node); + kvfree(cur_page); + } +} + +static int nvgrace_egm_fetch_retired_pages(struct nvgrace_egm_dev *egm_dev, + struct chardev *egm_chardev) +{ + u64 count; + void *memaddr; + int index, ret = 0; + + memaddr = memremap(egm_dev->retiredpagesphys, PAGE_SIZE, MEMREMAP_WB); + if (!memaddr) + return -ENOMEM; + + count = *(u64 *)memaddr; + if (count > PAGE_SIZE / sizeof(count)) + return -EINVAL; + + for (index = 0; index < count; index++) { + struct h_node *retired_page; + + /* + * Since the EGM is linearly mapped, the offset in the + * carveout is the same offset in the VM system memory. + * + * Calculate the offset to communicate to the usermode + * apps. + */ + retired_page = kzalloc(sizeof(*retired_page), GFP_KERNEL); + if (!retired_page) { + ret = -ENOMEM; + break; + } + + retired_page->mem_offset = *((u64 *)memaddr + index + 1) - + egm_dev->egmphys; + hash_add(egm_chardev->htbl, &retired_page->node, + retired_page->mem_offset); + } + + memunmap(memaddr); + + if (ret) + cleanup_retired_pages(egm_chardev); + + return ret; +} + static int egm_driver_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id) { struct nvgrace_egm_dev *egm_dev = container_of(aux_dev, struct nvgrace_egm_dev, aux_dev); struct chardev *egm_chardev; + int ret; egm_chardev = setup_egm_chardev(egm_dev); if (!egm_chardev) return -EINVAL; + hash_init(egm_chardev->htbl); + + ret = nvgrace_egm_fetch_retired_pages(egm_dev, egm_chardev); + if (ret) + goto error_exit; + xa_store(&egm_chardevs, egm_dev->egmpxm, egm_chardev, GFP_KERNEL); return 0; + +error_exit: + del_egm_chardev(egm_chardev); + return ret; } static void egm_driver_remove(struct auxiliary_device *aux_dev) @@ -199,6 +273,7 @@ static void egm_driver_remove(struct auxiliary_device *aux_dev) if (!egm_chardev) return; + cleanup_retired_pages(egm_chardev); del_egm_chardev(egm_chardev); } diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.c b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c index 20291504aca8..6d716c3a3257 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm_dev.c +++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c @@ -18,22 +18,41 @@ int nvgrace_gpu_has_egm_property(struct pci_dev *pdev, u64 *pegmpxm) } int nvgrace_gpu_fetch_egm_property(struct pci_dev *pdev, u64 *pegmphys, - u64 *pegmlength) + u64 *pegmlength, u64 *pretiredpagesphys) { int ret; /* - * The memory information is present in the system ACPI tables as DSD - * properties nvidia,egm-base-pa and nvidia,egm-size. + * The EGM memory information is present in the system ACPI tables + * as DSD properties nvidia,egm-base-pa and nvidia,egm-size. */ ret = device_property_read_u64(&pdev->dev, "nvidia,egm-size", pegmlength); if (ret) - return ret; + goto error_exit; ret = device_property_read_u64(&pdev->dev, "nvidia,egm-base-pa", pegmphys); + if (ret) + goto error_exit; + + /* + * SBIOS puts the list of retired pages on a region. The region + * SPA is exposed as "nvidia,egm-retired-pages-data-base". + */ + ret = device_property_read_u64(&pdev->dev, + "nvidia,egm-retired-pages-data-base", + pretiredpagesphys); + if (ret) + goto error_exit; + + /* Catch firmware bug and avoid a crash */ + if (*pretiredpagesphys == 0) { + dev_err(&pdev->dev, "Retired pages region is not setup\n"); + ret = -EINVAL; + } +error_exit: return ret; } @@ -74,7 +93,8 @@ static void nvgrace_gpu_release_aux_device(struct device *device) struct nvgrace_egm_dev * nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, - u64 egmphys, u64 egmlength, u64 egmpxm) + u64 egmphys, u64 egmlength, u64 egmpxm, + u64 retiredpagesphys) { struct nvgrace_egm_dev *egm_dev; int ret; @@ -86,6 +106,8 @@ nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, egm_dev->egmpxm = egmpxm; egm_dev->egmphys = egmphys; egm_dev->egmlength = egmlength; + egm_dev->retiredpagesphys = retiredpagesphys; + INIT_LIST_HEAD(&egm_dev->gpus); egm_dev->aux_dev.id = egmpxm; diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.h b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h index 2e1612445898..2f329a05685d 100644 --- a/drivers/vfio/pci/nvgrace-gpu/egm_dev.h +++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h @@ -16,8 +16,9 @@ void remove_gpu(struct nvgrace_egm_dev *egm_dev, struct pci_dev *pdev); struct nvgrace_egm_dev * nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name, - u64 egmphys, u64 egmlength, u64 egmpxm); + u64 egmphys, u64 egmlength, u64 egmpxm, + u64 retiredpagesphys); int nvgrace_gpu_fetch_egm_property(struct pci_dev *pdev, u64 *pegmphys, - u64 *pegmlength); + u64 *pegmlength, u64 *pretiredpagesphys); #endif /* EGM_DEV_H */ diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index 0bb427cca31f..11bbecda1ad2 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -78,7 +78,7 @@ static struct list_head egm_dev_list; static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) { struct nvgrace_egm_dev_entry *egm_entry = NULL; - u64 egmphys, egmlength, egmpxm; + u64 egmphys, egmlength, egmpxm, retiredpagesphys; int ret = 0; bool is_new_region = false; @@ -91,7 +91,8 @@ static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) if (nvgrace_gpu_has_egm_property(pdev, &egmpxm)) goto exit; - ret = nvgrace_gpu_fetch_egm_property(pdev, &egmphys, &egmlength); + ret = nvgrace_gpu_fetch_egm_property(pdev, &egmphys, &egmlength, + &retiredpagesphys); if (ret) goto exit; @@ -114,7 +115,8 @@ static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev) egm_entry->egm_dev = nvgrace_gpu_create_aux_device(pdev, NVGRACE_EGM_DEV_NAME, - egmphys, egmlength, egmpxm); + egmphys, egmlength, egmpxm, + retiredpagesphys); if (!egm_entry->egm_dev) { ret = -EINVAL; goto free_egm_entry; diff --git a/include/linux/nvgrace-egm.h b/include/linux/nvgrace-egm.h index b9956e7e5a0e..9e0d190c7da0 100644 --- a/include/linux/nvgrace-egm.h +++ b/include/linux/nvgrace-egm.h @@ -7,6 +7,7 @@ #define NVGRACE_EGM_H #include +#include #define NVGRACE_EGM_DEV_NAME "egm" #define EGM_OFFSET_SHIFT 40 @@ -20,6 +21,7 @@ struct nvgrace_egm_dev { struct auxiliary_device aux_dev; phys_addr_t egmphys; size_t egmlength; + phys_addr_t retiredpagesphys; u64 egmpxm; struct list_head gpus; }; -- 2.34.1