Add a selftest to help investigate latencies when mapping pages in vfio. To disabiguate the prefetch time from the pinning time the mmap() flag MAP_POPULATE is used. With prefetching done in mmap() that exposes pinning latencies when mapping gigantic pages. For this test 8G of memory is pinned to keep the test responsive. Pinning more memory could result in the test timing out, e.g. if it pinned 256G. The majority of that time is spent prefetching the memory in mmap(). This test has 4 main phases: mmap(), iova_alloc(), iommu_map(), and iommu_unmap(). Each of these stages are timed and reported in milliseconds. This test doesn't set targets for any of its phases as targets are error prone and lead to flaky tests. Therefore, instead of having targets this test simply reports how long each phase took. Signed-off-by: Aaron Lewis --- tools/testing/selftests/vfio/Makefile | 1 + .../vfio/vfio_dma_mapping_perf_test.c | 247 ++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 tools/testing/selftests/vfio/vfio_dma_mapping_perf_test.c diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile index 3c796ca99a509..134ce40b81790 100644 --- a/tools/testing/selftests/vfio/Makefile +++ b/tools/testing/selftests/vfio/Makefile @@ -1,5 +1,6 @@ CFLAGS = $(KHDR_INCLUDES) TEST_GEN_PROGS += vfio_dma_mapping_test +TEST_GEN_PROGS += vfio_dma_mapping_perf_test TEST_GEN_PROGS += vfio_iommufd_setup_test TEST_GEN_PROGS += vfio_pci_device_test TEST_GEN_PROGS += vfio_pci_device_init_perf_test diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_perf_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_perf_test.c new file mode 100644 index 0000000000000..c70f6935e0291 --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_perf_test.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +struct iommu_mapping { + u64 pgd; + u64 p4d; + u64 pud; + u64 pmd; + u64 pte; +}; + +static s64 to_ns(struct timespec ts) +{ + return (s64)ts.tv_nsec + NSEC_PER_SEC * (s64)ts.tv_sec; +} + +static double to_ms(struct timespec ts) +{ + return to_ns(ts) / 1000.0 / 1000.0; +} + +static struct timespec to_timespec(s64 ns) +{ + struct timespec ts = { + .tv_nsec = ns % NSEC_PER_SEC, + .tv_sec = ns / NSEC_PER_SEC, + }; + + return ts; +} + +static struct timespec timespec_sub(struct timespec a, struct timespec b) +{ + return to_timespec(to_ns(a) - to_ns(b)); +} + +static double timespec_elapsed_ms(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return to_ms(timespec_sub(end, start)); +} + + +static void parse_next_value(char **line, u64 *value) +{ + char *token; + + token = strtok_r(*line, " \t|\n", line); + if (!token) + return; + + /* Caller verifies `value`. No need to check return value. */ + sscanf(token, "0x%lx", value); +} + +static int intel_iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + char iommu_mapping_path[PATH_MAX], line[PATH_MAX]; + u64 line_iova = -1; + int ret = -ENOENT; + FILE *file; + char *rest; + + snprintf(iommu_mapping_path, sizeof(iommu_mapping_path), + "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct", + bdf); + + file = fopen(iommu_mapping_path, "r"); + VFIO_ASSERT_NOT_NULL(file, "fopen(%s) failed", iommu_mapping_path); + + while (fgets(line, sizeof(line), file)) { + rest = line; + + parse_next_value(&rest, &line_iova); + if (line_iova != (iova / getpagesize())) + continue; + + /* + * Ensure each struct field is initialized in case of empty + * page table values. + */ + memset(mapping, 0, sizeof(*mapping)); + parse_next_value(&rest, &mapping->pgd); + parse_next_value(&rest, &mapping->p4d); + parse_next_value(&rest, &mapping->pud); + parse_next_value(&rest, &mapping->pmd); + parse_next_value(&rest, &mapping->pte); + + ret = 0; + break; + } + + fclose(file); + + return ret; +} + +static int iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + if (!access("/sys/kernel/debug/iommu/intel", F_OK)) + return intel_iommu_mapping_get(bdf, iova, mapping); + + return -EOPNOTSUPP; +} + +FIXTURE(vfio_dma_mapping_perf_test) { + struct iommu *iommu; + struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; +}; + +FIXTURE_VARIANT(vfio_dma_mapping_perf_test) { + const char *iommu_mode; + u64 size; + int mmap_flags; + const char *file; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _size, _mmap_flags) \ +FIXTURE_VARIANT_ADD(vfio_dma_mapping_perf_test, _iommu_mode ## _ ## _name) { \ + .iommu_mode = #_iommu_mode, \ + .size = (_size), \ + .mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE | \ + MAP_POPULATE | (_mmap_flags), \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB); + +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + +FIXTURE_SETUP(vfio_dma_mapping_perf_test) +{ + self->iommu = iommu_init(variant->iommu_mode); + self->device = vfio_pci_device_init(device_bdf, self->iommu); + self->iova_allocator = iova_allocator_init(self->iommu); +} + +FIXTURE_TEARDOWN(vfio_dma_mapping_perf_test) +{ + iova_allocator_cleanup(self->iova_allocator); + vfio_pci_device_cleanup(self->device); + iommu_cleanup(self->iommu); +} + +TEST_F(vfio_dma_mapping_perf_test, dma_map_unmap) +{ + u64 mapping_size = variant->size ?: getpagesize(); + const u64 size = 8ULL * /*1GB=*/(1ULL << 30); + const int flags = variant->mmap_flags; + struct dma_region region; + struct iommu_mapping mapping; + struct timespec start; + u64 unmapped; + int rc; + + clock_gettime(CLOCK_MONOTONIC, &start); + region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + printf("Mmap duration = %.2lfms\n", timespec_elapsed_ms(start)); + + /* Skip the test if there aren't enough HugeTLB pages available. */ + if (flags & MAP_HUGETLB && region.vaddr == MAP_FAILED) + SKIP(return, "mmap() failed: %s (%d)\n", strerror(errno), errno); + else + ASSERT_NE(region.vaddr, MAP_FAILED); + + clock_gettime(CLOCK_MONOTONIC, &start); + region.iova = iova_allocator_alloc(self->iova_allocator, size); + region.size = size; + printf("IOVA alloc duration = %.2lfms\n", timespec_elapsed_ms(start)); + + clock_gettime(CLOCK_MONOTONIC, &start); + iommu_map(self->iommu, ®ion); + printf("DMA map duration = %.2lfms\n", timespec_elapsed_ms(start)); + + ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr)); + + rc = iommu_mapping_get(device_bdf, region.iova, &mapping); + if (rc == -EOPNOTSUPP) { + goto unmap; + } + + + /* + * IOMMUFD compatibility-mode does not support huge mappings when + * using VFIO_TYPE1_IOMMU. + */ + if (!strcmp(variant->iommu_mode, "iommufd_compat_type1")) + mapping_size = SZ_4K; + + + ASSERT_EQ(0, rc); + + switch (mapping_size) { + case SZ_4K: + ASSERT_NE(0, mapping.pte); + break; + case SZ_2M: + ASSERT_EQ(0, mapping.pte); + ASSERT_NE(0, mapping.pmd); + break; + case SZ_1G: + ASSERT_EQ(0, mapping.pte); + ASSERT_EQ(0, mapping.pmd); + ASSERT_NE(0, mapping.pud); + break; + default: + VFIO_FAIL("Unrecognized size: 0x%lx\n", mapping_size); + } + +unmap: + clock_gettime(CLOCK_MONOTONIC, &start); + rc = __iommu_unmap(self->iommu, ®ion, &unmapped); + printf("DMA unmap duration = %.2lfms\n", timespec_elapsed_ms(start)); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region.size); + ASSERT_NE(0, __to_iova(self->device, region.vaddr, NULL)); + ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping)); + + ASSERT_TRUE(!munmap(region.vaddr, size)); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} -- 2.52.0.351.gbe84eed79e-goog