currently item size is u8, which would truncate APIC IDs that are greater than 255. Make it u32 so that it cna hold x2apic IDs as well. Signed-off-by: Igor Mammedov --- lib/x86/apic.h | 2 +- lib/x86/apic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/x86/apic.h b/lib/x86/apic.h index cac6eab1..a76b1138 100644 --- a/lib/x86/apic.h +++ b/lib/x86/apic.h @@ -8,7 +8,7 @@ #include "processor.h" #include "smp.h" -extern u8 id_map[MAX_TEST_CPUS]; +extern u32 id_map[MAX_TEST_CPUS]; typedef struct { uint8_t vector; diff --git a/lib/x86/apic.c b/lib/x86/apic.c index 0d151476..c538fb5f 100644 --- a/lib/x86/apic.c +++ b/lib/x86/apic.c @@ -9,7 +9,7 @@ static void *g_apic = (void *)APIC_DEFAULT_PHYS_BASE; static void *g_ioapic = (void *)IO_APIC_DEFAULT_PHYS_BASE; -u8 id_map[MAX_TEST_CPUS]; +u32 id_map[MAX_TEST_CPUS]; struct apic_ops { u32 (*reg_read)(unsigned reg); -- 2.47.1 save_id() is called too early and uses xapic ops, which is fine up to 2566 cpus. However with that any CPU with APIC ID more than 255 will set wrong bit in online_cpus (since xapic ops only handle 8bit IDs, thus losing all higher bit x2apic might have). As result CPUs with id higher than 255 are not registered (they will trumple over elements in range 0-255 instead). To fix it move save_id() after the point where APs have switched to x2apic ops, to get non-truncated ID. Signed-off-by: Igor Mammedov --- lib/x86/setup.c | 2 +- x86/cstart.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/x86/setup.c b/lib/x86/setup.c index 122f0af3..c2f1c6d0 100644 --- a/lib/x86/setup.c +++ b/lib/x86/setup.c @@ -391,9 +391,9 @@ void ap_start64(void) setup_gdt_tss(); reset_apic(); load_idt(); - save_id(); enable_apic(); enable_x2apic(); + save_id(); ap_online(); } diff --git a/x86/cstart.S b/x86/cstart.S index 8fb7bdef..dafb330d 100644 --- a/x86/cstart.S +++ b/x86/cstart.S @@ -99,9 +99,9 @@ ap_start32: call load_idt call prepare_32 call reset_apic - call save_id call enable_apic call enable_x2apic + call save_id call ap_online /* ap_online() should never return */ -- 2.47.1 If number of CPUs is increased up to 2048, it will push available pages above 16Mb range and make smap/pku/pks tests fail with 'Could not reserve memory' error. Move pages used by tests to 32Mb to fix it. Signed-off-by: Igor Mammedov --- x86/pks.c | 2 +- x86/pku.c | 2 +- x86/smap.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x86/pks.c b/x86/pks.c index f4d6ac83..9b9519ba 100644 --- a/x86/pks.c +++ b/x86/pks.c @@ -6,7 +6,7 @@ #include "x86/msr.h" #define PTE_PKEY_BIT 59 -#define SUPER_BASE (1 << 23) +#define SUPER_BASE (2 << 24) #define SUPER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + SUPER_BASE))) volatile int pf_count = 0; diff --git a/x86/pku.c b/x86/pku.c index 6c0d72cc..544c6f24 100644 --- a/x86/pku.c +++ b/x86/pku.c @@ -6,7 +6,7 @@ #include "x86/msr.h" #define PTE_PKEY_BIT 59 -#define USER_BASE (1 << 23) +#define USER_BASE (2 << 24) #define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE))) volatile int pf_count = 0; diff --git a/x86/smap.c b/x86/smap.c index 9a823a55..53c9f4ce 100644 --- a/x86/smap.c +++ b/x86/smap.c @@ -45,7 +45,7 @@ asm ("pf_tss:\n" "jmp pf_tss\n\t"); -#define USER_BASE (1 << 23) +#define USER_BASE (2 << 24) #define USER_VAR(v) (*((__typeof__(&(v))) (((unsigned long)&v) + USER_BASE))) #define USER_ADDR(v) ((void *)((unsigned long)(&v) + USER_BASE)) -- 2.47.1 this should allow run tests with more thatn 256 cpus Signed-off-by: Igor Mammedov --- lib/x86/smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/x86/smp.h b/lib/x86/smp.h index bbe90daa..272aa5ee 100644 --- a/lib/x86/smp.h +++ b/lib/x86/smp.h @@ -1,7 +1,7 @@ #ifndef _X86_SMP_H_ #define _X86_SMP_H_ -#define MAX_TEST_CPUS (255) +#define MAX_TEST_CPUS (1024) /* * Allocate 12KiB of data for per-CPU usage. One page for per-CPU data, and -- 2.47.1 test is to be used for benchmarking/validating HPET main counter reading how to run: QEMU=/foo/qemu-system-x86_64 x86/run x86/hpet_read_test.flat -smp X where X is desired number of logical CPUs to test with it will 1st execute concurrent read benchmark and after that it will run torture test enabling/disabling HPET counter, while running readers in parallel. Goal is to verify counter that always goes up. Signed-off-by: Igor Mammedov --- v4: * use global for test cycles and siplify code a bit * use cpu number instead of APCI ID as index into latency array * report failure if a cpu measured 0 latency * replace on_cpus() with on_cpu_async() to avoid BSP interrupting itself * drop volatile v3: * measure lat inside threads so that, threads startup time wouldn't throw off results * fix BSP iterrupting itself by running read test and stalling other cpus as result. (fix it by exiting read test earlier if it's running on BSP) v2: * fix broken timer going backwards check * report # of fails * warn if number of vcpus is not sufficient for torture test and skip it * style fixups --- x86/Makefile.common | 2 + x86/hpet_read_test.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 x86/hpet_read_test.c diff --git a/x86/Makefile.common b/x86/Makefile.common index 5663a65d..ef0e09a6 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -101,6 +101,8 @@ tests-common += $(TEST_DIR)/realmode.$(exe) \ realmode_bits := $(if $(call cc-option,-m16,""),16,32) endif +tests-common += $(TEST_DIR)/hpet_read_test.$(exe) + test_cases: $(tests-common) $(tests) $(TEST_DIR)/%.o: CFLAGS += -std=gnu99 -ffreestanding -I $(SRCDIR)/lib -I $(SRCDIR)/lib/x86 -I lib diff --git a/x86/hpet_read_test.c b/x86/hpet_read_test.c new file mode 100644 index 00000000..a44cdac2 --- /dev/null +++ b/x86/hpet_read_test.c @@ -0,0 +1,93 @@ +#include "libcflat.h" +#include "smp.h" +#include "apic.h" +#include "asm/barrier.h" +#include "x86/atomic.h" +#include "vmalloc.h" +#include "alloc.h" + +#define HPET_ADDR 0xFED00000L +#define HPET_COUNTER_ADDR ((uint8_t *)HPET_ADDR + 0xF0UL) +#define HPET_CONFIG_ADDR ((uint8_t *)HPET_ADDR + 0x10UL) +#define HPET_ENABLE_BIT 0x01UL +#define HPET_CLK_PERIOD 10 + +#define TEST_CYCLES 100000 + +static atomic_t fail; +static uint64_t latency[MAX_TEST_CPUS]; + +static void hpet_reader(void *data) +{ + long i; + uint64_t old_counter = 0, new_counter; + long id = (long)data; + + latency[id] = *(uint64_t *)HPET_COUNTER_ADDR; + for (i = 0; i < TEST_CYCLES; ++i) { + new_counter = *(uint64_t *)HPET_COUNTER_ADDR; + if (new_counter < old_counter) + atomic_inc(&fail); + old_counter = new_counter; + } + /* claculate job latency in ns */ + latency[id] = (*(uint64_t *)HPET_COUNTER_ADDR - latency[id]) + * HPET_CLK_PERIOD / TEST_CYCLES; +} + +static void hpet_writer(void *data) +{ + int i; + + for (i = 0; i < TEST_CYCLES; ++i) + if (i % 2) + *(uint64_t *)HPET_CONFIG_ADDR |= HPET_ENABLE_BIT; + else + *(uint64_t *)HPET_CONFIG_ADDR &= ~HPET_ENABLE_BIT; +} + +int main(void) +{ + unsigned long cpu, time_ns, lat = 0; + uint64_t start, end; + int ncpus = cpu_count(); + + do { + printf("* starting concurrent read bench on %d cpus\n", ncpus); + *(uint64_t *)HPET_CONFIG_ADDR |= HPET_ENABLE_BIT; + start = *(uint64_t *)HPET_COUNTER_ADDR; + + for (cpu = cpu_count() - 1; cpu > 0; --cpu) + on_cpu_async(cpu, hpet_reader, (void *)cpu); + while (cpus_active() > 1) + pause(); + + end = (*(uint64_t *)HPET_COUNTER_ADDR); + time_ns = (end - start) * HPET_CLK_PERIOD; + + for (cpu = 1; cpu < ncpus; cpu++) + if (latency[cpu]) + lat += latency[cpu]; + else + report_fail("cpu %lu reported invalid latency (0)\n", cpu); + lat = lat / ncpus; + + report(time_ns && !atomic_read(&fail), + "read test took %lu ms, avg read: %lu ns\n", time_ns/1000000, lat); + } while (0); + + do { + printf("* starting enable/disable with concurrent readers torture\n"); + if (ncpus > 2) { + for (cpu = 2; cpu < ncpus; cpu++) + on_cpu_async(cpu, hpet_reader, (void *)TEST_CYCLES); + + on_cpu(1, hpet_writer, (void *)TEST_CYCLES); + report(!atomic_read(&fail), "torture test, fails: %u\n", + atomic_read(&fail)); + } else + printf("SKIP: torture test: '-smp X' should be greater than 2\n"); + } while (0); + + return report_summary(); +} -- 2.47.1