From: dongsheng For Intel Atom CPUs, the PMU events "Instruction Retired" or "Branch Instruction Retired" may be overcounted for some certain instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD and complex SGX/SMX/CSTATE instructions/flows. The detailed information can be found in the errata (section SRF7): https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ For the Atom platforms before Sierra Forest (including Sierra Forest), Both 2 events "Instruction Retired" and "Branch Instruction Retired" would be overcounted on these certain instructions, but for Clearwater Forest only "Instruction Retired" event is overcounted on these instructions. So add a helper detect_inst_overcount_flags() to detect whether the platform has the overcount issue and the later patches would relax the precise count check by leveraging the gotten overcount flags from this helper. Signed-off-by: dongsheng [Rewrite comments and commit message - Dapeng] Signed-off-by: Dapeng Mi Tested-by: Yi Lai --- lib/x86/processor.h | 27 ++++++++++++++++++++++++++ x86/pmu.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/lib/x86/processor.h b/lib/x86/processor.h index 62f3d578..937f75e4 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -1188,4 +1188,31 @@ static inline bool is_lam_u57_enabled(void) return !!(read_cr3() & X86_CR3_LAM_U57); } +/* Copy from kernel arch/x86/lib/cpu.c */ +static inline u32 x86_family(u32 sig) +{ + u32 x86; + + x86 = (sig >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (sig >> 20) & 0xff; + + return x86; +} + +static inline u32 x86_model(u32 sig) +{ + u32 fam, model; + + fam = x86_family(sig); + + model = (sig >> 4) & 0xf; + + if (fam >= 0x6) + model += ((sig >> 16) & 0xf) << 4; + + return model; +} + #endif diff --git a/x86/pmu.c b/x86/pmu.c index a6b0cfcc..87365aff 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -159,6 +159,14 @@ static struct pmu_event *gp_events; static unsigned int gp_events_size; static unsigned int fixed_counters_num; +/* + * Flags for Intel "Instruction Retired" and "Branch Instruction Retired" + * overcount flaws. + */ +#define INST_RETIRED_OVERCOUNT BIT(0) +#define BR_RETIRED_OVERCOUNT BIT(1) +static u32 intel_inst_overcount_flags; + static int has_ibpb(void) { return this_cpu_has(X86_FEATURE_SPEC_CTRL) || @@ -959,6 +967,43 @@ static void check_invalid_rdpmc_gp(void) "Expected #GP on RDPMC(64)"); } +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static u32 detect_inst_overcount_flags(void) +{ + u32 flags = 0; + struct cpuid c = cpuid(1); + + if (x86_family(c.a) == 0x6) { + switch (x86_model(c.a)) { + case 0xDD: /* Clearwater Forest */ + flags = INST_RETIRED_OVERCOUNT; + break; + + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + flags = INST_RETIRED_OVERCOUNT | BR_RETIRED_OVERCOUNT; + break; + } + } + + return flags; +} + int main(int ac, char **av) { int instruction_idx; @@ -985,6 +1030,8 @@ int main(int ac, char **av) branch_idx = INTEL_BRANCHES_IDX; branch_miss_idx = INTEL_BRANCH_MISS_IDX; + intel_inst_overcount_flags = detect_inst_overcount_flags(); + /* * For legacy Intel CPUS without clflush/clflushopt support, * there is no way to force to trigger a LLC miss, thus set -- 2.34.1