From 18b4a4d59e97e7ff13ee84b5bec79f3fc70a9f0a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 10 Feb 2010 10:03:34 +0100 Subject: [PATCH 01/12] oprofile: remove tracing build dependency The commit 1155de4 ring-buffer: Make it generally available already made ring-buffer available without the TRACING option enabled. This patch removes the TRACING dependency from oprofile. Fixes also oprofile configuration on ia64. The patch also applies to the 2.6.32-stable kernel. Reported-by: Tony Jones Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..25e69f727a2e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -6,8 +6,6 @@ config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" depends on PROFILING depends on HAVE_OPROFILE - depends on TRACING_SUPPORT - select TRACING select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help From b309a294e5b24692d0f7ea1defa168074cea619e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 26 Feb 2010 15:01:23 +0100 Subject: [PATCH 02/12] oprofile: remove EXPERIMENTAL from the config option description OProfile is already used for a long time and no longer experimental. Signed-off-by: Robert Richter --- arch/Kconfig | 2 +- init/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 25e69f727a2e..d67787241813 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -3,7 +3,7 @@ # config OPROFILE - tristate "OProfile system profiling (EXPERIMENTAL)" + tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE select RING_BUFFER diff --git a/init/Kconfig b/init/Kconfig index d95ca7cd5d45..b7c583956107 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1112,7 +1112,7 @@ config MMAP_ALLOW_UNINITIALIZED See Documentation/nommu-mmap.txt for more information. config PROFILING - bool "Profiling support (EXPERIMENTAL)" + bool "Profiling support" help Say Y here to enable the extended profiling support mechanisms used by profilers such as OProfile. From 013cfc50672bbb638796545231683231647edb07 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 28 Jan 2010 18:05:26 +0100 Subject: [PATCH 03/12] oprofile/x86: remove OPROFILE_IBS config option OProfile support for IBS is now for several versions in the kernel. The feature is stable now and the code can be activated permanently. As a side effect IBS now works also on nosmp configs. Signed-off-by: Robert Richter --- arch/Kconfig | 14 -------------- arch/x86/oprofile/op_model_amd.c | 31 +------------------------------ 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index d67787241813..06a13729c8df 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -15,20 +15,6 @@ config OPROFILE If unsure, say N. -config OPROFILE_IBS - bool "OProfile AMD IBS support (EXPERIMENTAL)" - default n - depends on OPROFILE && SMP && X86 - help - Instruction-Based Sampling (IBS) is a new profiling - technique that provides rich, precise program performance - information. IBS is introduced by AMD Family10h processors - (AMD Opteron Quad-Core processor "Barcelona") to overcome - the limitations of conventional performance counter - sampling. - - If unsure, say N. - config OPROFILE_EVENT_MULTIPLEX bool "OProfile multiplexing support (EXPERIMENTAL)" default n diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39686c29f03a..2b9c68d868ed 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -43,8 +44,6 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; -#ifdef CONFIG_OPROFILE_IBS - /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) @@ -72,8 +71,6 @@ struct op_ibs_config { static struct op_ibs_config ibs_config; -#endif - #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_fill_in_addresses(struct op_msrs * const msrs) @@ -185,8 +182,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } -#ifdef CONFIG_OPROFILE_IBS - static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) @@ -272,15 +267,6 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -#else - -static inline void op_amd_handle_ibs(struct pt_regs * const regs, - struct op_msrs const * const msrs) { } -static inline void op_amd_start_ibs(void) { } -static inline void op_amd_stop_ibs(void) { } - -#endif - static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { @@ -355,8 +341,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } -#ifdef CONFIG_OPROFILE_IBS - static u8 ibs_eilvt_off; static inline void apic_init_ibs_nmi_per_cpu(void *arg) @@ -507,19 +491,6 @@ static void op_amd_exit(void) ibs_exit(); } -#else - -/* no IBS support */ - -static int op_amd_init(struct oprofile_operations *ops) -{ - return 0; -} - -static void op_amd_exit(void) {} - -#endif /* CONFIG_OPROFILE_IBS */ - struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, From 89baaaa98a10cad5cc8516c7208b02d9fc711890 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 28 Jan 2010 16:50:45 +0100 Subject: [PATCH 04/12] oprofile/x86: remove node check in AMD IBS initialization Standard AMD systems have the same number of nodes as there are northbridge devices. However, there may kernel configurations (especially for 32 bit) or system setups exist, where the node number is different or it can not be detected properly. Thus the check is not reliable and may fail though IBS setup was fine. For this reason it is better to remove the check. Cc: stable Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 2b9c68d868ed..4eb30715b1d5 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -389,16 +389,6 @@ static int init_ibs_nmi(void) return 1; } -#ifdef CONFIG_NUMA - /* Sanity check */ - /* Works only for 64bit with proper numa implementation. */ - if (nodes != num_possible_nodes()) { - printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " - "found: %d, expected %d", - nodes, num_possible_nodes()); - return 1; - } -#endif return 0; } From 64683da6643e8c6c93f1f99548399b08c029fd13 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 4 Feb 2010 10:57:23 +0100 Subject: [PATCH 05/12] oprofile/x86: implement IBS cpuid feature detection This patch adds IBS feature detection using cpuid flags. An IBS capability mask is introduced to test for certain IBS features. The bit mask is the same as for IBS cpuid feature flags (Fn8000_001B_EAX), but bit 0 is used to indicate the existence of IBS. The patch also changes the handling of the IbsOpCntCtl bit (periodic op counter count control). The oprofilefs file for this feature (ibs_op/dispatched_ops) will be only exposed if the feature is available, also the default for the bit is set to count clock cycles. In general, the userland can detect the availability of a feature by checking for the corresponding file in oprofilefs. If it exists, the feature also exists. This may lead to a dynamic file layout depending on the cpu type with that the userland has to deal with. Current opcontrol is compatible. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 80 +++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 17 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 4eb30715b1d5..6557683c190e 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -58,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 -static int has_ibs; /* AMD Family10h and later */ +static u32 ibs_caps; struct op_ibs_config { unsigned long op_enabled; @@ -71,6 +73,40 @@ struct op_ibs_config { static struct op_ibs_config ibs_config; +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1LL<<0) +#define IBS_CAPS_OPCNT (1LL<<4) + +static u32 get_ibs_caps(void) +{ + u32 ibs_caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_AVAIL; + + ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(ibs_caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_AVAIL; + + return ibs_caps; +} + #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_fill_in_addresses(struct op_msrs * const msrs) @@ -189,7 +225,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, u64 val, ctl; struct op_entry entry; - if (!has_ibs) + if (!ibs_caps) return; if (ibs_config.fetch_enabled) { @@ -241,16 +277,21 @@ op_amd_handle_ibs(struct pt_regs * const regs, static inline void op_amd_start_ibs(void) { u64 val; - if (has_ibs && ibs_config.fetch_enabled) { + + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } - if (has_ibs && ibs_config.op_enabled) { + if (ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; - val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) + val |= IBS_OP_CNT_CTL; val |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, val); } @@ -258,11 +299,14 @@ static inline void op_amd_start_ibs(void) static void op_amd_stop_ibs(void) { - if (has_ibs && ibs_config.fetch_enabled) + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); - if (has_ibs && ibs_config.op_enabled) + if (ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } @@ -395,29 +439,30 @@ static int init_ibs_nmi(void) /* uninitialize the APIC for the IBS interrupts if needed */ static void clear_ibs_nmi(void) { - if (has_ibs) + if (ibs_caps) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } /* initialize the APIC for the IBS interrupts if available */ static void ibs_init(void) { - has_ibs = boot_cpu_has(X86_FEATURE_IBS); + ibs_caps = get_ibs_caps(); - if (!has_ibs) + if (!ibs_caps) return; if (init_ibs_nmi()) { - has_ibs = 0; + ibs_caps = 0; return; } - printk(KERN_INFO "oprofile: AMD IBS detected\n"); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", + (unsigned)ibs_caps); } static void ibs_exit(void) { - if (!has_ibs) + if (!ibs_caps) return; clear_ibs_nmi(); @@ -437,7 +482,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) if (ret) return ret; - if (!has_ibs) + if (!ibs_caps) return ret; /* model specific files */ @@ -447,7 +492,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; ibs_config.op_enabled = 0; - ibs_config.dispatched_ops = 1; + ibs_config.dispatched_ops = 0; dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); oprofilefs_create_ulong(sb, dir, "enable", @@ -462,8 +507,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) &ibs_config.op_enabled); oprofilefs_create_ulong(sb, dir, "max_count", &ibs_config.max_cnt_op); - oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); + if (ibs_caps & IBS_CAPS_OPCNT) + oprofilefs_create_ulong(sb, dir, "dispatched_ops", + &ibs_config.dispatched_ops); return 0; } From f125be1469303f7b9324447f251d74a0da24952f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Jan 2010 11:25:45 -0600 Subject: [PATCH 06/12] oprofile/x86: implement lsfr pseudo-random number generator for IBS This patch implements a linear feedback shift register (LFSR) for pseudo-random number generation for IBS. For IBS measurements it would be good to minimize memory traffic in the interrupt handler since every access pollutes the data caches. Computing a maximal period LFSR just needs shifts and ORs. The LFSR method is good enough to randomize the ops at low overhead. 16 pseudo-random bits are enough for the implementation and it doesn't matter that the pattern repeats with a fairly short cycle. It only needs to break up (hard) periodic sampling behavior. The logic was designed by Paul Drongowski. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 6557683c190e..97c84ebe3f24 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -218,6 +218,29 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } +/* + * 16-bit Linear Feedback Shift Register (LFSR) + * + * 16 14 13 11 + * Feedback polynomial = X + X + X + X + 1 + */ +static unsigned int lfsr_random(void) +{ + static unsigned int lfsr_value = 0xF00D; + unsigned int bit; + + /* Compute next bit to shift in */ + bit = ((lfsr_value >> 0) ^ + (lfsr_value >> 2) ^ + (lfsr_value >> 3) ^ + (lfsr_value >> 5)) & 0x0001; + + /* Advance to next register value */ + lfsr_value = (lfsr_value >> 1) | (bit << 15); + + return lfsr_value; +} + static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) From ba52078e1917c5116c0802298d88ad0e54a6728b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Feb 2010 15:46:49 +0100 Subject: [PATCH 07/12] oprofile/x86: implement randomization for IBS periodic op counter IBS selects an op (execution operation) for sampling by counting either cycles or dispatched ops. Better statistical samples can be produced by adding a software generated random offset to the periodic op counter value with each sample. This patch adds software randomization to the IBS periodic op counter. The lower 12 bits of the 20 bit counter are randomized. IbsOpCurCnt is initialized with a 12 bit random value. There is a work around if the hw can not write to IbsOpCurCnt. Then the lower 8 bits of the 16 bit IbsOpMaxCnt [15:0] value are randomized in the range of -128 to +127 by adding/subtracting an offset to the maximum count (IbsOpMaxCnt). The linear feedback shift register (LFSR) algorithm is used for pseudo-random number generation to have low impact to the memory system. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 69 +++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 97c84ebe3f24..a9d194734a8e 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -52,7 +52,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL -/*IbsOpCtl bits */ +/* IbsOpCtl bits */ #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) @@ -72,6 +72,7 @@ struct op_ibs_config { }; static struct op_ibs_config ibs_config; +static u64 ibs_op_ctl; /* * IBS cpuid feature detection @@ -84,8 +85,16 @@ static struct op_ibs_config ibs_config; * bit 0 is used to indicate the existence of IBS. */ #define IBS_CAPS_AVAIL (1LL<<0) +#define IBS_CAPS_RDWROPCNT (1LL<<3) #define IBS_CAPS_OPCNT (1LL<<4) +/* + * IBS randomization macros + */ +#define IBS_RANDOM_BITS 12 +#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) +#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) + static u32 get_ibs_caps(void) { u32 ibs_caps; @@ -241,6 +250,38 @@ static unsigned int lfsr_random(void) return lfsr_value; } +/* + * IBS software randomization + * + * The IBS periodic op counter is randomized in software. The lower 12 + * bits of the 20 bit counter are randomized. IbsOpCurCnt is + * initialized with a 12 bit random value. + */ +static inline u64 op_amd_randomize_ibs_op(u64 val) +{ + unsigned int random = lfsr_random(); + + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) + /* + * Work around if the hw can not write to IbsOpCurCnt + * + * Randomize the lower 8 bits of the 16 bit + * IbsOpMaxCnt [15:0] value in the range of -128 to + * +127 by adding/subtracting an offset to the + * maximum count (IbsOpMaxCnt). + * + * To avoid over or underflows and protect upper bits + * starting at bit 16, the initial value for + * IbsOpMaxCnt must fit in the range from 0x0081 to + * 0xff80. + */ + val += (s8)(random >> 4); + else + val |= (u64)(random & IBS_RANDOM_MASK) << 32; + + return val; +} + static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) @@ -290,8 +331,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_write_commit(&entry); /* reenable the IRQ */ - ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; - ctl |= IBS_OP_ENABLE; + ctl = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } @@ -312,10 +352,27 @@ static inline void op_amd_start_ibs(void) } if (ibs_config.op_enabled) { - val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; + ibs_op_ctl = ibs_config.max_cnt_op >> 4; + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { + /* + * IbsOpCurCnt not supported. See + * op_amd_randomize_ibs_op() for details. + */ + ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); + } else { + /* + * The start value is randomized with a + * positive offset, we need to compensate it + * with the half of the randomized range. Also + * avoid underflows. + */ + ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, + 0xFFFFULL); + } if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) - val |= IBS_OP_CNT_CTL; - val |= IBS_OP_ENABLE; + ibs_op_ctl |= IBS_OP_CNT_CTL; + ibs_op_ctl |= IBS_OP_ENABLE; + val = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, val); } } From 98a2e73a0690b3610f049a64154d8145e5771713 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Feb 2010 18:14:58 +0100 Subject: [PATCH 08/12] oprofile/x86: warn user if a counter is already active This patch generates a warning if a counter is already active. Implemented for AMD and P6 models. P4 is not supported. Cc: Naga Chumbalkar Cc: Shashi Belur Cc: Tony Jones Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 11 ++++++++++- arch/x86/oprofile/op_model_ppro.c | 11 ++++++++++- arch/x86/oprofile/op_x86_model.h | 11 +++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index a9d194734a8e..ef9d735dea35 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -194,9 +194,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { - if (unlikely(!msrs->controls[i].addr)) + if (unlikely(!msrs->controls[i].addr)) { + if (counter_config[i].enabled && !smp_processor_id()) + /* + * counter is reserved, this is on all + * cpus, so report only for cpu #0 + */ + op_x86_warn_reserved(i); continue; + } rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 8eb05878554c..c344525ebb55 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -82,9 +82,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < num_counters; ++i) { - if (unlikely(!msrs->controls[i].addr)) + if (unlikely(!msrs->controls[i].addr)) { + if (counter_config[i].enabled && !smp_processor_id()) + /* + * counter is reserved, this is on all + * cpus, so report only for cpu #0 + */ + op_x86_warn_reserved(i); continue; + } rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 7b8e75d16081..59fa2bdb0da3 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -57,6 +57,17 @@ struct op_x86_model_spec { struct op_counter_config; +static inline void op_x86_warn_in_use(int counter) +{ + pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", + counter, smp_processor_id()); +} + +static inline void op_x86_warn_reserved(int counter) +{ + pr_warning("oprofile: counter #%d is already reserved\n", counter); +} + extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern int op_x86_phys_to_virt(int phys); From 8588d1067147e14d1dd521fbadd1d2564f8cc794 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Tue, 23 Feb 2010 18:14:58 +0100 Subject: [PATCH 09/12] oprofile/x86: add comment to counter-in-use warning Currently, oprofile fails silently on platforms where a non-OS entity such as the system firmware "enables" and uses a performance counter. There is a warning in the code for this case. The warning indicates an already running counter. If oprofile doesn't collect data, then try using a different performance counter on your platform to monitor the desired event. Delete the counter from the desired event by editing the /usr/share/oprofile///events file. If the event cannot be monitored by any other counter, contact your hardware or BIOS vendor. Cc: Shashi Belur Cc: Tony Jones Signed-off-by: Naga Chumbalkar Signed-off-by: Robert Richter --- arch/x86/oprofile/op_x86_model.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 59fa2bdb0da3..ff82a755edd4 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -59,6 +59,15 @@ struct op_counter_config; static inline void op_x86_warn_in_use(int counter) { + /* + * The warning indicates an already running counter. If + * oprofile doesn't collect data, then try using a different + * performance counter on your platform to monitor the desired + * event. Delete counter #%d from the desired event by editing + * the /usr/share/oprofile/%s//events file. If the event + * cannot be monitored by any other counter, contact your + * hardware or BIOS vendor. + */ pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", counter, smp_processor_id()); } From 68dc819ce829f7e7977a56524e710473bdb55115 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 19:16:46 +0100 Subject: [PATCH 10/12] oprofile/x86: fix perfctr nmi reservation for mulitplexing Multiple virtual counters share one physical counter. The reservation of virtual counters fails due to duplicate allocation of the same counter. The counters are already reserved. Thus, virtual counter reservation may removed at all. This also makes the code easier. Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 11 ++++++----- arch/x86/oprofile/op_model_amd.c | 19 ------------------- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3347f696edc7..7170d1e29896 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) if (counter_config[i].enabled) { multiplex[i].saved = -(u64)counter_config[i].count; } else { - multiplex[i].addr = 0; multiplex[i].saved = 0; } } @@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) { + struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - rdmsrl(multiplex[virt].addr, multiplex[virt].saved); + if (counters[i].addr) + rdmsrl(counters[i].addr, multiplex[virt].saved); } } static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) { + struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - wrmsrl(multiplex[virt].addr, multiplex[virt].saved); + if (counters[i].addr) + wrmsrl(counters[i].addr, multiplex[virt].saved); } } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index ef9d735dea35..2aab018a7a56 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -118,19 +118,6 @@ static u32 get_ibs_caps(void) #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -static void op_mux_fill_in_addresses(struct op_msrs * const msrs) -{ - int i; - - for (i = 0; i < NUM_VIRT_COUNTERS; i++) { - int hw_counter = op_x86_virt_to_phys(i); - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; - else - msrs->multiplex[i].addr = 0; - } -} - static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { @@ -149,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, } } -#else - -static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } - #endif /* functions for op_amd_spec */ @@ -174,8 +157,6 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) else msrs->controls[i].addr = 0; } - - op_mux_fill_in_addresses(msrs); } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, From c17c8fbf349482e89b57d1b800e83e9f4cf40c47 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 20:20:25 +0100 Subject: [PATCH 11/12] oprofile/x86: use kzalloc() instead of kmalloc() Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 6 +++--- arch/x86/oprofile/op_model_amd.c | 4 ---- arch/x86/oprofile/op_model_p4.c | 6 ------ arch/x86/oprofile/op_model_ppro.c | 6 +----- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7170d1e29896..2c505ee71014 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -159,7 +159,7 @@ static int nmi_setup_mux(void) for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).multiplex = - kmalloc(multiplex_size, GFP_KERNEL); + kzalloc(multiplex_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).multiplex) return 0; } @@ -304,11 +304,11 @@ static int allocate_msrs(void) int i; for_each_possible_cpu(i) { - per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, + per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).counters) return 0; - per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, + per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) return 0; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 2aab018a7a56..f4ebc4596da8 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -147,15 +147,11 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < NUM_COUNTERS; i++) { if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; } } diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac6b354becdf..e6a160a4684a 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) setup_num_counters(); stag = get_stagger(); - /* initialize some registers */ - for (i = 0; i < num_counters; ++i) - msrs->counters[i].addr = 0; - for (i = 0; i < num_controls; ++i) - msrs->controls[i].addr = 0; - /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index c344525ebb55..5d1727ba409e 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; } for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; } } @@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, int i; if (!reset_value) { - reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, + reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; From cfc9c0b450176a077205ef39092f0dc1a04e020a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 26 Feb 2010 13:45:24 +0100 Subject: [PATCH 12/12] oprofile/x86: fix msr access to reserved counters During switching virtual counters there is access to perfctr msrs. If the counter is not available this fails due to an invalid address. This patch fixes this. Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f4ebc4596da8..6a58256dce9f 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -127,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); - if (!counter_config[virt].enabled) + if (!reset_value[virt]) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; @@ -163,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* setup reset_value */ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { - if (counter_config[i].enabled) + if (counter_config[i].enabled + && msrs->counters[op_x86_virt_to_phys(i)].addr) reset_value[i] = counter_config[i].count; else reset_value[i] = 0; @@ -197,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); - if (!counter_config[virt].enabled) - continue; - if (!msrs->counters[i].addr) + if (!reset_value[virt]) continue; /* setup counter registers */