Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf tooling fixes from Thomas Gleixner:

 - fix 'perf test Session topology' segfault on s390 (Thomas Richter)

 - fix NULL return handling in bpf__prepare_load() (YueHaibing)

 - fix indexing on Coresight ETM packet queue decoder (Mathieu Poirier)

 - fix perf.data format description of NRCPUS header (Arnaldo Carvalho
   de Melo)

 - update perf.data documentation section on cpu topology

 - handle uncore event aliases in small groups properly (Kan Liang)

 - add missing perf_sample.addr into python sample dictionary (Leo Yan)

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Fix perf.data format description of NRCPUS header
  perf script python: Add addr into perf sample dict
  perf data: Update documentation section on cpu topology
  perf cs-etm: Fix indexing for decoder packet queue
  perf bpf: Fix NULL return handling in bpf__prepare_load()
  perf test: "Session topology" dumps core on s390
  perf parse-events: Handle uncore event aliases in small groups properly
This commit is contained in:
Linus Torvalds 2018-06-03 08:58:59 -07:00
commit 26bdace74c
9 changed files with 185 additions and 21 deletions

View File

@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m)
A structure defining the number of CPUs. A structure defining the number of CPUs.
struct nr_cpus { struct nr_cpus {
uint32_t nr_cpus_online;
uint32_t nr_cpus_available; /* CPUs not yet onlined */ uint32_t nr_cpus_available; /* CPUs not yet onlined */
uint32_t nr_cpus_online;
}; };
HEADER_CPUDESC = 8, HEADER_CPUDESC = 8,
@ -153,10 +153,18 @@ struct {
HEADER_CPU_TOPOLOGY = 13, HEADER_CPU_TOPOLOGY = 13,
String lists defining the core and CPU threads topology. String lists defining the core and CPU threads topology.
The string lists are followed by a variable length array
which contains core_id and socket_id of each cpu.
The number of entries can be determined by the size of the
section minus the sizes of both string lists.
struct { struct {
struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list cores; /* Variable length */
struct perf_header_string_list threads; /* Variable length */ struct perf_header_string_list threads; /* Variable length */
struct {
uint32_t core_id;
uint32_t socket_id;
} cpus[nr]; /* Variable length records */
}; };
Example: Example:

View File

@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
session = perf_session__new(&data, false, NULL); session = perf_session__new(&data, false, NULL);
TEST_ASSERT_VAL("can't get session", session); TEST_ASSERT_VAL("can't get session", session);
/* On platforms with large numbers of CPUs process_cpu_topology()
* might issue an error while reading the perf.data file section
* HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
* cpu is a NULL pointer.
* Example: On s390
* CPU 0 is on core_id 0 and physical_package_id 6
* CPU 1 is on core_id 1 and physical_package_id 3
*
* Core_id and physical_package_id are platform and architecture
* dependend and might have higher numbers than the CPU id.
* This actually depends on the configuration.
*
* In this case process_cpu_topology() prints error message:
* "socket_id number is too big. You may need to upgrade the
* perf tool."
*
* This is the reason why this test might be skipped.
*/
if (!session->header.env.cpu)
return TEST_SKIP;
for (i = 0; i < session->header.env.nr_cpus_avail; i++) { for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
if (!cpu_map__has(map, i)) if (!cpu_map__has(map, i))
continue; continue;
@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
{ {
char path[PATH_MAX]; char path[PATH_MAX];
struct cpu_map *map; struct cpu_map *map;
int ret = -1; int ret = TEST_FAIL;
TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
goto free_path; goto free_path;
} }
if (check_cpu_topology(path, map)) ret = check_cpu_topology(path, map);
goto free_map;
ret = 0;
free_map:
cpu_map__put(map); cpu_map__put(map);
free_path: free_path:
unlink(path); unlink(path);
return ret; return ret;

View File

@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
} }
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
if (IS_ERR(obj)) { if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load buffer\n"); pr_debug("bpf: failed to load buffer\n");
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
pr_debug("bpf: successfull builtin compilation\n"); pr_debug("bpf: successfull builtin compilation\n");
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
if (!IS_ERR(obj) && llvm_param.dump_obj) if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
llvm__dump_obj(filename, obj_buf, obj_buf_sz); llvm__dump_obj(filename, obj_buf, obj_buf_sz);
free(obj_buf); free(obj_buf);
} else } else
obj = bpf_object__open(filename); obj = bpf_object__open(filename);
if (IS_ERR(obj)) { if (IS_ERR_OR_NULL(obj)) {
pr_debug("bpf: failed to load %s\n", filename); pr_debug("bpf: failed to load %s\n", filename);
return obj; return obj;
} }

View File

@ -96,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
/* Nothing to do, might as well just return */ /* Nothing to do, might as well just return */
if (decoder->packet_count == 0) if (decoder->packet_count == 0)
return 0; return 0;
/*
* The queueing process in function cs_etm_decoder__buffer_packet()
* increments the tail *before* using it. This is somewhat counter
* intuitive but it has the advantage of centralizing tail management
* at a single location. Because of that we need to follow the same
* heuristic with the head, i.e we increment it before using its
* value. Otherwise the first element of the packet queue is not
* used.
*/
decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
*packet = decoder->packet_buffer[decoder->head]; *packet = decoder->packet_buffer[decoder->head];
decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
decoder->packet_count--; decoder->packet_count--;
return 1; return 1;

View File

@ -127,6 +127,7 @@ struct perf_evsel {
bool precise_max; bool precise_max;
bool ignore_missing_thread; bool ignore_missing_thread;
bool forced_leader; bool forced_leader;
bool use_uncore_alias;
/* parse modifier helper */ /* parse modifier helper */
int exclude_GH; int exclude_GH;
int nr_members; int nr_members;

View File

@ -1219,13 +1219,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
int parse_events_add_pmu(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name, struct list_head *list, char *name,
struct list_head *head_config, bool auto_merge_stats) struct list_head *head_config,
bool auto_merge_stats,
bool use_alias)
{ {
struct perf_event_attr attr; struct perf_event_attr attr;
struct perf_pmu_info info; struct perf_pmu_info info;
struct perf_pmu *pmu; struct perf_pmu *pmu;
struct perf_evsel *evsel; struct perf_evsel *evsel;
struct parse_events_error *err = parse_state->error; struct parse_events_error *err = parse_state->error;
bool use_uncore_alias;
LIST_HEAD(config_terms); LIST_HEAD(config_terms);
pmu = perf_pmu__find(name); pmu = perf_pmu__find(name);
@ -1244,11 +1247,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
memset(&attr, 0, sizeof(attr)); memset(&attr, 0, sizeof(attr));
} }
use_uncore_alias = (pmu->is_uncore && use_alias);
if (!head_config) { if (!head_config) {
attr.type = pmu->type; attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
if (evsel) { if (evsel) {
evsel->pmu_name = name; evsel->pmu_name = name;
evsel->use_uncore_alias = use_uncore_alias;
return 0; return 0;
} else { } else {
return -ENOMEM; return -ENOMEM;
@ -1282,6 +1288,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->metric_expr = info.metric_expr; evsel->metric_expr = info.metric_expr;
evsel->metric_name = info.metric_name; evsel->metric_name = info.metric_name;
evsel->pmu_name = name; evsel->pmu_name = name;
evsel->use_uncore_alias = use_uncore_alias;
} }
return evsel ? 0 : -ENOMEM; return evsel ? 0 : -ENOMEM;
@ -1317,7 +1324,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
list_add_tail(&term->list, head); list_add_tail(&term->list, head);
if (!parse_events_add_pmu(parse_state, list, if (!parse_events_add_pmu(parse_state, list,
pmu->name, head, true)) { pmu->name, head,
true, true)) {
pr_debug("%s -> %s/%s/\n", str, pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str); pmu->name, alias->str);
ok++; ok++;
@ -1339,7 +1347,120 @@ int parse_events__modifier_group(struct list_head *list,
return parse_events__modifier_event(list, event_mod, true); return parse_events__modifier_event(list, event_mod, true);
} }
void parse_events__set_leader(char *name, struct list_head *list) /*
* Check if the two uncore PMUs are from the same uncore block
* The format of the uncore PMU name is uncore_#blockname_#pmuidx
*/
static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
{
char *end_a, *end_b;
end_a = strrchr(pmu_name_a, '_');
end_b = strrchr(pmu_name_b, '_');
if (!end_a || !end_b)
return false;
if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
return false;
return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
}
static int
parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{
struct perf_evsel *evsel, *leader;
uintptr_t *leaders;
bool is_leader = true;
int i, nr_pmu = 0, total_members, ret = 0;
leader = list_first_entry(list, struct perf_evsel, node);
evsel = list_last_entry(list, struct perf_evsel, node);
total_members = evsel->idx - leader->idx + 1;
leaders = calloc(total_members, sizeof(uintptr_t));
if (WARN_ON(!leaders))
return 0;
/*
* Going through the whole group and doing sanity check.
* All members must use alias, and be from the same uncore block.
* Also, storing the leader events in an array.
*/
__evlist__for_each_entry(list, evsel) {
/* Only split the uncore group which members use alias */
if (!evsel->use_uncore_alias)
goto out;
/* The events must be from the same uncore block */
if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
goto out;
if (!is_leader)
continue;
/*
* If the event's PMU name starts to repeat, it must be a new
* event. That can be used to distinguish the leader from
* other members, even they have the same event name.
*/
if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
is_leader = false;
continue;
}
/* The name is always alias name */
WARN_ON(strcmp(leader->name, evsel->name));
/* Store the leader event for each PMU */
leaders[nr_pmu++] = (uintptr_t) evsel;
}
/* only one event alias */
if (nr_pmu == total_members) {
parse_state->nr_groups--;
goto handled;
}
/*
* An uncore event alias is a joint name which means the same event
* runs on all PMUs of a block.
* Perf doesn't support mixed events from different PMUs in the same
* group. The big group has to be split into multiple small groups
* which only include the events from the same PMU.
*
* Here the uncore event aliases must be from the same uncore block.
* The number of PMUs must be same for each alias. The number of new
* small groups equals to the number of PMUs.
* Setting the leader event for corresponding members in each group.
*/
i = 0;
__evlist__for_each_entry(list, evsel) {
if (i >= nr_pmu)
i = 0;
evsel->leader = (struct perf_evsel *) leaders[i++];
}
/* The number of members and group name are same for each group */
for (i = 0; i < nr_pmu; i++) {
evsel = (struct perf_evsel *) leaders[i];
evsel->nr_members = total_members / nr_pmu;
evsel->group_name = name ? strdup(name) : NULL;
}
/* Take the new small groups into account */
parse_state->nr_groups += nr_pmu - 1;
handled:
ret = 1;
out:
free(leaders);
return ret;
}
void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state)
{ {
struct perf_evsel *leader; struct perf_evsel *leader;
@ -1348,6 +1469,9 @@ void parse_events__set_leader(char *name, struct list_head *list)
return; return;
} }
if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
return;
__perf_evlist__set_leader(list); __perf_evlist__set_leader(list);
leader = list_entry(list->next, struct perf_evsel, node); leader = list_entry(list->next, struct perf_evsel, node);
leader->group_name = name ? strdup(name) : NULL; leader->group_name = name ? strdup(name) : NULL;

View File

@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
void *ptr, char *type, u64 len); void *ptr, char *type, u64 len);
int parse_events_add_pmu(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name, struct list_head *list, char *name,
struct list_head *head_config, bool auto_merge_stats); struct list_head *head_config,
bool auto_merge_stats,
bool use_alias);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str, char *str,
@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old,
enum perf_pmu_event_symbol_type enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name); perf_pmu__parse_check(const char *name);
void parse_events__set_leader(char *name, struct list_head *list); void parse_events__set_leader(char *name, struct list_head *list,
struct parse_events_state *parse_state);
void parse_events_update_lists(struct list_head *list_event, void parse_events_update_lists(struct list_head *list_event,
struct list_head *list_all); struct list_head *list_all);
void parse_events_evlist_error(struct parse_events_state *parse_state, void parse_events_evlist_error(struct parse_events_state *parse_state,

View File

@ -161,7 +161,7 @@ PE_NAME '{' events '}'
struct list_head *list = $3; struct list_head *list = $3;
inc_group_count(list, _parse_state); inc_group_count(list, _parse_state);
parse_events__set_leader($1, list); parse_events__set_leader($1, list, _parse_state);
$$ = list; $$ = list;
} }
| |
@ -170,7 +170,7 @@ PE_NAME '{' events '}'
struct list_head *list = $2; struct list_head *list = $2;
inc_group_count(list, _parse_state); inc_group_count(list, _parse_state);
parse_events__set_leader(NULL, list); parse_events__set_leader(NULL, list, _parse_state);
$$ = list; $$ = list;
} }
@ -232,7 +232,7 @@ PE_NAME opt_event_config
YYABORT; YYABORT;
ALLOC_LIST(list); ALLOC_LIST(list);
if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
struct perf_pmu *pmu = NULL; struct perf_pmu *pmu = NULL;
int ok = 0; int ok = 0;
char *pattern; char *pattern;
@ -251,7 +251,7 @@ PE_NAME opt_event_config
free(pattern); free(pattern);
YYABORT; YYABORT;
} }
if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
ok++; ok++;
parse_events_terms__delete(terms); parse_events_terms__delete(terms);
} }

View File

@ -531,6 +531,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
PyLong_FromUnsignedLongLong(sample->period)); PyLong_FromUnsignedLongLong(sample->period));
pydict_set_item_string_decref(dict_sample, "phys_addr", pydict_set_item_string_decref(dict_sample, "phys_addr",
PyLong_FromUnsignedLongLong(sample->phys_addr)); PyLong_FromUnsignedLongLong(sample->phys_addr));
pydict_set_item_string_decref(dict_sample, "addr",
PyLong_FromUnsignedLongLong(sample->addr));
set_sample_read_in_dict(dict_sample, sample, evsel); set_sample_read_in_dict(dict_sample, sample, evsel);
pydict_set_item_string_decref(dict, "sample", dict_sample); pydict_set_item_string_decref(dict, "sample", dict_sample);