Commit 2565711f authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Improve the perf_sample_data struct layout

This patch reorders fields in the perf_sample_data struct in order to
minimize the number of cachelines touched in perf_sample_data_init().
It also removes some intializations which are redundant with the code
in kernel/events/core.c
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411559322-16548-7-git-send-email-eranian@google.com
Cc: cebbert.lkml@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: jolsa@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 4b6c5177
...@@ -580,35 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event, ...@@ -580,35 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
struct perf_sample_data { struct perf_sample_data {
u64 type; /*
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
*/
u64 addr;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
u64 period;
u64 weight;
u64 txn;
union perf_mem_data_src data_src;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
u64 type;
u64 ip; u64 ip;
struct { struct {
u32 pid; u32 pid;
u32 tid; u32 tid;
} tid_entry; } tid_entry;
u64 time; u64 time;
u64 addr;
u64 id; u64 id;
u64 stream_id; u64 stream_id;
struct { struct {
u32 cpu; u32 cpu;
u32 reserved; u32 reserved;
} cpu_entry; } cpu_entry;
u64 period;
union perf_mem_data_src data_src;
struct perf_callchain_entry *callchain; struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
struct perf_regs regs_user; struct perf_regs regs_user;
struct perf_regs regs_intr; struct perf_regs regs_intr;
u64 stack_user_size; u64 stack_user_size;
u64 weight; } ____cacheline_aligned;
/*
* Transaction flags for abort events:
*/
u64 txn;
};
/* default value for data source */ /* default value for data source */
#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
...@@ -625,14 +630,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, ...@@ -625,14 +630,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
data->raw = NULL; data->raw = NULL;
data->br_stack = NULL; data->br_stack = NULL;
data->period = period; data->period = period;
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
data->regs_user.regs = NULL;
data->stack_user_size = 0;
data->weight = 0; data->weight = 0;
data->data_src.val = PERF_MEM_NA; data->data_src.val = PERF_MEM_NA;
data->txn = 0; data->txn = 0;
data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
data->regs_intr.regs = NULL;
} }
extern void perf_output_sample(struct perf_output_handle *handle, extern void perf_output_sample(struct perf_output_handle *handle,
......
...@@ -4471,8 +4471,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, ...@@ -4471,8 +4471,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
} }
if (regs) { if (regs) {
regs_user->regs = regs;
regs_user->abi = perf_reg_abi(current); regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
regs_user->regs = NULL;
} }
} }
...@@ -4947,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -4947,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size; header->size += size;
} }
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) { if (sample_type & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */ /* regs dump ABI info */
int size = sizeof(u64); int size = sizeof(u64);
perf_sample_regs_user(&data->regs_user, regs);
if (data->regs_user.regs) { if (data->regs_user.regs) {
u64 mask = event->attr.sample_regs_user; u64 mask = event->attr.sample_regs_user;
size += hweight64(mask) * sizeof(u64); size += hweight64(mask) * sizeof(u64);
...@@ -4968,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header, ...@@ -4968,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header,
* in case new sample type is added, because we could eat * in case new sample type is added, because we could eat
* up the rest of the sample size. * up the rest of the sample size.
*/ */
struct perf_regs *uregs = &data->regs_user;
u16 stack_size = event->attr.sample_stack_user; u16 stack_size = event->attr.sample_stack_user;
u16 size = sizeof(u64); u16 size = sizeof(u64);
if (!uregs->abi)
perf_sample_regs_user(uregs, regs);
stack_size = perf_sample_ustack_size(stack_size, header->size, stack_size = perf_sample_ustack_size(stack_size, header->size,
uregs->regs); data->regs_user.regs);
/* /*
* If there is something to dump, add space for the dump * If there is something to dump, add space for the dump
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment