/* SPDX-License-Identifier: GPL-2.0 */ /* * Convert sample address to data type using DWARF debug info. * * Written by Namhyung Kim <namhyung@kernel.org> */ #include <stdio.h> #include <stdlib.h> #include <inttypes.h> #include "annotate.h" #include "annotate-data.h" #include "debuginfo.h" #include "debug.h" #include "dso.h" #include "dwarf-regs.h" #include "evsel.h" #include "evlist.h" #include "map.h" #include "map_symbol.h" #include "strbuf.h" #include "symbol.h" #include "symbol_conf.h" /* * Compare type name and size to maintain them in a tree. * I'm not sure if DWARF would have information of a single type in many * different places (compilation units). If not, it could compare the * offset of the type entry in the .debug_info section. */ static int data_type_cmp(const void *_key, const struct rb_node *node) { const struct annotated_data_type *key = _key; struct annotated_data_type *type; type = rb_entry(node, struct annotated_data_type, node); if (key->self.size != type->self.size) return key->self.size - type->self.size; return strcmp(key->self.type_name, type->self.type_name); } static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) { struct annotated_data_type *a, *b; a = rb_entry(node_a, struct annotated_data_type, node); b = rb_entry(node_b, struct annotated_data_type, node); if (a->self.size != b->self.size) return a->self.size < b->self.size; return strcmp(a->self.type_name, b->self.type_name) < 0; } /* Recursively add new members for struct/union */ static int __add_member_cb(Dwarf_Die *die, void *arg) { struct annotated_member *parent = arg; struct annotated_member *member; Dwarf_Die member_type, die_mem; Dwarf_Word size, loc; Dwarf_Attribute attr; struct strbuf sb; int tag; if (dwarf_tag(die) != DW_TAG_member) return DIE_FIND_CB_SIBLING; member = zalloc(sizeof(*member)); if (member == NULL) return DIE_FIND_CB_END; strbuf_init(&sb, 32); die_get_typename(die, &sb); die_get_real_type(die, &member_type); if (dwarf_aggregate_size(&member_type, &size) < 0) size = 0; if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) loc = 0; else dwarf_formudata(&attr, &loc); member->type_name = strbuf_detach(&sb, NULL); /* member->var_name can be NULL */ if (dwarf_diename(die)) member->var_name = strdup(dwarf_diename(die)); member->size = size; member->offset = loc + parent->offset; INIT_LIST_HEAD(&member->children); list_add_tail(&member->node, &parent->children); tag = dwarf_tag(&member_type); switch (tag) { case DW_TAG_structure_type: case DW_TAG_union_type: die_find_child(&member_type, __add_member_cb, member, &die_mem); break; default: break; } return DIE_FIND_CB_SIBLING; } static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) { Dwarf_Die die_mem; die_find_child(type, __add_member_cb, &parent->self, &die_mem); } static void delete_members(struct annotated_member *member) { struct annotated_member *child, *tmp; list_for_each_entry_safe(child, tmp, &member->children, node) { list_del(&child->node); delete_members(child); free(child->type_name); free(child->var_name); free(child); } } static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, Dwarf_Die *type_die) { struct annotated_data_type *result = NULL; struct annotated_data_type key; struct rb_node *node; struct strbuf sb; char *type_name; Dwarf_Word size; strbuf_init(&sb, 32); if (die_get_typename_from_type(type_die, &sb) < 0) strbuf_add(&sb, "(unknown type)", 14); type_name = strbuf_detach(&sb, NULL); dwarf_aggregate_size(type_die, &size); /* Check existing nodes in dso->data_types tree */ key.self.type_name = type_name; key.self.size = size; node = rb_find(&key, &dso->data_types, data_type_cmp); if (node) { result = rb_entry(node, struct annotated_data_type, node); free(type_name); return result; } /* If not, add a new one */ result = zalloc(sizeof(*result)); if (result == NULL) { free(type_name); return NULL; } result->self.type_name = type_name; result->self.size = size; INIT_LIST_HEAD(&result->self.children); if (symbol_conf.annotate_data_member) add_member_types(result, type_die); rb_add(&result->node, &dso->data_types, data_type_less); return result; } static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) { Dwarf_Off off, next_off; size_t header_size; if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) return cu_die; /* * There are some kernels don't have full aranges and contain only a few * aranges entries. Fallback to iterate all CU entries in .debug_info * in case it's missing. */ off = 0; while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, NULL, NULL, NULL) == 0) { if (dwarf_offdie(di->dbg, off + header_size, cu_die) && dwarf_haspc(cu_die, pc)) return true; off = next_off; } return false; } /* The type info will be saved in @type_die */ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, bool is_pointer) { Dwarf_Word size; /* Get the type of the variable */ if (die_get_real_type(var_die, type_die) == NULL) { pr_debug("variable has no type\n"); ann_data_stat.no_typeinfo++; return -1; } /* * Usually it expects a pointer type for a memory access. * Convert to a real type it points to. But global variables * and local variables are accessed directly without a pointer. */ if (is_pointer) { if ((dwarf_tag(type_die) != DW_TAG_pointer_type && dwarf_tag(type_die) != DW_TAG_array_type) || die_get_real_type(type_die, type_die) == NULL) { pr_debug("no pointer or no type\n"); ann_data_stat.no_typeinfo++; return -1; } } /* Get the size of the actual type */ if (dwarf_aggregate_size(type_die, &size) < 0) { pr_debug("type size is unknown\n"); ann_data_stat.invalid_size++; return -1; } /* Minimal sanity check */ if ((unsigned)offset >= size) { pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size); ann_data_stat.bad_offset++; return -1; } return 0; } /* The result will be saved in @type_die */ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, const char *var_name, struct annotated_op_loc *loc, Dwarf_Die *type_die) { Dwarf_Die cu_die, var_die; Dwarf_Die *scopes = NULL; int reg, offset; int ret = -1; int i, nr_scopes; int fbreg = -1; bool is_fbreg = false; int fb_offset = 0; /* Get a compile_unit for this address */ if (!find_cu_die(di, pc, &cu_die)) { pr_debug("cannot find CU for address %" PRIx64 "\n", pc); ann_data_stat.no_cuinfo++; return -1; } reg = loc->reg1; offset = loc->offset; if (reg == DWARF_REG_PC) { if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { ret = check_variable(&var_die, type_die, offset, /*is_pointer=*/false); loc->offset = offset; goto out; } if (var_name && die_find_variable_at(&cu_die, var_name, pc, &var_die)) { ret = check_variable(&var_die, type_die, 0, /*is_pointer=*/false); /* loc->offset will be updated by the caller */ goto out; } } /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ nr_scopes = die_get_scopes(&cu_die, pc, &scopes); if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) { Dwarf_Attribute attr; Dwarf_Block block; /* Check if the 'reg' is assigned as frame base register */ if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL && dwarf_formblock(&attr, &block) == 0 && block.length == 1) { switch (*block.data) { case DW_OP_reg0 ... DW_OP_reg31: fbreg = *block.data - DW_OP_reg0; break; case DW_OP_call_frame_cfa: if (die_get_cfa(di->dbg, pc, &fbreg, &fb_offset) < 0) fbreg = -1; break; default: break; } } } retry: is_fbreg = (reg == fbreg); if (is_fbreg) offset = loc->offset - fb_offset; /* Search from the inner-most scope to the outer */ for (i = nr_scopes - 1; i >= 0; i--) { if (reg == DWARF_REG_PC) { if (!die_find_variable_by_addr(&scopes[i], pc, addr, &var_die, &offset)) continue; } else { /* Look up variables/parameters in this scope */ if (!die_find_variable_by_reg(&scopes[i], pc, reg, &offset, is_fbreg, &var_die)) continue; } /* Found a variable, see if it's correct */ ret = check_variable(&var_die, type_die, offset, reg != DWARF_REG_PC && !is_fbreg); loc->offset = offset; goto out; } if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { reg = loc->reg2; goto retry; } if (ret < 0) ann_data_stat.no_var++; out: free(scopes); return ret; } /** * find_data_type - Return a data type at the location * @ms: map and symbol at the location * @ip: instruction address of the memory access * @loc: instruction operand location * @addr: data address of the memory access * @var_name: global variable name * * This functions searches the debug information of the binary to get the data * type it accesses. The exact location is expressed by (@ip, reg, offset) * for pointer variables or (@ip, @addr) for global variables. Note that global * variables might update the @loc->offset after finding the start of the variable. * If it cannot find a global variable by address, it tried to fine a declaration * of the variable using @var_name. In that case, @loc->offset won't be updated. * * It return %NULL if not found. */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, struct annotated_op_loc *loc, u64 addr, const char *var_name) { struct annotated_data_type *result = NULL; struct dso *dso = map__dso(ms->map); struct debuginfo *di; Dwarf_Die type_die; u64 pc; di = debuginfo__new(dso->long_name); if (di == NULL) { pr_debug("cannot get the debug info\n"); return NULL; } /* * IP is a relative instruction address from the start of the map, as * it can be randomized/relocated, it needs to translate to PC which is * a file address for DWARF processing. */ pc = map__rip_2objdump(ms->map, ip); if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) goto out; result = dso__findnew_data_type(dso, &type_die); out: debuginfo__delete(di); return result; } static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) { int i; size_t sz = sizeof(struct type_hist); sz += sizeof(struct type_hist_entry) * adt->self.size; /* Allocate a table of pointers for each event */ adt->nr_histograms = nr_entries; adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); if (adt->histograms == NULL) return -ENOMEM; /* * Each histogram is allocated for the whole size of the type. * TODO: Probably we can move the histogram to members. */ for (i = 0; i < nr_entries; i++) { adt->histograms[i] = zalloc(sz); if (adt->histograms[i] == NULL) goto err; } return 0; err: while (--i >= 0) free(adt->histograms[i]); free(adt->histograms); return -ENOMEM; } static void delete_data_type_histograms(struct annotated_data_type *adt) { for (int i = 0; i < adt->nr_histograms; i++) free(adt->histograms[i]); free(adt->histograms); } void annotated_data_type__tree_delete(struct rb_root *root) { struct annotated_data_type *pos; while (!RB_EMPTY_ROOT(root)) { struct rb_node *node = rb_first(root); rb_erase(node, root); pos = rb_entry(node, struct annotated_data_type, node); delete_members(&pos->self); delete_data_type_histograms(pos); free(pos->self.type_name); free(pos); } } /** * annotated_data_type__update_samples - Update histogram * @adt: Data type to update * @evsel: Event to update * @offset: Offset in the type * @nr_samples: Number of samples at this offset * @period: Event count at this offset * * This function updates type histogram at @ofs for @evsel. Samples are * aggregated before calling this function so it can be called with more * than one samples at a certain offset. */ int annotated_data_type__update_samples(struct annotated_data_type *adt, struct evsel *evsel, int offset, int nr_samples, u64 period) { struct type_hist *h; if (adt == NULL) return 0; if (adt->histograms == NULL) { int nr = evsel->evlist->core.nr_entries; if (alloc_data_type_histograms(adt, nr) < 0) return -1; } if (offset < 0 || offset >= adt->self.size) return -1; h = adt->histograms[evsel->core.idx]; h->nr_samples += nr_samples; h->addr[offset].nr_samples += nr_samples; h->period += period; h->addr[offset].period += period; return 0; }