• Artem Savkov's avatar
    perf report: Append inlines to non-DWARF callchains · 46d21ec0
    Artem Savkov authored
    Append information about inlined functions to FP and LBR callchains from
    DWARF debuginfo when available. Do so by calling append_inlines() from
    add_callchain_ip().
    
    Testing it:
    
    Frame-pointer mode recorded with 'perf record --call-graph=fp --freq=max -- ./a.out'
    
      #include <stdio.h>
      #include <stdint.h>
    
      static __attribute__((noinline)) uint32_t func5(uint32_t i)
      {
              return i + 10;
      }
    
      static uint32_t func4(uint32_t i)
      {
              return func5(i + 5);
      }
    
      static inline uint32_t func3(uint32_t i)
      {
              return func4(i + 4);
      }
    
      static __attribute__((noinline)) uint32_t func2(uint32_t i)
      {
              return func3(i + 3);
      }
    
      static uint32_t func1(uint32_t i)
      {
              return func2(i + 2);
      }
    
      __attribute__((noinline)) uint64_t entry(void)
      {
              uint64_t ret = 0;
              uint32_t i = 0;
              for (i = 0; i < 1000000; i++) {
                      ret += func1(i);
                      ret -= func2(i);
                      ret += func3(i);
                      ret += func4(i);
                      ret -= func5(i);
              }
              return ret;
      }
    
      int main(int argc, char **argv)
      {
              printf("%s\n", __func__);
              return entry();
      }
      ======
    
    Here is the output I get with '--call-graph callee --no-children'
    
      ======
      # To display the perf.data header info, please use --header/--header-only options.
      #
      #
      # Total Lost Samples: 0
      #
      # Samples: 250  of event 'cycles:u'
      # Event count (approx.): 26819859
      #
      # Overhead  Command  Shared Object         Symbol
      # ........  .......  ....................  .....................................
      #
          43.58%  a.out    a.out                 [.] func5
                  |
                  |--28.93%--entry
                  |          main
                  |          __libc_start_call_main
                  |
                   --14.65%--func4 (inlined)
                             |
                             |--10.45%--entry
                             |          main
                             |          __libc_start_call_main
                             |
                              --4.20%--func3 (inlined)
                                        entry
                                        main
                                        __libc_start_call_main
    
          38.80%  a.out    a.out                 [.] entry
                  |
                  |--23.27%--func4 (inlined)
                  |          |
                  |          |--20.28%--func3 (inlined)
                  |          |          func2
                  |          |          main
                  |          |          __libc_start_call_main
                  |          |
                  |           --2.99%--entry
                  |                     main
                  |                     __libc_start_call_main
                  |
                  |--8.17%--func5
                  |          main
                  |          __libc_start_call_main
                  |
                  |--3.89%--func1 (inlined)
                  |          entry
                  |          main
                  |          __libc_start_call_main
                  |
                   --3.48%--entry
                             main
                             __libc_start_call_main
    
          13.07%  a.out    a.out                 [.] func2
                  |
                  ---func5
                     main
                     __libc_start_call_main
    
           1.54%  a.out    [unknown]             [k] 0xffffffff81e011b7
           1.16%  a.out    [unknown]             [k] 0xffffffff81e00193
                  |
                   --0.57%--__mmap64 (inlined)
                             __mmap64 (inlined)
    
           0.34%  a.out    ld-linux-x86-64.so.2  [.] __tunable_get_val
           0.34%  a.out    ld-linux-x86-64.so.2  [.] strcmp
           0.32%  a.out    libc.so.6             [.] strchr
           0.31%  a.out    ld-linux-x86-64.so.2  [.] _dl_relocate_object
           0.22%  a.out    ld-linux-x86-64.so.2  [.] _dl_init_paths
           0.18%  a.out    ld-linux-x86-64.so.2  [.] get_common_cache_info.constprop.0
           0.14%  a.out    ld-linux-x86-64.so.2  [.] __GI___tunables_init
    
      #
      # (Tip: Show individual samples with: perf script)
      #
      ======
    
      It does not seem to be out of order, or at least it is consistent with
      what I get with dwarf unwinders.
    
    Committer notes:
    
    Adrian Hunter pointed out that this breaks --branch-history, so don't do
    it for branches, see the second Link below.
    Suggested-by: default avatarAndrii Nakryiko <andrii.nakryiko@gmail.com>
    Signed-off-by: <asavkov@redhat.com>
    Acked-by: default avatarNamhyung Kim <namhyung@kernel.org>
    Cc: Adrian Hunter <adrian.hunter@intel.com>
    Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
    Cc: Ian Rogers <irogers@google.com>
    Cc: Ingo Molnar <mingo@redhat.com>
    Cc: Jiri Olsa <jolsa@kernel.org>
    Cc: Mark Rutland <mark.rutland@arm.com>
    Cc: Masami Hiramatsu <mhiramat@kernel.org>
    Cc: Milian Wolff <milian.wolff@kdab.com>
    Cc: Peter Zijlstra <peterz@infradead.org>
    Link: https://lore.kernel.org/r/20230316133557.868731-2-asavkov@redhat.com
    Link: https://lore.kernel.org/r/54129783-2960-84e1-05e9-97ac70ffb432@intel.comSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
    46d21ec0
machine.c 83 KB