• Arnaldo Carvalho de Melo's avatar
    perf bpf skels: Stop using vmlinux.h generated from BTF, use subset of used structs + CO-RE · a8874665
    Arnaldo Carvalho de Melo authored
    Linus reported a build break due to using a vmlinux without a BTF elf
    section to generate the vmlinux.h header with bpftool for use in the BPF
    tools in tools/perf/util/bpf_skel/*.bpf.c.
    
    Instead add a vmlinux.h file with the structs needed with the fields the
    tools need, marking the structs with __attribute__((preserve_access_index)),
    so that libbpf's CO-RE code can fixup the struct field offsets.
    
    In some cases the vmlinux.h file that was being generated by bpftool
    from the kernel BTF information was not needed at all, just including
    linux/bpf.h, sometimes linux/perf_event.h was enough as non-UAPI
    types were not being used.
    
    To keep te patch small, include those UAPI headers from the trimmed down
    vmlinux.h file, that then provides the tools with just the structs and
    the subset of its fields needed for them.
    
    Testing it:
    
      # perf lock contention -b find / > /dev/null
      ^C contended   total wait     max wait     avg wait         type   caller
    
               7     53.59 us     10.86 us      7.66 us     rwlock:R   start_this_handle+0xa0
               2     30.35 us     21.99 us     15.17 us	 rwsem:R   iterate_dir+0x52
               1	  9.04 us      9.04 us      9.04 us     rwlock:W   start_this_handle+0x291
               1	  8.73 us      8.73 us      8.73 us     spinlock   raw_spin_rq_lock_nested+0x1e
      #
      # perf lock contention -abl find / > /dev/null
      ^C contended   total wait     max wait     avg wait            address   symbol
    
               1    262.96 ms    262.96 ms    262.96 ms   ffff8e67502d0170    (mutex)
              12    244.24 us     39.91 us     20.35 us   ffff8e6af56f8070   mmap_lock (rwsem)
               7     30.28 us      6.85 us      4.33 us   ffff8e6c865f1d40   rq_lock (spinlock)
               3	  7.42 us      4.03 us      2.47 us   ffff8e6c864b1d40   rq_lock (spinlock)
               2	  3.72 us      2.19 us      1.86 us   ffff8e6c86571d40   rq_lock (spinlock)
               1	  2.42 us      2.42 us      2.42 us   ffff8e6c86471d40   rq_lock (spinlock)
               4	  2.11 us	559 ns       527 ns   ffffffff9a146c80   rcu_state (spinlock)
               3	  1.45 us	818 ns       482 ns   ffff8e674ae8384c    (rwlock)
               1	   870 ns	870 ns       870 ns   ffff8e68456ee060    (rwlock)
               1	   663 ns	663 ns       663 ns   ffff8e6c864f1d40   rq_lock (spinlock)
               1	   573 ns	573 ns       573 ns   ffff8e6c86531d40   rq_lock (spinlock)
               1	   472 ns	472 ns       472 ns   ffff8e6c86431740    (spinlock)
               1	   397 ns	397 ns       397 ns   ffff8e67413a4f04    (spinlock)
      #
      # perf test offcpu
      95: perf record offcpu profiling tests                              : Ok
      #
      # perf kwork latency --use-bpf
      Starting trace, Hit <Ctrl+C> to stop and report
      ^C
        Kwork Name                     | Cpu  | Avg delay     | Count     | Max delay     | Max delay start     | Max delay end	  |
       --------------------------------------------------------------------------------------------------------------------------------
        (w)flush_memcg_stats_dwork     | 0000 |   1056.212 ms |         2 |   2112.345 ms |     550113.229573 s |     550115.341919 s |
        (w)toggle_allocation_gate	   | 0000 |     10.144 ms |        62 |    416.389 ms |     550113.453518 s |     550113.869907 s |
        (w)0xffff8e6748e28080          | 0002 |	 0.623 ms |         1 |      0.623 ms |     550110.989841 s |     550110.990464 s |
        (w)vmstat_shepherd             | 0000 |	 0.586 ms |        10 |      2.828 ms |     550111.971536 s |     550111.974364 s |
        (w)vmstat_update               | 0007 |	 0.363 ms |         5 |      1.634 ms |     550113.222520 s |     550113.224154 s |
        (w)vmstat_update               | 0000 |	 0.324 ms |        10 |      2.827 ms |     550111.971526 s |     550111.974354 s |
        (w)0xffff8e674c5f4a58          | 0002 |	 0.102 ms |         5 |      0.134 ms |     550110.989839 s |     550110.989972 s |
        (w)psi_avgs_work               | 0001 |	 0.086 ms |         3 |      0.107 ms |     550114.957852 s |     550114.957959 s |
        (w)psi_avgs_work               | 0000 |	 0.079 ms |         5 |      0.100 ms |     550118.605668 s |     550118.605768 s |
        (w)kfree_rcu_monitor           | 0006 |	 0.079 ms |         1 |      0.079 ms |     550110.925821 s |     550110.925900 s |
        (w)psi_avgs_work               | 0004 |	 0.079 ms |         1 |      0.079 ms |     550109.581835 s |     550109.581914 s |
        (w)psi_avgs_work               | 0001 |	 0.078 ms |         1 |      0.078 ms |     550109.197809 s |     550109.197887 s |
        (w)psi_avgs_work               | 0002 |	 0.077 ms |         5 |      0.086 ms |     550110.669819 s |     550110.669905 s |
      <SNIP>
      # strace -e bpf -o perf-stat-bpf-counters.output perf stat -e cycles --bpf-counters sleep 1
    
       Performance counter stats for 'sleep 1':
    
               6,197,983	  cycles
    
             1.003922848 seconds time elapsed
    
             0.000000000 seconds user
             0.002032000 seconds sys
    
      # head -7 perf-stat-bpf-counters.output
      bpf(BPF_OBJ_GET, {pathname="/sys/fs/bpf/perf_attr_map", bpf_fd=0, file_flags=0}, 16) = 3
      bpf(BPF_OBJ_GET_INFO_BY_FD, {info={bpf_fd=3, info_len=88, info=0x7ffcead64990}}, 16) = 0
      bpf(BPF_MAP_LOOKUP_ELEM, {map_fd=3, key=0x24129e0, value=0x7ffcead65a48, flags=BPF_ANY}, 32) = 0
      bpf(BPF_LINK_GET_FD_BY_ID, {link_id=1252}, 12) = -1 ENOENT (No such file or directory)
      bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffcead65780, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0,
    +func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0}, 116) = 4
      bpf(BPF_PROG_LOAD, {prog_type=BPF_PROG_TYPE_SOCKET_FILTER, insn_cnt=2, insns=0x7ffcead65920, license="GPL", log_level=0, log_size=0, log_buf=NULL, kern_version=KERNEL_VERSION(0, 0, 0), prog_flags=0, prog_name="", prog_ifindex=0, expected_attach_type=BPF_CGROUP_INET_INGRESS, prog_btf_fd=0, func_info_rec_size=0,
    +func_info=NULL, func_info_cnt=0, line_info_rec_size=0, line_info=NULL, line_info_cnt=0, attach_btf_id=0, attach_prog_fd=0, fd_array=NULL}, 128) = 4
      bpf(BPF_BTF_LOAD, {btf="\237\353\1\0\30\0\0\0\0\0\0\0\20\0\0\0\20\0\0\0\5\0\0\0\1\0\0\0\0\0\0\1"..., btf_log_buf=NULL, btf_size=45, btf_log_size=0, btf_log_level=0}, 28) = 4
      #
    Reported-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
    Suggested-by: default avatarAndrii Nakryiko <andrii@kernel.org>
    Tested-by: default avatarNamhyung Kim <namhyung@kernel.org>
    Tested-by: default avatarSong Liu <song@kernel.org>
    Cc: Adrian Hunter <adrian.hunter@intel.com>
    Cc: Alexei Starovoitov <ast@kernel.org>
    Cc: Ian Rogers <irogers@google.com>
    Cc: Jiri Olsa <jolsa@kernel.org>
    Co-developed-by: default avatarJiri Olsa <jolsa@kernel.org>
    Link: https://lore.kernel.org/lkml/ZFU1PJrn8YtHIqno@kernel.orgSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
    a8874665
Makefile.perf 39.8 KB