Commit b5b65f13 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-for-v5.15-2021-09-11' of...

Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Add missing fields and remove some duplicate fields when printing a
   perf_event_attr.

 - Fix hybrid config terms list corruption.

 - Update kernel header copies, some resulted in new kernel features
   being automagically added to 'perf trace' syscall/tracepoint argument
   id->string translators.

 - Add a file generated during the documentation build to .gitignore.

 - Add an option to build without libbfd, as some distros, like Debian
   consider its ABI unstable.

 - Add support to print a textual representation of IBS raw sample data
   in 'perf report'.

 - Fix bpf 'perf test' sample mismatch reporting

 - Fix passing arguments to stackcollapse report in a 'perf script'
   python script.

 - Allow build-id with trailing zeros.

 - Look for ImageBase in PE file to compute .text offset.

* tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (25 commits)
  tools headers UAPI: Update tools's copy of drm.h headers
  tools headers UAPI: Sync drm/i915_drm.h with the kernel sources
  tools headers UAPI: Sync linux/fs.h with the kernel sources
  tools headers UAPI: Sync linux/in.h copy with the kernel sources
  perf tools: Add an option to build without libbfd
  perf tools: Allow build-id with trailing zeros
  perf tools: Fix hybrid config terms list corruption
  perf tools: Factor out copy_config_terms() and free_config_terms()
  perf tools: Fix perf_event_attr__fprintf() missing/dupl. fields
  perf tools: Ignore Documentation dependency file
  perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions
  tools include UAPI: Update linux/mount.h copy
  perf beauty: Cover more flags in the  move_mount syscall argument beautifier
  tools headers UAPI: Sync linux/prctl.h with the kernel sources
  tools include UAPI: Sync sound/asound.h copy with the kernel sources
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
  perf report: Add support to print a textual representation of IBS raw sample data
  perf report: Add tools/arch/x86/include/asm/amd-ibs.h
  perf env: Add perf_env__cpuid, perf_env__{nr_}pmu_mappings
  ...
parents c3e46874 17a99e52
/* SPDX-License-Identifier: GPL-2.0 */
/*
* From PPR Vol 1 for AMD Family 19h Model 01h B1
* 55898 Rev 0.35 - Feb 5, 2021
*/
#include "msr-index.h"
/*
* IBS Hardware MSRs
*/
/* MSR 0xc0011030: IBS Fetch Control */
union ibs_fetch_ctl {
__u64 val;
struct {
__u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */
fetch_cnt:16, /* 16-31: instruction fetch count */
fetch_lat:16, /* 32-47: instruction fetch latency */
fetch_en:1, /* 48: instruction fetch enable */
fetch_val:1, /* 49: instruction fetch valid */
fetch_comp:1, /* 50: instruction fetch complete */
ic_miss:1, /* 51: i-cache miss */
phy_addr_valid:1,/* 52: physical address valid */
l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size
* (needs IbsPhyAddrValid) */
l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */
l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */
rand_en:1, /* 57: random tagging enable */
fetch_l2_miss:1,/* 58: L2 miss for sampled fetch
* (needs IbsFetchComp) */
reserved:5; /* 59-63: reserved */
};
};
/* MSR 0xc0011033: IBS Execution Control */
union ibs_op_ctl {
__u64 val;
struct {
__u64 opmaxcnt:16, /* 0-15: periodic op max. count */
reserved0:1, /* 16: reserved */
op_en:1, /* 17: op sampling enable */
op_val:1, /* 18: op sample valid */
cnt_ctl:1, /* 19: periodic op counter control */
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved1:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
reserved2:5; /* 59-63: reserved */
};
};
/* MSR 0xc0011035: IBS Op Data 2 */
union ibs_op_data {
__u64 val;
struct {
__u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */
tag_to_ret_ctr:16, /* 15-31: op tag to retire count */
reserved1:2, /* 32-33: reserved */
op_return:1, /* 34: return op */
op_brn_taken:1, /* 35: taken branch op */
op_brn_misp:1, /* 36: mispredicted branch op */
op_brn_ret:1, /* 37: branch op retired */
op_rip_invalid:1, /* 38: RIP is invalid */
op_brn_fuse:1, /* 39: fused branch op */
op_microcode:1, /* 40: microcode op */
reserved2:23; /* 41-63: reserved */
};
};
/* MSR 0xc0011036: IBS Op Data 2 */
union ibs_op_data2 {
__u64 val;
struct {
__u64 data_src:3, /* 0-2: data source */
reserved0:1, /* 3: reserved */
rmt_node:1, /* 4: destination node */
cache_hit_st:1, /* 5: cache hit state */
reserved1:57; /* 5-63: reserved */
};
};
/* MSR 0xc0011037: IBS Op Data 3 */
union ibs_op_data3 {
__u64 val;
struct {
__u64 ld_op:1, /* 0: load op */
st_op:1, /* 1: store op */
dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */
dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */
dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */
dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */
dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */
dc_miss:1, /* 7: data cache miss */
dc_mis_acc:1, /* 8: misaligned access */
reserved:4, /* 9-12: reserved */
dc_wc_mem_acc:1, /* 13: write combining memory access */
dc_uc_mem_acc:1, /* 14: uncacheable memory access */
dc_locked_op:1, /* 15: locked operation */
dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */
dc_lin_addr_valid:1, /* 17: data cache linear address valid */
dc_phy_addr_valid:1, /* 18: data cache physical address valid */
dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */
l2_miss:1, /* 20: L2 cache miss */
sw_pf:1, /* 21: software prefetch */
op_mem_width:4, /* 22-25: load/store size in bytes */
op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */
dc_miss_lat:16, /* 32-47: data cache miss latency */
tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */
};
};
/* MSR 0xc001103c: IBS Fetch Control Extended */
union ic_ibs_extd_ctl {
__u64 val;
struct {
__u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */
reserved:48; /* 16-63: reserved */
};
};
/*
* IBS driver related
*/
struct perf_ibs_data {
u32 size;
union {
u32 data[0]; /* data buffer starts here */
u32 caps;
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
......@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
#define KVM_GUESTDBG_USE_HW_BP 0x00020000
#define KVM_GUESTDBG_INJECT_DB 0x00040000
#define KVM_GUESTDBG_INJECT_BP 0x00080000
#define KVM_GUESTDBG_BLOCKIRQ 0x00100000
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
......
......@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
#define __NR_remap_file_pages 234
__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
#define __NR_mbind 235
__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
__SYSCALL(__NR_mbind, sys_mbind)
#define __NR_get_mempolicy 236
__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
#define __NR_set_mempolicy 237
__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
#define __NR_migrate_pages 238
__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
#define __NR_move_pages 239
__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
__SYSCALL(__NR_move_pages, sys_move_pages)
#endif
#define __NR_rt_tgsigqueueinfo 240
......@@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
#define __NR_memfd_secret 447
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#endif
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
#undef __NR_syscalls
#define __NR_syscalls 448
#define __NR_syscalls 449
/*
* 32 bit systems traditionally used different
......
......@@ -635,8 +635,8 @@ struct drm_gem_open {
/**
* DRM_CAP_VBLANK_HIGH_CRTC
*
* If set to 1, the kernel supports specifying a CRTC index in the high bits of
* &drm_wait_vblank_request.type.
* If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>`
* in the high bits of &drm_wait_vblank_request.type.
*
* Starting kernel version 2.6.39, this capability is always set to 1.
*/
......@@ -1050,6 +1050,16 @@ extern "C" {
#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
/**
* DRM_IOCTL_MODE_RMFB - Remove a framebuffer.
*
* This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
* argument is a framebuffer object ID.
*
* Warning: removing a framebuffer currently in-use on an enabled plane will
* disable that plane. The CRTC the plane is linked to may also be disabled
* (depending on driver capabilities).
*/
#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
......
......@@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
/*
* Indicates the 2k user priority levels are statically mapped into 3 buckets as
* follows:
*
* -1k to -1 Low priority
* 0 Normal priority
* 1 to 1k Highest priority
*/
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
#define I915_PARAM_HUC_STATUS 42
......@@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
#define I915_PARAM_HAS_USERPTR_PROBE 56
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
......@@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt {
__u64 offset;
};
/**
* struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object.
*
* This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl,
* and is used to retrieve the fake offset to mmap an object specified by &handle.
*
* The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+.
* `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave
* as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`.
*/
struct drm_i915_gem_mmap_offset {
/** Handle for the object being mapped. */
/** @handle: Handle for the object being mapped. */
__u32 handle;
/** @pad: Must be zero */
__u32 pad;
/**
* Fake offset to use for subsequent mmap call
* @offset: The fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
/**
* Flags for extended behaviour.
* @flags: Flags for extended behaviour.
*
* It is mandatory that one of the MMAP_OFFSET types
* (GTT, WC, WB, UC, etc) should be included.
* It is mandatory that one of the `MMAP_OFFSET` types
* should be included:
*
* - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined)
* - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching.
* - `I915_MMAP_OFFSET_WB`: Use Write-Back caching.
* - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching.
*
* On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid
* type. On devices without local memory, this caching mode is invalid.
*
* As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will
* be used, depending on the object placement on creation. WB will be used
* when the object can only exist in system memory, WC otherwise.
*/
__u64 flags;
#define I915_MMAP_OFFSET_GTT 0
#define I915_MMAP_OFFSET_WC 1
#define I915_MMAP_OFFSET_WB 2
#define I915_MMAP_OFFSET_UC 3
#define I915_MMAP_OFFSET_FIXED 4
/*
* Zero-terminated chain of extensions.
/**
* @extensions: Zero-terminated chain of extensions.
*
* No current extensions defined; mbz.
*/
__u64 extensions;
};
/**
* struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
* preparation for accessing the pages via some CPU domain.
*
* Specifying a new write or read domain will flush the object out of the
* previous domain(if required), before then updating the objects domain
* tracking with the new domain.
*
* Note this might involve waiting for the object first if it is still active on
* the GPU.
*
* Supported values for @read_domains and @write_domain:
*
* - I915_GEM_DOMAIN_WC: Uncached write-combined domain
* - I915_GEM_DOMAIN_CPU: CPU cache domain
* - I915_GEM_DOMAIN_GTT: Mappable aperture domain
*
* All other domains are rejected.
*
* Note that for discrete, starting from DG1, this is no longer supported, and
* is instead rejected. On such platforms the CPU domain is effectively static,
* where we also only support a single &drm_i915_gem_mmap_offset cache mode,
* which can't be set explicitly and instead depends on the object placements,
* as per the below.
*
* Implicit caching rules, starting from DG1:
*
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
* mapped as write-combined only.
*
* - Everything else is always allocated and mapped as write-back, with the
* guarantee that everything is also coherent with the GPU.
*
* Note that this is likely to change in the future again, where we might need
* more flexibility on future devices, so making this all explicit as part of a
* new &drm_i915_gem_create_ext extension is probable.
*/
struct drm_i915_gem_set_domain {
/** Handle for the object */
/** @handle: Handle for the object. */
__u32 handle;
/** New read domains */
/** @read_domains: New read domains. */
__u32 read_domains;
/** New write domain */
/**
* @write_domain: New write domain.
*
* Note that having something in the write domain implies it's in the
* read domain, and only that read domain.
*/
__u32 write_domain;
};
......@@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy {
* reading from the object simultaneously.
*
* The value of each engine class is the same as specified in the
* I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
* I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e.
* I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
* reported as active itself. Some hardware may have parallel
* execution engines, e.g. multiple media engines, which are
* mapped to the same class identifier and so are not separately
* reported for busyness.
* Some hardware may have parallel execution engines, e.g. multiple
* media engines, which are mapped to the same class identifier and so
* are not separately reported for busyness.
*
* Caveat emptor:
* Only the boolean result of this query is reliable; that is whether
......@@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy {
};
/**
* I915_CACHING_NONE
*
* GPU access is not coherent with cpu caches. Default for machines without an
* LLC.
*/
#define I915_CACHING_NONE 0
/**
* I915_CACHING_CACHED
*
* GPU access is coherent with cpu caches and furthermore the data is cached in
* last-level caches shared between cpu cores and the gpu GT. Default on
* machines with HAS_LLC.
*/
#define I915_CACHING_CACHED 1
/**
* I915_CACHING_DISPLAY
*
* Special GPU caching mode which is coherent with the scanout engines.
* Transparently falls back to I915_CACHING_NONE on platforms where no special
* cache mode (like write-through or gfdt flushing) is available. The kernel
* automatically sets this mode when using a buffer as a scanout target.
* Userspace can manually set this mode to avoid a costly stall and clflush in
* the hotpath of drawing the first frame.
* struct drm_i915_gem_caching - Set or get the caching for given object
* handle.
*
* Allow userspace to control the GTT caching bits for a given object when the
* object is later mapped through the ppGTT(or GGTT on older platforms lacking
* ppGTT support, or if the object is used for scanout). Note that this might
* require unbinding the object from the GTT first, if its current caching value
* doesn't match.
*
* Note that this all changes on discrete platforms, starting from DG1, the
* set/get caching is no longer supported, and is now rejected. Instead the CPU
* caching attributes(WB vs WC) will become an immutable creation time property
* for the object, along with the GTT caching level. For now we don't expose any
* new uAPI for this, instead on DG1 this is all implicit, although this largely
* shouldn't matter since DG1 is coherent by default(without any way of
* controlling it).
*
* Implicit caching rules, starting from DG1:
*
* - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
* contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
* mapped as write-combined only.
*
* - Everything else is always allocated and mapped as write-back, with the
* guarantee that everything is also coherent with the GPU.
*
* Note that this is likely to change in the future again, where we might need
* more flexibility on future devices, so making this all explicit as part of a
* new &drm_i915_gem_create_ext extension is probable.
*
* Side note: Part of the reason for this is that changing the at-allocation-time CPU
* caching attributes for the pages might be required(and is expensive) if we
* need to then CPU map the pages later with different caching attributes. This
* inconsistent caching behaviour, while supported on x86, is not universally
* supported on other architectures. So for simplicity we opt for setting
* everything at creation time, whilst also making it immutable, on discrete
* platforms.
*/
#define I915_CACHING_DISPLAY 2
struct drm_i915_gem_caching {
/**
* Handle of the buffer to set/get the caching level of. */
* @handle: Handle of the buffer to set/get the caching level.
*/
__u32 handle;
/**
* Cacheing level to apply or return value
* @caching: The GTT caching level to apply or possible return value.
*
* The supported @caching values:
*
* I915_CACHING_NONE:
*
* GPU access is not coherent with CPU caches. Default for machines
* without an LLC. This means manual flushing might be needed, if we
* want GPU access to be coherent.
*
* bits0-15 are for generic caching control (i.e. the above defined
* values). bits16-31 are reserved for platform-specific variations
* (e.g. l3$ caching on gen7). */
* I915_CACHING_CACHED:
*
* GPU access is coherent with CPU caches and furthermore the data is
* cached in last-level caches shared between CPU cores and the GPU GT.
*
* I915_CACHING_DISPLAY:
*
* Special GPU caching mode which is coherent with the scanout engines.
* Transparently falls back to I915_CACHING_NONE on platforms where no
* special cache mode (like write-through or gfdt flushing) is
* available. The kernel automatically sets this mode when using a
* buffer as a scanout target. Userspace can manually set this mode to
* avoid a costly stall and clflush in the hotpath of drawing the first
* frame.
*/
#define I915_CACHING_NONE 0
#define I915_CACHING_CACHED 1
#define I915_CACHING_DISPLAY 2
__u32 caching;
};
......@@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param {
__u32 size;
__u64 param;
#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance
* someone somewhere has attempted to use it, never re-use this context
* param number.
*/
#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
......@@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param {
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
/*
* I915_CONTEXT_PARAM_RINGSIZE:
*
* Sets the size of the CS ringbuffer to use for logical ring contexts. This
* applies a limit of how many batches can be queued to HW before the caller
* is blocked due to lack of space for more commands.
*
* Only reliably possible to be set prior to first use, i.e. during
* construction. At any later point, the current execution must be flushed as
* the ring can only be changed while the context is idle. Note, the ringsize
* can be specified as a constructor property, see
* I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
*
* Only applies to the current set of engine and lost when those engines
* are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
*
* Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
* Default is 16 KiB.
/* This API has been removed. On the off chance someone somewhere has
* attempted to use it, never re-use this context param number.
*/
#define I915_CONTEXT_PARAM_RINGSIZE 0xc
/* Must be kept compact -- no holes and well documented */
......@@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu {
__u32 rsvd;
};
/**
* DOC: Virtual Engine uAPI
*
* Virtual engine is a concept where userspace is able to configure a set of
* physical engines, submit a batch buffer, and let the driver execute it on any
* engine from the set as it sees fit.
*
* This is primarily useful on parts which have multiple instances of a same
* class engine, like for example GT3+ Skylake parts with their two VCS engines.
*
* For instance userspace can enumerate all engines of a certain class using the
* previously described `Engine Discovery uAPI`_. After that userspace can
* create a GEM context with a placeholder slot for the virtual engine (using
* `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class
* and instance respectively) and finally using the
* `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in
* the same reserved slot.
*
* Example of creating a virtual engine and submitting a batch buffer to it:
*
* .. code-block:: C
*
* I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = {
* .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
* .engine_index = 0, // Place this virtual engine into engine map slot 0
* .num_siblings = 2,
* .engines = { { I915_ENGINE_CLASS_VIDEO, 0 },
* { I915_ENGINE_CLASS_VIDEO, 1 }, },
* };
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = {
* .engines = { { I915_ENGINE_CLASS_INVALID,
* I915_ENGINE_CLASS_INVALID_NONE } },
* .extensions = to_user_pointer(&virtual), // Chains after load_balance extension
* };
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* },
* .param = {
* .param = I915_CONTEXT_PARAM_ENGINES,
* .value = to_user_pointer(&engines),
* .size = sizeof(engines),
* },
* };
* struct drm_i915_gem_context_create_ext create = {
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
* .extensions = to_user_pointer(&p_engines);
* };
*
* ctx_id = gem_context_create_ext(drm_fd, &create);
*
* // Now we have created a GEM context with its engine map containing a
* // single virtual engine. Submissions to this slot can go either to
* // vcs0 or vcs1, depending on the load balancing algorithm used inside
* // the driver. The load balancing is dynamic from one batch buffer to
* // another and transparent to userspace.
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 0; // Submits to index 0 which is the virtual engine
* gem_execbuf(drm_fd, &execbuf);
*/
/*
* i915_context_engines_load_balance:
*
......@@ -1883,6 +2049,61 @@ struct i915_context_engines_bond {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* DOC: Context Engine Map uAPI
*
* Context engine map is a new way of addressing engines when submitting batch-
* buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT`
* inside the flags field of `struct drm_i915_gem_execbuffer2`.
*
* To use it created GEM contexts need to be configured with a list of engines
* the user is intending to submit to. This is accomplished using the
* `I915_CONTEXT_PARAM_ENGINES` parameter and `struct
* i915_context_param_engines`.
*
* For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the
* configured map.
*
* Example of creating such context and submitting against it:
*
* .. code-block:: C
*
* I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = {
* .engines = { { I915_ENGINE_CLASS_RENDER, 0 },
* { I915_ENGINE_CLASS_COPY, 0 } }
* };
* struct drm_i915_gem_context_create_ext_setparam p_engines = {
* .base = {
* .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
* },
* .param = {
* .param = I915_CONTEXT_PARAM_ENGINES,
* .value = to_user_pointer(&engines),
* .size = sizeof(engines),
* },
* };
* struct drm_i915_gem_context_create_ext create = {
* .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
* .extensions = to_user_pointer(&p_engines);
* };
*
* ctx_id = gem_context_create_ext(drm_fd, &create);
*
* // We have now created a GEM context with two engines in the map:
* // Index 0 points to rcs0 while index 1 points to bcs0. Other engines
* // will not be accessible from this context.
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context
* gem_execbuf(drm_fd, &execbuf);
*
* ...
* execbuf.rsvd1 = ctx_id;
* execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context
* gem_execbuf(drm_fd, &execbuf);
*/
struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
......@@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam {
struct drm_i915_gem_context_param param;
};
struct drm_i915_gem_context_create_ext_clone {
/* This API has been removed. On the off chance someone somewhere has
* attempted to use it, never re-use this extension number.
*/
#define I915_CONTEXT_CREATE_EXT_CLONE 1
struct i915_user_extension base;
__u32 clone_id;
__u32 flags;
#define I915_CONTEXT_CLONE_ENGINES (1u << 0)
#define I915_CONTEXT_CLONE_FLAGS (1u << 1)
#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2)
#define I915_CONTEXT_CLONE_SSEU (1u << 3)
#define I915_CONTEXT_CLONE_TIMELINE (1u << 4)
#define I915_CONTEXT_CLONE_VM (1u << 5)
#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
__u64 rsvd;
};
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
......@@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats {
__u32 pad;
};
/**
* struct drm_i915_gem_userptr - Create GEM object from user allocated memory.
*
* Userptr objects have several restrictions on what ioctls can be used with the
* object handle.
*/
struct drm_i915_gem_userptr {
/**
* @user_ptr: The pointer to the allocated memory.
*
* Needs to be aligned to PAGE_SIZE.
*/
__u64 user_ptr;
/**
* @user_size:
*
* The size in bytes for the allocated memory. This will also become the
* object size.
*
* Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE,
* or larger.
*/
__u64 user_size;
/**
* @flags:
*
* Supported flags:
*
* I915_USERPTR_READ_ONLY:
*
* Mark the object as readonly, this also means GPU access can only be
* readonly. This is only supported on HW which supports readonly access
* through the GTT. If the HW can't support readonly access, an error is
* returned.
*
* I915_USERPTR_PROBE:
*
* Probe the provided @user_ptr range and validate that the @user_ptr is
* indeed pointing to normal memory and that the range is also valid.
* For example if some garbage address is given to the kernel, then this
* should complain.
*
* Returns -EFAULT if the probe failed.
*
* Note that this doesn't populate the backing pages, and also doesn't
* guarantee that the object will remain valid when the object is
* eventually used.
*
* The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE
* returns a non-zero value.
*
* I915_USERPTR_UNSYNCHRONIZED:
*
* NOT USED. Setting this flag will result in an error.
*/
__u32 flags;
#define I915_USERPTR_READ_ONLY 0x1
#define I915_USERPTR_PROBE 0x2
#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
/**
* Returned handle for the object.
* @handle: Returned handle for the object.
*
* Object handles are nonzero.
*/
......@@ -2376,6 +2642,76 @@ struct drm_i915_query_topology_info {
__u8 data[];
};
/**
* DOC: Engine Discovery uAPI
*
* Engine discovery uAPI is a way of enumerating physical engines present in a
* GPU associated with an open i915 DRM file descriptor. This supersedes the old
* way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like
* `I915_PARAM_HAS_BLT`.
*
* The need for this interface came starting with Icelake and newer GPUs, which
* started to establish a pattern of having multiple engines of a same class,
* where not all instances were always completely functionally equivalent.
*
* Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the
* `DRM_I915_QUERY_ENGINE_INFO` as the queried item id.
*
* Example for getting the list of engines:
*
* .. code-block:: C
*
* struct drm_i915_query_engine_info *info;
* struct drm_i915_query_item item = {
* .query_id = DRM_I915_QUERY_ENGINE_INFO;
* };
* struct drm_i915_query query = {
* .num_items = 1,
* .items_ptr = (uintptr_t)&item,
* };
* int err, i;
*
* // First query the size of the blob we need, this needs to be large
* // enough to hold our array of engines. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* //
* // Alternatively a large buffer can be allocated straight away enabling
* // querying in one pass, in which case item.length should contain the
* // length of the provided buffer.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* info = calloc(1, item.length);
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with info on all engines.
* item.data_ptr = (uintptr_t)&info,
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
*
* // We can now access each engine in the array
* for (i = 0; i < info->num_engines; i++) {
* struct drm_i915_engine_info einfo = info->engines[i];
* u16 class = einfo.engine.class;
* u16 instance = einfo.engine.instance;
* ....
* }
*
* free(info);
*
* Each of the enumerated engines, apart from being defined by its class and
* instance (see `struct i915_engine_class_instance`), also can have flags and
* capabilities defined as documented in i915_drm.h.
*
* For instance video engines which support HEVC encoding will have the
* `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set.
*
* Engine discovery only fully comes to its own when combined with the new way
* of addressing engines when submitting batch buffers using contexts with
* engine maps configured.
*/
/**
* struct drm_i915_engine_info
*
......
......@@ -184,6 +184,7 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
/*
* A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
......
......@@ -188,11 +188,22 @@ struct ip_mreq_source {
};
struct ip_msfilter {
union {
struct {
__be32 imsf_multiaddr_aux;
__be32 imsf_interface_aux;
__u32 imsf_fmode_aux;
__u32 imsf_numsrc_aux;
__be32 imsf_slist[1];
};
struct {
__be32 imsf_multiaddr;
__be32 imsf_interface;
__u32 imsf_fmode;
__u32 imsf_numsrc;
__be32 imsf_slist[1];
__be32 imsf_slist_flex[];
};
};
};
#define IP_MSFILTER_SIZE(numsrc) \
......@@ -211,11 +222,22 @@ struct group_source_req {
};
struct group_filter {
union {
struct {
__u32 gf_interface_aux; /* interface index */
struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */
__u32 gf_fmode_aux; /* filter mode */
__u32 gf_numsrc_aux; /* number of sources */
struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
};
struct {
__u32 gf_interface; /* interface index */
struct __kernel_sockaddr_storage gf_group; /* multicast address */
__u32 gf_fmode; /* filter mode */
__u32 gf_numsrc; /* number of sources */
struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
};
};
};
#define GROUP_FILTER_SIZE(numsrc) \
......
......@@ -1965,7 +1965,9 @@ struct kvm_stats_header {
#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK
#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
#define KVM_STATS_UNIT_SHIFT 4
#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
......@@ -1988,8 +1990,9 @@ struct kvm_stats_header {
* @size: The number of data items for this stats.
* Every data item is of type __u64.
* @offset: The offset of the stats to the start of stat structure in
* struture kvm or kvm_vcpu.
* @unused: Unused field for future usage. Always 0 for now.
* structure kvm or kvm_vcpu.
* @bucket_size: A parameter value used for histogram stats. It is only used
* for linear histogram stats, specifying the size of the bucket;
* @name: The name string for the stats. Its size is indicated by the
* &kvm_stats_header->name_size.
*/
......@@ -1998,7 +2001,7 @@ struct kvm_stats_desc {
__s16 exponent;
__u16 size;
__u32 offset;
__u32 unused;
__u32 bucket_size;
char name[];
};
......
......@@ -73,7 +73,8 @@
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT__MASK 0x00000077
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
#define MOVE_MOUNT__MASK 0x00000177
/*
* fsopen() flags.
......
......@@ -213,6 +213,7 @@ struct prctl_mm_map {
/* Speculation control variants */
# define PR_SPEC_STORE_BYPASS 0
# define PR_SPEC_INDIRECT_BRANCH 1
# define PR_SPEC_L1D_FLUSH 2
/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
# define PR_SPEC_NOT_AFFECTED 0
# define PR_SPEC_PRCTL (1UL << 0)
......@@ -234,14 +235,15 @@ struct prctl_mm_map {
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
/* MTE tag check fault modes */
# define PR_MTE_TCF_SHIFT 1
# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
# define PR_MTE_TCF_NONE 0
# define PR_MTE_TCF_SYNC (1UL << 1)
# define PR_MTE_TCF_ASYNC (1UL << 2)
# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
/* MTE tag inclusion mask */
# define PR_MTE_TAG_SHIFT 3
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
......
......@@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */
#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */
#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
......
......@@ -39,3 +39,4 @@ pmu-events/jevents
feature/
fixdep
libtraceevent-dynamic-list
Documentation/doc.dep
......@@ -827,9 +827,11 @@ else
endif
endif
ifeq ($(feature-libbfd), 1)
ifndef NO_LIBBFD
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd -lopcodes
else
else
# we are on a system that requires -liberty and (maybe) -lz
# to link against -lbfd; test each case individually here
......@@ -848,12 +850,13 @@ else
endif
endif
$(call feature_check,disassembler-four-args)
endif
endif
ifeq ($(feature-libbfd-buildid), 1)
ifeq ($(feature-libbfd-buildid), 1)
CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
else
else
msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
endif
endif
ifdef NO_DEMANGLE
......
......@@ -361,3 +361,5 @@
444 n64 landlock_create_ruleset sys_landlock_create_ruleset
445 n64 landlock_add_rule sys_landlock_add_rule
446 n64 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
......@@ -330,10 +330,10 @@
256 64 sys_debug_setcontext sys_ni_syscall
256 spu sys_debug_setcontext sys_ni_syscall
# 257 reserved for vserver
258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
259 nospu mbind sys_mbind compat_sys_mbind
260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
258 nospu migrate_pages sys_migrate_pages
259 nospu mbind sys_mbind
260 nospu get_mempolicy sys_get_mempolicy
261 nospu set_mempolicy sys_set_mempolicy
262 nospu mq_open sys_mq_open compat_sys_mq_open
263 nospu mq_unlink sys_mq_unlink
264 32 mq_timedsend sys_mq_timedsend_time32
......@@ -381,7 +381,7 @@
298 common faccessat sys_faccessat
299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
301 common move_pages sys_move_pages compat_sys_move_pages
301 common move_pages sys_move_pages
302 common getcpu sys_getcpu
303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
304 32 utimensat sys_utimensat_time32
......@@ -526,3 +526,5 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
......@@ -122,7 +122,7 @@
131 common quotactl sys_quotactl sys_quotactl
132 common getpgid sys_getpgid sys_getpgid
133 common fchdir sys_fchdir sys_fchdir
134 common bdflush - -
134 common bdflush sys_ni_syscall sys_ni_syscall
135 common sysfs sys_sysfs sys_sysfs
136 common personality sys_s390_personality sys_s390_personality
137 common afs_syscall - -
......@@ -274,9 +274,9 @@
265 common statfs64 sys_statfs64 compat_sys_statfs64
266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages
268 common mbind sys_mbind compat_sys_mbind
269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
268 common mbind sys_mbind sys_mbind
269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy
270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy
271 common mq_open sys_mq_open compat_sys_mq_open
272 common mq_unlink sys_mq_unlink sys_mq_unlink
273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32
......@@ -293,7 +293,7 @@
284 common inotify_init sys_inotify_init sys_inotify_init
285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch
286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
287 common migrate_pages sys_migrate_pages sys_migrate_pages
288 common openat sys_openat compat_sys_openat
289 common mkdirat sys_mkdirat sys_mkdirat
290 common mknodat sys_mknodat sys_mknodat
......@@ -317,7 +317,7 @@
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee sys_tee
309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
310 common move_pages sys_move_pages sys_move_pages
311 common getcpu sys_getcpu sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
313 common utimes sys_utimes sys_utimes_time32
......@@ -449,3 +449,5 @@
444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
......@@ -369,6 +369,7 @@
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
#
# Due to a historical design error, certain syscalls are numbered differently
......@@ -397,7 +398,7 @@
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
533 x32 move_pages sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
......
......@@ -144,6 +144,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
......
#!/bin/sh
# description: produce callgraphs in short form for scripting use
perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@"
......@@ -192,7 +192,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
}
if (count != expect * evlist->core.nr_entries) {
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count);
goto out_delete_evlist;
}
......
......@@ -223,8 +223,11 @@ struct ucred {
* reuses AF_INET address family
*/
#define AF_XDP 44 /* XDP sockets */
#define AF_MCTP 45 /* Management component
* transport protocol
*/
#define AF_MAX 45 /* For now.. */
#define AF_MAX 46 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
......@@ -274,6 +277,7 @@ struct ucred {
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
#define PF_XDP AF_XDP
#define PF_MCTP AF_MCTP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
......@@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags,
unsigned long nofile);
extern struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
......
......@@ -10,7 +10,7 @@ fi
linux_mount=${linux_header_dir}/mount.h
printf "static const char *move_mount_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
egrep $regex ${linux_mount} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
......
......@@ -59,6 +59,7 @@ perf-y += pstack.o
perf-y += session.o
perf-y += sample-raw.o
perf-y += s390-sample-raw.o
perf-y += amd-sample-raw.o
perf-$(CONFIG_TRACE) += syscalltbl.o
perf-y += ordered-events.o
perf-y += namespaces.o
......
// SPDX-License-Identifier: GPL-2.0
/*
* AMD specific. Provide textual annotation for IBS raw sample data.
*/
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include <linux/string.h>
#include "../../arch/x86/include/asm/amd-ibs.h"
#include "debug.h"
#include "session.h"
#include "evlist.h"
#include "sample-raw.h"
#include "pmu-events/pmu-events.h"
static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
{
const char * const ic_miss_strs[] = {
" IcMiss 0",
" IcMiss 1",
};
const char * const l1tlb_pgsz_strs[] = {
" L1TlbPgSz 4KB",
" L1TlbPgSz 2MB",
" L1TlbPgSz 1GB",
" L1TlbPgSz RESERVED"
};
const char * const l1tlb_pgsz_strs_erratum1347[] = {
" L1TlbPgSz 4KB",
" L1TlbPgSz 16KB",
" L1TlbPgSz 2MB",
" L1TlbPgSz 1GB"
};
const char *ic_miss_str = NULL;
const char *l1tlb_pgsz_str = NULL;
if (cpu_family == 0x19 && cpu_model < 0x10) {
/*
* Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss]
* Erratum #1347 workaround is to use table provided in erratum
*/
if (reg.phy_addr_valid)
l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz];
} else {
if (reg.phy_addr_valid)
l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz];
ic_miss_str = ic_miss_strs[reg.ic_miss];
}
printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s "
"PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n",
reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat,
reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "",
reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss,
reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "");
}
static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
{
printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat);
}
static void pr_ibs_op_ctl(union ibs_op_ctl reg)
{
printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n",
reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val,
reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
}
static void pr_ibs_op_data(union ibs_op_data reg)
{
printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d "
" RipInvalid %d BrnFuse %d Microcode %d\n",
reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr,
reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "",
reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "",
reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "",
reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode);
}
static void pr_ibs_op_data2(union ibs_op_data2 reg)
{
static const char * const data_src_str[] = {
"",
" DataSrc 1=(reserved)",
" DataSrc 2=Local node cache",
" DataSrc 3=DRAM",
" DataSrc 4=Remote node cache",
" DataSrc 5=(reserved)",
" DataSrc 6=(reserved)",
" DataSrc 7=Other"
};
printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val,
reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State "
: "CacheHitSt 0=M-state ") : "",
reg.rmt_node, data_src_str[reg.data_src]);
}
static void pr_ibs_op_data3(union ibs_op_data3 reg)
{
char l2_miss_str[sizeof(" L2Miss _")] = "";
char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
/*
* Erratum #1293
* Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set
*/
if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) {
snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss);
snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str),
" OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs);
}
if (reg.op_mem_width)
snprintf(op_mem_width_str, sizeof(op_mem_width_str),
" OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
"DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
"DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
"DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
}
/*
* IBS Op/Execution MSRs always saved, in order, are:
* IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2,
* IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP
*/
static void amd_dump_ibs_op(struct perf_sample *sample)
{
struct perf_ibs_data *data = sample->raw_data;
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
__u64 *rip = (__u64 *)op_ctl + 1;
union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1);
union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3);
pr_ibs_op_ctl(*op_ctl);
if (!op_data->op_rip_invalid)
printf("IbsOpRip:\t%016llx\n", *rip);
pr_ibs_op_data(*op_data);
/*
* Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set
*/
if (!(cpu_family == 0x19 && cpu_model < 0x10 &&
(op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf)))
pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2));
pr_ibs_op_data3(*op_data3);
if (op_data3->dc_lin_addr_valid)
printf("IbsDCLinAd:\t%016llx\n", *(rip + 4));
if (op_data3->dc_phy_addr_valid)
printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5));
if (op_data->op_brn_ret && *(rip + 6))
printf("IbsBrTarget:\t%016llx\n", *(rip + 6));
}
/*
* IBS Fetch MSRs always saved, in order, are:
* IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL
*/
static void amd_dump_ibs_fetch(struct perf_sample *sample)
{
struct perf_ibs_data *data = sample->raw_data;
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
__u64 *addr = (__u64 *)fetch_ctl + 1;
union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2;
pr_ibs_fetch_ctl(*fetch_ctl);
printf("IbsFetchLinAd:\t%016llx\n", *addr++);
if (fetch_ctl->phy_addr_valid)
printf("IbsFetchPhysAd:\t%016llx\n", *addr);
pr_ic_ibs_extd_ctl(*extd_ctl);
}
/*
* Test for enable and valid bits in captured control MSRs.
*/
static bool is_valid_ibs_fetch_sample(struct perf_sample *sample)
{
struct perf_ibs_data *data = sample->raw_data;
union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data;
if (fetch_ctl->fetch_en && fetch_ctl->fetch_val)
return true;
return false;
}
static bool is_valid_ibs_op_sample(struct perf_sample *sample)
{
struct perf_ibs_data *data = sample->raw_data;
union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data;
if (op_ctl->op_en && op_ctl->op_val)
return true;
return false;
}
/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events
* and if the event was triggered by IBS, display its raw data with decoded text.
* The function is only invoked when the dump flag -D is set.
*/
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
struct perf_sample *sample)
{
struct evsel *evsel;
if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size)
return;
evsel = evlist__event2evsel(evlist, event);
if (!evsel)
return;
if (evsel->core.attr.type == ibs_fetch_type) {
if (!is_valid_ibs_fetch_sample(sample)) {
pr_debug("Invalid raw IBS Fetch MSR data encountered\n");
return;
}
amd_dump_ibs_fetch(sample);
} else if (evsel->core.attr.type == ibs_op_type) {
if (!is_valid_ibs_op_sample(sample)) {
pr_debug("Invalid raw IBS Op MSR data encountered\n");
return;
}
amd_dump_ibs_op(sample);
}
}
static void parse_cpuid(struct perf_env *env)
{
const char *cpuid;
int ret;
cpuid = perf_env__cpuid(env);
/*
* cpuid = "AuthenticAMD,family,model,stepping"
*/
ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model);
if (ret != 2)
pr_debug("problem parsing cpuid\n");
}
/*
* Find and assign the type number used for ibs_op or ibs_fetch samples.
* Device names can be large - we are only interested in the first 9 characters,
* to match "ibs_fetch".
*/
bool evlist__has_amd_ibs(struct evlist *evlist)
{
struct perf_env *env = evlist->env;
int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
const char *pmu_mapping = perf_env__pmu_mappings(env);
char name[sizeof("ibs_fetch")];
u32 type;
while (nr_pmu_mappings--) {
ret = sscanf(pmu_mapping, "%u:%9s", &type, name);
if (ret == 2) {
if (strstarts(name, "ibs_op"))
ibs_op_type = type;
else if (strstarts(name, "ibs_fetch"))
ibs_fetch_type = type;
}
pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */;
}
if (ibs_fetch_type || ibs_op_type) {
if (!cpu_family)
parse_cpuid(env);
return true;
}
return false;
}
......@@ -21,6 +21,14 @@
#include "record.h"
#include "util/synthetic-events.h"
struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
{
struct btf *btf;
int err = btf__get_from_id(id, &btf);
return err ? ERR_PTR(err) : btf;
}
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
......
......@@ -1349,6 +1349,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid)
bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
{
if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) {
/*
* For the backward compatibility, it allows a build-id has
* trailing zeros.
*/
return !memcmp(dso->bid.data, bid->data, bid->size) &&
!memchr_inv(&dso->bid.data[bid->size], 0,
dso->bid.size - bid->size);
}
return dso->bid.size == bid->size &&
memcmp(dso->bid.data, bid->data, dso->bid.size) == 0;
}
......
......@@ -10,6 +10,7 @@
#include <sys/utsname.h>
#include <stdlib.h>
#include <string.h>
#include "strbuf.h"
struct perf_env perf_env;
......@@ -306,6 +307,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
return 0;
}
int perf_env__read_pmu_mappings(struct perf_env *env)
{
struct perf_pmu *pmu = NULL;
u32 pmu_num = 0;
struct strbuf sb;
while ((pmu = perf_pmu__scan(pmu))) {
if (!pmu->name)
continue;
pmu_num++;
}
if (!pmu_num) {
pr_debug("pmu mappings not available\n");
return -ENOENT;
}
env->nr_pmu_mappings = pmu_num;
if (strbuf_init(&sb, 128 * pmu_num) < 0)
return -ENOMEM;
while ((pmu = perf_pmu__scan(pmu))) {
if (!pmu->name)
continue;
if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
goto error;
/* include a NULL character at the end */
if (strbuf_add(&sb, "", 1) < 0)
goto error;
}
env->pmu_mappings = strbuf_detach(&sb, NULL);
return 0;
error:
strbuf_release(&sb);
return -1;
}
int perf_env__read_cpuid(struct perf_env *env)
{
char cpuid[128];
......@@ -404,6 +444,44 @@ const char *perf_env__arch(struct perf_env *env)
return normalize_arch(arch_name);
}
const char *perf_env__cpuid(struct perf_env *env)
{
int status;
if (!env || !env->cpuid) { /* Assume local operation */
status = perf_env__read_cpuid(env);
if (status)
return NULL;
}
return env->cpuid;
}
int perf_env__nr_pmu_mappings(struct perf_env *env)
{
int status;
if (!env || !env->nr_pmu_mappings) { /* Assume local operation */
status = perf_env__read_pmu_mappings(env);
if (status)
return 0;
}
return env->nr_pmu_mappings;
}
const char *perf_env__pmu_mappings(struct perf_env *env)
{
int status;
if (!env || !env->pmu_mappings) { /* Assume local operation */
status = perf_env__read_pmu_mappings(env);
if (status)
return NULL;
}
return env->pmu_mappings;
}
int perf_env__numa_node(struct perf_env *env, int cpu)
{
......
......@@ -149,11 +149,16 @@ int perf_env__kernel_is_64_bit(struct perf_env *env);
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
int perf_env__read_cpuid(struct perf_env *env);
int perf_env__read_pmu_mappings(struct perf_env *env);
int perf_env__nr_pmu_mappings(struct perf_env *env);
const char *perf_env__pmu_mappings(struct perf_env *env);
int perf_env__read_cpu_topology_map(struct perf_env *env);
void cpu_cache_level__free(struct cpu_cache_level *cache);
const char *perf_env__arch(struct perf_env *env);
const char *perf_env__cpuid(struct perf_env *env);
const char *perf_env__raw_arch(struct perf_env *env);
int perf_env__nr_cpus_avail(struct perf_env *env);
......
......@@ -333,11 +333,11 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
goto out;
}
static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
int copy_config_terms(struct list_head *dst, struct list_head *src)
{
struct evsel_config_term *pos, *tmp;
list_for_each_entry(pos, &src->config_terms, list) {
list_for_each_entry(pos, src, list) {
tmp = malloc(sizeof(*tmp));
if (tmp == NULL)
return -ENOMEM;
......@@ -350,11 +350,16 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
return -ENOMEM;
}
}
list_add_tail(&tmp->list, &dst->config_terms);
list_add_tail(&tmp->list, dst);
}
return 0;
}
static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
{
return copy_config_terms(&dst->config_terms, &src->config_terms);
}
/**
* evsel__clone - create a new evsel copied from @orig
* @orig: original evsel
......@@ -1385,11 +1390,11 @@ int evsel__disable(struct evsel *evsel)
return err;
}
static void evsel__free_config_terms(struct evsel *evsel)
void free_config_terms(struct list_head *config_terms)
{
struct evsel_config_term *term, *h;
list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
list_for_each_entry_safe(term, h, config_terms, list) {
list_del_init(&term->list);
if (term->free_str)
zfree(&term->val.str);
......@@ -1397,6 +1402,11 @@ static void evsel__free_config_terms(struct evsel *evsel)
}
}
static void evsel__free_config_terms(struct evsel *evsel)
{
free_config_terms(&evsel->config_terms);
}
void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
......
......@@ -213,6 +213,9 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
struct evsel *evsel__clone(struct evsel *orig);
struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
int copy_config_terms(struct list_head *dst, struct list_head *src);
void free_config_terms(struct list_head *config_terms);
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
......
......@@ -76,12 +76,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state,
int ret;
perf_pmu__for_each_hybrid_pmu(pmu) {
LIST_HEAD(terms);
if (pmu_cmp(parse_state, pmu))
continue;
copy_config_terms(&terms, config_terms);
ret = create_event_hybrid(PERF_TYPE_HARDWARE,
&parse_state->idx, list, attr, name,
config_terms, pmu);
&terms, pmu);
free_config_terms(&terms);
if (ret)
return ret;
}
......@@ -115,11 +119,15 @@ static int add_raw_hybrid(struct parse_events_state *parse_state,
int ret;
perf_pmu__for_each_hybrid_pmu(pmu) {
LIST_HEAD(terms);
if (pmu_cmp(parse_state, pmu))
continue;
copy_config_terms(&terms, config_terms);
ret = create_raw_event_hybrid(&parse_state->idx, list, attr,
name, config_terms, pmu);
name, &terms, pmu);
free_config_terms(&terms);
if (ret)
return ret;
}
......@@ -165,11 +173,15 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx,
*hybrid = true;
perf_pmu__for_each_hybrid_pmu(pmu) {
LIST_HEAD(terms);
if (pmu_cmp(parse_state, pmu))
continue;
copy_config_terms(&terms, config_terms);
ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list,
attr, name, config_terms, pmu);
attr, name, &terms, pmu);
free_config_terms(&terms);
if (ret)
return ret;
}
......
......@@ -387,7 +387,7 @@ __add_event(struct list_head *list, int *idx,
evsel->name = strdup(name);
if (config_terms)
list_splice(config_terms, &evsel->config_terms);
list_splice_init(config_terms, &evsel->config_terms);
if (list)
list_add_tail(&evsel->core.node, list);
......@@ -535,9 +535,12 @@ int parse_events_add_cache(struct list_head *list, int *idx,
config_name ? : name, &config_terms,
&hybrid, parse_state);
if (hybrid)
return ret;
goto out_free_terms;
return add_event(list, idx, &attr, config_name ? : name, &config_terms);
ret = add_event(list, idx, &attr, config_name ? : name, &config_terms);
out_free_terms:
free_config_terms(&config_terms);
return ret;
}
static void tracepoint_error(struct parse_events_error *e, int err,
......@@ -1457,10 +1460,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
get_config_name(head_config),
&config_terms, &hybrid);
if (hybrid)
return ret;
goto out_free_terms;
return add_event(list, &parse_state->idx, &attr,
ret = add_event(list, &parse_state->idx, &attr,
get_config_name(head_config), &config_terms);
out_free_terms:
free_config_terms(&config_terms);
return ret;
}
int parse_events_add_tool(struct parse_events_state *parse_state,
......@@ -1608,14 +1614,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
struct evsel_config_term *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
list_del_init(&pos->list);
if (pos->free_str)
zfree(&pos->val.str);
free(pos);
}
free_config_terms(&config_terms);
return -EINVAL;
}
......
......@@ -137,6 +137,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(cgroup, p_unsigned);
PRINT_ATTRf(text_poke, p_unsigned);
PRINT_ATTRf(build_id, p_unsigned);
PRINT_ATTRf(inherit_thread, p_unsigned);
PRINT_ATTRf(remove_on_exec, p_unsigned);
PRINT_ATTRf(sigtrap, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
......@@ -150,7 +153,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_watermark, p_unsigned);
PRINT_ATTRf(sample_max_stack, p_unsigned);
PRINT_ATTRf(aux_sample_size, p_unsigned);
PRINT_ATTRf(text_poke, p_unsigned);
PRINT_ATTRf(sig_data, p_unsigned);
return ret;
}
/* SPDX-License-Identifier: GPL-2.0 */
#include <string.h>
#include <linux/string.h>
#include "evlist.h"
#include "env.h"
#include "header.h"
#include "sample-raw.h"
/*
......@@ -12,7 +14,13 @@
void evlist__init_trace_event_sample_raw(struct evlist *evlist)
{
const char *arch_pf = perf_env__arch(evlist->env);
const char *cpuid = perf_env__cpuid(evlist->env);
if (arch_pf && !strcmp("s390", arch_pf))
evlist->trace_event_sample_raw = evlist__s390_sample_raw;
else if (arch_pf && !strcmp("x86", arch_pf) &&
cpuid && strstarts(cpuid, "AuthenticAMD") &&
evlist__has_amd_ibs(evlist)) {
evlist->trace_event_sample_raw = evlist__amd_sample_raw;
}
}
......@@ -6,6 +6,10 @@ struct evlist;
union perf_event;
struct perf_sample;
void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
struct perf_sample *sample);
bool evlist__has_amd_ibs(struct evlist *evlist);
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
struct perf_sample *sample);
void evlist__init_trace_event_sample_raw(struct evlist *evlist);
#endif /* __PERF_EVLIST_H */
......@@ -1581,10 +1581,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
if (bfd_get_flavour(abfd) == bfd_target_elf_flavour)
goto out_close;
section = bfd_get_section_by_name(abfd, ".text");
if (section)
dso->text_offset = section->vma - section->filepos;
symbols_size = bfd_get_symtab_upper_bound(abfd);
if (symbols_size == 0) {
bfd_close(abfd);
......@@ -1602,6 +1598,22 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
if (symbols_count < 0)
goto out_free;
section = bfd_get_section_by_name(abfd, ".text");
if (section) {
for (i = 0; i < symbols_count; ++i) {
if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") ||
!strcmp(bfd_asymbol_name(symbols[i]), "__image_base__"))
break;
}
if (i < symbols_count) {
/* PE symbols can only have 4 bytes, so use .text high bits */
dso->text_offset = section->vma - (u32)section->vma;
dso->text_offset += (u32)bfd_asymbol_value(symbols[i]);
} else {
dso->text_offset = section->vma - section->filepos;
}
}
qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue);
#ifdef bfd_get_section
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment