Commit 4b50239a authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.15-20171003' of...

Merge tag 'perf-core-for-mingo-4.15-20171003' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Multithread the synthesizing of PERF_RECORD_ events for pre-existing
  threads in 'perf top', speeding up that phase, greatly improving the
  user experience in systems such as Intel's Knights Mill (Kan Liang)

- 'perf test' fixes for the perf_event_attr test case (Jiri Olsa, Thomas Richter)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents a47ba4d7 f6a9820d
...@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { ...@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
/* kvm attributes for KVM_S390_VM_TOD */ /* kvm attributes for KVM_S390_VM_TOD */
#define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_LOW 0
#define KVM_S390_VM_TOD_HIGH 1 #define KVM_S390_VM_TOD_HIGH 1
#define KVM_S390_VM_TOD_EXT 2
struct kvm_s390_vm_tod_clock {
__u8 epoch_idx;
__u64 tod;
};
/* kvm attributes for KVM_S390_VM_CPU_MODEL */ /* kvm attributes for KVM_S390_VM_CPU_MODEL */
/* processor related attributes are r/w */ /* processor related attributes are r/w */
......
...@@ -196,6 +196,7 @@ ...@@ -196,6 +196,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
...@@ -287,6 +288,7 @@ ...@@ -287,6 +288,7 @@
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
......
...@@ -21,11 +21,13 @@ ...@@ -21,11 +21,13 @@
# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
# define DISABLE_PCID 0
#else #else
# define DISABLE_VME 0 # define DISABLE_VME 0
# define DISABLE_K6_MTRR 0 # define DISABLE_K6_MTRR 0
# define DISABLE_CYRIX_ARR 0 # define DISABLE_CYRIX_ARR 0
# define DISABLE_CENTAUR_MCR 0 # define DISABLE_CENTAUR_MCR 0
# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
...@@ -49,7 +51,7 @@ ...@@ -49,7 +51,7 @@
#define DISABLED_MASK1 0 #define DISABLED_MASK1 0
#define DISABLED_MASK2 0 #define DISABLED_MASK2 0
#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
#define DISABLED_MASK4 0 #define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0 #define DISABLED_MASK5 0
#define DISABLED_MASK6 0 #define DISABLED_MASK6 0
#define DISABLED_MASK7 0 #define DISABLED_MASK7 0
......
#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
#define _ASM_GENERIC_HUGETLB_ENCODE_H_
/*
* Several system calls take a flag to request "hugetlb" huge pages.
* Without further specification, these system calls will use the
* system's default huge page size. If a system supports multiple
* huge page sizes, the desired huge page size can be specified in
* bits [26:31] of the flag arguments. The value in these 6 bits
* will encode the log2 of the huge page size.
*
* The following definitions are associated with this huge page size
* encoding in flag arguments. System call specific header files
* that use this encoding should include this file. They can then
* provide definitions based on these with their own specific prefix.
* for example:
* #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
*/
#define HUGETLB_FLAG_ENCODE_SHIFT 26
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
...@@ -700,6 +700,7 @@ struct drm_prime_handle { ...@@ -700,6 +700,7 @@ struct drm_prime_handle {
struct drm_syncobj_create { struct drm_syncobj_create {
__u32 handle; __u32 handle;
#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
__u32 flags; __u32 flags;
}; };
...@@ -718,6 +719,24 @@ struct drm_syncobj_handle { ...@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
__u32 pad; __u32 pad;
}; };
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
__s64 timeout_nsec;
__u32 count_handles;
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
};
struct drm_syncobj_array {
__u64 handles;
__u32 count_handles;
__u32 pad;
};
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif
...@@ -840,6 +859,9 @@ extern "C" { ...@@ -840,6 +859,9 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
/** /**
* Device specific ioctls should only be in their respective headers * Device specific ioctls should only be in their respective headers
......
...@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea { ...@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
#define DRM_I915_PERF_OPEN 0x36 #define DRM_I915_PERF_OPEN 0x36
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
...@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea { ...@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
/* Allow drivers to submit batchbuffers directly to hardware, relying /* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware. * on the security mechanisms provided by hardware.
...@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait { ...@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
*/ */
#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
* drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
*/
#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
typedef struct drm_i915_getparam { typedef struct drm_i915_getparam {
__s32 param; __s32 param;
/* /*
...@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 { ...@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
__u64 rsvd2; __u64 rsvd2;
}; };
struct drm_i915_gem_exec_fence {
/**
* User's handle for a drm_syncobj to wait on or signal.
*/
__u32 handle;
#define I915_EXEC_FENCE_WAIT (1<<0)
#define I915_EXEC_FENCE_SIGNAL (1<<1)
__u32 flags;
};
struct drm_i915_gem_execbuffer2 { struct drm_i915_gem_execbuffer2 {
/** /**
* List of gem_exec_object2 structs * List of gem_exec_object2 structs
...@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 { ...@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
__u32 DR1; __u32 DR1;
__u32 DR4; __u32 DR4;
__u32 num_cliprects; __u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */ /**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
* is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
* struct drm_i915_gem_exec_fence *fences.
*/
__u64 cliprects_ptr; __u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_RING_MASK (7<<0)
#define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_DEFAULT (0<<0)
...@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 { ...@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
* element). * element).
*/ */
#define I915_EXEC_BATCH_FIRST (1<<18) #define I915_EXEC_BATCH_FIRST (1<<18)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
* define an array of i915_gem_exec_fence structures which specify a set of
* dma fences to wait upon or signal.
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \ #define i915_execbuffer2_set_context_id(eb2, context) \
...@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type { ...@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
DRM_I915_PERF_RECORD_MAX /* non-ABI */ DRM_I915_PERF_RECORD_MAX /* non-ABI */
}; };
/**
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
char uuid[36];
__u32 n_mux_regs;
__u32 n_boolean_regs;
__u32 n_flex_regs;
__u64 __user mux_regs_ptr;
__u64 __user boolean_regs_ptr;
__u64 __user flex_regs_ptr;
};
#if defined(__cplusplus) #if defined(__cplusplus)
} }
#endif #endif
......
...@@ -143,12 +143,6 @@ enum bpf_attach_type { ...@@ -143,12 +143,6 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
enum bpf_sockmap_flags {
BPF_SOCKMAP_UNSPEC,
BPF_SOCKMAP_STRPARSER,
__MAX_BPF_SOCKMAP_FLAG
};
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to * to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup * override effective bpf program that was inherited from this cgroup
...@@ -368,9 +362,20 @@ union bpf_attr { ...@@ -368,9 +362,20 @@ union bpf_attr {
* int bpf_redirect(ifindex, flags) * int bpf_redirect(ifindex, flags)
* redirect to another netdev * redirect to another netdev
* @ifindex: ifindex of the net device * @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress * @flags:
* cls_bpf:
* bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved * other bits - reserved
* Return: TC_ACT_REDIRECT * xdp_bpf:
* all bits - reserved
* Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
* xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
* int bpf_redirect_map(map, key, flags)
* redirect to endpoint in map
* @map: pointer to dev map
* @key: index in map to lookup
* @flags: --
* Return: XDP_REDIRECT on success or XDP_ABORT on error
* *
* u32 bpf_get_route_realm(skb) * u32 bpf_get_route_realm(skb)
* retrieve a dst's tclassid * retrieve a dst's tclassid
...@@ -632,7 +637,7 @@ union bpf_attr { ...@@ -632,7 +637,7 @@ union bpf_attr {
FN(skb_adjust_room), \ FN(skb_adjust_room), \
FN(redirect_map), \ FN(redirect_map), \
FN(sk_redirect_map), \ FN(sk_redirect_map), \
FN(sock_map_update), FN(sock_map_update), \
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
...@@ -753,20 +758,23 @@ struct bpf_sock { ...@@ -753,20 +758,23 @@ struct bpf_sock {
__u32 family; __u32 family;
__u32 type; __u32 type;
__u32 protocol; __u32 protocol;
__u32 mark;
__u32 priority;
}; };
#define XDP_PACKET_HEADROOM 256 #define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type. /* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other * A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result * return codes are reserved for future use. Unknown return codes will
* in packet drop. * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
*/ */
enum xdp_action { enum xdp_action {
XDP_ABORTED = 0, XDP_ABORTED = 0,
XDP_DROP, XDP_DROP,
XDP_PASS, XDP_PASS,
XDP_TX, XDP_TX,
XDP_REDIRECT,
}; };
/* user accessible metadata for XDP packet hook /* user accessible metadata for XDP packet hook
......
...@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { ...@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
struct kvm_ppc_smmu_info { struct kvm_ppc_smmu_info {
__u64 flags; __u64 flags;
__u32 slb_size; __u32 slb_size;
__u32 pad; __u16 data_keys; /* # storage keys supported for data */
__u16 instr_keys; /* # storage keys supported for instructions */
struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
}; };
......
#ifndef _UAPI_LINUX_MMAN_H #ifndef _UAPI_LINUX_MMAN_H
#define _UAPI_LINUX_MMAN_H #define _UAPI_LINUX_MMAN_H
#include <uapi/asm/mman.h> #include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
#define MREMAP_MAYMOVE 1 #define MREMAP_MAYMOVE 1
#define MREMAP_FIXED 2 #define MREMAP_FIXED 2
...@@ -10,4 +11,25 @@ ...@@ -10,4 +11,25 @@
#define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_ALWAYS 1
#define OVERCOMMIT_NEVER 2 #define OVERCOMMIT_NEVER 2
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
* size other than the default is desired. See hugetlb_encode.h.
* All known huge page size encodings are provided here. It is the
* responsibility of the application to know which sizes are supported on
* the running system. See mmap(2) man page for details.
*/
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
#endif /* _UAPI_LINUX_MMAN_H */ #endif /* _UAPI_LINUX_MMAN_H */
...@@ -240,6 +240,9 @@ Default is to monitor all CPUS. ...@@ -240,6 +240,9 @@ Default is to monitor all CPUS.
--force:: --force::
Don't do ownership validation. Don't do ownership validation.
--num-thread-synthesize::
The number of threads to run when synthesizing events for existing processes.
By default, the number of threads equals to the number of online CPUs.
INTERACTIVE PROMPTING KEYS INTERACTIVE PROMPTING KEYS
-------------------------- --------------------------
......
libperf-y += header.o libperf-y += header.o
libperf-y += sym-handling.o
libperf-y += kvm-stat.o libperf-y += kvm-stat.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o
......
/*
* Architecture specific ELF symbol handling and relocation mapping.
*
* Copyright 2017 IBM Corp.
* Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*/
#include "symbol.h"
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
if (ehdr.e_type == ET_EXEC)
return false;
return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
}
void arch__adjust_sym_map_offset(GElf_Sym *sym,
GElf_Shdr *shdr __maybe_unused,
struct map *map)
{
if (map->type == MAP__FUNCTION)
sym->st_value += map->start;
}
#endif
...@@ -1441,7 +1441,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, ...@@ -1441,7 +1441,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session); perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
kvm->evlist->threads, false, kvm->opts.proc_map_timeout); kvm->evlist->threads, false,
kvm->opts.proc_map_timeout, 1);
err = kvm_live_open_events(kvm); err = kvm_live_open_events(kvm);
if (err) if (err)
goto out; goto out;
......
...@@ -863,7 +863,7 @@ static int record__synthesize(struct record *rec, bool tail) ...@@ -863,7 +863,7 @@ static int record__synthesize(struct record *rec, bool tail)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address, process_synthesized_event, opts->sample_address,
opts->proc_map_timeout); opts->proc_map_timeout, 1);
out: out:
return err; return err;
} }
......
...@@ -958,8 +958,16 @@ static int __cmd_top(struct perf_top *top) ...@@ -958,8 +958,16 @@ static int __cmd_top(struct perf_top *top)
if (perf_session__register_idle_thread(top->session) < 0) if (perf_session__register_idle_thread(top->session) < 0)
goto out_delete; goto out_delete;
if (top->nr_threads_synthesize > 1)
perf_set_multithreaded();
machine__synthesize_threads(&top->session->machines.host, &opts->target, machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false, opts->proc_map_timeout); top->evlist->threads, false,
opts->proc_map_timeout,
top->nr_threads_synthesize);
if (top->nr_threads_synthesize > 1)
perf_set_singlethreaded();
if (perf_hpp_list.socket) { if (perf_hpp_list.socket) {
ret = perf_env__read_cpu_topology_map(&perf_env); ret = perf_env__read_cpu_topology_map(&perf_env);
...@@ -1112,6 +1120,7 @@ int cmd_top(int argc, const char **argv) ...@@ -1112,6 +1120,7 @@ int cmd_top(int argc, const char **argv)
}, },
.max_stack = sysctl_perf_event_max_stack, .max_stack = sysctl_perf_event_max_stack,
.sym_pcnt_filter = 5, .sym_pcnt_filter = 5,
.nr_threads_synthesize = UINT_MAX,
}; };
struct record_opts *opts = &top.record_opts; struct record_opts *opts = &top.record_opts;
struct target *target = &opts->target; struct target *target = &opts->target;
...@@ -1221,6 +1230,8 @@ int cmd_top(int argc, const char **argv) ...@@ -1221,6 +1230,8 @@ int cmd_top(int argc, const char **argv)
OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
"Show entries in a hierarchy"), "Show entries in a hierarchy"),
OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
"number of thread to run event synthesize"),
OPT_END() OPT_END()
}; };
const char * const top_usage[] = { const char * const top_usage[] = {
......
...@@ -1131,7 +1131,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) ...@@ -1131,7 +1131,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->threads, trace__tool_process, false, evlist->threads, trace__tool_process, false,
trace->opts.proc_map_timeout); trace->opts.proc_map_timeout, 1);
if (err) if (err)
symbol__exit(); symbol__exit();
......
...@@ -166,7 +166,7 @@ static int run_dir(const char *d, const char *perf) ...@@ -166,7 +166,7 @@ static int run_dir(const char *d, const char *perf)
snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
d, d, perf, vcnt, v); d, d, perf, vcnt, v);
return system(cmd); return system(cmd) ? TEST_FAIL : TEST_OK;
} }
int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
......
...@@ -237,6 +237,7 @@ class Test(object): ...@@ -237,6 +237,7 @@ class Test(object):
# events in result. Fail if there's not any. # events in result. Fail if there's not any.
for exp_name, exp_event in expect.items(): for exp_name, exp_event in expect.items():
exp_list = [] exp_list = []
res_event = {}
log.debug(" matching [%s]" % exp_name) log.debug(" matching [%s]" % exp_name)
for res_name, res_event in result.items(): for res_name, res_event in result.items():
log.debug(" to [%s]" % res_name) log.debug(" to [%s]" % res_name)
...@@ -252,6 +253,9 @@ class Test(object): ...@@ -252,6 +253,9 @@ class Test(object):
if not exp_list: if not exp_list:
if exp_event.optional(): if exp_event.optional():
log.debug(" %s does not match, but is optional" % exp_name) log.debug(" %s does not match, but is optional" % exp_name)
else:
if not res_event:
log.debug(" res_event is empty");
else: else:
exp_event.diff(res_event) exp_event.diff(res_event)
raise Fail(self, 'match failure'); raise Fail(self, 'match failure');
......
...@@ -23,7 +23,7 @@ comm=1 ...@@ -23,7 +23,7 @@ comm=1
freq=1 freq=1
inherit_stat=0 inherit_stat=0
enable_on_exec=1 enable_on_exec=1
task=0 task=1
watermark=0 watermark=0
precise_ip=0|1|2|3 precise_ip=0|1|2|3
mmap_data=0 mmap_data=0
......
...@@ -17,5 +17,6 @@ sample_type=327 ...@@ -17,5 +17,6 @@ sample_type=327
read_format=4 read_format=4
mmap=0 mmap=0
comm=0 comm=0
task=0
enable_on_exec=0 enable_on_exec=0
disabled=0 disabled=0
...@@ -23,7 +23,7 @@ sample_type=343 ...@@ -23,7 +23,7 @@ sample_type=343
# PERF_FORMAT_ID | PERF_FORMAT_GROUP # PERF_FORMAT_ID | PERF_FORMAT_GROUP
read_format=12 read_format=12
task=0
mmap=0 mmap=0
comm=0 comm=0
enable_on_exec=0 enable_on_exec=0
......
...@@ -18,5 +18,6 @@ sample_type=327 ...@@ -18,5 +18,6 @@ sample_type=327
read_format=4 read_format=4
mmap=0 mmap=0
comm=0 comm=0
task=0
enable_on_exec=0 enable_on_exec=0
disabled=0 disabled=0
...@@ -6,6 +6,7 @@ ret = 1 ...@@ -6,6 +6,7 @@ ret = 1
[event-1:base-stat] [event-1:base-stat]
fd=1 fd=1
group_fd=-1 group_fd=-1
read_format=3|15
[event-2:base-stat] [event-2:base-stat]
fd=2 fd=2
...@@ -13,3 +14,4 @@ group_fd=1 ...@@ -13,3 +14,4 @@ group_fd=1
config=1 config=1
disabled=0 disabled=0
enable_on_exec=0 enable_on_exec=0
read_format=3|15
...@@ -6,6 +6,7 @@ ret = 1 ...@@ -6,6 +6,7 @@ ret = 1
[event-1:base-stat] [event-1:base-stat]
fd=1 fd=1
group_fd=-1 group_fd=-1
read_format=3|15
[event-2:base-stat] [event-2:base-stat]
fd=2 fd=2
...@@ -13,3 +14,4 @@ group_fd=1 ...@@ -13,3 +14,4 @@ group_fd=1
config=1 config=1
disabled=0 disabled=0
enable_on_exec=0 enable_on_exec=0
read_format=3|15
...@@ -131,7 +131,7 @@ static int synth_all(struct machine *machine) ...@@ -131,7 +131,7 @@ static int synth_all(struct machine *machine)
{ {
return perf_event__synthesize_threads(NULL, return perf_event__synthesize_threads(NULL,
perf_event__process, perf_event__process,
machine, 0, 500); machine, 0, 500, 1);
} }
static int synth_process(struct machine *machine) static int synth_process(struct machine *machine)
......
...@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value) ...@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
callchain_param.mode = CHAIN_FOLDED; callchain_param.mode = CHAIN_FOLDED;
return 0; return 0;
} }
pr_err("Invalid callchain mode: %s\n", value);
return -1; return -1;
} }
...@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value) ...@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
callchain_param.order_set = true; callchain_param.order_set = true;
return 0; return 0;
} }
pr_err("Invalid callchain order: %s\n", value);
return -1; return -1;
} }
...@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value) ...@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
callchain_param.branch_callstack = 1; callchain_param.branch_callstack = 1;
return 0; return 0;
} }
pr_err("Invalid callchain sort key: %s\n", value);
return -1; return -1;
} }
...@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value) ...@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
callchain_param.value = CCVAL_COUNT; callchain_param.value = CCVAL_COUNT;
return 0; return 0;
} }
pr_err("Invalid callchain config key: %s\n", value);
return -1; return -1;
} }
...@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value) ...@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
return ret; return ret;
} }
if (!strcmp(var, "print-type")) if (!strcmp(var, "print-type")){
return parse_callchain_mode(value); int ret;
if (!strcmp(var, "order")) ret = parse_callchain_mode(value);
return parse_callchain_order(value); if (ret == -1)
if (!strcmp(var, "sort-key")) pr_err("Invalid callchain mode: %s\n", value);
return parse_callchain_sort_key(value); return ret;
}
if (!strcmp(var, "order")){
int ret;
ret = parse_callchain_order(value);
if (ret == -1)
pr_err("Invalid callchain order: %s\n", value);
return ret;
}
if (!strcmp(var, "sort-key")){
int ret;
ret = parse_callchain_sort_key(value);
if (ret == -1)
pr_err("Invalid callchain sort key: %s\n", value);
return ret;
}
if (!strcmp(var, "threshold")) { if (!strcmp(var, "threshold")) {
callchain_param.min_percent = strtod(value, &endptr); callchain_param.min_percent = strtod(value, &endptr);
if (value == endptr) { if (value == endptr) {
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include "rwsem.h"
struct comm_str { struct comm_str {
char *str; char *str;
...@@ -14,6 +15,7 @@ struct comm_str { ...@@ -14,6 +15,7 @@ struct comm_str {
/* Should perhaps be moved to struct machine */ /* Should perhaps be moved to struct machine */
static struct rb_root comm_str_root; static struct rb_root comm_str_root;
static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
static struct comm_str *comm_str__get(struct comm_str *cs) static struct comm_str *comm_str__get(struct comm_str *cs)
{ {
...@@ -25,7 +27,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs) ...@@ -25,7 +27,9 @@ static struct comm_str *comm_str__get(struct comm_str *cs)
static void comm_str__put(struct comm_str *cs) static void comm_str__put(struct comm_str *cs)
{ {
if (cs && refcount_dec_and_test(&cs->refcnt)) { if (cs && refcount_dec_and_test(&cs->refcnt)) {
down_write(&comm_str_lock);
rb_erase(&cs->rb_node, &comm_str_root); rb_erase(&cs->rb_node, &comm_str_root);
up_write(&comm_str_lock);
zfree(&cs->str); zfree(&cs->str);
free(cs); free(cs);
} }
...@@ -50,7 +54,8 @@ static struct comm_str *comm_str__alloc(const char *str) ...@@ -50,7 +54,8 @@ static struct comm_str *comm_str__alloc(const char *str)
return cs; return cs;
} }
static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) static
struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
{ {
struct rb_node **p = &root->rb_node; struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
...@@ -81,6 +86,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) ...@@ -81,6 +86,17 @@ static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
return new; return new;
} }
static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
{
struct comm_str *cs;
down_write(&comm_str_lock);
cs = __comm_str__findnew(str, root);
up_write(&comm_str_lock);
return cs;
}
struct comm *comm__new(const char *str, u64 timestamp, bool exec) struct comm *comm__new(const char *str, u64 timestamp, bool exec)
{ {
struct comm *comm = zalloc(sizeof(*comm)); struct comm *comm = zalloc(sizeof(*comm));
......
...@@ -678,23 +678,21 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, ...@@ -678,23 +678,21 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
return err; return err;
} }
int perf_event__synthesize_threads(struct perf_tool *tool, static int __perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process, perf_event__handler_t process,
struct machine *machine, struct machine *machine,
bool mmap_data, bool mmap_data,
unsigned int proc_map_timeout) unsigned int proc_map_timeout,
struct dirent **dirent,
int start,
int num)
{ {
union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event; union perf_event *namespaces_event;
char proc_path[PATH_MAX];
struct dirent **dirent;
int err = -1; int err = -1;
char *end; char *end;
pid_t pid; pid_t pid;
int n, i; int i;
if (machine__is_default_guest(machine))
return 0;
comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
if (comm_event == NULL) if (comm_event == NULL)
...@@ -714,19 +712,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -714,19 +712,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (namespaces_event == NULL) if (namespaces_event == NULL)
goto out_free_fork; goto out_free_fork;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); for (i = start; i < start + num; i++) {
n = scandir(proc_path, &dirent, 0, alphasort);
if (n < 0)
goto out_free_namespaces;
for (i = 0; i < n; i++) {
if (!isdigit(dirent[i]->d_name[0])) if (!isdigit(dirent[i]->d_name[0]))
continue; continue;
pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
/* only interested in proper numerical dirents */ /* only interested in proper numerical dirents */
if (!*end) { if (*end)
continue;
/* /*
* We may race with exiting thread, so don't stop just because * We may race with exiting thread, so don't stop just because
* one thread couldn't be synthesized. * one thread couldn't be synthesized.
...@@ -736,12 +729,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -736,12 +729,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
tool, machine, mmap_data, tool, machine, mmap_data,
proc_map_timeout); proc_map_timeout);
} }
free(dirent[i]);
}
free(dirent);
err = 0; err = 0;
out_free_namespaces:
free(namespaces_event); free(namespaces_event);
out_free_fork: out_free_fork:
free(fork_event); free(fork_event);
...@@ -753,6 +742,118 @@ int perf_event__synthesize_threads(struct perf_tool *tool, ...@@ -753,6 +742,118 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
return err; return err;
} }
struct synthesize_threads_arg {
struct perf_tool *tool;
perf_event__handler_t process;
struct machine *machine;
bool mmap_data;
unsigned int proc_map_timeout;
struct dirent **dirent;
int num;
int start;
};
static void *synthesize_threads_worker(void *arg)
{
struct synthesize_threads_arg *args = arg;
__perf_event__synthesize_threads(args->tool, args->process,
args->machine, args->mmap_data,
args->proc_map_timeout, args->dirent,
args->start, args->num);
return NULL;
}
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool mmap_data,
unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
struct synthesize_threads_arg *args = NULL;
pthread_t *synthesize_threads = NULL;
char proc_path[PATH_MAX];
struct dirent **dirent;
int num_per_thread;
int m, n, i, j;
int thread_nr;
int base = 0;
int err = -1;
if (machine__is_default_guest(machine))
return 0;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
n = scandir(proc_path, &dirent, 0, alphasort);
if (n < 0)
return err;
if (nr_threads_synthesize == UINT_MAX)
thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
else
thread_nr = nr_threads_synthesize;
if (thread_nr <= 1) {
err = __perf_event__synthesize_threads(tool, process,
machine, mmap_data,
proc_map_timeout,
dirent, base, n);
goto free_dirent;
}
if (thread_nr > n)
thread_nr = n;
synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
if (synthesize_threads == NULL)
goto free_dirent;
args = calloc(sizeof(*args), thread_nr);
if (args == NULL)
goto free_threads;
num_per_thread = n / thread_nr;
m = n % thread_nr;
for (i = 0; i < thread_nr; i++) {
args[i].tool = tool;
args[i].process = process;
args[i].machine = machine;
args[i].mmap_data = mmap_data;
args[i].proc_map_timeout = proc_map_timeout;
args[i].dirent = dirent;
}
for (i = 0; i < m; i++) {
args[i].num = num_per_thread + 1;
args[i].start = i * args[i].num;
}
if (i != 0)
base = args[i-1].start + args[i-1].num;
for (j = i; j < thread_nr; j++) {
args[j].num = num_per_thread;
args[j].start = base + (j - i) * args[i].num;
}
for (i = 0; i < thread_nr; i++) {
if (pthread_create(&synthesize_threads[i], NULL,
synthesize_threads_worker, &args[i]))
goto out_join;
}
err = 0;
out_join:
for (i = 0; i < thread_nr; i++)
pthread_join(synthesize_threads[i], NULL);
free(args);
free_threads:
free(synthesize_threads);
free_dirent:
for (i = 0; i < n; i++)
free(dirent[i]);
free(dirent);
return err;
}
struct process_symbol_args { struct process_symbol_args {
const char *name; const char *name;
u64 start; u64 start;
......
...@@ -680,7 +680,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, ...@@ -680,7 +680,8 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
int perf_event__synthesize_threads(struct perf_tool *tool, int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process, perf_event__handler_t process,
struct machine *machine, bool mmap_data, struct machine *machine, bool mmap_data,
unsigned int proc_map_timeout); unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process, perf_event__handler_t process,
struct machine *machine); struct machine *machine);
......
...@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) ...@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
return evsel; return evsel;
} }
static bool perf_event_can_profile_kernel(void)
{
return geteuid() == 0 || perf_event_paranoid() == -1;
}
struct perf_evsel *perf_evsel__new_cycles(bool precise) struct perf_evsel *perf_evsel__new_cycles(bool precise)
{ {
struct perf_event_attr attr = { struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE, .type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES, .config = PERF_COUNT_HW_CPU_CYCLES,
.exclude_kernel = geteuid() != 0, .exclude_kernel = !perf_event_can_profile_kernel(),
}; };
struct perf_evsel *evsel; struct perf_evsel *evsel;
......
...@@ -2218,12 +2218,16 @@ int machines__for_each_thread(struct machines *machines, ...@@ -2218,12 +2218,16 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads, struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap, perf_event__handler_t process, bool data_mmap,
unsigned int proc_map_timeout) unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{ {
if (target__has_task(target)) if (target__has_task(target))
return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
else if (target__has_cpu(target)) else if (target__has_cpu(target))
return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); return perf_event__synthesize_threads(tool, process,
machine, data_mmap,
proc_map_timeout,
nr_threads_synthesize);
/* command specified */ /* command specified */
return 0; return 0;
} }
......
...@@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines, ...@@ -257,15 +257,18 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads, struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap, perf_event__handler_t process, bool data_mmap,
unsigned int proc_map_timeout); unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
static inline static inline
int machine__synthesize_threads(struct machine *machine, struct target *target, int machine__synthesize_threads(struct machine *machine, struct target *target,
struct thread_map *threads, bool data_mmap, struct thread_map *threads, bool data_mmap,
unsigned int proc_map_timeout) unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{ {
return __machine__synthesize_threads(machine, NULL, target, threads, return __machine__synthesize_threads(machine, NULL, target, threads,
perf_event__process, data_mmap, perf_event__process, data_mmap,
proc_map_timeout); proc_map_timeout,
nr_threads_synthesize);
} }
pid_t machine__get_current_tid(struct machine *machine, int cpu); pid_t machine__get_current_tid(struct machine *machine, int cpu);
......
...@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap) ...@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
void __weak arch__sym_update(struct symbol *s __maybe_unused, void __weak arch__sym_update(struct symbol *s __maybe_unused,
GElf_Sym *sym __maybe_unused) { } GElf_Sym *sym __maybe_unused) { }
void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
struct map *map __maybe_unused)
{
sym->st_value -= shdr->sh_addr - shdr->sh_offset;
}
int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
struct symsrc *runtime_ss, int kmodule) struct symsrc *runtime_ss, int kmodule)
{ {
...@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, ...@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
/* Adjust symbol to map to file offset */ /* Adjust symbol to map to file offset */
if (adjust_kernel_syms) if (adjust_kernel_syms)
arch__adjust_sym_map_offset(&sym, &shdr, map); sym.st_value -= shdr.sh_addr - shdr.sh_offset;
if (strcmp(section_name, if (strcmp(section_name,
(curr_dso->short_name + (curr_dso->short_name +
......
...@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str, ...@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
#ifdef HAVE_LIBELF_SUPPORT #ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
void arch__sym_update(struct symbol *s, GElf_Sym *sym); void arch__sym_update(struct symbol *s, GElf_Sym *sym);
void arch__adjust_sym_map_offset(GElf_Sym *sym,
GElf_Shdr *shdr __maybe_unused,
struct map *map __maybe_unused);
#endif #endif
#define SYMBOL_A 0 #define SYMBOL_A 0
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
#include "syscalltbl.h" #include "syscalltbl.h"
#include <stdlib.h> #include <stdlib.h>
#include <linux/compiler.h>
#ifdef HAVE_SYSCALL_TABLE #ifdef HAVE_SYSCALL_TABLE
#include <linux/compiler.h>
#include <string.h> #include <string.h>
#include "string2.h" #include "string2.h"
#include "util.h" #include "util.h"
......
...@@ -45,6 +45,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) ...@@ -45,6 +45,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->cpu = -1; thread->cpu = -1;
INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list); INIT_LIST_HEAD(&thread->comm_list);
init_rwsem(&thread->namespaces_lock);
init_rwsem(&thread->comm_lock);
comm_str = malloc(32); comm_str = malloc(32);
if (!comm_str) if (!comm_str)
...@@ -83,18 +85,26 @@ void thread__delete(struct thread *thread) ...@@ -83,18 +85,26 @@ void thread__delete(struct thread *thread)
map_groups__put(thread->mg); map_groups__put(thread->mg);
thread->mg = NULL; thread->mg = NULL;
} }
down_write(&thread->namespaces_lock);
list_for_each_entry_safe(namespaces, tmp_namespaces, list_for_each_entry_safe(namespaces, tmp_namespaces,
&thread->namespaces_list, list) { &thread->namespaces_list, list) {
list_del(&namespaces->list); list_del(&namespaces->list);
namespaces__free(namespaces); namespaces__free(namespaces);
} }
up_write(&thread->namespaces_lock);
down_write(&thread->comm_lock);
list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) { list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
list_del(&comm->list); list_del(&comm->list);
comm__free(comm); comm__free(comm);
} }
up_write(&thread->comm_lock);
unwind__finish_access(thread); unwind__finish_access(thread);
nsinfo__zput(thread->nsinfo); nsinfo__zput(thread->nsinfo);
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
free(thread); free(thread);
} }
...@@ -125,7 +135,7 @@ struct namespaces *thread__namespaces(const struct thread *thread) ...@@ -125,7 +135,7 @@ struct namespaces *thread__namespaces(const struct thread *thread)
return list_first_entry(&thread->namespaces_list, struct namespaces, list); return list_first_entry(&thread->namespaces_list, struct namespaces, list);
} }
int thread__set_namespaces(struct thread *thread, u64 timestamp, static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event) struct namespaces_event *event)
{ {
struct namespaces *new, *curr = thread__namespaces(thread); struct namespaces *new, *curr = thread__namespaces(thread);
...@@ -149,6 +159,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, ...@@ -149,6 +159,17 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
return 0; return 0;
} }
int thread__set_namespaces(struct thread *thread, u64 timestamp,
struct namespaces_event *event)
{
int ret;
down_write(&thread->namespaces_lock);
ret = __thread__set_namespaces(thread, timestamp, event);
up_write(&thread->namespaces_lock);
return ret;
}
struct comm *thread__comm(const struct thread *thread) struct comm *thread__comm(const struct thread *thread)
{ {
if (list_empty(&thread->comm_list)) if (list_empty(&thread->comm_list))
...@@ -170,8 +191,8 @@ struct comm *thread__exec_comm(const struct thread *thread) ...@@ -170,8 +191,8 @@ struct comm *thread__exec_comm(const struct thread *thread)
return last; return last;
} }
int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, static int ____thread__set_comm(struct thread *thread, const char *str,
bool exec) u64 timestamp, bool exec)
{ {
struct comm *new, *curr = thread__comm(thread); struct comm *new, *curr = thread__comm(thread);
...@@ -195,6 +216,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, ...@@ -195,6 +216,17 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
return 0; return 0;
} }
int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
bool exec)
{
int ret;
down_write(&thread->comm_lock);
ret = ____thread__set_comm(thread, str, timestamp, exec);
up_write(&thread->comm_lock);
return ret;
}
int thread__set_comm_from_proc(struct thread *thread) int thread__set_comm_from_proc(struct thread *thread)
{ {
char path[64]; char path[64];
...@@ -212,7 +244,7 @@ int thread__set_comm_from_proc(struct thread *thread) ...@@ -212,7 +244,7 @@ int thread__set_comm_from_proc(struct thread *thread)
return err; return err;
} }
const char *thread__comm_str(const struct thread *thread) static const char *__thread__comm_str(const struct thread *thread)
{ {
const struct comm *comm = thread__comm(thread); const struct comm *comm = thread__comm(thread);
...@@ -222,6 +254,17 @@ const char *thread__comm_str(const struct thread *thread) ...@@ -222,6 +254,17 @@ const char *thread__comm_str(const struct thread *thread)
return comm__str(comm); return comm__str(comm);
} }
const char *thread__comm_str(const struct thread *thread)
{
const char *str;
down_read((struct rw_semaphore *)&thread->comm_lock);
str = __thread__comm_str(thread);
up_read((struct rw_semaphore *)&thread->comm_lock);
return str;
}
/* CHECKME: it should probably better return the max comm len from its comm list */ /* CHECKME: it should probably better return the max comm len from its comm list */
int thread__comm_len(struct thread *thread) int thread__comm_len(struct thread *thread)
{ {
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "symbol.h" #include "symbol.h"
#include <strlist.h> #include <strlist.h>
#include <intlist.h> #include <intlist.h>
#include "rwsem.h"
struct thread_stack; struct thread_stack;
struct unwind_libunwind_ops; struct unwind_libunwind_ops;
...@@ -29,7 +30,9 @@ struct thread { ...@@ -29,7 +30,9 @@ struct thread {
int comm_len; int comm_len;
bool dead; /* if set thread has exited */ bool dead; /* if set thread has exited */
struct list_head namespaces_list; struct list_head namespaces_list;
struct rw_semaphore namespaces_lock;
struct list_head comm_list; struct list_head comm_list;
struct rw_semaphore comm_lock;
u64 db_id; u64 db_id;
void *priv; void *priv;
......
...@@ -37,6 +37,7 @@ struct perf_top { ...@@ -37,6 +37,7 @@ struct perf_top {
int sym_pcnt_filter; int sym_pcnt_filter;
const char *sym_filter; const char *sym_filter;
float min_percent; float min_percent;
unsigned int nr_threads_synthesize;
}; };
#define CONSOLE_CLEAR "" #define CONSOLE_CLEAR ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment