Commit 49be4fb2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'perf-tools-fixes-for-v6.3-1-2023-03-09' of...

Merge tag 'perf-tools-fixes-for-v6.3-1-2023-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Add Adrian Hunter to MAINTAINERS as a perf tools reviewer

 - Sync various tools/ copies of kernel headers with the kernel sources,
   this time trying to avoid first merging with upstream to then update
   but instead copy from upstream so that a merge is avoided and the end
   result after merging this pull request is the one expected,
   tools/perf/check-headers.sh (mostly) happy, less warnings while
   building tools/perf/

 - Fix counting when initial delay configured by setting
   perf_attr.enable_on_exec when starting workloads from the perf
   command line

 - Don't avoid emitting a PERF_RECORD_MMAP2 in 'perf inject
   --buildid-all' when that record comes with a build-id, otherwise we
   end up not being able to resolve symbols

 - Don't use comma as the CSV output separator the "stat+csv_output"
   test, as comma can appear on some tests as a modifier for an event,
   use @ instead, ditto for the JSON linter test

 - The offcpu test was looking for some bits being set on
   task_struct->prev_state without masking other bits not important for
   this specific 'perf test', fix it

* tag 'perf-tools-fixes-for-v6.3-1-2023-03-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf tools: Add Adrian Hunter to MAINTAINERS as a reviewer
  tools headers UAPI: Sync linux/perf_event.h with the kernel sources
  tools headers x86 cpufeatures: Sync with the kernel sources
  tools include UAPI: Sync linux/vhost.h with the kernel sources
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  tools headers kvm: Sync uapi/{asm/linux} kvm.h headers with the kernel sources
  tools include UAPI: Synchronize linux/fcntl.h with the kernel sources
  tools headers: Synchronize {linux,vdso}/bits.h with the kernel sources
  tools headers UAPI: Sync linux/prctl.h with the kernel sources
  tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
  perf stat: Fix counting when initial delay configured
  tools headers svm: Sync svm headers with the kernel sources
  perf test: Avoid counting commas in json linter
  perf tests stat+csv_output: Switch CSV separator to @
  perf inject: Fix --buildid-all not to eat up MMAP2
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  perf test: Fix offcpu test prev_state check
parents 44889ba5 5b201a82
......@@ -16391,6 +16391,7 @@ R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
R: Jiri Olsa <jolsa@kernel.org>
R: Namhyung Kim <namhyung@kernel.org>
R: Ian Rogers <irogers@google.com>
R: Adrian Hunter <adrian.hunter@intel.com>
L: linux-perf-users@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Supported
......
......@@ -109,6 +109,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
struct kvm_vcpu_init {
__u32 target;
......
......@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 20 /* N 32-bit words worth of info */
#define NCAPINTS 21 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
......
......@@ -124,6 +124,7 @@
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK19 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
#define DISABLED_MASK20 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
......@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
......@@ -33,6 +34,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/* Intel MSRs. Some also available on other CPUs */
......@@ -49,6 +51,10 @@
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
| SPEC_CTRL_RRSBA_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
......@@ -189,6 +195,9 @@
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
#define MSR_SNOOP_RSP_0 0x00001328
#define MSR_SNOOP_RSP_1 0x00001329
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
......@@ -566,6 +575,26 @@
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
/* SNP feature bits enabled by the hypervisor */
#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
/* SNP feature bits reserved for future use. */
#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* AMD Collaborative Processor Performance Control MSRs */
......@@ -1061,6 +1090,8 @@
/* - AMD: */
#define MSR_IA32_MBA_BW_BASE 0xc0000200
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
......
......@@ -98,6 +98,7 @@
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
#define REQUIRED_MASK20 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
......@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
......@@ -507,8 +508,8 @@ struct kvm_nested_state {
* KVM_{GET,PUT}_NESTED_STATE ioctl values.
*/
union {
struct kvm_vmx_nested_state_data vmx[0];
struct kvm_svm_nested_state_data svm[0];
__DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx);
__DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm);
} data;
};
......@@ -525,6 +526,35 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
/*
* Masked event layout.
* Bits Description
* ---- -----------
* 7:0 event select (low bits)
* 15:8 umask match
* 31:16 unused
* 35:32 event select (high bits)
* 36:54 unused
* 55 exclude bit
* 63:56 umask mask
*/
#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \
(((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \
(((mask) & 0xFFULL) << 56) | \
(((match) & 0xFFULL) << 8) | \
((__u64)(!!(exclude)) << 55))
#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
(GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
......
......@@ -116,6 +116,12 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
/* SW_EXITINFO1[3:0] */ \
(((((u64)reason_set) & 0xf)) | \
/* SW_EXITINFO1[11:4] */ \
((((u64)reason_code) & 0xff) << 4))
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
......
......@@ -7,7 +7,7 @@
#include <asm/alternative.h>
#include <asm/export.h>
.pushsection .noinstr.text, "ax"
.section .noinstr.text, "ax"
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
......@@ -42,7 +42,7 @@ SYM_TYPED_FUNC_START(__memcpy)
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
SYM_FUNC_ALIAS(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
/*
......@@ -183,4 +183,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
RET
SYM_FUNC_END(memcpy_orig)
.popsection
......@@ -6,6 +6,8 @@
#include <asm/alternative.h>
#include <asm/export.h>
.section .noinstr.text, "ax"
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
......@@ -43,7 +45,7 @@ SYM_FUNC_START(__memset)
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)
SYM_FUNC_ALIAS_WEAK(memset, __memset)
SYM_FUNC_ALIAS(memset, __memset)
EXPORT_SYMBOL(memset)
/*
......
......@@ -6,7 +6,6 @@
#include <vdso/bits.h>
#include <asm/bitsperlong.h>
#define BIT_ULL(nr) (ULL(1) << (nr))
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
......
......@@ -43,6 +43,7 @@
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */
/* (1U << 31) is reserved for signed error codes */
/*
......
......@@ -583,6 +583,8 @@ struct kvm_s390_mem_op {
struct {
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
__u8 pad1[6]; /* ignored */
__u64 old_addr; /* ignored if cmpxchg flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* ignored */
......@@ -595,11 +597,17 @@ struct kvm_s390_mem_op {
#define KVM_S390_MEMOP_SIDA_WRITE 3
#define KVM_S390_MEMOP_ABSOLUTE_READ 4
#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
/* for KVM_INTERRUPT */
struct kvm_interrupt {
/* in */
......@@ -1175,6 +1183,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -374,6 +374,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
/*
* Hardware event_id to monitor via a performance monitoring event:
......@@ -515,6 +516,8 @@ struct perf_event_attr {
* truncated accordingly on 32 bit architectures.
*/
__u64 sig_data;
__u64 config3; /* extension of config2 */
};
/*
......
......@@ -281,6 +281,12 @@ struct prctl_mm_map {
# define PR_SME_VL_LEN_MASK 0xffff
# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN 1
#define PR_GET_MDWE 66
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
......
......@@ -180,4 +180,12 @@
*/
#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
/* Resume a device so it can resume processing virtqueue requests
*
* After the return of this ioctl the device will have restored all the
* necessary states and it is fully operational to continue processing the
* virtqueue descriptors.
*/
#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
#endif
......@@ -5,5 +5,6 @@
#include <vdso/const.h>
#define BIT(nr) (UL(1) << (nr))
#define BIT_ULL(nr) (ULL(1) << (nr))
#endif /* __VDSO_BITS_H */
......@@ -538,6 +538,7 @@ static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
dso->hit = 1;
}
dso__put(dso);
perf_event__repipe(tool, event, sample, machine);
return 0;
}
......
......@@ -539,12 +539,7 @@ static int enable_counters(void)
return err;
}
/*
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
if (!target__none(&target)) {
if (!target__enable_on_exec(&target)) {
if (!all_counters_use_bpf)
evlist__enable(evsel_list);
}
......@@ -914,7 +909,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
return err;
}
if (stat_config.initial_delay) {
if (target.initial_delay) {
pr_info(EVLIST_DISABLED_MSG);
} else {
err = enable_counters();
......@@ -926,8 +921,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (forks)
evlist__start_workload(evsel_list);
if (stat_config.initial_delay > 0) {
usleep(stat_config.initial_delay * USEC_PER_MSEC);
if (target.initial_delay > 0) {
usleep(target.initial_delay * USEC_PER_MSEC);
err = enable_counters();
if (err)
return -1;
......@@ -1248,7 +1243,7 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
OPT_INTEGER('D', "delay", &stat_config.initial_delay,
OPT_INTEGER('D', "delay", &target.initial_delay,
"ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
......
......@@ -40,19 +40,6 @@ def is_counter_value(num):
return isfloat(num) or num == '<not counted>' or num == '<not supported>'
def check_json_output(expected_items):
if expected_items != -1:
for line in Lines:
if 'failed' not in line:
count = 0
count = line.count(',')
if count != expected_items and count >= 1 and count <= 3 and 'metric-value' in line:
# Events that generate >1 metric may have isolated metric
# values and possibly other prefixes like interval, core and
# aggregate-number.
continue
if count != expected_items:
raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
f' in \'{line}\'')
checks = {
'aggregate-number': lambda x: isfloat(x),
'core': lambda x: True,
......@@ -73,6 +60,16 @@ def check_json_output(expected_items):
}
input = '[\n' + ','.join(Lines) + '\n]'
for item in json.loads(input):
if expected_items != -1:
count = len(item)
if count != expected_items and count >= 1 and count <= 4 and 'metric-value' in item:
# Events that generate >1 metric may have isolated metric
# values and possibly other prefixes like interval, core and
# aggregate-number.
pass
elif count != expected_items:
raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
f' in \'{item}\'')
for key, value in item.items():
if key not in checks:
raise RuntimeError(f'Unexpected key: key={key} value={value}')
......@@ -82,11 +79,11 @@ def check_json_output(expected_items):
try:
if args.no_args or args.system_wide or args.event:
expected_items = 6
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 7
elif args.per_core or args.per_socket or args.per_node or args.per_die:
elif args.interval or args.per_thread or args.system_wide_no_aggr:
expected_items = 8
elif args.per_core or args.per_socket or args.per_node or args.per_die:
expected_items = 9
else:
# If no option is specified, don't check the number of items.
expected_items = -1
......
......@@ -7,6 +7,7 @@
set -e
skip_test=0
csv_sep=@
function commachecker()
{
......@@ -34,7 +35,7 @@ function commachecker()
[ "$x" = "Failed" ] && continue
# Count the number of commas
x=$(echo $line | tr -d -c ',')
x=$(echo $line | tr -d -c $csv_sep)
cnt="${#x}"
# echo $line $cnt
[[ ! "$cnt" =~ $exp ]] && {
......@@ -54,7 +55,7 @@ function ParanoidAndNotRoot()
check_no_args()
{
echo -n "Checking CSV output: no args "
perf stat -x, true 2>&1 | commachecker --no-args
perf stat -x$csv_sep true 2>&1 | commachecker --no-args
echo "[Success]"
}
......@@ -66,7 +67,7 @@ check_system_wide()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, -a true 2>&1 | commachecker --system-wide
perf stat -x$csv_sep -a true 2>&1 | commachecker --system-wide
echo "[Success]"
}
......@@ -79,14 +80,14 @@ check_system_wide_no_aggr()
return
fi
echo -n "Checking CSV output: system wide no aggregation "
perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr
perf stat -x$csv_sep -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking CSV output: interval "
perf stat -x, -I 1000 true 2>&1 | commachecker --interval
perf stat -x$csv_sep -I 1000 true 2>&1 | commachecker --interval
echo "[Success]"
}
......@@ -94,7 +95,7 @@ check_interval()
check_event()
{
echo -n "Checking CSV output: event "
perf stat -x, -e cpu-clock true 2>&1 | commachecker --event
perf stat -x$csv_sep -e cpu-clock true 2>&1 | commachecker --event
echo "[Success]"
}
......@@ -106,7 +107,7 @@ check_per_core()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-core -a true 2>&1 | commachecker --per-core
perf stat -x$csv_sep --per-core -a true 2>&1 | commachecker --per-core
echo "[Success]"
}
......@@ -118,7 +119,7 @@ check_per_thread()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread
perf stat -x$csv_sep --per-thread -a true 2>&1 | commachecker --per-thread
echo "[Success]"
}
......@@ -130,7 +131,7 @@ check_per_die()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-die -a true 2>&1 | commachecker --per-die
perf stat -x$csv_sep --per-die -a true 2>&1 | commachecker --per-die
echo "[Success]"
}
......@@ -142,7 +143,7 @@ check_per_node()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-node -a true 2>&1 | commachecker --per-node
perf stat -x$csv_sep --per-node -a true 2>&1 | commachecker --per-node
echo "[Success]"
}
......@@ -154,7 +155,7 @@ check_per_socket()
echo "[Skip] paranoid and not root"
return
fi
perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket
perf stat -x$csv_sep --per-socket -a true 2>&1 | commachecker --per-socket
echo "[Success]"
}
......
......@@ -277,7 +277,7 @@ int on_switch(u64 *ctx)
else
prev_state = get_task_state(prev);
return off_cpu_stat(ctx, prev, next, prev_state);
return off_cpu_stat(ctx, prev, next, prev_state & 0xff);
}
char LICENSE[] SEC("license") = "Dual BSD/GPL";
......@@ -842,11 +842,7 @@ int create_perf_stat_counter(struct evsel *evsel,
if (evsel__is_group_leader(evsel)) {
attr->disabled = 1;
/*
* In case of initial_delay we enable tracee
* events manually.
*/
if (target__none(target) && !config->initial_delay)
if (target__enable_on_exec(target))
attr->enable_on_exec = 1;
}
......
......@@ -166,7 +166,6 @@ struct perf_stat_config {
FILE *output;
unsigned int interval;
unsigned int timeout;
int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
......
......@@ -18,6 +18,7 @@ struct target {
bool per_thread;
bool use_bpf;
bool hybrid;
int initial_delay;
const char *attr_map;
};
......@@ -72,6 +73,17 @@ static inline bool target__none(struct target *target)
return !target__has_task(target) && !target__has_cpu(target);
}
static inline bool target__enable_on_exec(struct target *target)
{
/*
* Normally enable_on_exec should be set if:
* 1) The tracee process is forked (not attaching to existed task or cpu).
* 2) And initial_delay is not configured.
* Otherwise, we enable tracee events manually.
*/
return target__none(target) && !target->initial_delay;
}
static inline bool target__has_per_thread(struct target *target)
{
return target->system_wide && target->per_thread;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment