Commit c7f4f994 authored by Ingo Molnar's avatar Ingo Molnar

Merge tag 'perf-core-for-mingo-4.14-20170823' of...

Merge tag 'perf-core-for-mingo-4.14-20170823' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Expression parser enhancements for metrics (Andi Kleen)

- Fix buffer overflow while freeing events in 'perf stat' (Andi Kleen)

- Fix static linking with elfutils's libdf and with libunwind
  in Debian/Ubuntu (Konstantin Khlebnikov)

- Tighten detection of BPF events, avoiding matching some other PMU
  events such as 'cpu/uops_executed.core,cmask=1/' as a .c source
  file that ended up being considered a BPF event (Andi Kleen)

- Add Skylake server uncore JSON vendor events (Andi Kleen)

- Add support for printing new mem_info encodings, including
  'perf test' checks (Andi Kleen)

- Really install manpages via 'make install-man' (Konstantin Khlebnikov)

- Fix documentation for perf_event_paranoid and perf_event_mlock_kb
  sysctls (Konstantin Khlebnikov)
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 93da8b22 60913e00
...@@ -61,6 +61,7 @@ show up in /proc/sys/kernel: ...@@ -61,6 +61,7 @@ show up in /proc/sys/kernel:
- perf_cpu_time_max_percent - perf_cpu_time_max_percent
- perf_event_paranoid - perf_event_paranoid
- perf_event_max_stack - perf_event_max_stack
- perf_event_mlock_kb
- perf_event_max_contexts_per_stack - perf_event_max_contexts_per_stack
- pid_max - pid_max
- powersave-nap [ PPC only ] - powersave-nap [ PPC only ]
...@@ -654,7 +655,9 @@ Controls use of the performance events system by unprivileged ...@@ -654,7 +655,9 @@ Controls use of the performance events system by unprivileged
users (without CAP_SYS_ADMIN). The default value is 2. users (without CAP_SYS_ADMIN). The default value is 2.
-1: Allow use of (almost) all events by all users -1: Allow use of (almost) all events by all users
>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
>=0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
Disallow raw tracepoint access by users without CAP_SYS_ADMIN
>=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=1: Disallow CPU event access by users without CAP_SYS_ADMIN
>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
...@@ -673,6 +676,14 @@ The default value is 127. ...@@ -673,6 +676,14 @@ The default value is 127.
============================================================== ==============================================================
perf_event_mlock_kb:
Control size of per-cpu ring buffer not counted agains mlock limit.
The default value is 512 + 1 page
==============================================================
perf_event_max_contexts_per_stack: perf_event_max_contexts_per_stack:
Controls maximum number of stack frame context entries for Controls maximum number of stack frame context entries for
......
...@@ -954,14 +954,20 @@ union perf_mem_data_src { ...@@ -954,14 +954,20 @@ union perf_mem_data_src {
mem_snoop:5, /* snoop mode */ mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */ mem_lock:2, /* lock instr */
mem_dtlb:7, /* tlb access */ mem_dtlb:7, /* tlb access */
mem_rsvd:31; mem_lvl_num:4, /* memory hierarchy level number */
mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */
mem_rsvd:24;
}; };
}; };
#elif defined(__BIG_ENDIAN_BITFIELD) #elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src { union perf_mem_data_src {
__u64 val; __u64 val;
struct { struct {
__u64 mem_rsvd:31, __u64 mem_rsvd:24,
mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */
mem_lvl_num:4, /* memory hierarchy level number */
mem_dtlb:7, /* tlb access */ mem_dtlb:7, /* tlb access */
mem_lock:2, /* lock instr */ mem_lock:2, /* lock instr */
mem_snoop:5, /* snoop mode */ mem_snoop:5, /* snoop mode */
...@@ -998,6 +1004,22 @@ union perf_mem_data_src { ...@@ -998,6 +1004,22 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ #define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
#define PERF_MEM_LVL_SHIFT 5 #define PERF_MEM_LVL_SHIFT 5
#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
#define PERF_MEM_REMOTE_SHIFT 37
#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
/* 5-0xa available */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
#define PERF_MEM_LVLNUM_SHIFT 33
/* snoop mode */ /* snoop mode */
#define PERF_MEM_SNOOP_NA 0x01 /* not available */ #define PERF_MEM_SNOOP_NA 0x01 /* not available */
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
...@@ -1006,6 +1028,10 @@ union perf_mem_data_src { ...@@ -1006,6 +1028,10 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOP_SHIFT 19
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
#define PERF_MEM_SNOOPX_SHIFT 37
/* locked instruction */ /* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */ #define PERF_MEM_LOCK_NA 0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
......
...@@ -192,7 +192,7 @@ do-install-man: man ...@@ -192,7 +192,7 @@ do-install-man: man
# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \ # $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) # $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
install-man: check-man-tools man install-man: check-man-tools man do-install-man
ifdef missing_tools ifdef missing_tools
DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
......
...@@ -35,7 +35,7 @@ ifeq ($(SRCARCH),x86) ...@@ -35,7 +35,7 @@ ifeq ($(SRCARCH),x86)
ifeq (${IS_64_BIT}, 1) ifeq (${IS_64_BIT}, 1)
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
$(call detected,CONFIG_X86_64) $(call detected,CONFIG_X86_64)
else else
LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
...@@ -103,8 +103,12 @@ ifdef LIBDW_DIR ...@@ -103,8 +103,12 @@ ifdef LIBDW_DIR
LIBDW_CFLAGS := -I$(LIBDW_DIR)/include LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
endif endif
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2
endif
FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS)
# for linking with debug library, run like: # for linking with debug library, run like:
# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
...@@ -365,10 +369,6 @@ ifndef NO_LIBELF ...@@ -365,10 +369,6 @@ ifndef NO_LIBELF
else else
CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
LDFLAGS += $(LIBDW_LDFLAGS) LDFLAGS += $(LIBDW_LDFLAGS)
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
endif
EXTLIBS += ${DWARFLIBS} EXTLIBS += ${DWARFLIBS}
$(call detected,CONFIG_DWARF) $(call detected,CONFIG_DWARF)
endif # PERF_HAVE_DWARF_REGS endif # PERF_HAVE_DWARF_REGS
...@@ -505,6 +505,10 @@ ifndef NO_LOCAL_LIBUNWIND ...@@ -505,6 +505,10 @@ ifndef NO_LOCAL_LIBUNWIND
EXTLIBS += $(LIBUNWIND_LIBS) EXTLIBS += $(LIBUNWIND_LIBS)
LDFLAGS += $(LIBUNWIND_LIBS) LDFLAGS += $(LIBUNWIND_LIBS)
endif endif
ifeq ($(findstring -static,${LDFLAGS}),-static)
# gcc -static links libgcc_eh which contans piece of libunwind
LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
endif
ifndef NO_LIBUNWIND ifndef NO_LIBUNWIND
CFLAGS += -DHAVE_LIBUNWIND_SUPPORT CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
......
...@@ -34,3 +34,4 @@ GenuineIntel-6-2C,v2,westmereep-dp,core ...@@ -34,3 +34,4 @@ GenuineIntel-6-2C,v2,westmereep-dp,core
GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-2C,v2,westmereep-dp,core
GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-25,v2,westmereep-sp,core
GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-2F,v2,westmereex,core
GenuineIntel-6-55,v1,skylakex,core
This diff is collapsed.
[
{
"EventCode": "0xC7",
"UMask": "0x1",
"BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x2",
"BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x4",
"BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x8",
"BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
"PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x10",
"BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x20",
"BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x40",
"BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
"PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xC7",
"UMask": "0x80",
"BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
"Counter": "0,1,2,3",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
"PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).",
"SampleAfterValue": "2000003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xCA",
"UMask": "0x1e",
"BriefDescription": "Cycles with any input/output SSE or FP assist",
"Counter": "0,1,2,3",
"EventName": "FP_ASSIST.ANY",
"CounterMask": "1",
"PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
}
]
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
[
{
"EventCode": "0x28",
"UMask": "0x7",
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
"Counter": "0,1,2,3",
"EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
"PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x28",
"UMask": "0x18",
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
"Counter": "0,1,2,3",
"EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
"PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x28",
"UMask": "0x20",
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"Counter": "0,1,2,3",
"EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
"PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0x28",
"UMask": "0x40",
"BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
"Counter": "0,1,2,3",
"EventName": "CORE_POWER.THROTTLE",
"PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
"SampleAfterValue": "200003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xCB",
"UMask": "0x1",
"BriefDescription": "Number of hardware interrupts received by the processor.",
"Counter": "0,1,2,3",
"EventName": "HW_INTERRUPTS.RECEIVED",
"PublicDescription": "Counts the number of hardware interruptions received by the processor.",
"SampleAfterValue": "203",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xFE",
"UMask": "0x2",
"BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
"Counter": "0,1,2,3",
"EventName": "IDI_MISC.WB_UPGRADE",
"PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
},
{
"EventCode": "0xFE",
"UMask": "0x4",
"BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
"Counter": "0,1,2,3",
"EventName": "IDI_MISC.WB_DOWNGRADE",
"PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
"SampleAfterValue": "100003",
"CounterHTOff": "0,1,2,3,4,5,6,7"
}
]
This diff is collapsed.
[
{
"BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_READ",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "iMC"
},
{
"BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_WRITE",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0xC",
"Unit": "iMC"
},
{
"BriefDescription": "Memory controller clock ticks",
"Counter": "0,1,2,3",
"EventName": "UNC_M_CLOCKTICKS",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
"Counter": "0,1,2,3",
"EventCode": "0x85",
"EventName": "UNC_M_POWER_CHANNEL_PPD",
"MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
"MetricName": "power_channel_ppd %",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Cycles Memory is in self refresh power mode",
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "UNC_M_POWER_SELF_REFRESH",
"MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
"MetricName": "power_self_refresh %",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charges due to page misses",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
"PerPkg": "1",
"UMask": "0x1",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charge for reads",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.RD",
"PerPkg": "1",
"UMask": "0x4",
"Unit": "iMC"
},
{
"BriefDescription": "Pre-charge for writes",
"Counter": "0,1,2,3",
"EventCode": "0x2",
"EventName": "UNC_M_PRE_COUNT.WR",
"PerPkg": "1",
"UMask": "0x8",
"Unit": "iMC"
},
{
"BriefDescription": "DRAM Page Activate commands sent due to a write request",
"Counter": "0,1,2,3",
"EventCode": "0x1",
"EventName": "UNC_M_ACT_COUNT.WR",
"PerPkg": "1",
"PublicDescription": "Counts DRAM Page Activate commands sent on this channel due to a write request to the iMC (Memory Controller). Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS (Column Access Select) command.",
"UMask": "0x2",
"Unit": "iMC"
},
{
"BriefDescription": "All DRAM CAS Commands issued",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.ALL",
"PerPkg": "1",
"PublicDescription": "Counts all CAS (Column Address Select) commands issued to DRAM per memory channel. CAS commands are issued to specify the address to read or write on DRAM, so this event increments for every read and write. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.",
"UMask": "0xF",
"Unit": "iMC"
},
{
"BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_READ",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0x3",
"Unit": "iMC"
},
{
"BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_REG",
"PerPkg": "1",
"PublicDescription": "Counts CAS (Column Access Select) regular read commands issued to DRAM on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this event increments for every regular read. This event only counts regular reads and does not includes underfill reads due to partial write requests. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.",
"UMask": "0x1",
"Unit": "iMC"
},
{
"BriefDescription": "DRAM Underfill Read CAS Commands issued",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL",
"PerPkg": "1",
"PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ",
"UMask": "0x2",
"Unit": "iMC"
},
{
"BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
"Counter": "0,1,2,3",
"EventCode": "0x4",
"EventName": "LLC_MISSES.MEM_WRITE",
"PerPkg": "1",
"ScaleUnit": "64Bytes",
"UMask": "0xC",
"Unit": "iMC"
},
{
"BriefDescription": "Read Pending Queue Allocations",
"Counter": "0,1,2,3",
"EventCode": "0x10",
"EventName": "UNC_M_RPQ_INSERTS",
"PerPkg": "1",
"PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ",
"Unit": "iMC"
},
{
"BriefDescription": "Read Pending Queue Occupancy",
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "UNC_M_RPQ_OCCUPANCY",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries in the Read Pending Queue (RPQ) at each cycle. This can then be used to calculate both the average occupancy of the queue (in conjunction with the number of cycles not empty) and the average latency in the queue (in conjunction with the number of allocations). The RPQ is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. They deallocate from the RPQ after the CAS command has been issued to memory.",
"Unit": "iMC"
},
{
"BriefDescription": "Write Pending Queue Allocations",
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "UNC_M_WPQ_INSERTS",
"PerPkg": "1",
"PublicDescription": "Counts the number of writes requests allocated into the Write Pending Queue (WPQ). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (Memory Controller). The write requests deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.",
"Unit": "iMC"
},
{
"BriefDescription": "Write Pending Queue Occupancy",
"Counter": "0,1,2,3",
"EventCode": "0x81",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"PerPkg": "1",
"PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.",
"Unit": "iMC"
}
]
This diff is collapsed.
This diff is collapsed.
...@@ -34,6 +34,7 @@ perf-y += thread-map.o ...@@ -34,6 +34,7 @@ perf-y += thread-map.o
perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
perf-y += bpf.o perf-y += bpf.o
perf-y += topology.o perf-y += topology.o
perf-y += mem.o
perf-y += cpumap.o perf-y += cpumap.o
perf-y += stat.o perf-y += stat.o
perf-y += event_update.o perf-y += event_update.o
......
...@@ -47,6 +47,10 @@ static struct test generic_tests[] = { ...@@ -47,6 +47,10 @@ static struct test generic_tests[] = {
.desc = "Read samples using the mmap interface", .desc = "Read samples using the mmap interface",
.func = test__basic_mmap, .func = test__basic_mmap,
}, },
{
.desc = "Test data source output",
.func = test__mem,
},
{ {
.desc = "Parse event definition strings", .desc = "Parse event definition strings",
.func = test__parse_events, .func = test__parse_events,
......
This diff is collapsed.
This diff is collapsed.
...@@ -115,7 +115,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int ...@@ -115,7 +115,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
perf_evsel__free_counts(evsel); perf_evsel__free_counts(evsel);
out_close_fd: out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr); perf_evsel__close_fd(evsel);
out_evsel_delete: out_evsel_delete:
perf_evsel__delete(evsel); perf_evsel__delete(evsel);
out_thread_map_delete: out_thread_map_delete:
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment