perf/core improvements and fixes:
- Intel PT timestamp fixes (Adrian Hunter) - Fix Intel JSON fixed counter conversions (Andi Kleen) - Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo) - Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha) - Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or equal the first event (Namhyung Kim) - Fix uretprobe probe placement on ppc64le (Ravi Bangoria) - Support building C++ source files and add feature detection for g++, prep work for supporting a builtin clang/llvm, to remove the need for having that toolchain installed to automagically build BPF scriptlets that then gets uploaded to the kernel via sys_bpf() (Wang Nan) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJX9Y7/AAoJENZQFvNTUqpA47AP/RcGgy9L9Bc8ov45xUobCy2k vJJNsJZKhM7BDifdabN5HJ+7AjfFKVN8XY4lRaRwryfbBvxY+/ivD0dj+nYRBYFO VeOCWGBsRJrU9Wtyh/h24bVB67gVincDLmhbDkNgLR5ik3mYPCC/vzqfGNxkbR2y d7m2Xzt3Pe2QXcEA89YuW7alYcUGqyuYvPyL0VTNsZc0eIFIpZaXh/jD0i66YGFQ D2rWTh+WQQjQlEt0IiioL8V+Il5T5ehOvw6p1V0tE+Jkhdwyu6bzveai2WcIKj5H 7yEMKecA6NzA+0r6Tpqz/k6GS+QVmgk8L+BS/5msPFTAPYjsBCJjTh3QwbKJg47x 3nc5rl2KZ9Lkvb1AHxQHYke1IlsBfTMyZIk14zCM+++/8QA6IQkoosB7xb37MAt7 k7C63Bu/Uf8c+2r7NMiqV0DRPzoV0EsNFyaA7q01aPjzHEc5/goEJYKZhInslQ8F AHpLy8sGklt7uHdWsrC337yaObzuZ9Z24c2S4ixdTgsx7c1qHNUjcWbwt6Z052PM fARZaFzFfhzyTcIKsgg7SuJRatleg3BtjvDV9VS/lTypNjr5qaRpH0tv/nQKLzrf CuuQ/DbuaSMVQ/qnsXwIIB/ATe+cnXYJI4nBastiOpTCMB0bHhLD4k0l8ZKbYn5j kSXSWF57tjB0Hr/Chx2F =1K3j -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Intel PT timestamp fixes (Adrian Hunter) - Fix Intel JSON fixed counter conversions (Andi Kleen) - Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo) - Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha) - Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or equal the first event (Namhyung Kim) - Fix uretprobe probe placement on ppc64le (Ravi Bangoria) - Support building C++ source files and add feature detection for g++, prep work for supporting a builtin clang/llvm, to remove the need for having that toolchain installed to automagically build BPF scriptlets that then gets uploaded to the kernel via sys_bpf() (Wang Nan) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
c68306ce20
|
@ -106,7 +106,6 @@
|
|||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
|
|
|
@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
|
|||
|
||||
#ifndef CONFIG_UML
|
||||
/*
|
||||
* memcpy_mcsafe - memory copy with machine check exception handling
|
||||
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
|
||||
* Note that we only catch machine checks when reading the source addresses.
|
||||
* Writes to target are posted and don't generate machine checks.
|
||||
*/
|
||||
ENTRY(memcpy_mcsafe)
|
||||
ENTRY(memcpy_mcsafe_unrolled)
|
||||
cmpl $8, %edx
|
||||
/* Less than 8 bytes? Go to byte copy loop */
|
||||
jb .L_no_whole_words
|
||||
|
@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
|
|||
.L_done_memcpy_trap:
|
||||
xorq %rax, %rax
|
||||
ret
|
||||
ENDPROC(memcpy_mcsafe)
|
||||
ENDPROC(memcpy_mcsafe_unrolled)
|
||||
|
||||
.section .fixup, "ax"
|
||||
/* Return -EFAULT for any failure */
|
||||
|
|
|
@ -90,6 +90,7 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
|
|||
# - per object C flags
|
||||
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
|
||||
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
|
||||
cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
|
||||
|
||||
###
|
||||
## HOSTCC C flags
|
||||
|
|
|
@ -61,6 +61,9 @@ quiet_cmd_cc_o_c = CC $@
|
|||
quiet_cmd_host_cc_o_c = HOSTCC $@
|
||||
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
|
||||
|
||||
quiet_cmd_cxx_o_c = CXX $@
|
||||
cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
|
||||
|
||||
quiet_cmd_cpp_i_c = CPP $@
|
||||
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
|
||||
|
||||
|
@ -88,6 +91,10 @@ $(OUTPUT)%.o: %.c FORCE
|
|||
$(call rule_mkdir)
|
||||
$(call if_changed_dep,$(host)cc_o_c)
|
||||
|
||||
$(OUTPUT)%.o: %.cpp FORCE
|
||||
$(call rule_mkdir)
|
||||
$(call if_changed_dep,cxx_o_c)
|
||||
|
||||
$(OUTPUT)%.o: %.S FORCE
|
||||
$(call rule_mkdir)
|
||||
$(call if_changed_dep,$(host)cc_o_c)
|
||||
|
|
|
@ -7,7 +7,7 @@ endif
|
|||
|
||||
feature_check = $(eval $(feature_check_code))
|
||||
define feature_check_code
|
||||
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
|
||||
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
|
||||
endef
|
||||
|
||||
feature_set = $(eval $(feature_set_code))
|
||||
|
|
|
@ -46,11 +46,13 @@ FILES= \
|
|||
test-lzma.bin \
|
||||
test-bpf.bin \
|
||||
test-get_cpuid.bin \
|
||||
test-sdt.bin
|
||||
test-sdt.bin \
|
||||
test-cxx.bin
|
||||
|
||||
FILES := $(addprefix $(OUTPUT),$(FILES))
|
||||
|
||||
CC := $(CROSS_COMPILE)gcc -MD
|
||||
CXX := $(CROSS_COMPILE)g++ -MD
|
||||
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
|
||||
|
||||
all: $(FILES)
|
||||
|
@ -58,6 +60,9 @@ all: $(FILES)
|
|||
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
|
||||
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
|
||||
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
|
||||
|
||||
###############################
|
||||
|
||||
$(OUTPUT)test-all.bin:
|
||||
|
@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin:
|
|||
$(OUTPUT)test-sdt.bin:
|
||||
$(BUILD)
|
||||
|
||||
$(OUTPUT)test-cxx.bin:
|
||||
$(BUILDXX) -std=gnu++11
|
||||
|
||||
-include $(OUTPUT)*.d
|
||||
|
||||
###############################
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
static void print_str(std::string s)
|
||||
{
|
||||
std::cout << s << std::endl;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string s("Hello World!");
|
||||
print_str(std::move(s));
|
||||
std::cout << "|" << s << "|" << std::endl;
|
||||
return 0;
|
||||
}
|
|
@ -339,7 +339,7 @@ enum bpf_func_id {
|
|||
BPF_FUNC_skb_change_type,
|
||||
|
||||
/**
|
||||
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
|
||||
* bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
|
||||
* @skb: pointer to skb
|
||||
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
|
||||
* @index: index of the cgroup in the bpf_map
|
||||
|
@ -348,7 +348,7 @@ enum bpf_func_id {
|
|||
* == 1 skb succeeded the cgroup2 descendant test
|
||||
* < 0 error
|
||||
*/
|
||||
BPF_FUNC_skb_in_cgroup,
|
||||
BPF_FUNC_skb_under_cgroup,
|
||||
|
||||
/**
|
||||
* bpf_get_hash_recalc(skb)
|
||||
|
|
|
@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
|
|||
|
||||
/* Reset the buffer */
|
||||
kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
|
||||
data = kbuffer_read_event(kbuf, ts);
|
||||
|
||||
while (kbuf->curr < offset) {
|
||||
data = kbuffer_next_event(kbuf, ts);
|
||||
|
|
|
@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100
|
|||
See assembly instructions with percentage: perf annotate <symbol>
|
||||
If you prefer Intel style assembly, try: perf annotate -M intel
|
||||
For hierarchical output, try: perf report --hierarchy
|
||||
Order by the overhead of source file name and line number: perf report -s srcline
|
||||
System-wide collection from all CPUs: perf record -a
|
||||
Show current config key-value pairs: perf config --list
|
||||
Show user configuration overrides: perf config --user --list
|
||||
|
|
|
@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
|||
*
|
||||
* In addition, we shouldn't specify an offset for kretprobes.
|
||||
*/
|
||||
if (pev->point.offset || pev->point.retprobe || !map || !sym)
|
||||
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
|
||||
!map || !sym)
|
||||
return;
|
||||
|
||||
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
|
||||
|
|
|
@ -312,6 +312,8 @@ static struct fixed {
|
|||
const char *event;
|
||||
} fixed[] = {
|
||||
{ "inst_retired.any", "event=0xc0" },
|
||||
{ "inst_retired.any_p", "event=0xc0" },
|
||||
{ "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
|
||||
{ "cpu_clk_unhalted.thread", "event=0x3c" },
|
||||
{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
|
||||
{ NULL, NULL},
|
||||
|
|
|
@ -90,6 +90,7 @@ struct intel_pt_decoder {
|
|||
bool pge;
|
||||
bool have_tma;
|
||||
bool have_cyc;
|
||||
bool fixup_last_mtc;
|
||||
uint64_t pos;
|
||||
uint64_t last_ip;
|
||||
uint64_t ip;
|
||||
|
@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info {
|
|||
uint64_t tsc_timestamp;
|
||||
uint64_t timestamp;
|
||||
bool have_tma;
|
||||
bool fixup_last_mtc;
|
||||
bool from_mtc;
|
||||
double cbr_cyc_to_tsc;
|
||||
};
|
||||
|
||||
/*
|
||||
* MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
|
||||
* 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
|
||||
* provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
|
||||
* packet by copying the missing bits from the current MTC assuming the least
|
||||
* difference between the two, and that the current MTC comes after last_mtc.
|
||||
*/
|
||||
static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
|
||||
uint32_t *last_mtc)
|
||||
{
|
||||
uint32_t first_missing_bit = 1U << (16 - mtc_shift);
|
||||
uint32_t mask = ~(first_missing_bit - 1);
|
||||
|
||||
*last_mtc |= mtc & mask;
|
||||
if (*last_mtc >= mtc) {
|
||||
*last_mtc -= first_missing_bit;
|
||||
*last_mtc &= 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
|
||||
{
|
||||
struct intel_pt_decoder *decoder = pkt_info->decoder;
|
||||
|
@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
|
|||
return 0;
|
||||
|
||||
mtc = pkt_info->packet.payload;
|
||||
if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
|
||||
data->fixup_last_mtc = false;
|
||||
intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
|
||||
&data->last_mtc);
|
||||
}
|
||||
if (mtc > data->last_mtc)
|
||||
mtc_delta = mtc - data->last_mtc;
|
||||
else
|
||||
|
@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
|
|||
|
||||
data->ctc_delta = 0;
|
||||
data->have_tma = true;
|
||||
data->fixup_last_mtc = true;
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
|
|||
.tsc_timestamp = decoder->tsc_timestamp,
|
||||
.timestamp = decoder->timestamp,
|
||||
.have_tma = decoder->have_tma,
|
||||
.fixup_last_mtc = decoder->fixup_last_mtc,
|
||||
.from_mtc = from_mtc,
|
||||
.cbr_cyc_to_tsc = 0,
|
||||
};
|
||||
|
@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
|
|||
}
|
||||
decoder->ctc_delta = 0;
|
||||
decoder->have_tma = true;
|
||||
decoder->fixup_last_mtc = true;
|
||||
intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
|
||||
decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
|
||||
}
|
||||
|
@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
|
|||
|
||||
mtc = decoder->packet.payload;
|
||||
|
||||
if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
|
||||
decoder->fixup_last_mtc = false;
|
||||
intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
|
||||
&decoder->last_mtc);
|
||||
}
|
||||
|
||||
if (mtc > decoder->last_mtc)
|
||||
mtc_delta = mtc - decoder->last_mtc;
|
||||
else
|
||||
|
@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
|
|||
timestamp, decoder->timestamp);
|
||||
else
|
||||
decoder->timestamp = timestamp;
|
||||
|
||||
decoder->timestamp_insn_cnt = 0;
|
||||
}
|
||||
|
||||
/* Walk PSB+ packets when already in sync. */
|
||||
|
|
Loading…
Reference in New Issue