perf/core improvements and fixes:

- Intel PT timestamp fixes (Adrian Hunter)
 
 - Fix Intel JSON fixed counter conversions (Andi Kleen)
 
 - Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo)
 
 - Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha)
 
 - Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or
   equal the first event (Namhyung Kim)
 
 - Fix uretprobe probe placement on ppc64le (Ravi Bangoria)
 
 - Support building C++ source files and add feature detection for g++,
   prep work for supporting a builtin clang/llvm, to remove the need for having
   that toolchain installed to automagically build BPF scriptlets that then
   gets uploaded to the kernel via sys_bpf() (Wang Nan)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJX9Y7/AAoJENZQFvNTUqpA47AP/RcGgy9L9Bc8ov45xUobCy2k
 vJJNsJZKhM7BDifdabN5HJ+7AjfFKVN8XY4lRaRwryfbBvxY+/ivD0dj+nYRBYFO
 VeOCWGBsRJrU9Wtyh/h24bVB67gVincDLmhbDkNgLR5ik3mYPCC/vzqfGNxkbR2y
 d7m2Xzt3Pe2QXcEA89YuW7alYcUGqyuYvPyL0VTNsZc0eIFIpZaXh/jD0i66YGFQ
 D2rWTh+WQQjQlEt0IiioL8V+Il5T5ehOvw6p1V0tE+Jkhdwyu6bzveai2WcIKj5H
 7yEMKecA6NzA+0r6Tpqz/k6GS+QVmgk8L+BS/5msPFTAPYjsBCJjTh3QwbKJg47x
 3nc5rl2KZ9Lkvb1AHxQHYke1IlsBfTMyZIk14zCM+++/8QA6IQkoosB7xb37MAt7
 k7C63Bu/Uf8c+2r7NMiqV0DRPzoV0EsNFyaA7q01aPjzHEc5/goEJYKZhInslQ8F
 AHpLy8sGklt7uHdWsrC337yaObzuZ9Z24c2S4ixdTgsx7c1qHNUjcWbwt6Z052PM
 fARZaFzFfhzyTcIKsgg7SuJRatleg3BtjvDV9VS/lTypNjr5qaRpH0tv/nQKLzrf
 CuuQ/DbuaSMVQ/qnsXwIIB/ATe+cnXYJI4nBastiOpTCMB0bHhLD4k0l8ZKbYn5j
 kSXSWF57tjB0Hr/Chx2F
 =1K3j
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Intel PT timestamp fixes (Adrian Hunter)

- Fix Intel JSON fixed counter conversions (Andi Kleen)

- Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo)

- Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha)

- Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or
  equal the first event (Namhyung Kim)

- Fix uretprobe probe placement on ppc64le (Ravi Bangoria)

- Support building C++ source files and add feature detection for g++,
  prep work for supporting a builtin clang/llvm, to remove the need for having
  that toolchain installed to automagically build BPF scriptlets that then
  gets uploaded to the kernel via sys_bpf() (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2016-10-07 00:36:49 +02:00
commit c68306ce20
13 changed files with 85 additions and 9 deletions

View File

@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */

View File

@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
* memcpy_mcsafe - memory copy with machine check exception handling
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
ENTRY(memcpy_mcsafe)
ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
.L_done_memcpy_trap:
xorq %rax, %rax
ret
ENDPROC(memcpy_mcsafe)
ENDPROC(memcpy_mcsafe_unrolled)
.section .fixup, "ax"
/* Return -EFAULT for any failure */

View File

@ -90,6 +90,7 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
###
## HOSTCC C flags

View File

@ -61,6 +61,9 @@ quiet_cmd_cc_o_c = CC $@
quiet_cmd_host_cc_o_c = HOSTCC $@
cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
quiet_cmd_cxx_o_c = CXX $@
cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
@ -88,6 +91,10 @@ $(OUTPUT)%.o: %.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)
$(OUTPUT)%.o: %.cpp FORCE
$(call rule_mkdir)
$(call if_changed_dep,cxx_o_c)
$(OUTPUT)%.o: %.S FORCE
$(call rule_mkdir)
$(call if_changed_dep,$(host)cc_o_c)

View File

@ -7,7 +7,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))

View File

@ -46,11 +46,13 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-get_cpuid.bin \
test-sdt.bin
test-sdt.bin \
test-cxx.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
CC := $(CROSS_COMPILE)gcc -MD
CXX := $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
@ -58,6 +60,9 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
###############################
$(OUTPUT)test-all.bin:
@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-sdt.bin:
$(BUILD)
$(OUTPUT)test-cxx.bin:
$(BUILDXX) -std=gnu++11
-include $(OUTPUT)*.d
###############################

View File

@ -0,0 +1,15 @@
#include <iostream>
#include <memory>
static void print_str(std::string s)
{
std::cout << s << std::endl;
}
int main()
{
std::string s("Hello World!");
print_str(std::move(s));
std::cout << "|" << s << "|" << std::endl;
return 0;
}

View File

@ -339,7 +339,7 @@ enum bpf_func_id {
BPF_FUNC_skb_change_type,
/**
* bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
* bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
@ -348,7 +348,7 @@ enum bpf_func_id {
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_skb_in_cgroup,
BPF_FUNC_skb_under_cgroup,
/**
* bpf_get_hash_recalc(skb)

View File

@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
/* Reset the buffer */
kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
data = kbuffer_read_event(kbuf, ts);
while (kbuf->curr < offset) {
data = kbuffer_next_event(kbuf, ts);

View File

@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100
See assembly instructions with percentage: perf annotate <symbol>
If you prefer Intel style assembly, try: perf annotate -M intel
For hierarchical output, try: perf report --hierarchy
Order by the overhead of source file name and line number: perf report -s srcline
System-wide collection from all CPUs: perf record -a
Show current config key-value pairs: perf config --list
Show user configuration overrides: perf config --user --list

View File

@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
if (pev->point.offset || pev->point.retprobe || !map || !sym)
if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
!map || !sym)
return;
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);

View File

@ -312,6 +312,8 @@ static struct fixed {
const char *event;
} fixed[] = {
{ "inst_retired.any", "event=0xc0" },
{ "inst_retired.any_p", "event=0xc0" },
{ "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
{ "cpu_clk_unhalted.thread", "event=0x3c" },
{ "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
{ NULL, NULL},

View File

@ -90,6 +90,7 @@ struct intel_pt_decoder {
bool pge;
bool have_tma;
bool have_cyc;
bool fixup_last_mtc;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info {
uint64_t tsc_timestamp;
uint64_t timestamp;
bool have_tma;
bool fixup_last_mtc;
bool from_mtc;
double cbr_cyc_to_tsc;
};
/*
* MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
* 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
* provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
* packet by copying the missing bits from the current MTC assuming the least
* difference between the two, and that the current MTC comes after last_mtc.
*/
static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
uint32_t *last_mtc)
{
uint32_t first_missing_bit = 1U << (16 - mtc_shift);
uint32_t mask = ~(first_missing_bit - 1);
*last_mtc |= mtc & mask;
if (*last_mtc >= mtc) {
*last_mtc -= first_missing_bit;
*last_mtc &= 0xff;
}
}
static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
{
struct intel_pt_decoder *decoder = pkt_info->decoder;
@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
return 0;
mtc = pkt_info->packet.payload;
if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
data->fixup_last_mtc = false;
intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
&data->last_mtc);
}
if (mtc > data->last_mtc)
mtc_delta = mtc - data->last_mtc;
else
@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
data->ctc_delta = 0;
data->have_tma = true;
data->fixup_last_mtc = true;
return 0;
@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
.tsc_timestamp = decoder->tsc_timestamp,
.timestamp = decoder->timestamp,
.have_tma = decoder->have_tma,
.fixup_last_mtc = decoder->fixup_last_mtc,
.from_mtc = from_mtc,
.cbr_cyc_to_tsc = 0,
};
@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
}
decoder->ctc_delta = 0;
decoder->have_tma = true;
decoder->fixup_last_mtc = true;
intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
}
@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
mtc = decoder->packet.payload;
if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
decoder->fixup_last_mtc = false;
intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
&decoder->last_mtc);
}
if (mtc > decoder->last_mtc)
mtc_delta = mtc - decoder->last_mtc;
else
@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
timestamp, decoder->timestamp);
else
decoder->timestamp = timestamp;
decoder->timestamp_insn_cnt = 0;
}
/* Walk PSB+ packets when already in sync. */