linux/arch/x86
Peter Zijlstra 87e24f4b67 perf/x86: Fix local vs remote memory events for NHM/WSM
Verified using the below proglet.. before:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,101,554 node-stores
         2,096,931 node-store-misses

       5.021546079 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

           501,137 node-stores
               199 node-store-misses

       5.124451068 seconds time elapsed

After:

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 0
remote write

 Performance counter stats for './numa 0':

         2,107,516 node-stores
         2,097,187 node-store-misses

       5.012755149 seconds time elapsed

[root@westmere ~]# perf stat -e node-stores -e node-store-misses ./numa 1
local write

 Performance counter stats for './numa 1':

         2,063,355 node-stores
               165 node-store-misses

       5.082091494 seconds time elapsed

#define _GNU_SOURCE

#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <dirent.h>
#include <signal.h>
#include <unistd.h>
#include <numaif.h>
#include <stdlib.h>

#define SIZE (32*1024*1024)

volatile int done;

void sig_done(int sig)
{
	done = 1;
}

int main(int argc, char **argv)
{
	cpu_set_t *mask, *mask2;
	size_t size;
	int i, err, t;
	int nrcpus = 1024;
	char *mem;
	unsigned long nodemask = 0x01; /* node 0 */
	DIR *node;
	struct dirent *de;
	int read = 0;
	int local = 0;

	if (argc < 2) {
		printf("usage: %s [0-3]\n", argv[0]);
		printf("  bit0 - local/remote\n");
		printf("  bit1 - read/write\n");
		exit(0);
	}

	switch (atoi(argv[1])) {
	case 0:
		printf("remote write\n");
		break;
	case 1:
		printf("local write\n");
		local = 1;
		break;
	case 2:
		printf("remote read\n");
		read = 1;
		break;
	case 3:
		printf("local read\n");
		local = 1;
		read = 1;
		break;
	}

	mask = CPU_ALLOC(nrcpus);
	size = CPU_ALLOC_SIZE(nrcpus);
	CPU_ZERO_S(size, mask);

	node = opendir("/sys/devices/system/node/node0/");
	if (!node)
		perror("opendir");
	while ((de = readdir(node))) {
		int cpu;

		if (sscanf(de->d_name, "cpu%d", &cpu) == 1)
			CPU_SET_S(cpu, size, mask);
	}
	closedir(node);

	mask2 = CPU_ALLOC(nrcpus);
	CPU_ZERO_S(size, mask2);
	for (i = 0; i < size; i++)
		CPU_SET_S(i, size, mask2);
	CPU_XOR_S(size, mask2, mask2, mask); // invert

	if (!local)
		mask = mask2;

	err = sched_setaffinity(0, size, mask);
	if (err)
		perror("sched_setaffinity");

	mem = mmap(0, SIZE, PROT_READ|PROT_WRITE,
			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	err = mbind(mem, SIZE, MPOL_BIND, &nodemask, 8*sizeof(nodemask), MPOL_MF_MOVE);
	if (err)
		perror("mbind");

	signal(SIGALRM, sig_done);
	alarm(5);

	if (!read) {
		while (!done) {
			for (i = 0; i < SIZE; i++)
				mem[i] = 0x01;
		}
	} else {
		while (!done) {
			for (i = 0; i < SIZE; i++)
				t += *(volatile char *)(mem + i);
		}
	}

	return 0;
}

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/n/tip-tq73sxus35xmqpojf7ootxgs@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-03-12 20:43:41 +01:00
..
boot x86/boot-image: Don't leak phdrs in arch/x86/boot/compressed/misc.c::Parse_elf() 2012-01-26 11:30:29 +01:00
configs iommu: Rename the DMAR and INTR_REMAP config options 2011-09-21 10:22:03 +02:00
crypto crypto: serpent-sse2 - remove unneeded LRW/XTS #ifdefs 2011-12-20 15:20:08 +08:00
ia32 aout: move setup_arg_pages() prior to reading/mapping the binary 2012-03-05 13:51:32 -08:00
include/asm perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled 2012-03-02 12:16:39 +01:00
kernel perf/x86: Fix local vs remote memory events for NHM/WSM 2012-03-12 20:43:41 +01:00
kvm perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled 2012-03-02 12:16:39 +01:00
lguest lguest: Make sure interrupt is allocated ok by lguest_setup_irq 2012-01-12 15:44:47 +10:30
lib x86: Derandom delay_tsc for 64 bit 2012-03-09 12:43:27 -08:00
math-emu
mm x86: fix typo in recent find_vma_prev purge 2012-03-06 18:48:13 -08:00
net net: bpf_jit: fix divide by 0 generation 2012-01-18 16:04:26 -05:00
oprofile Merge branch 'core' of git://amd64.org/linux/rric into perf/core 2011-11-15 11:05:18 +01:00
pci x86/PCI: do not tie MSI MS-7253 use_crs quirk to BIOS version 2012-03-01 10:56:37 -08:00
platform x86/uv: Fix uninitialized spinlocks 2012-01-26 10:58:34 +01:00
power x86: Fix files explicitly requiring export.h for EXPORT_SYMBOL/THIS_MODULE 2011-10-31 19:30:35 -04:00
syscalls x86, syscall: Allow syscall offset to be symbolic 2011-11-18 17:01:19 -08:00
tools x86/tools: Add decoded instruction dump mode 2011-12-05 14:53:23 +01:00
um uml: fix compile for x86-64 2012-01-18 19:26:11 -08:00
vdso Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2011-10-28 05:03:12 -07:00
video x86: fix up files really needing to include module.h 2011-10-31 19:30:36 -04:00
xen xen/pat: Disable PAT support for now. 2012-02-20 10:41:35 -05:00
.gitignore x86/kprobes: Add arch/x86/tools/insn_sanity to .gitignore 2012-01-16 08:21:59 +01:00
Kbuild net: filter: Just In Time compiler for x86-64 2011-04-27 23:05:08 -07:00
Kconfig x86/numachip: Drop unnecessary conflict with EDAC 2012-01-26 11:03:03 +01:00
Kconfig.cpu mm,x86,um: move CMPXCHG_DOUBLE config option 2012-01-12 20:13:03 -08:00
Kconfig.debug Merge branch 'x86-platform-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip 2012-01-11 19:13:40 -08:00
Makefile x86: Generate system call tables and unistd_*.h from tables 2011-11-17 13:35:37 -08:00
Makefile.um um: take arch/um/sys-x86 to arch/x86/um 2011-11-02 14:15:05 +01:00
Makefile_32.cpu x86, cpu: Move AMD Elan Kconfig under "Processor family" 2011-04-08 13:01:25 -07:00