mirror of https://gitee.com/openkylin/numactl.git
326 lines
7.7 KiB
C
326 lines
7.7 KiB
C
/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
|
|
Manage shared memory policy for numactl.
|
|
The actual policy is set in numactl itself, this just sets up and maps
|
|
the shared memory segments and dumps them.
|
|
|
|
numactl is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public
|
|
License as published by the Free Software Foundation; version
|
|
2.
|
|
|
|
numactl is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should find a copy of v2 of the GNU General Public License somewhere
|
|
on your Linux system; if not, write to the Free Software Foundation,
|
|
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
#define _GNU_SOURCE 1
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ipc.h>
|
|
#include <sys/shm.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/stat.h>
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include "numa.h"
|
|
#include "numaif.h"
|
|
#include "numaint.h"
|
|
#include "util.h"
|
|
#include "shm.h"
|
|
|
|
int shmfd = -1;
|
|
long shmid = 0;
|
|
char *shmptr;
|
|
unsigned long long shmlen;
|
|
mode_t shmmode = 0600;
|
|
unsigned long long shmoffset;
|
|
int shmflags;
|
|
static int shm_pagesize;
|
|
|
|
long huge_page_size(void)
|
|
{
|
|
size_t len = 0;
|
|
char *line = NULL;
|
|
FILE *f = fopen("/proc/meminfo", "r");
|
|
if (f != NULL) {
|
|
while (getdelim(&line, &len, '\n', f) > 0) {
|
|
int ps;
|
|
if (sscanf(line, "Hugepagesize: %d kB", &ps) == 1)
|
|
return ps * 1024;
|
|
}
|
|
free(line);
|
|
fclose(f);
|
|
}
|
|
return getpagesize();
|
|
}
|
|
|
|
static void check_region(char *opt)
|
|
{
|
|
if (((unsigned long)shmptr % shm_pagesize) || (shmlen % shm_pagesize)) {
|
|
fprintf(stderr, "numactl: policy region not page aligned\n");
|
|
exit(1);
|
|
}
|
|
if (!shmlen) {
|
|
fprintf(stderr,
|
|
"numactl: policy region length not specified before %s\n",
|
|
opt);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
static key_t sysvkey(char *name)
|
|
{
|
|
int fd;
|
|
key_t key = ftok(name, shmid);
|
|
if (key >= 0)
|
|
return key;
|
|
|
|
fprintf(stderr, "numactl: Creating shm key file %s mode %04o\n",
|
|
name, shmmode);
|
|
fd = creat(name, shmmode);
|
|
if (fd < 0)
|
|
nerror("cannot create key for shm %s\n", name);
|
|
key = ftok(name, shmid);
|
|
if (key < 0)
|
|
nerror("cannot get key for newly created shm key file %s",
|
|
name);
|
|
return key;
|
|
}
|
|
|
|
/* Attach a sysv style shared memory segment. */
|
|
void attach_sysvshm(char *name, char *opt)
|
|
{
|
|
struct shmid_ds s;
|
|
key_t key = sysvkey(name);
|
|
|
|
shmfd = shmget(key, shmlen, shmflags);
|
|
if (shmfd < 0 && errno == ENOENT) {
|
|
if (shmlen == 0)
|
|
complain(
|
|
"need a --length to create a sysv shared memory segment");
|
|
fprintf(stderr,
|
|
"numactl: Creating shared memory segment %s id %ld mode %04o length %.fMB\n",
|
|
name, shmid, shmmode, ((double)shmlen) / (1024*1024) );
|
|
shmfd = shmget(key, shmlen, IPC_CREAT|shmmode|shmflags);
|
|
if (shmfd < 0)
|
|
nerror("cannot create shared memory segment");
|
|
}
|
|
|
|
if (shmlen == 0) {
|
|
if (shmctl(shmfd, IPC_STAT, &s) < 0)
|
|
err("shmctl IPC_STAT");
|
|
shmlen = s.shm_segsz;
|
|
}
|
|
|
|
shmptr = shmat(shmfd, NULL, SHM_RDONLY);
|
|
if (shmptr == (void*)-1)
|
|
err("shmat");
|
|
shmptr += shmoffset;
|
|
|
|
shm_pagesize = (shmflags & SHM_HUGETLB) ? huge_page_size() : getpagesize();
|
|
|
|
check_region(opt);
|
|
}
|
|
|
|
/* Attach a shared memory file. */
|
|
void attach_shared(char *name, char *opt)
|
|
{
|
|
struct stat64 st;
|
|
|
|
shmfd = open(name, O_RDONLY);
|
|
if (shmfd < 0) {
|
|
errno = 0;
|
|
if (shmlen == 0)
|
|
complain("need a --length to create a shared file");
|
|
shmfd = open(name, O_RDWR|O_CREAT, shmmode);
|
|
if (shmfd < 0)
|
|
nerror("cannot create file %s", name);
|
|
}
|
|
if (fstat64(shmfd, &st) < 0)
|
|
err("shm stat");
|
|
if (shmlen > st.st_size) {
|
|
if (ftruncate64(shmfd, shmlen) < 0) {
|
|
/* XXX: we could do it by hand, but it would it
|
|
would be impossible to apply policy then.
|
|
need to fix that in the kernel. */
|
|
perror("ftruncate");
|
|
}
|
|
}
|
|
|
|
shm_pagesize = st.st_blksize;
|
|
|
|
check_region(opt);
|
|
|
|
/* RED-PEN For shmlen > address space may need to map in pieces.
|
|
Left for some poor 32bit soul. */
|
|
shmptr = mmap64(NULL, shmlen, PROT_READ, MAP_SHARED, shmfd, shmoffset);
|
|
if (shmptr == (char*)-1)
|
|
err("shm mmap");
|
|
|
|
}
|
|
|
|
static void
|
|
dumppol(unsigned long long start, unsigned long long end, int pol, struct bitmask *mask)
|
|
{
|
|
if (pol == MPOL_DEFAULT)
|
|
return;
|
|
printf("%016llx-%016llx: %s ",
|
|
shmoffset+start,
|
|
shmoffset+end,
|
|
policy_name(pol));
|
|
printmask("", mask);
|
|
}
|
|
|
|
/* Dump policies in a shared memory segment. */
|
|
void dump_shm(void)
|
|
{
|
|
struct bitmask *nodes, *prevnodes;
|
|
int prevpol = -1, pol;
|
|
unsigned long long c, start;
|
|
|
|
start = 0;
|
|
if (shmlen == 0) {
|
|
printf("nothing to dump\n");
|
|
return;
|
|
}
|
|
|
|
nodes = numa_allocate_nodemask();
|
|
prevnodes = numa_allocate_nodemask();
|
|
|
|
for (c = 0; c < shmlen; c += shm_pagesize) {
|
|
if (get_mempolicy(&pol, nodes->maskp, nodes->size, c+shmptr,
|
|
MPOL_F_ADDR) < 0)
|
|
err("get_mempolicy on shm");
|
|
if (pol == prevpol)
|
|
continue;
|
|
if (prevpol != -1)
|
|
dumppol(start, c, prevpol, prevnodes);
|
|
prevnodes = nodes;
|
|
prevpol = pol;
|
|
start = c;
|
|
}
|
|
dumppol(start, c, prevpol, prevnodes);
|
|
}
|
|
|
|
static void dumpnode(unsigned long long start, unsigned long long end, int node)
|
|
{
|
|
printf("%016llx-%016llx: %d\n", shmoffset+start, shmoffset+end, node);
|
|
}
|
|
|
|
/* Dump nodes in a shared memory segment. */
|
|
void dump_shm_nodes(void)
|
|
{
|
|
int prevnode = -1, node;
|
|
unsigned long long c, start;
|
|
|
|
start = 0;
|
|
if (shmlen == 0) {
|
|
printf("nothing to dump\n");
|
|
return;
|
|
}
|
|
|
|
for (c = 0; c < shmlen; c += shm_pagesize) {
|
|
if (get_mempolicy(&node, NULL, 0, c+shmptr,
|
|
MPOL_F_ADDR|MPOL_F_NODE) < 0)
|
|
err("get_mempolicy on shm");
|
|
if (node == prevnode)
|
|
continue;
|
|
if (prevnode != -1)
|
|
dumpnode(start, c, prevnode);
|
|
prevnode = node;
|
|
start = c;
|
|
}
|
|
dumpnode(start, c, prevnode);
|
|
}
|
|
|
|
static void vwarn(char *ptr, char *fmt, ...)
|
|
{
|
|
va_list ap;
|
|
unsigned long off = (unsigned long)ptr - (unsigned long)shmptr;
|
|
va_start(ap,fmt);
|
|
printf("numactl verify %lx(%lx): ", (unsigned long)ptr, off);
|
|
vprintf(fmt, ap);
|
|
va_end(ap);
|
|
exitcode = 1;
|
|
}
|
|
|
|
static unsigned interleave_next(unsigned cur, struct bitmask *mask)
|
|
{
|
|
int numa_num_nodes = numa_num_possible_nodes();
|
|
|
|
++cur;
|
|
while (!numa_bitmask_isbitset(mask, cur)) {
|
|
cur = (cur+1) % numa_num_nodes;
|
|
}
|
|
return cur;
|
|
}
|
|
|
|
/* Verify policy in a shared memory segment */
|
|
void verify_shm(int policy, struct bitmask *nodes)
|
|
{
|
|
char *p;
|
|
int ilnode, node;
|
|
int pol2;
|
|
struct bitmask *nodes2;
|
|
|
|
nodes2 = numa_allocate_nodemask();
|
|
|
|
if (policy == MPOL_INTERLEAVE) {
|
|
if (get_mempolicy(&ilnode, NULL, 0, shmptr,
|
|
MPOL_F_ADDR|MPOL_F_NODE)
|
|
< 0)
|
|
err("get_mempolicy");
|
|
}
|
|
|
|
for (p = shmptr; p - (char *)shmptr < shmlen; p += shm_pagesize) {
|
|
if (get_mempolicy(&pol2, nodes2->maskp, nodes2->size, p,
|
|
MPOL_F_ADDR) < 0)
|
|
err("get_mempolicy");
|
|
if (pol2 != policy) {
|
|
vwarn(p, "wrong policy %s, expected %s\n",
|
|
policy_name(pol2), policy_name(policy));
|
|
return;
|
|
}
|
|
if (memcmp(nodes2, nodes, numa_bitmask_nbytes(nodes))) {
|
|
vwarn(p, "mismatched node mask\n");
|
|
printmask("expected", nodes);
|
|
printmask("real", nodes2);
|
|
}
|
|
|
|
if (get_mempolicy(&node, NULL, 0, p, MPOL_F_ADDR|MPOL_F_NODE) < 0)
|
|
err("get_mempolicy");
|
|
|
|
switch (policy) {
|
|
case MPOL_INTERLEAVE:
|
|
if (node < 0 || !numa_bitmask_isbitset(nodes2, node))
|
|
vwarn(p, "interleave node out of range %d\n", node);
|
|
if (node != ilnode) {
|
|
vwarn(p, "expected interleave node %d, got %d\n",
|
|
ilnode,node);
|
|
return;
|
|
}
|
|
ilnode = interleave_next(ilnode, nodes2);
|
|
break;
|
|
case MPOL_PREFERRED:
|
|
case MPOL_BIND:
|
|
if (!numa_bitmask_isbitset(nodes2, node)) {
|
|
vwarn(p, "unexpected node %d\n", node);
|
|
printmask("expected", nodes2);
|
|
}
|
|
break;
|
|
|
|
case MPOL_DEFAULT:
|
|
break;
|
|
|
|
}
|
|
}
|
|
|
|
}
|