linux/drivers/oprofile/event_buffer.c

210 lines
4.5 KiB
C
Raw Normal View History

/**
* @file event_buffer.c
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
*
* This is the global event buffer that the user-space
* daemon reads from. The event buffer is an untyped array
* of unsigned longs. Entries are prefixed by the
* escape value ESCAPE_CODE followed by an identifying code.
*/
#include <linux/vmalloc.h>
#include <linux/oprofile.h>
#include <linux/sched/signal.h>
#include <linux/capability.h>
#include <linux/dcookies.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include "oprof.h"
#include "event_buffer.h"
#include "oprofile_stats.h"
DEFINE_MUTEX(buffer_mutex);
static unsigned long buffer_opened;
static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
static unsigned long *event_buffer;
static unsigned long buffer_size;
static unsigned long buffer_watershed;
static size_t buffer_pos;
/* atomic_t because wait_event checks it outside of buffer_mutex */
static atomic_t buffer_ready = ATOMIC_INIT(0);
/*
* Add an entry to the event buffer. When we get near to the end we
* wake up the process sleeping on the read() of the file. To protect
* the event_buffer this function may only be called when buffer_mutex
* is set.
*/
void add_event_entry(unsigned long value)
{
/*
* This shouldn't happen since all workqueues or handlers are
* canceled or flushed before the event buffer is freed.
*/
if (!event_buffer) {
WARN_ON_ONCE(1);
return;
}
if (buffer_pos == buffer_size) {
atomic_inc(&oprofile_stats.event_lost_overflow);
return;
}
event_buffer[buffer_pos] = value;
if (++buffer_pos == buffer_size - buffer_watershed) {
atomic_set(&buffer_ready, 1);
wake_up(&buffer_wait);
}
}
/* Wake up the waiting process if any. This happens
* on "echo 0 >/dev/oprofile/enable" so the daemon
* processes the data remaining in the event buffer.
*/
void wake_up_buffer_waiter(void)
{
mutex_lock(&buffer_mutex);
atomic_set(&buffer_ready, 1);
wake_up(&buffer_wait);
mutex_unlock(&buffer_mutex);
}
int alloc_event_buffer(void)
{
unsigned long flags;
raw_spin_lock_irqsave(&oprofilefs_lock, flags);
buffer_size = oprofile_buffer_size;
buffer_watershed = oprofile_buffer_watershed;
raw_spin_unlock_irqrestore(&oprofilefs_lock, flags);
if (buffer_watershed >= buffer_size)
return -EINVAL;
buffer_pos = 0;
treewide: Use array_size() in vmalloc() The vmalloc() function has no 2-factor argument form, so multiplication factors need to be wrapped in array_size(). This patch replaces cases of: vmalloc(a * b) with: vmalloc(array_size(a, b)) as well as handling cases of: vmalloc(a * b * c) with: vmalloc(array3_size(a, b, c)) This does, however, attempt to ignore constant size factors like: vmalloc(4 * 1024) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( vmalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | vmalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( vmalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | vmalloc( - sizeof(u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(__u8) * COUNT + COUNT , ...) | vmalloc( - sizeof(char) * COUNT + COUNT , ...) | vmalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( vmalloc( - sizeof(TYPE) * (COUNT_ID) + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_ID + array_size(COUNT_ID, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT_CONST + array_size(COUNT_CONST, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT_ID) + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_ID + array_size(COUNT_ID, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT_CONST) + array_size(COUNT_CONST, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT_CONST + array_size(COUNT_CONST, sizeof(THING)) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ vmalloc( - SIZE * COUNT + array_size(COUNT, SIZE) , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( vmalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | vmalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( vmalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | vmalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( vmalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | vmalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( vmalloc(C1 * C2 * C3, ...) | vmalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants. @@ expression E1, E2; constant C1, C2; @@ ( vmalloc(C1 * C2, ...) | vmalloc( - E1 * E2 + array_size(E1, E2) , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 05:27:11 +08:00
event_buffer = vmalloc(array_size(buffer_size, sizeof(unsigned long)));
if (!event_buffer)
return -ENOMEM;
return 0;
}
void free_event_buffer(void)
{
mutex_lock(&buffer_mutex);
vfree(event_buffer);
buffer_pos = 0;
event_buffer = NULL;
mutex_unlock(&buffer_mutex);
}
static int event_buffer_open(struct inode *inode, struct file *file)
{
int err = -EPERM;
drivers/oprofile: Open access for CAP_PERFMON privileged process Open access to monitoring for CAP_PERFMON privileged process. Providing the access under CAP_PERFMON capability singly, without the rest of CAP_SYS_ADMIN credentials, excludes chances to misuse the credentials and makes operation more secure. CAP_PERFMON implements the principle of least privilege for performance monitoring and observability operations (POSIX IEEE 1003.1e 2.2.2.39 principle of least privilege: A security design principle that states that a process or program be granted only those privileges (e.g., capabilities) necessary to accomplish its legitimate function, and only for the time that such privileges are actually required) For backward compatibility reasons access to the monitoring remains open for CAP_SYS_ADMIN privileged processes but CAP_SYS_ADMIN usage for secure monitoring is discouraged with respect to CAP_PERFMON capability. Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Acked-by: James Morris <jamorris@linux.microsoft.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Igor Lubashev <ilubashe@akamai.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Serge Hallyn <serge@hallyn.com> Cc: Song Liu <songliubraving@fb.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: intel-gfx@lists.freedesktop.org Cc: linux-doc@vger.kernel.org Cc: linux-man@vger.kernel.org Cc: linux-security-module@vger.kernel.org Cc: selinux@vger.kernel.org Link: http://lore.kernel.org/lkml/691f1096-b15f-9b12-50a0-c2b93918149e@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-04-02 16:53:07 +08:00
if (!perfmon_capable())
return -EPERM;
if (test_and_set_bit_lock(0, &buffer_opened))
return -EBUSY;
/* Register as a user of dcookies
* to ensure they persist for the lifetime of
* the open event file
*/
err = -EINVAL;
file->private_data = dcookie_register();
if (!file->private_data)
goto out;
if ((err = oprofile_setup()))
goto fail;
/* NB: the actual start happens from userspace
* echo 1 >/dev/oprofile/enable
*/
return nonseekable_open(inode, file);
fail:
dcookie_unregister(file->private_data);
out:
__clear_bit_unlock(0, &buffer_opened);
return err;
}
static int event_buffer_release(struct inode *inode, struct file *file)
{
oprofile_stop();
oprofile_shutdown();
dcookie_unregister(file->private_data);
buffer_pos = 0;
atomic_set(&buffer_ready, 0);
__clear_bit_unlock(0, &buffer_opened);
return 0;
}
static ssize_t event_buffer_read(struct file *file, char __user *buf,
size_t count, loff_t *offset)
{
int retval = -EINVAL;
size_t const max = buffer_size * sizeof(unsigned long);
/* handling partial reads is more trouble than it's worth */
if (count != max || *offset)
return -EINVAL;
wait_event_interruptible(buffer_wait, atomic_read(&buffer_ready));
if (signal_pending(current))
return -EINTR;
/* can't currently happen */
if (!atomic_read(&buffer_ready))
return -EAGAIN;
mutex_lock(&buffer_mutex);
/* May happen if the buffer is freed during pending reads. */
if (!event_buffer) {
retval = -EINTR;
goto out;
}
atomic_set(&buffer_ready, 0);
retval = -EFAULT;
count = buffer_pos * sizeof(unsigned long);
if (copy_to_user(buf, event_buffer, count))
goto out;
retval = count;
buffer_pos = 0;
out:
mutex_unlock(&buffer_mutex);
return retval;
}
const struct file_operations event_buffer_fops = {
.open = event_buffer_open,
.release = event_buffer_release,
.read = event_buffer_read,
.llseek = no_llseek,
};