Revert 5.15.37 merge into android13-5.15

This reverts the merge of 5.15.37 into the android13-5.15
There are lots of ABI issues, and many of the commits are not needed in
the Android tree at this time.  Revert the merge (except for the
Makefile change), so that future merges will continue to work, and the
needed individual changes from this release will be manually added to
the tree at a later point in time.

Fixes: f7dace75d276 ("Merge 5.15.37 into android13-5.15")
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I0632858e5c0fb94fc14c0f4216997330eca260a7
This commit is contained in:
Greg Kroah-Hartman 2022-05-17 22:03:32 +02:00
parent ef5fed3c1e
commit ecda2085fd
51 changed files with 636 additions and 1441 deletions

View File

@ -782,7 +782,7 @@ ocram: sram@ffff0000 {
};
qspi: spi@ff705000 {
compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
compatible = "cdns,qspi-nor";
#address-cells = <1>;
#size-cells = <0>;
reg = <0xff705000 0x1000>,

View File

@ -756,7 +756,7 @@ usb0-ecc@ff8c8800 {
};
qspi: spi@ff809000 {
compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
compatible = "cdns,qspi-nor";
#address-cells = <1>;
#size-cells = <0>;
reg = <0xff809000 0x100>,

View File

@ -594,7 +594,7 @@ emac0-tx-ecc@ff8c0400 {
};
qspi: spi@ff8d2000 {
compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
compatible = "cdns,qspi-nor";
#address-cells = <1>;
#size-cells = <0>;
reg = <0xff8d2000 0x100>,

View File

@ -628,7 +628,7 @@ sdmmca-ecc@ff8c8c00 {
};
qspi: spi@ff8d2000 {
compatible = "intel,socfpga-qspi", "cdns,qspi-nor";
compatible = "cdns,qspi-nor";
#address-cells = <1>;
#size-cells = <0>;
reg = <0xff8d2000 0x100>,

View File

@ -669,8 +669,7 @@ static void __init kvm_use_magic_page(void)
on_each_cpu(kvm_map_magic_page, &features, 1);
/* Quick self-test to see if the mapping works */
if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
sizeof(u32))) {
if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
kvm_patching_worked = false;
return;
}

View File

@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (new_ctx == NULL)
return 0;
if (!access_ok(new_ctx, ctx_size) ||
fault_in_readable((char __user *)new_ctx, ctx_size))
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
return -EFAULT;
/*
@ -1239,7 +1239,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
#endif
if (!access_ok(ctx, sizeof(*ctx)) ||
fault_in_readable((char __user *)ctx, sizeof(*ctx)))
fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
return -EFAULT;
/*

View File

@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
if (new_ctx == NULL)
return 0;
if (!access_ok(new_ctx, ctx_size) ||
fault_in_readable((char __user *)new_ctx, ctx_size))
fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
return -EFAULT;
/*

View File

@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
fpregs_unlock();
if (ret) {
if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
goto retry;
return -EFAULT;
}
@ -278,9 +278,10 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
if (ret != -EFAULT)
return -EINVAL;
if (!fault_in_readable(buf, size))
ret = fault_in_pages_readable(buf, size);
if (!ret)
goto retry;
return -EFAULT;
return ret;
}
/*

View File

@ -33,22 +33,6 @@ config BLK_DEV_FD
To compile this driver as a module, choose M here: the
module will be called floppy.
config BLK_DEV_FD_RAWCMD
bool "Support for raw floppy disk commands (DEPRECATED)"
depends on BLK_DEV_FD
help
If you want to use actual physical floppies and expect to do
special low-level hardware accesses to them (access and use
non-standard formats, for example), then enable this.
Note that the code enabled by this option is rarely used and
might be unstable or insecure, and distros should not enable it.
Note: FDRAWCMD is deprecated and will be removed from the kernel
in the near future.
If unsure, say N.
config AMIGA_FLOPPY
tristate "Amiga floppy support"
depends on AMIGA

View File

@ -2984,8 +2984,6 @@ static const char *drive_name(int type, int drive)
return "(null)";
}
#ifdef CONFIG_BLK_DEV_FD_RAWCMD
/* raw commands */
static void raw_cmd_done(int flag)
{
@ -3185,35 +3183,6 @@ static int raw_cmd_ioctl(int cmd, void __user *param)
return ret;
}
static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
void __user *param)
{
int ret;
pr_warn_once("Note: FDRAWCMD is deprecated and will be removed from the kernel in the near future.\n");
if (type)
return -EINVAL;
if (lock_fdc(drive))
return -EINTR;
set_floppy(drive);
ret = raw_cmd_ioctl(cmd, param);
if (ret == -EINTR)
return -EINTR;
process_fd_request();
return ret;
}
#else /* CONFIG_BLK_DEV_FD_RAWCMD */
static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
void __user *param)
{
return -EOPNOTSUPP;
}
#endif
static int invalidate_drive(struct block_device *bdev)
{
/* invalidate the buffer track to force a reread */
@ -3402,6 +3371,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
{
int drive = (long)bdev->bd_disk->private_data;
int type = ITYPE(drive_state[drive].fd_device);
int i;
int ret;
int size;
union inparam {
@ -3552,7 +3522,16 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
outparam = &write_errors[drive];
break;
case FDRAWCMD:
return floppy_raw_cmd_ioctl(type, drive, cmd, (void __user *)param);
if (type)
return -EINVAL;
if (lock_fdc(drive))
return -EINTR;
set_floppy(drive);
i = raw_cmd_ioctl(cmd, (void __user *)param);
if (i == -EINTR)
return -EINTR;
process_fd_request();
return i;
case FDTWADDLE:
if (lock_fdc(drive))
return -EINTR;

View File

@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
struct drm_armada_gem_pwrite *args = data;
struct armada_gem_object *dobj;
char __user *ptr;
int ret = 0;
int ret;
DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
args->handle, args->offset, args->size, args->ptr);
@ -349,8 +349,9 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (!access_ok(ptr, args->size))
return -EFAULT;
if (fault_in_readable(ptr, args->size))
return -EFAULT;
ret = fault_in_pages_readable(ptr, args->size);
if (ret)
return ret;
dobj = armada_gem_object_lookup(file, args->handle);
if (dobj == NULL)

View File

@ -36,7 +36,6 @@
/* Quirks */
#define CQSPI_NEEDS_WR_DELAY BIT(0)
#define CQSPI_DISABLE_DAC_MODE BIT(1)
#define CQSPI_NO_SUPPORT_WR_COMPLETION BIT(3)
/* Capabilities */
#define CQSPI_SUPPORTS_OCTAL BIT(0)
@ -84,7 +83,6 @@ struct cqspi_st {
u32 wr_delay;
bool use_direct_mode;
struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT];
bool wr_completion;
};
struct cqspi_driver_platdata {
@ -799,11 +797,9 @@ static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata,
* polling on the controller's side. spinand and spi-nor will take
* care of polling the status register.
*/
if (cqspi->wr_completion) {
reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
}
reg = readl(reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
writel(reg, reg_base + CQSPI_REG_WR_COMPLETION_CTRL);
reg = readl(reg_base + CQSPI_REG_SIZE);
reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
@ -1536,10 +1532,6 @@ static int cqspi_probe(struct platform_device *pdev)
cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk);
master->max_speed_hz = cqspi->master_ref_clk_hz;
/* write completion is supported by default */
cqspi->wr_completion = true;
ddata = of_device_get_match_data(dev);
if (ddata) {
if (ddata->quirks & CQSPI_NEEDS_WR_DELAY)
@ -1549,8 +1541,6 @@ static int cqspi_probe(struct platform_device *pdev)
master->mode_bits |= SPI_RX_OCTAL | SPI_TX_OCTAL;
if (!(ddata->quirks & CQSPI_DISABLE_DAC_MODE))
cqspi->use_direct_mode = true;
if (ddata->quirks & CQSPI_NO_SUPPORT_WR_COMPLETION)
cqspi->wr_completion = false;
}
ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0,
@ -1659,10 +1649,6 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = {
.quirks = CQSPI_DISABLE_DAC_MODE,
};
static const struct cqspi_driver_platdata socfpga_qspi = {
.quirks = CQSPI_NO_SUPPORT_WR_COMPLETION,
};
static const struct of_device_id cqspi_dt_ids[] = {
{
.compatible = "cdns,qspi-nor",
@ -1680,10 +1666,6 @@ static const struct of_device_id cqspi_dt_ids[] = {
.compatible = "intel,lgm-qspi",
.data = &intel_lgm_qspi,
},
{
.compatible = "intel,socfpga-qspi",
.data = (void *)&socfpga_qspi,
},
{ /* end of table */ }
};

View File

@ -1709,7 +1709,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
* Fault pages before locking them in prepare_pages
* to avoid recursive lock
*/
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
ret = -EFAULT;
break;
}
@ -1903,17 +1903,16 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
const bool is_sync_write = (iocb->ki_flags & IOCB_DSYNC);
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
loff_t pos;
ssize_t written = 0;
ssize_t written_buffered;
size_t prev_left = 0;
loff_t endbyte;
ssize_t err;
unsigned int ilock_flags = 0;
struct iomap_dio *dio = NULL;
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
@ -1956,80 +1955,23 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
goto buffered;
}
/*
* We remove IOCB_DSYNC so that we don't deadlock when iomap_dio_rw()
* calls generic_write_sync() (through iomap_dio_complete()), because
* that results in calling fsync (btrfs_sync_file()) which will try to
* lock the inode in exclusive/write mode.
*/
if (is_sync_write)
iocb->ki_flags &= ~IOCB_DSYNC;
/*
* The iov_iter can be mapped to the same file range we are writing to.
* If that's the case, then we will deadlock in the iomap code, because
* it first calls our callback btrfs_dio_iomap_begin(), which will create
* an ordered extent, and after that it will fault in the pages that the
* iov_iter refers to. During the fault in we end up in the readahead
* pages code (starting at btrfs_readahead()), which will lock the range,
* find that ordered extent and then wait for it to complete (at
* btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since
* obviously the ordered extent can never complete as we didn't submit
* yet the respective bio(s). This always happens when the buffer is
* memory mapped to the same file range, since the iomap DIO code always
* invalidates pages in the target file range (after starting and waiting
* for any writeback).
*
* So here we disable page faults in the iov_iter and then retry if we
* got -EFAULT, faulting in the pages before the retry.
*/
again:
from->nofault = true;
err = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
IOMAP_DIO_PARTIAL, written);
from->nofault = false;
/* No increment (+=) because iomap returns a cumulative value. */
if (err > 0)
written = err;
if (iov_iter_count(from) > 0 && (err == -EFAULT || err > 0)) {
const size_t left = iov_iter_count(from);
/*
* We have more data left to write. Try to fault in as many as
* possible of the remainder pages and retry. We do this without
* releasing and locking again the inode, to prevent races with
* truncate.
*
* Also, in case the iov refers to pages in the file range of the
* file we want to write to (due to a mmap), we could enter an
* infinite loop if we retry after faulting the pages in, since
* iomap will invalidate any pages in the range early on, before
* it tries to fault in the pages of the iov. So we keep track of
* how much was left of iov in the previous EFAULT and fallback
* to buffered IO in case we haven't made any progress.
*/
if (left == prev_left) {
err = -ENOTBLK;
} else {
fault_in_iov_iter_readable(from, left);
prev_left = left;
goto again;
}
}
dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
0, 0);
btrfs_inode_unlock(inode, ilock_flags);
/*
* Add back IOCB_DSYNC. Our caller, btrfs_file_write_iter(), will do
* the fsync (call generic_write_sync()).
*/
if (is_sync_write)
iocb->ki_flags |= IOCB_DSYNC;
if (IS_ERR_OR_NULL(dio)) {
err = PTR_ERR_OR_ZERO(dio);
if (err < 0 && err != -ENOTBLK)
goto out;
} else {
written = iomap_dio_complete(dio);
}
/* If 'err' is -ENOTBLK then it means we must fallback to buffered IO. */
if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from))
if (written < 0 || !iov_iter_count(from)) {
err = written;
goto out;
}
buffered:
pos = iocb->ki_pos;
@ -2054,7 +1996,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
out:
return err < 0 ? err : written;
return written ? written : err;
}
static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
@ -3718,8 +3660,6 @@ static int check_direct_read(struct btrfs_fs_info *fs_info,
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
size_t prev_left = 0;
ssize_t read = 0;
ssize_t ret;
if (fsverity_active(inode))
@ -3729,57 +3669,10 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
again:
/*
* This is similar to what we do for direct IO writes, see the comment
* at btrfs_direct_write(), but we also disable page faults in addition
* to disabling them only at the iov_iter level. This is because when
* reading from a hole or prealloc extent, iomap calls iov_iter_zero(),
* which can still trigger page fault ins despite having set ->nofault
* to true of our 'to' iov_iter.
*
* The difference to direct IO writes is that we deadlock when trying
* to lock the extent range in the inode's tree during he page reads
* triggered by the fault in (while for writes it is due to waiting for
* our own ordered extent). This is because for direct IO reads,
* btrfs_dio_iomap_begin() returns with the extent range locked, which
* is only unlocked in the endio callback (end_bio_extent_readpage()).
*/
pagefault_disable();
to->nofault = true;
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
IOMAP_DIO_PARTIAL, read);
to->nofault = false;
pagefault_enable();
/* No increment (+=) because iomap returns a cumulative value. */
if (ret > 0)
read = ret;
if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) {
const size_t left = iov_iter_count(to);
if (left == prev_left) {
/*
* We didn't make any progress since the last attempt,
* fallback to a buffered read for the remainder of the
* range. This is just to avoid any possibility of looping
* for too long.
*/
ret = read;
} else {
/*
* We made some progress since the last retry or this is
* the first time we are retrying. Fault in as many pages
* as possible and retry.
*/
fault_in_iov_iter_writeable(to, left);
prev_left = left;
goto again;
}
}
0, 0);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
return ret < 0 ? ret : read;
return ret;
}
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)

View File

@ -7961,34 +7961,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
}
len = min(len, em->len - (start - em->start));
/*
* If we have a NOWAIT request and the range contains multiple extents
* (or a mix of extents and holes), then we return -EAGAIN to make the
* caller fallback to a context where it can do a blocking (without
* NOWAIT) request. This way we avoid doing partial IO and returning
* success to the caller, which is not optimal for writes and for reads
* it can result in unexpected behaviour for an application.
*
* When doing a read, because we use IOMAP_DIO_PARTIAL when calling
* iomap_dio_rw(), we can end up returning less data then what the caller
* asked for, resulting in an unexpected, and incorrect, short read.
* That is, the caller asked to read N bytes and we return less than that,
* which is wrong unless we are crossing EOF. This happens if we get a
* page fault error when trying to fault in pages for the buffer that is
* associated to the struct iov_iter passed to iomap_dio_rw(), and we
* have previously submitted bios for other extents in the range, in
* which case iomap_dio_rw() may return us EIOCBQUEUED if not all of
* those bios have completed by the time we get the page fault error,
* which we return back to our caller - we should only return EIOCBQUEUED
* after we have submitted bios for all the extents in the range.
*/
if ((flags & IOMAP_NOWAIT) && len < length) {
free_extent_map(em);
ret = -EAGAIN;
goto unlock_err;
}
if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
start, len);

View File

@ -2258,8 +2258,9 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
ret = -EFAULT;
if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
ret = fault_in_pages_writeable(ubuf + sk_offset,
*buf_size - sk_offset);
if (ret)
break;
ret = btrfs_search_forward(root, &key, path, sk->min_transid);

View File

@ -4414,7 +4414,7 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
return 0;
/* If it will be a short write, don't bother. */
if (fault_in_iov_iter_readable(iter, count))
if (iov_iter_fault_in_readable(iter, count))
return 0;
if (f2fs_has_inline_data(inode)) {

View File

@ -1166,7 +1166,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
again:
err = -EFAULT;
if (fault_in_iov_iter_readable(ii, bytes))
if (iov_iter_fault_in_readable(ii, bytes))
break;
err = -ENOMEM;

View File

@ -961,6 +961,46 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
goto out;
}
static int gfs2_write_lock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
error = gfs2_glock_nq(&ip->i_gh);
if (error)
goto out_uninit;
if (&ip->i_inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &m_ip->i_gh);
if (error)
goto out_unlock;
}
return 0;
out_unlock:
gfs2_glock_dq(&ip->i_gh);
out_uninit:
gfs2_holder_uninit(&ip->i_gh);
return error;
}
static void gfs2_write_unlock(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (&ip->i_inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
gfs2_glock_dq_uninit(&m_ip->i_gh);
}
gfs2_glock_dq_uninit(&ip->i_gh);
}
static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
unsigned len)
{
@ -1078,6 +1118,11 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
return ret;
}
static inline bool gfs2_iomap_need_write_lock(unsigned flags)
{
return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
}
static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap,
struct iomap *srcmap)
@ -1090,6 +1135,12 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
iomap->flags |= IOMAP_F_BUFFER_HEAD;
trace_gfs2_iomap_start(ip, pos, length, flags);
if (gfs2_iomap_need_write_lock(flags)) {
ret = gfs2_write_lock(inode);
if (ret)
goto out;
}
ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
if (ret)
goto out_unlock;
@ -1117,7 +1168,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
out_unlock:
if (ret && gfs2_iomap_need_write_lock(flags))
gfs2_write_unlock(inode);
release_metapath(&mp);
out:
trace_gfs2_iomap_end(ip, iomap, ret);
return ret;
}
@ -1165,11 +1219,15 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
}
if (unlikely(!written))
return 0;
goto out_unlock;
if (iomap->flags & IOMAP_F_SIZE_CHANGED)
mark_inode_dirty(inode);
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
out_unlock:
if (gfs2_iomap_need_write_lock(flags))
gfs2_write_unlock(inode);
return 0;
}

View File

@ -777,99 +777,27 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
return ret ? ret : ret1;
}
static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
size_t *prev_count,
size_t *window_size)
{
char __user *p = i->iov[0].iov_base + i->iov_offset;
size_t count = iov_iter_count(i);
int pages = 1;
if (likely(!count))
return false;
if (ret <= 0 && ret != -EFAULT)
return false;
if (!iter_is_iovec(i))
return false;
if (*prev_count != count || !*window_size) {
int pages, nr_dirtied;
pages = min_t(int, BIO_MAX_VECS,
DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
nr_dirtied = max(current->nr_dirtied_pause -
current->nr_dirtied, 1);
pages = min(pages, nr_dirtied);
}
*prev_count = count;
*window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
return true;
}
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
struct gfs2_holder *gh)
{
struct file *file = iocb->ki_filp;
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
size_t prev_count = 0, window_size = 0;
size_t written = 0;
size_t count = iov_iter_count(to);
ssize_t ret;
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
* that the inode glock may be dropped, fault in the pages manually,
* and retry.
*
* Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
* physical as well as manual page faults, and we need to disable both
* kinds.
*
* For direct I/O, gfs2 takes the inode glock in deferred mode. This
* locking mode is compatible with other deferred holders, so multiple
* processes and nodes can do direct I/O to a file at the same time.
* There's no guarantee that reads or writes will be atomic. Any
* coordination among readers and writers needs to happen externally.
*/
if (!iov_iter_count(to))
if (!count)
return 0; /* skip atime */
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
retry_under_glock:
pagefault_disable();
to->nofault = true;
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
IOMAP_DIO_PARTIAL, written);
to->nofault = false;
pagefault_enable();
if (ret > 0)
written = ret;
if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
size_t leftover;
gfs2_holder_allow_demote(gh);
leftover = fault_in_iov_iter_writeable(to, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(gh))
goto retry;
goto retry_under_glock;
}
}
if (gfs2_holder_queued(gh))
gfs2_glock_dq(gh);
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (ret < 0)
return ret;
return written;
return ret;
}
static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
@ -878,20 +806,10 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
size_t prev_count = 0, window_size = 0;
size_t read = 0;
size_t len = iov_iter_count(from);
loff_t offset = iocb->ki_pos;
ssize_t ret;
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
* that the inode glock may be dropped, fault in the pages manually,
* and retry.
*
* For writes, iomap_dio_rw only triggers manual page faults, so we
* don't need to disable physical ones.
*/
/*
* Deferred lock, even if its a write, since we do no allocation on
* this path. All we need to change is the atime, and this lock mode
@ -901,62 +819,31 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
* VFS does.
*/
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
retry_under_glock:
/* Silently fall back to buffered I/O when writing beyond EOF */
if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
if (offset + len > i_size_read(&ip->i_inode))
goto out;
from->nofault = true;
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
IOMAP_DIO_PARTIAL, read);
from->nofault = false;
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
if (ret == -ENOTBLK)
ret = 0;
if (ret > 0)
read = ret;
if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
size_t leftover;
gfs2_holder_allow_demote(gh);
leftover = fault_in_iov_iter_readable(from, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(gh))
goto retry;
goto retry_under_glock;
}
}
out:
if (gfs2_holder_queued(gh))
gfs2_glock_dq(gh);
gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (ret < 0)
return ret;
return read;
return ret;
}
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct gfs2_inode *ip;
struct gfs2_holder gh;
size_t prev_count = 0, window_size = 0;
size_t written = 0;
ssize_t ret;
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
* that the inode glock may be dropped, fault in the pages manually,
* and retry.
*/
if (iocb->ki_flags & IOCB_DIRECT) {
ret = gfs2_file_direct_read(iocb, to, &gh);
if (likely(ret != -ENOTBLK))
@ -978,118 +865,18 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
ip = GFS2_I(iocb->ki_filp->f_mapping->host);
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
retry:
ret = gfs2_glock_nq(&gh);
if (ret)
goto out_uninit;
retry_under_glock:
pagefault_disable();
ret = generic_file_read_iter(iocb, to);
pagefault_enable();
if (ret > 0)
written += ret;
if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
size_t leftover;
gfs2_holder_allow_demote(&gh);
leftover = fault_in_iov_iter_writeable(to, window_size);
gfs2_holder_disallow_demote(&gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(&gh)) {
if (written)
goto out_uninit;
goto retry;
}
goto retry_under_glock;
}
}
if (gfs2_holder_queued(&gh))
gfs2_glock_dq(&gh);
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
return written ? written : ret;
}
static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
struct iov_iter *from,
struct gfs2_holder *gh)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder *statfs_gh = NULL;
size_t prev_count = 0, window_size = 0;
size_t read = 0;
ssize_t ret;
/*
* In this function, we disable page faults when we're holding the
* inode glock while doing I/O. If a page fault occurs, we indicate
* that the inode glock may be dropped, fault in the pages manually,
* and retry.
*/
if (inode == sdp->sd_rindex) {
statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS);
if (!statfs_gh)
return -ENOMEM;
}
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
retry_under_glock:
if (inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, statfs_gh);
if (ret)
goto out_unlock;
}
current->backing_dev_info = inode_to_bdi(inode);
pagefault_disable();
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
pagefault_enable();
current->backing_dev_info = NULL;
if (ret > 0) {
iocb->ki_pos += ret;
read += ret;
}
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
size_t leftover;
gfs2_holder_allow_demote(gh);
leftover = fault_in_iov_iter_readable(from, window_size);
gfs2_holder_disallow_demote(gh);
if (leftover != window_size) {
if (!gfs2_holder_queued(gh)) {
if (read)
goto out_uninit;
goto retry;
}
goto retry_under_glock;
}
}
out_unlock:
if (gfs2_holder_queued(gh))
gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (statfs_gh)
kfree(statfs_gh);
return read ? read : ret;
}
/**
* gfs2_file_write_iter - Perform a write to a file
* @iocb: The io context
@ -1141,7 +928,9 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out_unlock;
iocb->ki_flags |= IOCB_DSYNC;
buffered = gfs2_file_buffered_write(iocb, from, &gh);
current->backing_dev_info = inode_to_bdi(inode);
buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
current->backing_dev_info = NULL;
if (unlikely(buffered <= 0)) {
if (!ret)
ret = buffered;
@ -1155,6 +944,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
* the direct I/O range as we don't know if the buffered pages
* made it to disk.
*/
iocb->ki_pos += buffered;
ret2 = generic_write_sync(iocb, buffered);
invalidate_mapping_pages(mapping,
(iocb->ki_pos - buffered) >> PAGE_SHIFT,
@ -1162,9 +952,13 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!ret || ret2 > 0)
ret += ret2;
} else {
ret = gfs2_file_buffered_write(iocb, from, &gh);
if (likely(ret > 0))
current->backing_dev_info = inode_to_bdi(inode);
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
current->backing_dev_info = NULL;
if (likely(ret > 0)) {
iocb->ki_pos += ret;
ret = generic_write_sync(iocb, ret);
}
}
out_unlock:

View File

@ -58,7 +58,6 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl);
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
static void __gfs2_glock_dq(struct gfs2_holder *gh);
static struct dentry *gfs2_root;
static struct workqueue_struct *glock_workqueue;
@ -198,12 +197,6 @@ static int demote_ok(const struct gfs2_glock *gl)
if (gl->gl_state == LM_ST_UNLOCKED)
return 0;
/*
* Note that demote_ok is used for the lru process of disposing of
* glocks. For this purpose, we don't care if the glock's holders
* have the HIF_MAY_DEMOTE flag set or not. If someone is using
* them, don't demote.
*/
if (!list_empty(&gl->gl_holders))
return 0;
if (glops->go_demote_ok)
@ -308,59 +301,46 @@ void gfs2_glock_put(struct gfs2_glock *gl)
}
/**
* may_grant - check if it's ok to grant a new lock
* may_grant - check if its ok to grant a new lock
* @gl: The glock
* @current_gh: One of the current holders of @gl
* @gh: The lock request which we wish to grant
*
* With our current compatibility rules, if a glock has one or more active
* holders (HIF_HOLDER flag set), any of those holders can be passed in as
* @current_gh; they are all the same as far as compatibility with the new @gh
* goes.
*
* Returns true if it's ok to grant the lock.
* Returns: true if its ok to grant the lock
*/
static inline bool may_grant(struct gfs2_glock *gl,
struct gfs2_holder *current_gh,
struct gfs2_holder *gh)
static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
{
if (current_gh) {
GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, &current_gh->gh_iflags));
const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
switch(current_gh->gh_state) {
case LM_ST_EXCLUSIVE:
/*
* Here we make a special exception to grant holders
* who agree to share the EX lock with other holders
* who also have the bit set. If the original holder
* has the LM_FLAG_NODE_SCOPE bit set, we grant more
* holders with the bit set.
*/
return gh->gh_state == LM_ST_EXCLUSIVE &&
(current_gh->gh_flags & LM_FLAG_NODE_SCOPE) &&
(gh->gh_flags & LM_FLAG_NODE_SCOPE);
case LM_ST_SHARED:
case LM_ST_DEFERRED:
return gh->gh_state == current_gh->gh_state;
default:
return false;
}
if (gh != gh_head) {
/**
* Here we make a special exception to grant holders who agree
* to share the EX lock with other holders who also have the
* bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
* is set, we grant more holders with the bit set.
*/
if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
(gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
gh->gh_state == LM_ST_EXCLUSIVE &&
(gh->gh_flags & LM_FLAG_NODE_SCOPE))
return 1;
if ((gh->gh_state == LM_ST_EXCLUSIVE ||
gh_head->gh_state == LM_ST_EXCLUSIVE))
return 0;
}
if (gl->gl_state == gh->gh_state)
return true;
return 1;
if (gh->gh_flags & GL_EXACT)
return false;
return 0;
if (gl->gl_state == LM_ST_EXCLUSIVE) {
return gh->gh_state == LM_ST_SHARED ||
gh->gh_state == LM_ST_DEFERRED;
if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
return 1;
if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
return 1;
}
if (gh->gh_flags & LM_FLAG_ANY)
return gl->gl_state != LM_ST_UNLOCKED;
return false;
if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
return 1;
return 0;
}
static void gfs2_holder_wake(struct gfs2_holder *gh)
@ -386,7 +366,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
struct gfs2_holder *gh, *tmp;
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_WAIT, &gh->gh_iflags))
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
if (ret & LM_OUT_ERROR)
gh->gh_error = -EIO;
@ -400,78 +380,6 @@ static void do_error(struct gfs2_glock *gl, const int ret)
}
}
/**
* demote_incompat_holders - demote incompatible demoteable holders
* @gl: the glock we want to promote
* @new_gh: the new holder to be promoted
*/
static void demote_incompat_holders(struct gfs2_glock *gl,
struct gfs2_holder *new_gh)
{
struct gfs2_holder *gh;
/*
* Demote incompatible holders before we make ourselves eligible.
* (This holder may or may not allow auto-demoting, but we don't want
* to demote the new holder before it's even granted.)
*/
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
/*
* Since holders are at the front of the list, we stop when we
* find the first non-holder.
*/
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
!may_grant(gl, new_gh, gh)) {
/*
* We should not recurse into do_promote because
* __gfs2_glock_dq only calls handle_callback,
* gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
*/
__gfs2_glock_dq(gh);
}
}
}
/**
* find_first_holder - find the first "holder" gh
* @gl: the glock
*/
static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
if (!list_empty(&gl->gl_holders)) {
gh = list_first_entry(&gl->gl_holders, struct gfs2_holder,
gh_list);
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
return gh;
}
return NULL;
}
/**
* find_first_strong_holder - find the first non-demoteable holder
* @gl: the glock
*
* Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
*/
static inline struct gfs2_holder *
find_first_strong_holder(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
return NULL;
if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
return gh;
}
return NULL;
}
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
@ -485,21 +393,14 @@ __releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh, *tmp, *first_gh;
bool incompat_holders_demoted = false;
struct gfs2_holder *gh, *tmp;
int ret;
restart:
first_gh = find_first_strong_holder(gl);
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_WAIT, &gh->gh_iflags))
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
if (may_grant(gl, first_gh, gh)) {
if (!incompat_holders_demoted) {
demote_incompat_holders(gl, first_gh);
incompat_holders_demoted = true;
first_gh = gh;
}
if (may_grant(gl, gh)) {
if (gh->gh_list.prev == &gl->gl_holders &&
glops->go_lock) {
spin_unlock(&gl->gl_lockref.lock);
@ -525,11 +426,6 @@ __acquires(&gl->gl_lockref.lock)
gfs2_holder_wake(gh);
continue;
}
/*
* If we get here, it means we may not grant this holder for
* some reason. If this holder is the head of the list, it
* means we have a blocked holder at the head, so return 1.
*/
if (gh->gh_list.prev == &gl->gl_holders)
return 1;
do_error(gl, 0);
@ -826,6 +722,23 @@ __acquires(&gl->gl_lockref.lock)
spin_lock(&gl->gl_lockref.lock);
}
/**
* find_first_holder - find the first "holder" gh
* @gl: the glock
*/
static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
if (!list_empty(&gl->gl_holders)) {
gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
return gh;
}
return NULL;
}
/**
* run_queue - do all outstanding tasks related to a glock
* @gl: The glock in question
@ -1441,20 +1354,15 @@ __acquires(&gl->gl_lockref.lock)
GLOCK_BUG_ON(gl, true);
if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
if (test_bit(GLF_LOCK, &gl->gl_flags)) {
struct gfs2_holder *first_gh;
first_gh = find_first_strong_holder(gl);
try_futile = !may_grant(gl, first_gh, gh);
}
if (test_bit(GLF_LOCK, &gl->gl_flags))
try_futile = !may_grant(gl, gh);
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
goto fail;
}
list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
!test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
goto trap_recursive;
if (try_futile &&
!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
@ -1550,83 +1458,51 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
}
static inline bool needs_demote(struct gfs2_glock *gl)
{
return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
}
/**
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
* @gh: the glock holder
*
*/
static void __gfs2_glock_dq(struct gfs2_holder *gh)
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned delay = 0;
int fast_path = 0;
spin_lock(&gl->gl_lockref.lock);
/*
* This while loop is similar to function demote_incompat_holders:
* If the glock is due to be demoted (which may be from another node
* or even if this holder is GL_NOCACHE), the weak holders are
* demoted as well, allowing the glock to be demoted.
* If we're in the process of file system withdraw, we cannot just
* dequeue any glocks until our journal is recovered, lest we
* introduce file system corruption. We need two exceptions to this
* rule: We need to allow unlocking of nondisk glocks and the glock
* for our own journal that needs recovery.
*/
while (gh) {
/*
* If we're in the process of file system withdraw, we cannot
* just dequeue any glocks until our journal is recovered, lest
* we introduce file system corruption. We need two exceptions
* to this rule: We need to allow unlocking of nondisk glocks
* and the glock for our own journal that needs recovery.
*/
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
glock_blocked_by_withdraw(gl) &&
gh->gh_gl != sdp->sd_jinode_gl) {
sdp->sd_glock_dqs_held++;
spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
/*
* This holder should not be cached, so mark it for demote.
* Note: this should be done before the check for needs_demote
* below.
*/
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_glock_queue(gh, 0);
/*
* If there hasn't been a demote request we are done.
* (Let the remaining holders, if any, keep holding it.)
*/
if (!needs_demote(gl)) {
if (list_empty(&gl->gl_holders))
fast_path = 1;
break;
}
/*
* If we have another strong holder (we cannot auto-demote)
* we are done. It keeps holding it until it is done.
*/
if (find_first_strong_holder(gl))
break;
/*
* If we have a weak holder at the head of the list, it
* (and all others like it) must be auto-demoted. If there
* are no more weak holders, we exit the while loop.
*/
gh = find_first_holder(gl);
if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
glock_blocked_by_withdraw(gl) &&
gh->gh_gl != sdp->sd_jinode_gl) {
sdp->sd_glock_dqs_held++;
spin_unlock(&gl->gl_lockref.lock);
might_sleep();
wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
TASK_UNINTERRUPTIBLE);
spin_lock(&gl->gl_lockref.lock);
}
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
if (list_empty(&gl->gl_holders) &&
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
gfs2_glock_add_to_lru(gl);
trace_gfs2_glock_queue(gh, 0);
if (unlikely(!fast_path)) {
gl->gl_lockref.count++;
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@ -1635,19 +1511,6 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
delay = gl->gl_hold_time;
__gfs2_glock_queue_work(gl, delay);
}
}
/**
* gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
* @gh: the glock holder
*
*/
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
__gfs2_glock_dq(gh);
spin_unlock(&gl->gl_lockref.lock);
}
@ -1810,7 +1673,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
{
struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
unsigned long delay = 0;
unsigned long holdtime;
unsigned long now = jiffies;
@ -1825,28 +1687,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
delay = gl->gl_hold_time;
}
/*
* Note 1: We cannot call demote_incompat_holders from handle_callback
* or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
* handle_callback -> demote_incompat_holders -> gfs2_glock_dq
* Plus, we only want to demote the holders if the request comes from
* a remote cluster node because local holder conflicts are resolved
* elsewhere.
*
* Note 2: if a remote node wants this glock in EX mode, lock_dlm will
* request that we set our state to UNLOCKED. Here we mock up a holder
* to make it look like someone wants the lock EX locally. Any SH
* and DF requests should be able to share the lock without demoting.
*
* Note 3: We only want to demote the demoteable holders when there
* are no more strong holders. The demoteable holders might as well
* keep the glock until the last strong holder is done with it.
*/
if (!find_first_strong_holder(gl)) {
if (state == LM_ST_UNLOCKED)
mock_gh.gh_state = LM_ST_EXCLUSIVE;
demote_incompat_holders(gl, &mock_gh);
}
handle_callback(gl, state, delay, true);
__gfs2_glock_queue_work(gl, delay);
spin_unlock(&gl->gl_lockref.lock);
@ -2238,8 +2078,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
*p++ = 'H';
if (test_bit(HIF_WAIT, &iflags))
*p++ = 'W';
if (test_bit(HIF_MAY_DEMOTE, &iflags))
*p++ = 'D';
*p = 0;
return buf;
}

View File

@ -150,8 +150,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
break;
if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
continue;
if (gh->gh_owner_pid == pid)
goto out;
}
@ -327,24 +325,6 @@ static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
spin_unlock(&gl->gl_lockref.lock);
}
static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
spin_lock(&gl->gl_lockref.lock);
clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
spin_unlock(&gl->gl_lockref.lock);
}
extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);

View File

@ -252,7 +252,6 @@ struct gfs2_lkstats {
enum {
/* States */
HIF_MAY_DEMOTE = 1,
HIF_HOLDER = 6, /* Set for gh that "holds" the glock */
HIF_WAIT = 10,
};
@ -387,8 +386,9 @@ struct gfs2_inode {
u64 i_generation;
u64 i_eattr;
unsigned long i_flags; /* GIF_... */
struct gfs2_glock *i_gl;
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_qadata *i_qadata; /* quota allocation data */
struct gfs2_holder i_rgd_gh;
struct gfs2_blkreserv i_res; /* rgrp multi-block reservation */

View File

@ -757,7 +757,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
* same page as we're writing to, without it being marked
* up-to-date.
*/
if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}

View File

@ -381,8 +381,6 @@ static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter,
loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter);
dio->size += length;
if (!length)
return -EFAULT;
return length;
}
@ -414,8 +412,6 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi,
copied = copy_to_iter(inline_data, length, iter);
}
dio->size += copied;
if (!copied)
return -EFAULT;
return copied;
}
@ -600,12 +596,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size)
iov_iter_revert(iter, iomi.pos - dio->i_size);
if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) {
if (!(iocb->ki_flags & IOCB_NOWAIT))
wait_for_completion = true;
ret = 0;
}
/* magic error code to fall back to buffered I/O */
if (ret == -ENOTBLK) {
wait_for_completion = true;

View File

@ -1829,7 +1829,7 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
* pages being swapped out between us bringing them into memory
* and doing the actual copying.
*/
if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}

View File

@ -989,7 +989,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
frame_vbo = pos & ~(frame_size - 1);
index = frame_vbo >> PAGE_SHIFT;
if (unlikely(fault_in_iov_iter_readable(from, bytes))) {
if (unlikely(iov_iter_fault_in_readable(from, bytes))) {
err = -EFAULT;
goto out;
}

View File

@ -299,34 +299,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
* arg_type, ret_type and reg_type. For example, a pointer value may be null,
* or a memory is read-only. We classify types into two categories: base types
* and extended types. Extended types are base types combined with a type flag.
*
* Currently there are no more than 32 base types in arg_type, ret_type and
* reg_types.
*/
#define BPF_BASE_TYPE_BITS 8
enum bpf_type_flag {
/* PTR may be NULL. */
PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS),
/* MEM is read-only. When applied on bpf_arg, it indicates the arg is
* compatible with both mutable and immutable memory.
*/
MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS),
__BPF_TYPE_LAST_FLAG = MEM_RDONLY,
};
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
/* Max number of all types. */
#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1))
/* function argument constraints */
enum bpf_arg_type {
ARG_DONTCARE = 0, /* unused argument in helper function */
@ -338,11 +310,13 @@ enum bpf_arg_type {
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
*/
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */
ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized,
* helper function must fill all bytes or clear
* them in error case.
@ -352,65 +326,42 @@ enum bpf_arg_type {
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
ARG_PTR_TO_CTX, /* pointer to context */
ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
ARG_PTR_TO_INT, /* pointer to int */
ARG_PTR_TO_LONG, /* pointer to long */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */
ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */
ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */
ARG_PTR_TO_FUNC, /* pointer to a bpf program function */
ARG_PTR_TO_STACK, /* pointer to stack */
ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */
ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */
ARG_PTR_TO_TIMER, /* pointer to bpf_timer */
__BPF_ARG_TYPE_MAX,
/* Extended arg_types. */
ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE,
ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM,
ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX,
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
*/
__BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT,
};
static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
/* type of values returned from helper functions */
enum bpf_return_type {
RET_INTEGER, /* function returns integer */
RET_VOID, /* function doesn't return anything */
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_SOCKET, /* returns a pointer to a socket */
RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */
RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */
RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */
RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */
RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */
RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */
RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */
__BPF_RET_TYPE_MAX,
/* Extended ret_types. */
RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE,
RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET,
RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK,
RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM,
RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
*/
__BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT,
};
static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
* to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
@ -472,15 +423,18 @@ enum bpf_reg_type {
PTR_TO_CTX, /* reg points to bpf_context */
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
PTR_TO_MAP_VALUE, /* reg points to map element value */
PTR_TO_MAP_KEY, /* reg points to a map element key */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
PTR_TO_STACK, /* reg == frame_pointer + offset */
PTR_TO_PACKET_META, /* skb->data - meta_len */
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
/* PTR_TO_BTF_ID points to a kernel struct that does not need
@ -498,25 +452,18 @@ enum bpf_reg_type {
* been checked for null. Used primarily to inform the verifier
* an explicit null check is required for this struct.
*/
PTR_TO_BTF_ID_OR_NULL,
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */
PTR_TO_FUNC, /* reg points to a bpf program function */
PTR_TO_MAP_KEY, /* reg points to a map element key */
__BPF_REG_TYPE_MAX,
/* Extended reg_types. */
PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE,
PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET,
PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON,
PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK,
PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID,
/* This must be the last entry. Its purpose is to ensure the enum is
* wide enough to hold the higher bits reserved for bpf_type_flag.
*/
__BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT,
};
static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT);
/* The information passed from prog-specific *_is_valid_access
* back to the verifier.

View File

@ -19,8 +19,6 @@
* that converting umax_value to int cannot overflow.
*/
#define BPF_MAX_VAR_SIZ (1 << 29)
/* size of type_str_buf in bpf_verifier. */
#define TYPE_STR_BUF_LEN 64
/* Liveness marks, used for registers and spilled-regs (in stack slots).
* Read marks propagate upwards until they find a write mark; they record that
@ -479,8 +477,6 @@ struct bpf_verifier_env {
/* longest register parentage chain walked for liveness marking */
u32 longest_mark_read_walk;
bpfptr_t fd_array;
/* buffer used in reg_type_str() to generate reg_type string */
char type_str_buf[TYPE_STR_BUF_LEN];
ANDROID_KABI_RESERVE(1);
ANDROID_KABI_RESERVE(2);
@ -545,18 +541,4 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
u32 btf_id,
struct bpf_attach_target_info *tgt_info);
#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0)
/* extract base type from bpf_{arg, return, reg}_type. */
static inline u32 base_type(u32 type)
{
return type & BPF_BASE_TYPE_MASK;
}
/* extract flags from an extended type. See bpf_type_flag in bpf.h. */
static inline u32 type_flag(u32 type)
{
return type & ~BPF_BASE_TYPE_MASK;
}
#endif /* _LINUX_BPF_VERIFIER_H */

View File

@ -332,13 +332,6 @@ struct iomap_dio_ops {
*/
#define IOMAP_DIO_OVERWRITE_ONLY (1 << 1)
/*
* When a page fault occurs, return a partial synchronous result and allow
* the caller to retry the rest of the operation after dealing with the page
* fault.
*/
#define IOMAP_DIO_PARTIAL (1 << 2)
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, size_t done_before);

View File

@ -2903,8 +2903,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
* and return without waiting upon it */
#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_POPULATE 0x40 /* fault in page */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */

View File

@ -731,11 +731,61 @@ int wait_on_page_private_2_killable(struct page *page);
extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
/*
* Fault in userspace address range.
* Fault everything in given userspace address range in.
*/
size_t fault_in_writeable(char __user *uaddr, size_t size);
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
size_t fault_in_readable(const char __user *uaddr, size_t size);
static inline int fault_in_pages_writeable(char __user *uaddr, size_t size)
{
char __user *end = uaddr + size - 1;
if (unlikely(size == 0))
return 0;
if (unlikely(uaddr > end))
return -EFAULT;
/*
* Writing zeroes into userspace here is OK, because we know that if
* the zero gets there, we'll be overwriting it.
*/
do {
if (unlikely(__put_user(0, uaddr) != 0))
return -EFAULT;
uaddr += PAGE_SIZE;
} while (uaddr <= end);
/* Check whether the range spilled into the next page. */
if (((unsigned long)uaddr & PAGE_MASK) ==
((unsigned long)end & PAGE_MASK))
return __put_user(0, end);
return 0;
}
static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
{
volatile char c;
const char __user *end = uaddr + size - 1;
if (unlikely(size == 0))
return 0;
if (unlikely(uaddr > end))
return -EFAULT;
do {
if (unlikely(__get_user(c, uaddr) != 0))
return -EFAULT;
uaddr += PAGE_SIZE;
} while (uaddr <= end);
/* Check whether the range spilled into the next page. */
if (((unsigned long)uaddr & PAGE_MASK) ==
((unsigned long)end & PAGE_MASK)) {
return __get_user(c, end);
}
(void)c;
return 0;
}
int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);

View File

@ -35,7 +35,6 @@ struct iov_iter_state {
struct iov_iter {
u8 iter_type;
bool nofault;
bool data_source;
size_t iov_offset;
size_t count;
@ -134,8 +133,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes);
size_t iov_iter_single_seg_count(const struct iov_iter *i);
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);

View File

@ -4800,12 +4800,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
u32 type, flag;
type = base_type(ctx_arg_info->reg_type);
flag = type_flag(ctx_arg_info->reg_type);
if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
(flag & PTR_MAYBE_NULL)) {
if (ctx_arg_info->offset == off &&
(ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL ||
ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) {
info->reg_type = ctx_arg_info->reg_type;
return true;
}
@ -5510,9 +5508,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (reg->type == PTR_TO_BTF_ID) {
reg_btf = reg->btf;
reg_ref_id = reg->btf_id;
} else if (reg2btf_ids[base_type(reg->type)]) {
} else if (reg2btf_ids[reg->type]) {
reg_btf = btf_vmlinux;
reg_ref_id = *reg2btf_ids[base_type(reg->type)];
reg_ref_id = *reg2btf_ids[reg->type];
} else {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
func_name, i,
@ -5719,7 +5717,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
}
reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
reg->type = PTR_TO_MEM_OR_NULL;
reg->id = ++env->id_gen;
continue;
@ -6232,7 +6230,7 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
.func = bpf_btf_find_by_name_kind,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,

View File

@ -1753,7 +1753,7 @@ static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};

View File

@ -530,7 +530,7 @@ const struct bpf_func_proto bpf_strtol_proto = {
.func = bpf_strtol,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
@ -558,7 +558,7 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.func = bpf_strtoul,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
@ -630,7 +630,7 @@ const struct bpf_func_proto bpf_event_output_data_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -667,7 +667,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
.func = bpf_per_cpu_ptr,
.gpl_only = false,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
.arg2_type = ARG_ANYTHING,
};
@ -680,7 +680,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
.func = bpf_this_cpu_ptr,
.gpl_only = false,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
.ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
.arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
};
@ -1013,7 +1013,7 @@ const struct bpf_func_proto bpf_snprintf_proto = {
.arg1_type = ARG_PTR_TO_MEM_OR_NULL,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_CONST_STR,
.arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};

View File

@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map_elem_reg_info = {
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map_elem, key),
PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY },
PTR_TO_RDONLY_BUF_OR_NULL },
{ offsetof(struct bpf_iter__bpf_map_elem, value),
PTR_TO_BUF | PTR_MAYBE_NULL },
PTR_TO_RDWR_BUF_OR_NULL },
},
};

View File

@ -444,7 +444,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = {
.func = bpf_ringbuf_output,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};

View File

@ -4757,7 +4757,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};

View File

@ -445,6 +445,18 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON;
}
static bool reg_type_may_be_null(enum bpf_reg_type type)
{
return type == PTR_TO_MAP_VALUE_OR_NULL ||
type == PTR_TO_SOCKET_OR_NULL ||
type == PTR_TO_SOCK_COMMON_OR_NULL ||
type == PTR_TO_TCP_SOCK_OR_NULL ||
type == PTR_TO_BTF_ID_OR_NULL ||
type == PTR_TO_MEM_OR_NULL ||
type == PTR_TO_RDONLY_BUF_OR_NULL ||
type == PTR_TO_RDWR_BUF_OR_NULL;
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return reg->type == PTR_TO_MAP_VALUE &&
@ -453,14 +465,12 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
return base_type(type) == PTR_TO_SOCKET ||
base_type(type) == PTR_TO_TCP_SOCK ||
base_type(type) == PTR_TO_MEM;
}
static bool type_is_rdonly_mem(u32 type)
{
return type & MEM_RDONLY;
return type == PTR_TO_SOCKET ||
type == PTR_TO_SOCKET_OR_NULL ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_TCP_SOCK_OR_NULL ||
type == PTR_TO_MEM ||
type == PTR_TO_MEM_OR_NULL;
}
static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
@ -468,9 +478,14 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
return type == ARG_PTR_TO_SOCK_COMMON;
}
static bool type_may_be_null(u32 type)
static bool arg_type_may_be_null(enum bpf_arg_type type)
{
return type & PTR_MAYBE_NULL;
return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
type == ARG_PTR_TO_STACK_OR_NULL;
}
/* Determine whether the function releases some resources allocated by another
@ -530,54 +545,39 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
insn->imm == BPF_CMPXCHG;
}
/* string representation of 'enum bpf_reg_type'
*
* Note that reg_type_str() can not appear more than once in a single verbose()
* statement.
*/
static const char *reg_type_str(struct bpf_verifier_env *env,
enum bpf_reg_type type)
{
char postfix[16] = {0}, prefix[16] = {0};
static const char * const str[] = {
[NOT_INIT] = "?",
[SCALAR_VALUE] = "inv",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_STACK] = "fp",
[PTR_TO_PACKET] = "pkt",
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
[PTR_TO_FLOW_KEYS] = "flow_keys",
[PTR_TO_SOCKET] = "sock",
[PTR_TO_SOCK_COMMON] = "sock_common",
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
};
if (type & PTR_MAYBE_NULL) {
if (base_type(type) == PTR_TO_BTF_ID ||
base_type(type) == PTR_TO_PERCPU_BTF_ID)
strncpy(postfix, "or_null_", 16);
else
strncpy(postfix, "_or_null", 16);
}
if (type & MEM_RDONLY)
strncpy(prefix, "rdonly_", 16);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
return env->type_str_buf;
}
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
[SCALAR_VALUE] = "inv",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
[PTR_TO_STACK] = "fp",
[PTR_TO_PACKET] = "pkt",
[PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
[PTR_TO_FLOW_KEYS] = "flow_keys",
[PTR_TO_SOCKET] = "sock",
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
[PTR_TO_SOCK_COMMON] = "sock_common",
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
[PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
[PTR_TO_RDWR_BUF] = "rdwr_buf",
[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
};
static char slot_type_char[] = {
[STACK_INVALID] = '?',
@ -628,7 +628,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
continue;
verbose(env, " R%d", i);
print_liveness(env, reg->live);
verbose(env, "=%s", reg_type_str(env, t));
verbose(env, "=%s", reg_type_str[t]);
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
@ -636,8 +636,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
if (base_type(t) == PTR_TO_BTF_ID ||
base_type(t) == PTR_TO_PERCPU_BTF_ID)
if (t == PTR_TO_BTF_ID ||
t == PTR_TO_BTF_ID_OR_NULL ||
t == PTR_TO_PERCPU_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@ -646,9 +647,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
verbose(env, ",r=%d", reg->range);
else if (base_type(t) == CONST_PTR_TO_MAP ||
base_type(t) == PTR_TO_MAP_KEY ||
base_type(t) == PTR_TO_MAP_VALUE)
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_KEY ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
verbose(env, ",ks=%d,vs=%d",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
@ -718,7 +720,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (state->stack[i].slot_type[0] == STACK_SPILL) {
reg = &state->stack[i].spilled_ptr;
t = reg->type;
verbose(env, "=%s", reg_type_str(env, t));
verbose(env, "=%s", reg_type_str[t]);
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
@ -1131,7 +1133,8 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
{
if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
switch (reg->type) {
case PTR_TO_MAP_VALUE_OR_NULL: {
const struct bpf_map *map = reg->map_ptr;
if (map->inner_map_meta) {
@ -1150,10 +1153,32 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
} else {
reg->type = PTR_TO_MAP_VALUE;
}
return;
break;
}
case PTR_TO_SOCKET_OR_NULL:
reg->type = PTR_TO_SOCKET;
break;
case PTR_TO_SOCK_COMMON_OR_NULL:
reg->type = PTR_TO_SOCK_COMMON;
break;
case PTR_TO_TCP_SOCK_OR_NULL:
reg->type = PTR_TO_TCP_SOCK;
break;
case PTR_TO_BTF_ID_OR_NULL:
reg->type = PTR_TO_BTF_ID;
break;
case PTR_TO_MEM_OR_NULL:
reg->type = PTR_TO_MEM;
break;
case PTR_TO_RDONLY_BUF_OR_NULL:
reg->type = PTR_TO_RDONLY_BUF;
break;
case PTR_TO_RDWR_BUF_OR_NULL:
reg->type = PTR_TO_RDWR_BUF;
break;
default:
WARN_ONCE(1, "unknown nullable register type");
}
reg->type &= ~PTR_MAYBE_NULL;
}
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
@ -1881,7 +1906,7 @@ static int mark_reg_read(struct bpf_verifier_env *env,
break;
if (parent->live & REG_LIVE_DONE) {
verbose(env, "verifier BUG type %s var_off %lld off %d\n",
reg_type_str(env, parent->type),
reg_type_str[parent->type],
parent->var_off.value, parent->off);
return -EFAULT;
}
@ -2539,8 +2564,9 @@ static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
static bool is_spillable_regtype(enum bpf_reg_type type)
{
switch (base_type(type)) {
switch (type) {
case PTR_TO_MAP_VALUE:
case PTR_TO_MAP_VALUE_OR_NULL:
case PTR_TO_STACK:
case PTR_TO_CTX:
case PTR_TO_PACKET:
@ -2549,13 +2575,21 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_FLOW_KEYS:
case CONST_PTR_TO_MAP:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BUF:
case PTR_TO_BTF_ID_OR_NULL:
case PTR_TO_RDONLY_BUF:
case PTR_TO_RDONLY_BUF_OR_NULL:
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_MEM_OR_NULL:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
return true;
@ -3371,7 +3405,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
if (base_type(*reg_type) == PTR_TO_BTF_ID) {
if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
*btf = info.btf;
*btf_id = info.btf_id;
} else {
@ -3439,7 +3473,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
}
verbose(env, "R%d invalid %s access off=%d size=%d\n",
regno, reg_type_str(env, reg->type), off, size);
regno, reg_type_str[reg->type], off, size);
return -EACCES;
}
@ -4166,30 +4200,15 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
mark_reg_unknown(env, regs, value_regno);
}
}
} else if (base_type(reg->type) == PTR_TO_MEM) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
if (type_may_be_null(reg->type)) {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str(env, reg->type));
return -EACCES;
}
if (t == BPF_WRITE && rdonly_mem) {
verbose(env, "R%d cannot write into %s\n",
regno, reg_type_str(env, reg->type));
return -EACCES;
}
} else if (reg->type == PTR_TO_MEM) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into mem\n", value_regno);
return -EACCES;
}
err = check_mem_region_access(env, regno, off, size,
reg->mem_size, false);
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
@ -4219,7 +4238,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else {
mark_reg_known_zero(env, regs,
value_regno);
if (type_may_be_null(reg_type))
if (reg_type_may_be_null(reg_type))
regs[value_regno].id = ++env->id_gen;
/* A load of ctx field could have different
* actual load size with the one encoded in the
@ -4227,7 +4246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
if (base_type(reg_type) == PTR_TO_BTF_ID) {
if (reg_type == PTR_TO_BTF_ID ||
reg_type == PTR_TO_BTF_ID_OR_NULL) {
regs[value_regno].btf = btf;
regs[value_regno].btf_id = btf_id;
}
@ -4280,7 +4300,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else if (type_is_sk_pointer(reg->type)) {
if (t == BPF_WRITE) {
verbose(env, "R%d cannot write into %s\n",
regno, reg_type_str(env, reg->type));
regno, reg_type_str[reg->type]);
return -EACCES;
}
err = check_sock_access(env, insn_idx, regno, off, size, t);
@ -4296,32 +4316,26 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else if (reg->type == CONST_PTR_TO_MAP) {
err = check_ptr_to_map_access(env, regs, regno, off, size, t,
value_regno);
} else if (base_type(reg->type) == PTR_TO_BUF) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
const char *buf_info;
u32 *max_access;
if (rdonly_mem) {
if (t == BPF_WRITE) {
verbose(env, "R%d cannot write into %s\n",
regno, reg_type_str(env, reg->type));
return -EACCES;
}
buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
} else if (reg->type == PTR_TO_RDONLY_BUF) {
if (t == BPF_WRITE) {
verbose(env, "R%d cannot write into %s\n",
regno, reg_type_str[reg->type]);
return -EACCES;
}
err = check_buffer_access(env, reg, regno, off, size, false,
buf_info, max_access);
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
"rdonly",
&env->prog->aux->max_rdonly_access);
if (!err && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_RDWR_BUF) {
err = check_buffer_access(env, reg, regno, off, size, false,
"rdwr",
&env->prog->aux->max_rdwr_access);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str(env, reg->type));
reg_type_str[reg->type]);
return -EACCES;
}
@ -4395,7 +4409,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
is_sk_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
reg_type_str[reg_state(env, insn->dst_reg)->type]);
return -EACCES;
}
@ -4578,10 +4592,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
const char *buf_info;
u32 *max_access;
switch (base_type(reg->type)) {
switch (reg->type) {
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
@ -4600,20 +4612,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
return check_mem_region_access(env, regno, reg->off,
access_size, reg->mem_size,
zero_size_allowed);
case PTR_TO_BUF:
if (type_is_rdonly_mem(reg->type)) {
if (meta && meta->raw_mode)
return -EACCES;
buf_info = "rdonly";
max_access = &env->prog->aux->max_rdonly_access;
} else {
buf_info = "rdwr";
max_access = &env->prog->aux->max_rdwr_access;
}
case PTR_TO_RDONLY_BUF:
if (meta && meta->raw_mode)
return -EACCES;
return check_buffer_access(env, reg, regno, reg->off,
access_size, zero_size_allowed,
buf_info, max_access);
"rdonly",
&env->prog->aux->max_rdonly_access);
case PTR_TO_RDWR_BUF:
return check_buffer_access(env, reg, regno, reg->off,
access_size, zero_size_allowed,
"rdwr",
&env->prog->aux->max_rdwr_access);
case PTR_TO_STACK:
return check_stack_range_initialized(
env,
@ -4625,9 +4635,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
register_is_null(reg))
return 0;
verbose(env, "R%d type=%s ", regno,
reg_type_str(env, reg->type));
verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[reg->type],
reg_type_str[PTR_TO_STACK]);
return -EACCES;
}
}
@ -4638,7 +4648,7 @@ int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
if (register_is_null(reg))
return 0;
if (type_may_be_null(reg->type)) {
if (reg_type_may_be_null(reg->type)) {
/* Assuming that the register contains a value check if the memory
* access is safe. Temporarily save and restore the register's state as
* the conversion shouldn't be visible to a caller.
@ -4786,8 +4796,9 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
{
return base_type(type) == ARG_PTR_TO_MEM ||
base_type(type) == ARG_PTR_TO_UNINIT_MEM;
return type == ARG_PTR_TO_MEM ||
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_UNINIT_MEM;
}
static bool arg_type_is_mem_size(enum bpf_arg_type type)
@ -4889,7 +4900,8 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
PTR_TO_BUF,
PTR_TO_RDONLY_BUF,
PTR_TO_RDWR_BUF,
},
};
@ -4920,26 +4932,31 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
[ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
[ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
[ARG_CONST_SIZE] = &scalar_types,
[ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
[ARG_CONST_MAP_PTR] = &const_map_ptr_types,
[ARG_PTR_TO_CTX] = &context_types,
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
#ifdef CONFIG_NET
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
#endif
[ARG_PTR_TO_SOCKET] = &fullsock_types,
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
[ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
[ARG_PTR_TO_UNINIT_MEM] = &mem_types,
[ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK] = &stack_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
};
@ -4953,27 +4970,12 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
const struct bpf_reg_types *compatible;
int i, j;
compatible = compatible_reg_types[base_type(arg_type)];
compatible = compatible_reg_types[arg_type];
if (!compatible) {
verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
return -EFAULT;
}
/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
*
* Same for MAYBE_NULL:
*
* ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
* but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
*
* Therefore we fold these flags depending on the arg_type before comparison.
*/
if (arg_type & MEM_RDONLY)
type &= ~MEM_RDONLY;
if (arg_type & PTR_MAYBE_NULL)
type &= ~PTR_MAYBE_NULL;
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
if (expected == NOT_INIT)
@ -4983,14 +4985,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
goto found;
}
verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
for (j = 0; j + 1 < i; j++)
verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
return -EACCES;
found:
if (reg->type == PTR_TO_BTF_ID) {
if (type == PTR_TO_BTF_ID) {
if (!arg_btf_id) {
if (!compatible->btf_id) {
verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
@ -5049,14 +5051,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
if (arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
err = resolve_map_arg_type(env, meta, &arg_type);
if (err)
return err;
}
if (register_is_null(reg) && type_may_be_null(arg_type))
if (register_is_null(reg) && arg_type_may_be_null(arg_type))
/* A NULL register has a SCALAR_VALUE type, so skip
* type checking.
*/
@ -5125,11 +5128,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
} else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
if (type_may_be_null(arg_type) && register_is_null(reg))
return 0;
} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
(arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
!register_is_null(reg)) ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
*/
@ -6204,8 +6206,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
int *insn_idx_p)
{
const struct bpf_func_proto *fn = NULL;
enum bpf_return_type ret_type;
enum bpf_type_flag ret_flag;
struct bpf_reg_state *regs;
struct bpf_call_arg_meta meta;
int insn_idx = *insn_idx_p;
@ -6339,14 +6339,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
/* update return register (already marked as written above) */
ret_type = fn->ret_type;
ret_flag = type_flag(fn->ret_type);
if (ret_type == RET_INTEGER) {
if (fn->ret_type == RET_INTEGER) {
/* sets type to SCALAR_VALUE */
mark_reg_unknown(env, regs, BPF_REG_0);
} else if (ret_type == RET_VOID) {
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
fn->ret_type == RET_PTR_TO_MAP_VALUE) {
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
/* remember map_ptr, so that check_map_access()
@ -6360,25 +6359,28 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].map_uid = meta.map_uid;
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
if (!type_may_be_null(ret_type) &&
map_value_has_spin_lock(meta.map_ptr)) {
regs[BPF_REG_0].id = ++env->id_gen;
if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
if (map_value_has_spin_lock(meta.map_ptr))
regs[BPF_REG_0].id = ++env->id_gen;
} else {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
}
} else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
} else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
} else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
} else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].mem_size = meta.mem_size;
} else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
@ -6396,30 +6398,29 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
tname, PTR_ERR(ret));
return -EINVAL;
}
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].type =
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].mem_size = tsize;
} else {
/* MEM_RDONLY may be carried from ret_flag, but it
* doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
* it will confuse the check of PTR_TO_BTF_ID in
* check_mem_access().
*/
ret_flag &= ~MEM_RDONLY;
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
regs[BPF_REG_0].type =
fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
regs[BPF_REG_0].btf = meta.ret_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
} else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
fn->ret_type == RET_PTR_TO_BTF_ID) {
int ret_btf_id;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
PTR_TO_BTF_ID :
PTR_TO_BTF_ID_OR_NULL;
ret_btf_id = *fn->ret_btf_id;
if (ret_btf_id == 0) {
verbose(env, "invalid return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id),
func_id);
verbose(env, "invalid return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
return -EINVAL;
}
/* current BPF helper definitions are only coming from
@ -6428,12 +6429,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].btf = btf_vmlinux;
regs[BPF_REG_0].btf_id = ret_btf_id;
} else {
verbose(env, "unknown return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id), func_id);
verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
return -EINVAL;
}
if (type_may_be_null(regs[BPF_REG_0].type))
if (reg_type_may_be_null(regs[BPF_REG_0].type))
regs[BPF_REG_0].id = ++env->id_gen;
if (is_ptr_cast_function(func_id)) {
@ -6632,25 +6633,25 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
verbose(env, "math between %s pointer and %lld is not allowed\n",
reg_type_str(env, type), val);
reg_type_str[type], val);
return false;
}
if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
verbose(env, "%s pointer offset %d is not allowed\n",
reg_type_str(env, type), reg->off);
reg_type_str[type], reg->off);
return false;
}
if (smin == S64_MIN) {
verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
reg_type_str(env, type));
reg_type_str[type]);
return false;
}
if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
verbose(env, "value %lld makes %s pointer be out of bounds\n",
smin, reg_type_str(env, type));
smin, reg_type_str[type]);
return false;
}
@ -7027,13 +7028,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
if (ptr_reg->type & PTR_MAYBE_NULL) {
switch (ptr_reg->type) {
case PTR_TO_MAP_VALUE_OR_NULL:
verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
dst, reg_type_str(env, ptr_reg->type));
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
}
switch (base_type(ptr_reg->type)) {
case CONST_PTR_TO_MAP:
/* smin_val represents the known value */
if (known && smin_val == 0 && opcode == BPF_ADD)
@ -7046,10 +7045,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
case PTR_TO_XDP_SOCK:
reject:
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str(env, ptr_reg->type));
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
default:
if (type_may_be_null(ptr_reg->type))
if (reg_type_may_be_null(ptr_reg->type))
goto reject;
break;
}
@ -8771,7 +8770,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
struct bpf_reg_state *reg, u32 id,
bool is_null)
{
if (type_may_be_null(reg->type) && reg->id == id &&
if (reg_type_may_be_null(reg->type) && reg->id == id &&
!WARN_ON_ONCE(!reg->id)) {
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
!tnum_equals_const(reg->var_off, 0) ||
@ -9149,7 +9148,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
*/
if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
type_may_be_null(dst_reg->type)) {
reg_type_may_be_null(dst_reg->type)) {
/* Mark all identical registers in each branch as either
* safe or unknown depending R == 0 or R != 0 conditional.
*/
@ -9208,7 +9207,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
dst_reg->type = aux->btf_var.reg_type;
switch (base_type(dst_reg->type)) {
switch (dst_reg->type) {
case PTR_TO_MEM:
dst_reg->mem_size = aux->btf_var.mem_size;
break;
@ -9405,7 +9404,7 @@ static int check_return_code(struct bpf_verifier_env *env)
/* enforce return zero from async callbacks like timer */
if (reg->type != SCALAR_VALUE) {
verbose(env, "In async callback the register R0 is not a known value (%s)\n",
reg_type_str(env, reg->type));
reg_type_str[reg->type]);
return -EINVAL;
}
@ -9419,7 +9418,7 @@ static int check_return_code(struct bpf_verifier_env *env)
if (is_subprog) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
reg_type_str(env, reg->type));
reg_type_str[reg->type]);
return -EINVAL;
}
return 0;
@ -9483,7 +9482,7 @@ static int check_return_code(struct bpf_verifier_env *env)
if (reg->type != SCALAR_VALUE) {
verbose(env, "At program exit the register R0 is not a known value (%s)\n",
reg_type_str(env, reg->type));
reg_type_str[reg->type]);
return -EINVAL;
}
@ -10264,7 +10263,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
return true;
if (rcur->type == NOT_INIT)
return false;
switch (base_type(rold->type)) {
switch (rold->type) {
case SCALAR_VALUE:
if (env->explore_alu_limits)
return false;
@ -10286,22 +10285,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
}
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
/* a PTR_TO_MAP_VALUE could be safe to use as a
* PTR_TO_MAP_VALUE_OR_NULL into the same map.
* However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
* checked, doing so could have affected others with the same
* id, and we can't check for that because we lost the id when
* we converted to a PTR_TO_MAP_VALUE.
*/
if (type_may_be_null(rold->type)) {
if (!type_may_be_null(rcur->type))
return false;
if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
return false;
/* Check our ids match any regs they're supposed to */
return check_ids(rold->id, rcur->id, idmap);
}
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
* 'id' is not compared, since it's only used for maps with
@ -10313,6 +10296,20 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_VALUE_OR_NULL:
/* a PTR_TO_MAP_VALUE could be safe to use as a
* PTR_TO_MAP_VALUE_OR_NULL into the same map.
* However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
* checked, doing so could have affected others with the same
* id, and we can't check for that because we lost the id when
* we converted to a PTR_TO_MAP_VALUE.
*/
if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
return false;
if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
return false;
/* Check our ids match any regs they're supposed to */
return check_ids(rold->id, rcur->id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
if (rcur->type != rold->type)
@ -10341,8 +10338,11 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
case PTR_TO_PACKET_END:
case PTR_TO_FLOW_KEYS:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
/* Only valid matches are exact, which memcmp() above
* would have accepted
@ -10868,13 +10868,17 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
/* Return true if it's OK to have the same insn return a different type. */
static bool reg_type_mismatch_ok(enum bpf_reg_type type)
{
switch (base_type(type)) {
switch (type) {
case PTR_TO_CTX:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
case PTR_TO_SOCK_COMMON:
case PTR_TO_SOCK_COMMON_OR_NULL:
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID_OR_NULL:
return false;
default:
return true;
@ -11098,7 +11102,7 @@ static int do_check(struct bpf_verifier_env *env)
if (is_ctx_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
reg_type_str[reg_state(env, insn->dst_reg)->type]);
return -EACCES;
}
@ -11349,7 +11353,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
err = -EINVAL;
goto err_put;
}
aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
aux->btf_var.reg_type = PTR_TO_MEM;
aux->btf_var.mem_size = tsize;
} else {
aux->btf_var.reg_type = PTR_TO_BTF_ID;
@ -13171,7 +13175,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
mark_reg_known_zero(env, regs, i);
else if (regs[i].type == SCALAR_VALUE)
mark_reg_unknown(env, regs, i);
else if (base_type(regs[i].type) == PTR_TO_MEM) {
else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
const u32 mem_size = regs[i].mem_size;
mark_reg_known_zero(env, regs, i);

View File

@ -345,7 +345,7 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
@ -394,7 +394,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.func = bpf_trace_printk,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
};
@ -446,9 +446,9 @@ static const struct bpf_func_proto bpf_seq_printf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -463,7 +463,7 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -487,7 +487,7 @@ static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@ -648,7 +648,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -958,7 +958,7 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
@ -1207,7 +1207,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -1429,7 +1429,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -1483,7 +1483,7 @@ static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};

View File

@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
kaddr = kmap_atomic(page);
from = kaddr + offset;
@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
kaddr = kmap_atomic(page);
to = kaddr + offset;
@ -431,81 +431,35 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
}
/*
* fault_in_iov_iter_readable - fault in iov iterator for reading
* @i: iterator
* @size: maximum length
*
* Fault in one or more iovecs of the given iov_iter, to a maximum length of
* @size. For each iovec, fault in each page that constitutes the iovec.
* bytes. For each iovec, fault in each page that constitutes the iovec.
*
* Returns the number of bytes not faulted in (like copy_to_user() and
* copy_from_user()).
*
* Always returns 0 for non-userspace iterators.
* Return 0 on success, or non-zero if the memory could not be accessed (i.e.
* because it is an invalid address).
*/
size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
{
if (iter_is_iovec(i)) {
size_t count = min(size, iov_iter_count(i));
const struct iovec *p;
size_t skip;
size -= count;
for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
size_t len = min(count, p->iov_len - skip);
size_t ret;
if (bytes > i->count)
bytes = i->count;
for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
size_t len = min(bytes, p->iov_len - skip);
int err;
if (unlikely(!len))
continue;
ret = fault_in_readable(p->iov_base + skip, len);
count -= len - ret;
if (ret)
break;
err = fault_in_pages_readable(p->iov_base + skip, len);
if (unlikely(err))
return err;
bytes -= len;
}
return count + size;
}
return 0;
}
EXPORT_SYMBOL(fault_in_iov_iter_readable);
/*
* fault_in_iov_iter_writeable - fault in iov iterator for writing
* @i: iterator
* @size: maximum length
*
* Faults in the iterator using get_user_pages(), i.e., without triggering
* hardware page faults. This is primarily useful when we already know that
* some or all of the pages in @i aren't in memory.
*
* Returns the number of bytes not faulted in, like copy_to_user() and
* copy_from_user().
*
* Always returns 0 for non-user-space iterators.
*/
size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
{
if (iter_is_iovec(i)) {
size_t count = min(size, iov_iter_count(i));
const struct iovec *p;
size_t skip;
size -= count;
for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
size_t len = min(count, p->iov_len - skip);
size_t ret;
if (unlikely(!len))
continue;
ret = fault_in_safe_writeable(p->iov_base + skip, len);
count -= len - ret;
if (ret)
break;
}
return count + size;
}
return 0;
}
EXPORT_SYMBOL(fault_in_iov_iter_writeable);
EXPORT_SYMBOL(iov_iter_fault_in_readable);
void iov_iter_init(struct iov_iter *i, unsigned int direction,
const struct iovec *iov, unsigned long nr_segs,
@ -514,7 +468,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_IOVEC,
.nofault = false,
.data_source = direction,
.iov = iov,
.nr_segs = nr_segs,
@ -1530,17 +1483,13 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
return 0;
if (likely(iter_is_iovec(i))) {
unsigned int gup_flags = 0;
unsigned long addr;
if (iov_iter_rw(i) != WRITE)
gup_flags |= FOLL_WRITE;
if (i->nofault)
gup_flags |= FOLL_NOFAULT;
addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
n = DIV_ROUND_UP(len, PAGE_SIZE);
res = get_user_pages_fast(addr, n, gup_flags, pages);
res = get_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
pages);
if (unlikely(res <= 0))
return res;
return (res == n ? len : res * PAGE_SIZE) - *start;
@ -1656,20 +1605,15 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
return 0;
if (likely(iter_is_iovec(i))) {
unsigned int gup_flags = 0;
unsigned long addr;
if (iov_iter_rw(i) != WRITE)
gup_flags |= FOLL_WRITE;
if (i->nofault)
gup_flags |= FOLL_NOFAULT;
addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
n = DIV_ROUND_UP(len, PAGE_SIZE);
p = get_pages_array(n);
if (!p)
return -ENOMEM;
res = get_user_pages_fast(addr, n, gup_flags, p);
res = get_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
if (unlikely(res <= 0)) {
kvfree(p);
*pages = NULL;

View File

@ -90,7 +90,7 @@
* ->lock_page (filemap_fault, access_process_vm)
*
* ->i_rwsem (generic_perform_write)
* ->mmap_lock (fault_in_readable->do_page_fault)
* ->mmap_lock (fault_in_pages_readable->do_page_fault)
*
* bdi->wb.list_lock
* sb_lock (fs/fs-writeback.c)
@ -3810,7 +3810,7 @@ ssize_t generic_perform_write(struct file *file,
* same page as we're writing to, without it being marked
* up-to-date.
*/
if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
break;
}

120
mm/gup.c
View File

@ -943,8 +943,6 @@ static int faultin_page(struct vm_area_struct *vma,
/* mlock all present pages, but do not fault in new pages */
if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
if (*flags & FOLL_NOFAULT)
return -EFAULT;
if (*flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
if (*flags & FOLL_REMOTE)
@ -1683,122 +1681,6 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
}
#endif /* !CONFIG_MMU */
/**
* fault_in_writeable - fault in userspace address range for writing
* @uaddr: start of address range
* @size: size of address range
*
* Returns the number of bytes not faulted in (like copy_to_user() and
* copy_from_user()).
*/
size_t fault_in_writeable(char __user *uaddr, size_t size)
{
char __user *start = uaddr, *end;
if (unlikely(size == 0))
return 0;
if (!PAGE_ALIGNED(uaddr)) {
if (unlikely(__put_user(0, uaddr) != 0))
return size;
uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
}
end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
if (unlikely(end < start))
end = NULL;
while (uaddr != end) {
if (unlikely(__put_user(0, uaddr) != 0))
goto out;
uaddr += PAGE_SIZE;
}
out:
if (size > uaddr - start)
return size - (uaddr - start);
return 0;
}
EXPORT_SYMBOL(fault_in_writeable);
/*
* fault_in_safe_writeable - fault in an address range for writing
* @uaddr: start of address range
* @size: length of address range
*
* Faults in an address range for writing. This is primarily useful when we
* already know that some or all of the pages in the address range aren't in
* memory.
*
* Unlike fault_in_writeable(), this function is non-destructive.
*
* Note that we don't pin or otherwise hold the pages referenced that we fault
* in. There's no guarantee that they'll stay in memory for any duration of
* time.
*
* Returns the number of bytes not faulted in, like copy_to_user() and
* copy_from_user().
*/
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
{
unsigned long start = (unsigned long)uaddr, end;
struct mm_struct *mm = current->mm;
bool unlocked = false;
if (unlikely(size == 0))
return 0;
end = PAGE_ALIGN(start + size);
if (end < start)
end = 0;
mmap_read_lock(mm);
do {
if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
break;
start = (start + PAGE_SIZE) & PAGE_MASK;
} while (start != end);
mmap_read_unlock(mm);
if (size > (unsigned long)uaddr - start)
return size - ((unsigned long)uaddr - start);
return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);
/**
* fault_in_readable - fault in userspace address range for reading
* @uaddr: start of user address range
* @size: size of user address range
*
* Returns the number of bytes not faulted in (like copy_to_user() and
* copy_from_user()).
*/
size_t fault_in_readable(const char __user *uaddr, size_t size)
{
const char __user *start = uaddr, *end;
volatile char c;
if (unlikely(size == 0))
return 0;
if (!PAGE_ALIGNED(uaddr)) {
if (unlikely(__get_user(c, uaddr) != 0))
return size;
uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
}
end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
if (unlikely(end < start))
end = NULL;
while (uaddr != end) {
if (unlikely(__get_user(c, uaddr) != 0))
goto out;
uaddr += PAGE_SIZE;
}
out:
(void)c;
if (size > uaddr - start)
return size - (uaddr - start);
return 0;
}
EXPORT_SYMBOL(fault_in_readable);
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
@ -2851,7 +2733,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT)))
FOLL_FAST_ONLY)))
return -EINVAL;
if (gup_flags & FOLL_PIN)

View File

@ -528,8 +528,6 @@ static bool __init kfence_init_pool(void)
* enters __slab_free() slow-path.
*/
for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
struct page *page = &pages[i];
if (!i || (i % 2))
continue;
@ -537,11 +535,7 @@ static bool __init kfence_init_pool(void)
if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
goto err;
__SetPageSlab(page);
#ifdef CONFIG_MEMCG
page->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
MEMCG_DATA_OBJCGS;
#endif
__SetPageSlab(&pages[i]);
}
/*
@ -917,9 +911,6 @@ void __kfence_free(void *addr)
{
struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
#ifdef CONFIG_MEMCG
KFENCE_WARN_ON(meta->objcg);
#endif
/*
* If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
* the object, as the object page may be recycled for other-typed

View File

@ -89,9 +89,6 @@ struct kfence_metadata {
struct kfence_track free_track;
/* For updating alloc_covered on frees. */
u32 alloc_stack_hash;
#ifdef CONFIG_MEMCG
struct obj_cgroup *objcg;
#endif
};
extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];

View File

@ -929,7 +929,7 @@ static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
PTR_TO_BTF_ID_OR_NULL },
{ offsetof(struct bpf_iter__bpf_sk_storage_map, value),
PTR_TO_BUF | PTR_MAYBE_NULL },
PTR_TO_RDWR_BUF_OR_NULL },
},
.seq_info = &iter_seq_info,
};

View File

@ -1713,7 +1713,7 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
@ -2018,9 +2018,9 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
.gpl_only = false,
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg1_type = ARG_PTR_TO_MEM_OR_NULL,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM_OR_NULL,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
@ -2541,7 +2541,7 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM_OR_NULL,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
@ -4177,7 +4177,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -4191,7 +4191,7 @@ const struct bpf_func_proto bpf_skb_output_proto = {
.arg1_btf_id = &bpf_skb_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -4374,7 +4374,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@ -4400,7 +4400,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
@ -4570,7 +4570,7 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -4584,7 +4584,7 @@ const struct bpf_func_proto bpf_xdp_output_proto = {
.arg1_btf_id = &bpf_xdp_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
@ -5072,7 +5072,7 @@ const struct bpf_func_proto bpf_sk_setsockopt_proto = {
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
@ -5106,7 +5106,7 @@ static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
@ -5140,7 +5140,7 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
@ -5315,7 +5315,7 @@ static const struct bpf_func_proto bpf_bind_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
@ -5903,7 +5903,7 @@ static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
@ -5913,7 +5913,7 @@ static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
@ -5956,7 +5956,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
@ -6044,7 +6044,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
@ -6269,7 +6269,7 @@ static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6288,7 +6288,7 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6307,7 +6307,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6344,7 +6344,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6367,7 +6367,7 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6390,7 +6390,7 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
.pkt_access = true,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6409,7 +6409,7 @@ static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6428,7 +6428,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6447,7 +6447,7 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_SOCKET_OR_NULL,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
@ -6769,9 +6769,9 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
@ -6838,9 +6838,9 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
.pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE,
};
@ -7069,7 +7069,7 @@ static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};

View File

@ -1575,7 +1575,7 @@ static struct bpf_iter_reg sock_map_iter_reg = {
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__sockmap, key),
PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY },
PTR_TO_RDONLY_BUF_OR_NULL },
{ offsetof(struct bpf_iter__sockmap, sk),
PTR_TO_BTF_ID_OR_NULL },
},

View File

@ -7,7 +7,6 @@
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
#include "test_ksyms_weak.skel.h"
#include "test_ksyms_btf_write_check.skel.h"
static int duration;
@ -110,16 +109,6 @@ static void test_weak_syms(void)
test_ksyms_weak__destroy(skel);
}
static void test_write_check(void)
{
struct test_ksyms_btf_write_check *skel;
skel = test_ksyms_btf_write_check__open_and_load();
ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
test_ksyms_btf_write_check__destroy(skel);
}
void test_ksyms_btf(void)
{
int percpu_datasec;
@ -147,7 +136,4 @@ void test_ksyms_btf(void)
if (test__start_subtest("weak_ksyms"))
test_weak_syms();
if (test__start_subtest("write_check"))
test_write_check();
}

View File

@ -1,29 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Google */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
extern const int bpf_prog_active __ksym; /* int type global var. */
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
int *active;
__u32 cpu;
cpu = bpf_get_smp_processor_id();
active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
if (active) {
/* Kernel memory obtained from bpf_{per,this}_cpu_ptr
* is read-only, should _not_ pass verification.
*/
/* WRITE_ONCE */
*(volatile int *)active = -1;
}
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -107,25 +107,6 @@
.result = REJECT,
.errstr = "R0 min value is outside of the allowed memory range",
},
{
"calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_acquire", 3 },
{ "bpf_kfunc_call_test_release", 5 },
},
},
{
"calls: overlapping caller/callee",
.insns = {