Merge branch 'bpf-af-xdp-cleanups'

Björn Töpel says:

====================
This the second follow-up set. The first four patches are uapi
changes:

* Removing rebind support
* Getting rid of structure hole
* Removing explicit cache line alignment
* Stricter bind checks

The last patches do some cleanups, where the umem and refcount_t
changes were suggested by Daniel.

* Add a missing write-barrier and use READ_ONCE for data-dependencies
* Clean up umem and do proper locking
* Convert atomic_t to refcount_t
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Daniel Borkmann 2018-05-22 10:25:07 +02:00
commit fd0bfa8d6e
6 changed files with 226 additions and 157 deletions

View File

@ -17,19 +17,33 @@
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
__u16 sxdp_flags;
};
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
};
struct xdp_mmap_offsets {
struct xdp_ring_offset rx;
struct xdp_ring_offset tx;
struct xdp_ring_offset fr; /* Fill */
struct xdp_ring_offset cr; /* Completion */
};
/* XDP socket options */
#define XDP_RX_RING 1
#define XDP_TX_RING 2
#define XDP_UMEM_REG 3
#define XDP_UMEM_FILL_RING 4
#define XDP_UMEM_COMPLETION_RING 5
#define XDP_STATISTICS 6
#define XDP_MMAP_OFFSETS 1
#define XDP_RX_RING 2
#define XDP_TX_RING 3
#define XDP_UMEM_REG 4
#define XDP_UMEM_FILL_RING 5
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@ -50,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
/* Rx/Tx descriptor */
struct xdp_desc {
__u32 idx;
__u32 len;
@ -58,21 +73,6 @@ struct xdp_desc {
__u8 padding[5];
};
struct xdp_ring {
__u32 producer __attribute__((aligned(64)));
__u32 consumer __attribute__((aligned(64)));
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] __attribute__((aligned(64)));
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
__u32 desc[0] __attribute__((aligned(64)));
};
/* UMEM descriptor is __u32 */
#endif /* _LINUX_IF_XDP_H */

View File

@ -16,39 +16,25 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048
int xdp_umem_create(struct xdp_umem **umem)
{
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
if (!*umem)
return -ENOMEM;
return 0;
}
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unsigned int i;
if (umem->pgs) {
for (i = 0; i < umem->npgs; i++) {
struct page *page = umem->pgs[i];
for (i = 0; i < umem->npgs; i++) {
struct page *page = umem->pgs[i];
set_page_dirty_lock(page);
put_page(page);
}
kfree(umem->pgs);
umem->pgs = NULL;
set_page_dirty_lock(page);
put_page(page);
}
kfree(umem->pgs);
umem->pgs = NULL;
}
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{
if (umem->user) {
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user);
}
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user);
}
static void xdp_umem_release(struct xdp_umem *umem)
@ -66,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
if (umem->pgs) {
xdp_umem_unpin_pages(umem);
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm);
umem->pgs = NULL;
}
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm);
xdp_umem_unaccount_pages(umem);
out:
kfree(umem);
@ -96,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem)
{
atomic_inc(&umem->users);
refcount_inc(&umem->users);
}
void xdp_put_umem(struct xdp_umem *umem)
@ -104,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem)
return;
if (atomic_dec_and_test(&umem->users)) {
if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work);
}
@ -167,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp;
int size_chk, err;
if (!umem)
return -EINVAL;
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
@ -227,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp);
atomic_set(&umem->users, 1);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
if (err)
@ -245,6 +224,24 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return err;
}
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
{
struct xdp_umem *umem;
int err;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
err = xdp_umem_reg(umem, mr);
if (err) {
kfree(umem);
return ERR_PTR(err);
}
return umem;
}
bool xdp_umem_validate_queues(struct xdp_umem *umem)
{
return umem->fq && umem->cq;

View File

@ -27,7 +27,7 @@ struct xdp_umem {
struct pid *pid;
unsigned long address;
size_t size;
atomic_t users;
refcount_t users;
struct work_struct work;
};
@ -50,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
}
bool xdp_umem_validate_queues(struct xdp_umem *umem);
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
int xdp_umem_create(struct xdp_umem **umem);
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */

View File

@ -142,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
if (xs->queue_id >= xs->dev->real_num_tx_queues) {
err = -ENXIO;
goto out;
}
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@ -223,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q)
return -ENOMEM;
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
*queue = q;
return 0;
}
static void __xsk_release(struct xdp_sock *xs)
{
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
}
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@ -251,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
__xsk_release(xs);
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
xs->dev = NULL;
}
@ -285,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk;
struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk);
struct xdp_umem *old_umem = NULL;
struct net_device *dev;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@ -296,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
mutex_lock(&xs->mutex);
dev_curr = xs->dev;
if (xs->dev) {
err = -EBUSY;
goto out_release;
}
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) {
err = -ENODEV;
@ -308,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
@ -343,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
xdp_get_umem(umem_xs->umem);
old_umem = xs->umem;
xs->umem = umem_xs->umem;
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
@ -355,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props);
}
/* Rebind? */
if (dev_curr && (dev_curr != dev ||
xs->queue_id != sxdp->sxdp_queue_id)) {
__xsk_release(xs);
if (old_umem)
xdp_put_umem(old_umem);
}
xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id;
@ -410,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr;
struct xdp_umem *umem;
if (xs->umem)
return -EBUSY;
if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT;
mutex_lock(&xs->mutex);
err = xdp_umem_create(&umem);
err = xdp_umem_reg(umem, &mr);
if (err) {
kfree(umem);
if (xs->umem) {
mutex_unlock(&xs->mutex);
return err;
return -EBUSY;
}
umem = xdp_umem_create(&mr);
if (IS_ERR(umem)) {
mutex_unlock(&xs->mutex);
return PTR_ERR(umem);
}
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
xs->umem = umem;
mutex_unlock(&xs->mutex);
return 0;
@ -439,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
if (!xs->umem)
return -EINVAL;
if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
}
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
@ -495,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
if (len < sizeof(off))
return -EINVAL;
off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.fr.desc = offsetof(struct xdp_umem_ring, desc);
off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.cr.desc = offsetof(struct xdp_umem_ring, desc);
len = sizeof(off);
if (copy_to_user(optval, &off, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
return 0;
}
default:
break;
}
@ -509,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) {
q = xs->rx;
q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
q = xs->tx;
q = READ_ONCE(xs->tx);
} else {
if (!xs->umem)
umem = READ_ONCE(xs->umem);
if (!umem)
return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING)
q = xs->umem->fq;
q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
q = xs->umem->cq;
q = READ_ONCE(umem->cq);
}
if (!q)

View File

@ -13,6 +13,23 @@
#define RX_BATCH_SIZE 16
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
u32 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue {
struct xdp_umem_props umem_props;
u32 ring_mask;

View File

@ -79,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
struct xdp_umem_ring *ring;
u32 *producer;
u32 *consumer;
u32 *ring;
void *map;
};
struct xdp_umem {
@ -94,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
struct xdp_rxtx_ring *ring;
u32 *producer;
u32 *consumer;
struct xdp_desc *ring;
void *map;
};
struct xdpsock {
@ -155,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries;
/* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer;
q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons);
}
@ -168,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries;
/* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer + q->size;
q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
@ -177,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer;
q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@ -189,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer;
q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@ -208,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i].idx;
fq->ring[idx] = d[i].idx;
}
u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod;
*fq->producer = fq->cached_prod;
return 0;
}
@ -229,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i];
fq->ring[idx] = d[i];
}
u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod;
*fq->producer = fq->cached_prod;
return 0;
}
@ -248,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
d[i] = cq->ring->desc[idx];
d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
cq->ring->ptrs.consumer = cq->cached_cons;
*cq->consumer = cq->cached_cons;
}
return entries;
@ -270,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs,
unsigned int ndescs)
{
struct xdp_rxtx_ring *r = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@ -279,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
r->desc[idx].idx = descs[i].idx;
r->desc[idx].len = descs[i].len;
r->desc[idx].offset = descs[i].offset;
r[idx].idx = descs[i].idx;
r[idx].len = descs[i].len;
r[idx].offset = descs[i].offset;
}
u_smp_wmb();
r->ptrs.producer = uq->cached_prod;
*uq->producer = uq->cached_prod;
return 0;
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs)
{
struct xdp_rxtx_ring *q = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@ -302,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
q->desc[idx].idx = idx + i;
q->desc[idx].len = sizeof(pkt_data) - 1;
q->desc[idx].offset = 0;
r[idx].idx = idx + i;
r[idx].len = sizeof(pkt_data) - 1;
r[idx].offset = 0;
}
u_smp_wmb();
q->ptrs.producer = uq->cached_prod;
*uq->producer = uq->cached_prod;
return 0;
}
@ -317,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs,
int ndescs)
{
struct xdp_rxtx_ring *r = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
@ -327,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
descs[i] = r->desc[idx];
descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
r->ptrs.consumer = uq->cached_cons;
*uq->consumer = uq->cached_cons;
}
return entries;
@ -392,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd)
{
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem));
@ -413,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
FQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.ring != MAP_FAILED);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
umem->fq.map = mmap(0, off.fr.desc +
FQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
umem->fq.producer = umem->fq.map + off.fr.producer;
umem->fq.consumer = umem->fq.map + off.fr.consumer;
umem->fq.ring = umem->fq.map + off.fr.desc;
umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
lassert(umem->cq.ring != MAP_FAILED);
lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
umem->cq.producer = umem->cq.map + off.cr.producer;
umem->cq.consumer = umem->cq.map + off.cr.consumer;
umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd;
@ -449,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk;
bool shared = true;
socklen_t optlen;
u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
@ -474,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
/* Rx */
xsk->rx.ring = mmap(NULL,
sizeof(struct xdp_ring) +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
lassert(xsk->rx.ring != MAP_FAILED);
xsk->rx.map = mmap(NULL,
off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++)
@ -491,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
}
/* Tx */
xsk->tx.ring = mmap(NULL,
sizeof(struct xdp_ring) +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING);
lassert(xsk->tx.ring != MAP_FAILED);
xsk->tx.map = mmap(NULL,
off.tx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING);
lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
xsk->rx.producer = xsk->rx.map + off.rx.producer;
xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
xsk->tx.producer = xsk->tx.map + off.tx.producer;
xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;