From e0d3974ac77b0d581b92affe5851fc40ad2f42a4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:03 -0800 Subject: [PATCH 1/9] bpf: offload: don't require rtnl for dev list manipulation We don't need the RTNL lock for all operations on offload state. We only need to hold it around ndo calls. The device offload initialization doesn't require it. The soon-to-come querying of the offload info will only need it partially. We will also be able to remove the waitqueue in following patches. Use struct rw_semaphore because map offload will require sleeping with the semaphore held for read. Suggested-by: Kirill Tkhai Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8455b89d1bbf..032079754d88 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -20,13 +20,16 @@ #include #include #include +#include -/* protected by RTNL */ +/* Protects bpf_prog_offload_devs and offload members of all progs. + * RTNL lock cannot be taken when holding this lock. + */ +static DECLARE_RWSEM(bpf_devs_lock); static LIST_HEAD(bpf_prog_offload_devs); int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) { - struct net *net = current->nsproxy->net_ns; struct bpf_dev_offload *offload; if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && @@ -43,19 +46,26 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) offload->prog = prog; init_waitqueue_head(&offload->verifier_done); - rtnl_lock(); - offload->netdev = __dev_get_by_index(net, attr->prog_ifindex); - if (!offload->netdev) { - rtnl_unlock(); - kfree(offload); - return -EINVAL; - } + offload->netdev = dev_get_by_index(current->nsproxy->net_ns, + attr->prog_ifindex); + if (!offload->netdev) + goto err_free; + down_write(&bpf_devs_lock); + if (offload->netdev->reg_state != NETREG_REGISTERED) + goto err_unlock; prog->aux->offload = offload; list_add_tail(&offload->offloads, &bpf_prog_offload_devs); - rtnl_unlock(); + dev_put(offload->netdev); + up_write(&bpf_devs_lock); return 0; +err_unlock: + up_write(&bpf_devs_lock); + dev_put(offload->netdev); +err_free: + kfree(offload); + return -EINVAL; } static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, @@ -126,7 +136,9 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog) wake_up(&offload->verifier_done); rtnl_lock(); + down_write(&bpf_devs_lock); __bpf_prog_offload_destroy(prog); + up_write(&bpf_devs_lock); rtnl_unlock(); kfree(offload); @@ -181,11 +193,13 @@ static int bpf_offload_notification(struct notifier_block *notifier, if (netdev->reg_state != NETREG_UNREGISTERING) break; + down_write(&bpf_devs_lock); list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads) { if (offload->netdev == netdev) __bpf_prog_offload_destroy(offload->prog); } + up_write(&bpf_devs_lock); break; default: break; From 9a18eedb145d080d542766af1d7513ebfccd1604 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:04 -0800 Subject: [PATCH 2/9] bpf: offload: don't use prog->aux->offload as boolean We currently use aux->offload to indicate that program is bound to a specific device. This forces us to keep the offload structure around even after the device is gone. Add a bool member to struct bpf_prog_aux to indicate if offload was requested. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 3 ++- kernel/bpf/syscall.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index da54ef644fcd..838eee10e979 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -201,6 +201,7 @@ struct bpf_prog_aux { u32 stack_depth; u32 id; u32 func_cnt; + bool offload_requested; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct latch_tree_node ksym_tnode; @@ -529,7 +530,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) { - return aux->offload; + return aux->offload_requested; } #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 007802c5ca7d..e0afc2e39fd5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1157,6 +1157,8 @@ static int bpf_prog_load(union bpf_attr *attr) if (!prog) return -ENOMEM; + prog->aux->offload_requested = !!attr->prog_ifindex; + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog_nouncharge; @@ -1178,7 +1180,7 @@ static int bpf_prog_load(union bpf_attr *attr) atomic_set(&prog->aux->refcnt, 1); prog->gpl_compatible = is_gpl ? 1 : 0; - if (attr->prog_ifindex) { + if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_offload_init(prog, attr); if (err) goto free_prog; From cae1927c0b4a93ae15de824faca1f6f611a44fcd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:05 -0800 Subject: [PATCH 3/9] bpf: offload: allow netdev to disappear while verifier is running To allow verifier instruction callbacks without any extra locking NETDEV_UNREGISTER notification would wait on a waitqueue for verifier to finish. This design decision was made when rtnl lock was providing all the locking. Use the read/write lock instead and remove the workqueue. Verifier will now call into the offload code, so dev_ops are moved to offload structure. Since verifier calls are all under bpf_prog_is_dev_bound() we no longer need static inline implementations to please builds with CONFIG_NET=n. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 +- .../net/ethernet/netronome/nfp/bpf/verifier.c | 2 +- drivers/net/netdevsim/bpf.c | 2 +- include/linux/bpf.h | 9 ++++-- include/linux/bpf_verifier.h | 16 ++-------- include/linux/netdevice.h | 4 +-- kernel/bpf/offload.c | 30 ++++++++++--------- kernel/bpf/verifier.c | 20 +++++-------- 8 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index aae1be9ed056..89a9b6393882 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -238,7 +238,7 @@ struct nfp_bpf_vnic { int nfp_bpf_jit(struct nfp_prog *prog); -extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; +extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; struct netdev_bpf; struct nfp_app; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 9c2608445bd8..d8870c2f11f3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -260,6 +260,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) return 0; } -const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { +const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = { .insn_hook = nfp_verify_insn, }; diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index a243fa7ae02f..5134d5c1306c 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -66,7 +66,7 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) return 0; } -static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = { +static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = { .insn_hook = nsim_bpf_verify_insn, }; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 838eee10e979..669549f7e3e8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -17,6 +17,7 @@ #include #include +struct bpf_verifier_env; struct perf_event; struct bpf_prog; struct bpf_map; @@ -184,14 +185,18 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); }; +struct bpf_prog_offload_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + struct bpf_dev_offload { struct bpf_prog *prog; struct net_device *netdev; void *dev_priv; struct list_head offloads; bool dev_state; - bool verifier_running; - wait_queue_head_t verifier_done; + const struct bpf_prog_offload_ops *dev_ops; }; struct bpf_prog_aux { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 883a35d50cd5..2feb218c001d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -166,12 +166,6 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log) return log->len_used >= log->len_total - 1; } -struct bpf_verifier_env; -struct bpf_ext_analyzer_ops { - int (*insn_hook)(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx); -}; - #define BPF_MAX_SUBPROGS 256 /* single container for all structs @@ -185,7 +179,6 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -206,13 +199,8 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return cur->frame[cur->curframe]->regs; } -#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); -#else -static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) -{ - return -EOPNOTSUPP; -} -#endif +int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 352066e4eeef..49bfc6eec74c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -804,7 +804,7 @@ enum bpf_netdev_command { BPF_OFFLOAD_DESTROY, }; -struct bpf_ext_analyzer_ops; +struct bpf_prog_offload_ops; struct netlink_ext_ack; struct netdev_bpf { @@ -826,7 +826,7 @@ struct netdev_bpf { /* BPF_OFFLOAD_VERIFIER_PREP */ struct { struct bpf_prog *prog; - const struct bpf_ext_analyzer_ops *ops; /* callee set */ + const struct bpf_prog_offload_ops *ops; /* callee set */ } verifier; /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ struct { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 032079754d88..69ddc3899bab 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -44,7 +44,6 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) return -ENOMEM; offload->prog = prog; - init_waitqueue_head(&offload->verifier_done); offload->netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex); @@ -97,15 +96,28 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) if (err) goto exit_unlock; - env->dev_ops = data.verifier.ops; - + env->prog->aux->offload->dev_ops = data.verifier.ops; env->prog->aux->offload->dev_state = true; - env->prog->aux->offload->verifier_running = true; exit_unlock: rtnl_unlock(); return err; } +int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) +{ + struct bpf_dev_offload *offload; + int ret = -ENODEV; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload->netdev) + ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); + up_read(&bpf_devs_lock); + + return ret; +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_dev_offload *offload = prog->aux->offload; @@ -117,9 +129,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) data.offload.prog = prog; - if (offload->verifier_running) - wait_event(offload->verifier_done, !offload->verifier_running); - if (offload->dev_state) WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); @@ -132,9 +141,6 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_dev_offload *offload = prog->aux->offload; - offload->verifier_running = false; - wake_up(&offload->verifier_done); - rtnl_lock(); down_write(&bpf_devs_lock); __bpf_prog_offload_destroy(prog); @@ -146,15 +152,11 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog) static int bpf_prog_offload_translate(struct bpf_prog *prog) { - struct bpf_dev_offload *offload = prog->aux->offload; struct netdev_bpf data = {}; int ret; data.offload.prog = prog; - offload->verifier_running = false; - wake_up(&offload->verifier_done); - rtnl_lock(); ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); rtnl_unlock(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98d8637cf70d..a2b211262c25 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4438,15 +4438,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; } -static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) -{ - if (env->dev_ops && env->dev_ops->insn_hook) - return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); - - return 0; -} - static int do_check(struct bpf_verifier_env *env) { struct bpf_verifier_state *state; @@ -4531,9 +4522,12 @@ static int do_check(struct bpf_verifier_env *env) print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); } - err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); - if (err) - return err; + if (bpf_prog_is_dev_bound(env->prog->aux)) { + err = bpf_prog_offload_verify_insn(env, insn_idx, + prev_insn_idx); + if (err) + return err; + } regs = cur_regs(env); env->insn_aux_data[insn_idx].seen = true; @@ -5463,7 +5457,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; - if (env->prog->aux->offload) { + if (bpf_prog_is_dev_bound(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env); if (ret) goto err_unlock; From ce3b9db4db0e0e2b9761c56d08615ea0159e4a1b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:06 -0800 Subject: [PATCH 4/9] bpf: offload: free prog->aux->offload when device disappears All bpf offload operations should now be under bpf_devs_lock, it's safe to free and clear the entire offload structure, not only the netdev pointer. __bpf_prog_offload_destroy() will no longer be called multiple times. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/offload.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 69ddc3899bab..3126e1a842e6 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -70,12 +70,14 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, struct netdev_bpf *data) { - struct net_device *netdev = prog->aux->offload->netdev; + struct bpf_dev_offload *offload = prog->aux->offload; + struct net_device *netdev; ASSERT_RTNL(); - if (!netdev) + if (!offload) return -ENODEV; + netdev = offload->netdev; if (!netdev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; @@ -111,7 +113,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, down_read(&bpf_devs_lock); offload = env->prog->aux->offload; - if (offload->netdev) + if (offload) ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); up_read(&bpf_devs_lock); @@ -123,31 +125,24 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) struct bpf_dev_offload *offload = prog->aux->offload; struct netdev_bpf data = {}; - /* Caution - if netdev is destroyed before the program, this function - * will be called twice. - */ - data.offload.prog = prog; if (offload->dev_state) WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); - offload->dev_state = false; list_del_init(&offload->offloads); - offload->netdev = NULL; + kfree(offload); + prog->aux->offload = NULL; } void bpf_prog_offload_destroy(struct bpf_prog *prog) { - struct bpf_dev_offload *offload = prog->aux->offload; - rtnl_lock(); down_write(&bpf_devs_lock); - __bpf_prog_offload_destroy(prog); + if (prog->aux->offload) + __bpf_prog_offload_destroy(prog); up_write(&bpf_devs_lock); rtnl_unlock(); - - kfree(offload); } static int bpf_prog_offload_translate(struct bpf_prog *prog) From ad8ad79f4f6078f456792f7f8d344da2be9bc74f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:07 -0800 Subject: [PATCH 5/9] bpf: offload: free program id when device disappears Bound programs are quite useless after their device disappears. They are simply waiting for reference count to go to zero, don't list them in BPF_PROG_GET_NEXT_ID by freeing their ID early. Note that orphaned offload programs will return -ENODEV on BPF_OBJ_GET_INFO_BY_FD so user will never see ID 0. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ kernel/bpf/offload.c | 3 +++ kernel/bpf/syscall.c | 9 +++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 669549f7e3e8..9a916ab34299 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -357,6 +357,8 @@ void bpf_prog_put(struct bpf_prog *prog); int __bpf_prog_charge(struct user_struct *user, u32 pages); void __bpf_prog_uncharge(struct user_struct *user, u32 pages); +void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); + struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 3126e1a842e6..e4f1668a021c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -130,6 +130,9 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); + /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ + bpf_prog_free_id(prog, true); + list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e0afc2e39fd5..e02dafa6f402 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) { - /* cBPF to eBPF migrations are currently not in the idr store. */ + /* cBPF to eBPF migrations are currently not in the idr store. + * Offloaded programs are removed from the store when their device + * disappears - even if someone grabs an fd to them they are unusable, + * simply waiting for refcnt to drop to be freed. + */ if (!prog->aux->id) return; @@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) __acquire(&prog_idr_lock); idr_remove(&prog_idr, prog->aux->id); + prog->aux->id = 0; if (do_idr_lock) spin_unlock_bh(&prog_idr_lock); From cdab6ba8668d68f031bfd5d237b4586ec4f8cd88 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:08 -0800 Subject: [PATCH 6/9] nsfs: generalize ns_get_path() for path resolution with a task ns_get_path() takes struct task_struct and proc_ns_ops as its parameters. For path resolution directly from a namespace, e.g. based on a networking device's net name space, we need more flexibility. Add a ns_get_path_cb() helper which will allow callers to use any method of obtaining the name space reference. Convert ns_get_path() to use ns_get_path_cb(). Following patches will bring a networking user. CC: Eric W. Biederman Suggested-by: Daniel Borkmann Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- fs/nsfs.c | 29 ++++++++++++++++++++++++++--- include/linux/proc_ns.h | 3 +++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index 7c6f76d29f56..36b0772701a0 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -103,14 +103,14 @@ static void *__ns_get_path(struct path *path, struct ns_common *ns) goto got_it; } -void *ns_get_path(struct path *path, struct task_struct *task, - const struct proc_ns_operations *ns_ops) +void *ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, + void *private_data) { struct ns_common *ns; void *ret; again: - ns = ns_ops->get(task); + ns = ns_get_cb(private_data); if (!ns) return ERR_PTR(-ENOENT); @@ -120,6 +120,29 @@ void *ns_get_path(struct path *path, struct task_struct *task, return ret; } +struct ns_get_path_task_args { + const struct proc_ns_operations *ns_ops; + struct task_struct *task; +}; + +static struct ns_common *ns_get_path_task(void *private_data) +{ + struct ns_get_path_task_args *args = private_data; + + return args->ns_ops->get(args->task); +} + +void *ns_get_path(struct path *path, struct task_struct *task, + const struct proc_ns_operations *ns_ops) +{ + struct ns_get_path_task_args args = { + .ns_ops = ns_ops, + .task = task, + }; + + return ns_get_path_cb(path, ns_get_path_task, &args); +} + int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 2ff18c9840a7..d31cb6215905 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -78,6 +78,9 @@ extern struct file *proc_ns_fget(int fd); #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) extern void *ns_get_path(struct path *path, struct task_struct *task, const struct proc_ns_operations *ns_ops); +typedef struct ns_common *ns_get_path_helper_t(void *); +extern void *ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, + void *private_data); extern int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops); From 675fc275a3a2d905535207237402c6d8dcb5fa4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:09 -0800 Subject: [PATCH 7/9] bpf: offload: report device information for offloaded programs Report to the user ifindex and namespace information of offloaded programs. If device has disappeared return -ENODEV. Specify the namespace using dev/inode combination. CC: Eric W. Biederman Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 3 ++ kernel/bpf/offload.c | 59 ++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 6 ++++ tools/include/uapi/linux/bpf.h | 3 ++ 5 files changed, 73 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9a916ab34299..7810ae57b357 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 69eabfcb9bdb..f2f8b36e2ad4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -921,6 +921,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index e4f1668a021c..040d4e0edf3f 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -16,9 +16,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -176,6 +178,63 @@ int bpf_prog_offload_compile(struct bpf_prog *prog) return bpf_prog_offload_translate(prog); } +struct ns_get_path_bpf_prog_args { + struct bpf_prog *prog; + struct bpf_prog_info *info; +}; + +static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data) +{ + struct ns_get_path_bpf_prog_args *args = private_data; + struct bpf_prog_aux *aux = args->prog->aux; + struct ns_common *ns; + struct net *net; + + rtnl_lock(); + down_read(&bpf_devs_lock); + + if (aux->offload) { + args->info->ifindex = aux->offload->netdev->ifindex; + net = dev_net(aux->offload->netdev); + get_net(net); + ns = &net->ns; + } else { + args->info->ifindex = 0; + ns = NULL; + } + + up_read(&bpf_devs_lock); + rtnl_unlock(); + + return ns; +} + +int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog) +{ + struct ns_get_path_bpf_prog_args args = { + .prog = prog, + .info = info, + }; + struct inode *ns_inode; + struct path ns_path; + void *res; + + res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); + if (IS_ERR(res)) { + if (!info->ifindex) + return -ENODEV; + return PTR_ERR(res); + } + + ns_inode = ns_path.dentry->d_inode; + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + info->netns_ino = ns_inode->i_ino; + path_put(&ns_path); + + return 0; +} + const struct bpf_prog_ops bpf_offload_prog_ops = { }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e02dafa6f402..ebf0fb23e237 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1707,6 +1707,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, return -EFAULT; } + if (bpf_prog_is_dev_bound(prog->aux)) { + err = bpf_prog_offload_info_fill(&info, prog); + if (err) + return err; + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index db1b0923a308..4e8c60acfa32 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -921,6 +921,9 @@ struct bpf_prog_info { __u32 nr_map_ids; __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); struct bpf_map_info { From 522622104ebabbc3372d2fad706b4d30cee13319 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:10 -0800 Subject: [PATCH 8/9] tools: bpftool: report device information for offloaded programs Print the just-exposed device information about device to which program is bound. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/common.c | 52 ++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 2 ++ tools/bpf/bpftool/prog.c | 3 +++ 3 files changed, 57 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b62c94e3997a..6601c95a9258 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -44,7 +44,9 @@ #include #include #include +#include #include +#include #include #include @@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab) free(obj); } } + +static char * +ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) +{ + struct stat st; + int err; + + err = stat("/proc/self/ns/net", &st); + if (err) { + p_err("Can't stat /proc/self: %s", strerror(errno)); + return NULL; + } + + if (st.st_dev != ns_dev || st.st_ino != ns_ino) + return NULL; + + return if_indextoname(ifindex, buf); +} + +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + printf(" dev "); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + printf("%s", name); + else + printf("ifindex %u ns_dev %llu ns_ino %llu", + ifindex, ns_dev, ns_inode); +} + +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) +{ + char name[IF_NAMESIZE]; + + if (!ifindex) + return; + + jsonw_name(json_wtr, "dev"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_uint_field(json_wtr, "ns_dev", ns_dev); + jsonw_uint_field(json_wtr, "ns_inode", ns_inode); + if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_end_object(json_wtr); +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 8f6d3cac0347..65b526fe6e7e 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -96,6 +96,8 @@ struct pinned_obj { int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); +void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); struct cmd { const char *cmd; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index fd0873178503..98f871ed53d6 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) info->tag[0], info->tag[1], info->tag[2], info->tag[3], info->tag[4], info->tag[5], info->tag[6], info->tag[7]); + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + if (info->load_time) { char buf[32]; @@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf("tag "); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); if (info->load_time) { From 752d7b4501c250bead233ab041738db84436b1af Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 27 Dec 2017 18:39:11 -0800 Subject: [PATCH 9/9] selftests/bpf: test device info reporting for bound progs Check if bound programs report correct device info. Test in local namespace, in remote one, back to the local ns, remove the device and check that information is cleared. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 112 ++++++++++++++++++-- 1 file changed, 101 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index c940505c2978..e3c750f17cb8 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -18,6 +18,8 @@ import argparse import json import os import pprint +import random +import string import subprocess import time @@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) pp = pprint.PrettyPrinter() devs = [] # devices we created for clean up files = [] # files to be removed +netns = [] # net namespaces to be removed def log_get_sec(level=0): return "*" * (log_level + level) @@ -128,22 +131,25 @@ def rm(f): if f in files: files.remove(f) -def tool(name, args, flags, JSON=True, fail=True): +def tool(name, args, flags, JSON=True, ns="", fail=True): params = "" if JSON: params += "%s " % (flags["json"]) - ret, out = cmd(name + " " + params + args, fail=fail) + if ns != "": + ns = "ip netns exec %s " % (ns) + + ret, out = cmd(ns + name + " " + params + args, fail=fail) if JSON and len(out.strip()) != 0: return ret, json.loads(out) else: return ret, out -def bpftool(args, JSON=True, fail=True): - return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail) +def bpftool(args, JSON=True, ns="", fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) -def bpftool_prog_list(expected=None): - _, progs = bpftool("prog show", JSON=True, fail=True) +def bpftool_prog_list(expected=None, ns=""): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -158,13 +164,13 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) -def ip(args, force=False, JSON=True, fail=True): +def ip(args, force=False, JSON=True, ns="", fail=True): if force: args = "-force " + args - return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail) + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail) -def tc(args, JSON=True, fail=True): - return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail) +def tc(args, JSON=True, ns="", fail=True): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) def ethtool(dev, opt, args, fail=True): return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) @@ -178,6 +184,15 @@ def bpf_pinned(name): def bpf_bytecode(bytecode): return "bytecode \"%s\"" % (bytecode) +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + class DebugfsDir: """ Class for accessing DebugFS directories as a dictionary. @@ -237,6 +252,8 @@ class NetdevSim: self.dev = self._netdevsim_create() devs.append(self) + self.ns = "" + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) self.dfs_refresh() @@ -257,7 +274,7 @@ class NetdevSim: def remove(self): devs.remove(self) - ip("link del dev %s" % (self.dev["ifname"])) + ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) def dfs_refresh(self): self.dfs = DebugfsDir(self.dfs_dir) @@ -285,6 +302,11 @@ class NetdevSim: time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + def set_ns(self, ns): + name = "1" if ns == "" else ns + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + def set_mtu(self, mtu, fail=True): return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) @@ -372,6 +394,8 @@ def clean_up(): dev.remove() for f in files: cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) def pin_prog(file_name, idx=0): progs = bpftool_prog_list(expected=(idx + 1)) @@ -381,6 +405,35 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) +def check_dev_info(other_ns, ns, pin_file=None, removed=False): + if removed: + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + return + progs = bpftool_prog_list(expected=int(not removed), ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not removed and not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + if removed: + fail(dev["ifindex"] != 0, "Device perameters not zero on removed") + fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") + fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") + # Parse command line parser = argparse.ArgumentParser() parser.add_argument("--log", help="output verbose log to given file") @@ -417,6 +470,12 @@ for s in samples: skip(ret != 0, "sample %s/%s not found, please compile it" % (bpf_test_dir, s)) +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + try: obj = bpf_obj("sample_ret0.o") bytecode = bpf_bytecode("1,6 0 0 4294967295,") @@ -549,6 +608,8 @@ try: progs = bpftool_prog_list(expected=1) fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") start_test("Test XDP prog replace with bad flags...") ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) @@ -673,6 +734,35 @@ try: fail(time_diff < delay_sec, "Removal process took %s, expected %s" % (time_diff, delay_sec)) + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + sim.set_xdp(obj, "offload") + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + pin_file, _ = pin_prog("/sys/fs/bpf/tmp") + sim.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info(True, "", pin_file=pin_file, removed=True) + print("%s: OK" % (os.path.basename(__file__))) finally: