From 46f7487e161b195a1bd7ddbd9c6aba9c93ec881a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Mar 2019 12:44:17 +0100 Subject: [PATCH 1/5] netfilter: nat: don't register device notifier twice Otherwise, we get notifier list corruption. This is the most simple fix: remove the device notifier call chain from the ipv6 masquerade register function and handle it only in the ipv4 version. The better fix is merge nf_nat_masquerade_ipv4/6_(un)register_notifier into a single nf_nat_masquerade_(un)register_notifiers but to do this its needed to first merge the two masquerade modules into a single xt_MASQUERADE. Furthermore, we need to use different refcounts for ipv4/ipv6 until we can merge MASQUERADE. Fixes: d1aca8ab3104a ("netfilter: nat: merge ipv4 and ipv6 masquerade functionality") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_masquerade.c | 35 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 86fa4dcc63c5..d85c4d902e7b 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -11,7 +11,8 @@ #include static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt __read_mostly; +static unsigned int masq_refcnt4 __read_mostly; +static unsigned int masq_refcnt6 __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -141,8 +142,13 @@ int nf_nat_masquerade_ipv4_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); + if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { + ret = -EOVERFLOW; + goto out_unlock; + } + /* check if the notifier was already set */ - if (++masq_refcnt > 1) + if (++masq_refcnt4 > 1) goto out_unlock; /* Register for device down reports */ @@ -160,7 +166,7 @@ int nf_nat_masquerade_ipv4_register_notifier(void) err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt4--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -171,7 +177,7 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt4 > 0) goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); @@ -321,25 +327,23 @@ int nf_nat_masquerade_ipv6_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) + if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + ret = -EOVERFLOW; goto out_unlock; + } - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; + /* check if the notifier is already set */ + if (++masq_refcnt6 > 1) + goto out_unlock; ret = register_inet6addr_notifier(&masq_inet6_notifier); if (ret) - goto err_unregister; + goto err_dec; mutex_unlock(&masq_mutex); return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt6--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -350,11 +354,10 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt6 > 0) goto out_unlock; unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); out_unlock: mutex_unlock(&masq_mutex); } From 40ba1d9b4d19796afc9b7ece872f5f3e8f5e2c13 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 00:58:53 +0100 Subject: [PATCH 2/5] netfilter: nf_tables: fix set double-free in abort path The abort path can cause a double-free of an anonymous set. Added-and-to-be-aborted rule looks like this: udp dport { 137, 138 } drop The to-be-aborted transaction list looks like this: newset newsetelem newsetelem rule This gets walked in reverse order, so first pass disables the rule, the set elements, then the set. After synchronize_rcu(), we then destroy those in same order: rule, set element, set element, newset. Problem is that the anonymous set has already been bound to the rule, so the rule (lookup expression destructor) already frees the set, when then cause use-after-free when trying to delete the elements from this set, then try to free the set again when handling the newset expression. Rule releases the bound set in first place from the abort path, this causes the use-after-free on set element removal when undoing the new element transactions. To handle this, skip new element transaction if set is bound from the abort path. This is still causes the use-after-free on set element removal. To handle this, remove transaction from the list when the set is already bound. Joint work with Florian Westphal. Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path") Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1325 Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++---- net/netfilter/nf_tables_api.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c331e96a713b..ed0687b0e0f4 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -416,7 +416,8 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + bound:1, genmask:2; u8 klen; u8 dlen; @@ -1344,15 +1345,12 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; - bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index faf6bd10a19f..1333bf97dc26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET && nft_trans_set(trans) == set) { - nft_trans_set_bound(trans) = true; + set->bound = true; break; } } @@ -6709,8 +6709,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - if (!nft_trans_set_bound(trans)) - nft_set_destroy(nft_trans_set(trans)); + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6783,8 +6782,11 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (!nft_trans_set_bound(trans)) - list_del_rcu(&nft_trans_set(trans)->list); + if (nft_trans_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; @@ -6792,8 +6794,11 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; From 273fe3f1006ea5ebc63d6729e43e8e45e32b256a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 15:30:03 +0100 Subject: [PATCH 3/5] netfilter: nf_tables: bogus EBUSY when deleting set after flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set deletion after flush coming in the same batch results in EBUSY. Add set use counter to track the number of references to this set from rules. We cannot rely on the list of bindings for this since such list is still populated from the preparation phase. Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 28 +++++++++++++++++++++++++++- net/netfilter/nft_dynset.c | 13 +++++++++---- net/netfilter/nft_lookup.c | 13 +++++++++---- net/netfilter/nft_objref.c | 13 +++++++++---- 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ed0687b0e0f4..3e9ab643eedf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -382,6 +382,7 @@ void nft_unregister_set(struct nft_set_type *type); * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size + * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in jiffies @@ -407,6 +408,7 @@ struct nft_set { u32 dtype; u32 objtype; u32 size; + u32 use; atomic_t nelems; u32 ndeact; u64 timeout; @@ -467,6 +469,10 @@ struct nft_set_binding { u32 flags; }; +enum nft_trans_phase; +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1333bf97dc26..ca09ef037ca5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3672,6 +3672,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, static void nft_set_destroy(struct nft_set *set) { + if (WARN_ON(set->use > 0)) + return; + set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); kfree(set->name); @@ -3712,7 +3715,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } - if (!list_empty(&set->bindings) || + if (set->use || (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; @@ -3742,6 +3745,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; + if (set->use == UINT_MAX) + return -EOVERFLOW; + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3769,6 +3775,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); + set->use++; return 0; } @@ -3788,6 +3795,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase) +{ + switch (phase) { + case NFT_TRANS_PREPARE: + set->use--; + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: + set->use--; + /* fall through */ + default: + nf_tables_unbind_set(ctx, set, binding, + phase == NFT_TRANS_COMMIT); + } +} +EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); + void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index a8a74a16f9c4..e461007558e8 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); +static void nft_dynset_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + + priv->set->use++; } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, + .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 14496da5141d..161c3451a747 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); +static void nft_lookup_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); + + priv->set->use++; } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, + .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 79ef074c18ca..457a9ceb46af 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -162,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); +static void nft_objref_map_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + + priv->set->use++; } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -183,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, + .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, From 3f3a390dbd59d236f62cff8e8b20355ef7069e3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Mar 2019 13:04:16 +0100 Subject: [PATCH 4/5] netfilter: nf_tables: use-after-free in dynamic operations Smatch reports: net/netfilter/nf_tables_api.c:2167 nf_tables_expr_destroy() error: dereferencing freed memory 'expr->ops' net/netfilter/nf_tables_api.c 2162 static void nf_tables_expr_destroy(const struct nft_ctx *ctx, 2163 struct nft_expr *expr) 2164 { 2165 if (expr->ops->destroy) 2166 expr->ops->destroy(ctx, expr); ^^^^ --> 2167 module_put(expr->ops->type->owner); ^^^^^^^^^ 2168 } Smatch says there are three functions which free expr->ops. Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ca09ef037ca5..9d8f51dfc593 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2162,9 +2162,11 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx, static void nf_tables_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { + const struct nft_expr_type *type = expr->ops->type; + if (expr->ops->destroy) expr->ops->destroy(ctx, expr); - module_put(expr->ops->type->owner); + module_put(type->owner); } struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, From b8b27498659c65034032af79842913844a6cc79a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Mar 2019 23:20:11 +0100 Subject: [PATCH 5/5] netfilter: nf_tables: return immediately on empty commit When running 'nft flush ruleset' while no rules exist, we will increment the generation counter and announce a new genid to userspace, yet nothing had changed in the first place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d8f51dfc593..513f93118604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6564,6 +6564,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_chain *chain; struct nft_table *table; + if (list_empty(&net->nft.commit_list)) { + mutex_unlock(&net->nft.commit_mutex); + return 0; + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN;