netfilter: ipset: Prepare the ipset core to use RCU at set level

Replace rwlock_t with spinlock_t in "struct ip_set" and change the locking
accordingly. Convert the comment extension into an rcu-avare object. Also,
simplify the timeout routines.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
This commit is contained in:
Jozsef Kadlecsik 2015-06-13 14:22:25 +02:00
parent bd55389cc3
commit b57b2d1fa5
4 changed files with 66 additions and 50 deletions

View File

@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets;
};
struct ip_set_comment_rcu {
struct rcu_head rcu;
char str[0];
};
struct ip_set_comment {
char *str;
struct ip_set_comment_rcu __rcu *c;
};
struct ip_set_skbinfo {
@ -226,7 +231,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
rwlock_t lock;
spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */

View File

@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb);
}
/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;
if (unlikely(comment->str)) {
kfree(comment->str);
comment->str = NULL;
if (unlikely(c)) {
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
comment->str = kzalloc(len + 1, GFP_ATOMIC);
if (unlikely(!comment->str))
c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
strlcpy(comment->str, ext->comment, len + 1);
strlcpy(c->str, ext->comment, len + 1);
rcu_assign_pointer(comment->c, c);
}
/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
if (!comment->str)
struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
if (!c)
return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
if (unlikely(!comment->str))
struct ip_set_comment_rcu *c;
c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
kfree(comment->str);
comment->str = NULL;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
#endif

View File

@ -40,31 +40,26 @@ ip_set_timeout_uget(struct nlattr *tb)
}
static inline bool
ip_set_timeout_test(unsigned long timeout)
ip_set_timeout_expired(unsigned long *t)
{
return timeout == IPSET_ELEM_PERMANENT ||
time_is_after_jiffies(timeout);
}
static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
return *timeout != IPSET_ELEM_PERMANENT &&
time_is_before_jiffies(*timeout);
return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}
static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t)
ip_set_timeout_set(unsigned long *timeout, u32 value)
{
if (!t) {
unsigned long t;
if (!value) {
*timeout = IPSET_ELEM_PERMANENT;
return;
}
*timeout = msecs_to_jiffies(t * MSEC_PER_SEC) + jiffies;
if (*timeout == IPSET_ELEM_PERMANENT)
t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
(*timeout)--;
t--;
*timeout = t;
}
static inline u32

View File

@ -209,15 +209,15 @@ ip_set_type_register(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
type->name, family_name(type->family),
type->revision_min);
ret = -EINVAL;
goto unlock;
ip_set_type_unlock();
return -EINVAL;
}
list_add_rcu(&type->list, &ip_set_type_list);
pr_debug("type %s, family %s, revision %u:%u registered.\n",
type->name, family_name(type->family),
type->revision_min, type->revision_max);
unlock:
ip_set_type_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(ip_set_type_register);
@ -231,12 +231,12 @@ ip_set_type_unregister(struct ip_set_type *type)
pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
type->name, family_name(type->family),
type->revision_min);
goto unlock;
ip_set_type_unlock();
return;
}
list_del_rcu(&type->list);
pr_debug("type %s, family %s with revision min %u unregistered.\n",
type->name, family_name(type->family), type->revision_min);
unlock:
ip_set_type_unlock();
synchronize_rcu();
@ -531,16 +531,16 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
read_lock_bh(&set->lock);
rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
read_unlock_bh(&set->lock);
rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
write_lock_bh(&set->lock);
spin_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@ -570,9 +570,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);
return ret;
}
@ -593,9 +593,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
write_lock_bh(&set->lock);
spin_lock_bh(&set->lock);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
write_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);
return ret;
}
@ -880,7 +880,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
if (!set)
return -ENOMEM;
rwlock_init(&set->lock);
spin_lock_init(&set->lock);
strlcpy(set->name, name, IPSET_MAXNAMELEN);
set->family = family;
set->revision = revision;
@ -1062,9 +1062,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
write_lock_bh(&set->lock);
spin_lock_bh(&set->lock);
set->variant->flush(set);
write_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);
}
static int
@ -1377,9 +1377,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
set->variant->uref(set, cb, true);
/* Fall through and add elements */
default:
read_lock_bh(&set->lock);
rcu_read_lock_bh();
ret = set->variant->list(set, skb, cb);
read_unlock_bh(&set->lock);
rcu_read_unlock_bh();
if (!cb->args[IPSET_CB_ARG0])
/* Set is done, proceed with next one */
goto next_set;
@ -1462,9 +1462,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
write_lock_bh(&set->lock);
spin_lock_bh(&set->lock);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
write_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
@ -1644,9 +1644,9 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
set->type->adt_policy))
return -IPSET_ERR_PROTOCOL;
read_lock_bh(&set->lock);
rcu_read_lock_bh();
ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
read_unlock_bh(&set->lock);
rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN)
ret = 1;