Merge branch 'ovs_flowids'

Joe Stringer says:

====================
openvswitch: Introduce 128-bit unique flow identifiers.

This series extends the openvswitch datapath interface for flow commands to use
128-bit unique identifiers as an alternative to the netlink-formatted flow key.
This significantly reduces the cost of assembling messages between the kernel
and userspace, in particular improving Open vSwitch revalidation performance by
40% or more.

v14:
- Perform lookup using unmasked key in legacy case.
- Fix minor checkpatch.pl style violations.

v13:
- Embed sw_flow_id in sw_flow to save memory allocation in UFID case.
- Malloc unmasked key for id in non-UFID case.
- Fix bug where non-UFID case could double-serialize keys.

v12:
- Userspace patches fully merged into Open vSwitch master
- New minor refactor patches (2,3,4)
- Merge unmasked_key, ufid representation of flow identifier in sw_flow
- Improve memory allocation sizes when serializing ufid
- Handle corner case where a flow_new is requested with a flow that has an
  identical ufid as an existing flow, but a different flow key
- Limit UFID to between 1-16 octets inclusive.
- Add various helper functions to improve readibility

v11:
- Pushed most of the prerequisite patches for this series to OVS master.
- Split out openvswitch.h interface changes from datapath implementation
- Datapath implementation to be reviewed on net-next, separately

v10:
- New patch allowing datapath to serialize masked keys
- Simplify datapath interface by accepting UFID or flow_key, but not both
- Flows set up with UFID must be queried/deleted using UFID
- Reduce sw_flow memory usage for UFID
- Don't periodically rehash UFID table in linux datapath
- Remove kernel_only UFID in linux datapath

v9:
- No kernel changes

v8:
- Rename UID -> UFID
- Fix null dereference in datapath when paired with older userspace
- All patches are reviewed/acked except datapath changes.

v7:
- Remove OVS_DP_F_INDEX_BY_UID
- Rework datapath UID serialization for variable length UIDs

v6:
- Reduce netlink conversions for all datapaths
- Various bugfixes

v5:
- Various bugfixes
- Improve logging

v4:
- Datapath memory leak fixes
- Enable UID-based terse dumping and deleting by default
- Various fixes

RFCv3:
- Add datapath implementation
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-01-26 15:45:55 -08:00
commit 6039e3dff0
9 changed files with 519 additions and 134 deletions

View File

@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject
some but not all of them. However, this behavior may change in future versions. some but not all of them. However, this behavior may change in future versions.
Unique flow identifiers
-----------------------
An alternative to using the original match portion of a key as the handle for
flow identification is a unique flow identifier, or "UFID". UFIDs are optional
for both the kernel and user space program.
User space programs that support UFID are expected to provide it during flow
setup in addition to the flow, then refer to the flow using the UFID for all
future operations. The kernel is not required to index flows by the original
flow key if a UFID is specified.
Basic rule for evolving flow keys Basic rule for evolving flow keys
--------------------------------- ---------------------------------

View File

@ -205,6 +205,23 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr,
NLMSG_HDRLEN); NLMSG_HDRLEN);
} }
/**
* genlmsg_parse - parse attributes of a genetlink message
* @nlh: netlink message header
* @family: genetlink message family
* @tb: destination array with maxtype+1 elements
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
* */
static inline int genlmsg_parse(const struct nlmsghdr *nlh,
const struct genl_family *family,
struct nlattr *tb[], int maxtype,
const struct nla_policy *policy)
{
return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
policy);
}
/** /**
* genl_dump_check_consistent - check if sequence is consistent and advertise if not * genl_dump_check_consistent - check if sequence is consistent and advertise if not
* @cb: netlink callback structure that stores the sequence number * @cb: netlink callback structure that stores the sequence number

View File

@ -459,6 +459,14 @@ struct ovs_key_nd {
* a wildcarded match. Omitting attribute is treated as wildcarding all * a wildcarded match. Omitting attribute is treated as wildcarding all
* corresponding fields. Optional for all requests. If not present, * corresponding fields. Optional for all requests. If not present,
* all flow key bits are exact match bits. * all flow key bits are exact match bits.
* @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
* identifier for the flow. Causes the flow to be indexed by this value rather
* than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
* requests. Present in notifications if the flow was created with this
* attribute.
* @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
* flags that provide alternative semantics for flow installation and
* retrieval. Optional for all requests.
* *
* These attributes follow the &struct ovs_header within the Generic Netlink * These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands. * payload for %OVS_FLOW_* commands.
@ -474,11 +482,23 @@ enum ovs_flow_attr {
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error OVS_FLOW_ATTR_PROBE, /* Flow operation is a feature probe, error
* logging should be suppressed. */ * logging should be suppressed. */
OVS_FLOW_ATTR_UFID, /* Variable length unique flow identifier. */
OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
__OVS_FLOW_ATTR_MAX __OVS_FLOW_ATTR_MAX
}; };
#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
/**
* Omit attributes for notifications.
*
* If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
* may omit the corresponding %OVS_FLOW_ATTR_* from the response.
*/
#define OVS_UFID_F_OMIT_KEY (1 << 0)
#define OVS_UFID_F_OMIT_MASK (1 << 1)
#define OVS_UFID_F_OMIT_ACTIONS (1 << 2)
/** /**
* enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
* @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with

View File

@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family; static struct genl_family dp_flow_genl_family;
static struct genl_family dp_datapath_genl_family; static struct genl_family dp_datapath_genl_family;
static const struct nla_policy flow_policy[];
static const struct genl_multicast_group ovs_dp_flow_multicast_group = { static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
.name = OVS_FLOW_MCGROUP, .name = OVS_FLOW_MCGROUP,
}; };
@ -461,10 +463,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
0, upcall_info->cmd); 0, upcall_info->cmd);
upcall->dp_ifindex = dp_ifindex; upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
err = ovs_nla_put_flow(key, key, user_skb);
BUG_ON(err); BUG_ON(err);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata) if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@ -664,46 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
} }
} }
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
{ {
return NLMSG_ALIGN(sizeof(struct ovs_header)) return ovs_identifier_is_ufid(sfid) &&
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ !(ufid_flags & OVS_UFID_F_OMIT_KEY);
+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
} }
/* Called with ovs_mutex or RCU read lock. */ static bool should_fill_mask(uint32_t ufid_flags)
static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
struct sk_buff *skb)
{ {
struct nlattr *nla; return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
int err; }
/* Fill flow key. */ static bool should_fill_actions(uint32_t ufid_flags)
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); {
if (!nla) return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
return -EMSGSIZE; }
err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
if (err) const struct sw_flow_id *sfid,
return err; uint32_t ufid_flags)
{
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
nla_nest_end(skb, nla); /* OVS_FLOW_ATTR_UFID */
if (sfid && ovs_identifier_is_ufid(sfid))
len += nla_total_size(sfid->ufid_len);
/* Fill flow mask. */ /* OVS_FLOW_ATTR_KEY */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); if (!sfid || should_fill_key(sfid, ufid_flags))
if (!nla) len += nla_total_size(ovs_key_attr_size());
return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); /* OVS_FLOW_ATTR_MASK */
if (err) if (should_fill_mask(ufid_flags))
return err; len += nla_total_size(ovs_key_attr_size());
nla_nest_end(skb, nla); /* OVS_FLOW_ATTR_ACTIONS */
return 0; if (should_fill_actions(ufid_flags))
len += nla_total_size(acts->actions_len);
return len
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8); /* OVS_FLOW_ATTR_USED */
} }
/* Called with ovs_mutex or RCU read lock. */ /* Called with ovs_mutex or RCU read lock. */
@ -774,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
/* Called with ovs_mutex or RCU read lock. */ /* Called with ovs_mutex or RCU read lock. */
static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
struct sk_buff *skb, u32 portid, struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd) u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
{ {
const int skb_orig_len = skb->len; const int skb_orig_len = skb->len;
struct ovs_header *ovs_header; struct ovs_header *ovs_header;
@ -787,17 +789,31 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
ovs_header->dp_ifindex = dp_ifindex; ovs_header->dp_ifindex = dp_ifindex;
err = ovs_flow_cmd_fill_match(flow, skb); err = ovs_nla_put_identifier(flow, skb);
if (err) if (err)
goto error; goto error;
if (should_fill_key(&flow->id, ufid_flags)) {
err = ovs_nla_put_masked_key(flow, skb);
if (err)
goto error;
}
if (should_fill_mask(ufid_flags)) {
err = ovs_nla_put_mask(flow, skb);
if (err)
goto error;
}
err = ovs_flow_cmd_fill_stats(flow, skb); err = ovs_flow_cmd_fill_stats(flow, skb);
if (err) if (err)
goto error; goto error;
err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); if (should_fill_actions(ufid_flags)) {
if (err) err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
goto error; if (err)
goto error;
}
genlmsg_end(skb, ovs_header); genlmsg_end(skb, ovs_header);
return 0; return 0;
@ -809,15 +825,19 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
/* May not be called with RCU read lock. */ /* May not be called with RCU read lock. */
static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
const struct sw_flow_id *sfid,
struct genl_info *info, struct genl_info *info,
bool always) bool always,
uint32_t ufid_flags)
{ {
struct sk_buff *skb; struct sk_buff *skb;
size_t len;
if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0)) if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
return NULL; return NULL;
skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
if (!skb) if (!skb)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
@ -828,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
int dp_ifindex, int dp_ifindex,
struct genl_info *info, u8 cmd, struct genl_info *info, u8 cmd,
bool always) bool always, u32 ufid_flags)
{ {
struct sk_buff *skb; struct sk_buff *skb;
int retval; int retval;
skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
always); &flow->id, info, always, ufid_flags);
if (IS_ERR_OR_NULL(skb)) if (IS_ERR_OR_NULL(skb))
return skb; return skb;
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0, info->snd_portid, info->snd_seq, 0,
cmd); cmd, ufid_flags);
BUG_ON(retval < 0); BUG_ON(retval < 0);
return skb; return skb;
} }
@ -849,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs; struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr; struct ovs_header *ovs_header = info->userhdr;
struct sw_flow *flow, *new_flow; struct sw_flow *flow = NULL, *new_flow;
struct sw_flow_mask mask; struct sw_flow_mask mask;
struct sk_buff *reply; struct sk_buff *reply;
struct datapath *dp; struct datapath *dp;
struct sw_flow_key key;
struct sw_flow_actions *acts; struct sw_flow_actions *acts;
struct sw_flow_match match; struct sw_flow_match match;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int error; int error;
bool log = !a[OVS_FLOW_ATTR_PROBE]; bool log = !a[OVS_FLOW_ATTR_PROBE];
@ -879,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
} }
/* Extract key. */ /* Extract key. */
ovs_match_init(&match, &new_flow->unmasked_key, &mask); ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log); a[OVS_FLOW_ATTR_MASK], log);
if (error) if (error)
goto err_kfree_flow; goto err_kfree_flow;
ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); ovs_flow_mask_key(&new_flow->key, &key, &mask);
/* Extract flow identifier. */
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
&key, log);
if (error)
goto err_kfree_flow;
/* Validate actions. */ /* Validate actions. */
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
@ -895,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow; goto err_kfree_flow;
} }
reply = ovs_flow_cmd_alloc_info(acts, info, false); reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
ufid_flags);
if (IS_ERR(reply)) { if (IS_ERR(reply)) {
error = PTR_ERR(reply); error = PTR_ERR(reply);
goto err_kfree_acts; goto err_kfree_acts;
@ -907,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = -ENODEV; error = -ENODEV;
goto err_unlock_ovs; goto err_unlock_ovs;
} }
/* Check if this is a duplicate flow */ /* Check if this is a duplicate flow */
flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); if (ovs_identifier_is_ufid(&new_flow->id))
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
if (!flow)
flow = ovs_flow_tbl_lookup(&dp->table, &key);
if (likely(!flow)) { if (likely(!flow)) {
rcu_assign_pointer(new_flow->sf_acts, acts); rcu_assign_pointer(new_flow->sf_acts, acts);
@ -924,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex, ovs_header->dp_ifindex,
reply, info->snd_portid, reply, info->snd_portid,
info->snd_seq, 0, info->snd_seq, 0,
OVS_FLOW_CMD_NEW); OVS_FLOW_CMD_NEW,
ufid_flags);
BUG_ON(error < 0); BUG_ON(error < 0);
} }
ovs_unlock(); ovs_unlock();
@ -942,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = -EEXIST; error = -EEXIST;
goto err_unlock_ovs; goto err_unlock_ovs;
} }
/* The unmasked key has to be the same for flow updates. */ /* The flow identifier has to be the same for flow updates.
if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { * Look for any overlapping flow.
/* Look for any overlapping flow. */ */
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (unlikely(!ovs_flow_cmp(flow, &match))) {
if (ovs_identifier_is_key(&flow->id))
flow = ovs_flow_tbl_lookup_exact(&dp->table,
&match);
else /* UFID matches but key is different */
flow = NULL;
if (!flow) { if (!flow) {
error = -ENOENT; error = -ENOENT;
goto err_unlock_ovs; goto err_unlock_ovs;
@ -960,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex, ovs_header->dp_ifindex,
reply, info->snd_portid, reply, info->snd_portid,
info->snd_seq, 0, info->snd_seq, 0,
OVS_FLOW_CMD_NEW); OVS_FLOW_CMD_NEW,
ufid_flags);
BUG_ON(error < 0); BUG_ON(error < 0);
} }
ovs_unlock(); ovs_unlock();
@ -1016,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp; struct datapath *dp;
struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_actions *old_acts = NULL, *acts = NULL;
struct sw_flow_match match; struct sw_flow_match match;
struct sw_flow_id sfid;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int error; int error;
bool log = !a[OVS_FLOW_ATTR_PROBE]; bool log = !a[OVS_FLOW_ATTR_PROBE];
bool ufid_present;
/* Extract key. */ /* Extract key. */
error = -EINVAL; error = -EINVAL;
@ -1026,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto error; goto error;
} }
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
ovs_match_init(&match, &key, &mask); ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log); a[OVS_FLOW_ATTR_MASK], log);
@ -1042,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
} }
/* Can allocate before locking if have acts. */ /* Can allocate before locking if have acts. */
reply = ovs_flow_cmd_alloc_info(acts, info, false); reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
ufid_flags);
if (IS_ERR(reply)) { if (IS_ERR(reply)) {
error = PTR_ERR(reply); error = PTR_ERR(reply);
goto err_kfree_acts; goto err_kfree_acts;
@ -1056,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs; goto err_unlock_ovs;
} }
/* Check that the flow exists. */ /* Check that the flow exists. */
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (ufid_present)
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
else
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (unlikely(!flow)) { if (unlikely(!flow)) {
error = -ENOENT; error = -ENOENT;
goto err_unlock_ovs; goto err_unlock_ovs;
@ -1072,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ovs_header->dp_ifindex, ovs_header->dp_ifindex,
reply, info->snd_portid, reply, info->snd_portid,
info->snd_seq, 0, info->snd_seq, 0,
OVS_FLOW_CMD_NEW); OVS_FLOW_CMD_NEW,
ufid_flags);
BUG_ON(error < 0); BUG_ON(error < 0);
} }
} else { } else {
/* Could not alloc without acts before locking. */ /* Could not alloc without acts before locking. */
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
info, OVS_FLOW_CMD_NEW, false); info, OVS_FLOW_CMD_NEW, false,
ufid_flags);
if (unlikely(IS_ERR(reply))) { if (unlikely(IS_ERR(reply))) {
error = PTR_ERR(reply); error = PTR_ERR(reply);
goto err_unlock_ovs; goto err_unlock_ovs;
@ -1115,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow; struct sw_flow *flow;
struct datapath *dp; struct datapath *dp;
struct sw_flow_match match; struct sw_flow_match match;
int err; struct sw_flow_id ufid;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int err = 0;
bool log = !a[OVS_FLOW_ATTR_PROBE]; bool log = !a[OVS_FLOW_ATTR_PROBE];
bool ufid_present;
if (!a[OVS_FLOW_ATTR_KEY]) { ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
} else if (!ufid_present) {
OVS_NLERR(log, OVS_NLERR(log,
"Flow get message rejected, Key attribute missing."); "Flow get message rejected, Key attribute missing.");
return -EINVAL; err = -EINVAL;
} }
ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
if (err) if (err)
return err; return err;
@ -1136,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto unlock; goto unlock;
} }
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (ufid_present)
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
else
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (!flow) { if (!flow) {
err = -ENOENT; err = -ENOENT;
goto unlock; goto unlock;
} }
reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
OVS_FLOW_CMD_NEW, true); OVS_FLOW_CMD_NEW, true, ufid_flags);
if (IS_ERR(reply)) { if (IS_ERR(reply)) {
err = PTR_ERR(reply); err = PTR_ERR(reply);
goto unlock; goto unlock;
@ -1162,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct ovs_header *ovs_header = info->userhdr; struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key; struct sw_flow_key key;
struct sk_buff *reply; struct sk_buff *reply;
struct sw_flow *flow; struct sw_flow *flow = NULL;
struct datapath *dp; struct datapath *dp;
struct sw_flow_match match; struct sw_flow_match match;
struct sw_flow_id ufid;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
int err; int err;
bool log = !a[OVS_FLOW_ATTR_PROBE]; bool log = !a[OVS_FLOW_ATTR_PROBE];
bool ufid_present;
if (likely(a[OVS_FLOW_ATTR_KEY])) { ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL); ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
log); log);
@ -1183,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock; goto unlock;
} }
if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
err = ovs_flow_tbl_flush(&dp->table); err = ovs_flow_tbl_flush(&dp->table);
goto unlock; goto unlock;
} }
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); if (ufid_present)
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
else
flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
if (unlikely(!flow)) { if (unlikely(!flow)) {
err = -ENOENT; err = -ENOENT;
goto unlock; goto unlock;
@ -1198,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_unlock(); ovs_unlock();
reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
info, false); &flow->id, info, false, ufid_flags);
if (likely(reply)) { if (likely(reply)) {
if (likely(!IS_ERR(reply))) { if (likely(!IS_ERR(reply))) {
rcu_read_lock(); /*To keep RCU checker happy. */ rcu_read_lock(); /*To keep RCU checker happy. */
err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
reply, info->snd_portid, reply, info->snd_portid,
info->snd_seq, 0, info->snd_seq, 0,
OVS_FLOW_CMD_DEL); OVS_FLOW_CMD_DEL,
ufid_flags);
rcu_read_unlock(); rcu_read_unlock();
BUG_ON(err < 0); BUG_ON(err < 0);
@ -1224,9 +1291,18 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct nlattr *a[__OVS_FLOW_ATTR_MAX];
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
struct table_instance *ti; struct table_instance *ti;
struct datapath *dp; struct datapath *dp;
u32 ufid_flags;
int err;
err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
OVS_FLOW_ATTR_MAX, flow_policy);
if (err)
return err;
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
rcu_read_lock(); rcu_read_lock();
dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@ -1249,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0) OVS_FLOW_CMD_NEW, ufid_flags) < 0)
break; break;
cb->args[0] = bucket; cb->args[0] = bucket;
@ -1265,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
}; };
static const struct genl_ops dp_flow_genl_ops[] = { static const struct genl_ops dp_flow_genl_ops[] = {

View File

@ -197,6 +197,16 @@ struct sw_flow_match {
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
}; };
#define MAX_UFID_LENGTH 16 /* 128 bits */
struct sw_flow_id {
u32 ufid_len;
union {
u32 ufid[MAX_UFID_LENGTH / 4];
struct sw_flow_key *unmasked_key;
};
};
struct sw_flow_actions { struct sw_flow_actions {
struct rcu_head rcu; struct rcu_head rcu;
u32 actions_len; u32 actions_len;
@ -213,13 +223,15 @@ struct flow_stats {
struct sw_flow { struct sw_flow {
struct rcu_head rcu; struct rcu_head rcu;
struct hlist_node hash_node[2]; struct {
u32 hash; struct hlist_node node[2];
u32 hash;
} flow_table, ufid_table;
int stats_last_writer; /* NUMA-node id of the last writer on int stats_last_writer; /* NUMA-node id of the last writer on
* 'stats[0]'. * 'stats[0]'.
*/ */
struct sw_flow_key key; struct sw_flow_key key;
struct sw_flow_key unmasked_key; struct sw_flow_id id;
struct sw_flow_mask *mask; struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts; struct sw_flow_actions __rcu *sf_acts;
struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
@ -243,6 +255,16 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */ unsigned char ar_tip[4]; /* target IP address */
} __packed; } __packed;
static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
{
return sfid->ufid_len;
}
static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
{
return !ovs_identifier_is_ufid(sfid);
}
void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags, void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
const struct sk_buff *); const struct sk_buff *);
void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,

View File

@ -1180,6 +1180,59 @@ int ovs_nla_get_match(struct sw_flow_match *match,
return err; return err;
} }
static size_t get_ufid_len(const struct nlattr *attr, bool log)
{
size_t len;
if (!attr)
return 0;
len = nla_len(attr);
if (len < 1 || len > MAX_UFID_LENGTH) {
OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
nla_len(attr), MAX_UFID_LENGTH);
return 0;
}
return len;
}
/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
* or false otherwise.
*/
bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
bool log)
{
sfid->ufid_len = get_ufid_len(attr, log);
if (sfid->ufid_len)
memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
return sfid->ufid_len;
}
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log)
{
struct sw_flow_key *new_key;
if (ovs_nla_get_ufid(sfid, ufid, log))
return 0;
/* If UFID was not provided, use unmasked key. */
new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
if (!new_key)
return -ENOMEM;
memcpy(new_key, key, sizeof(*key));
sfid->unmasked_key = new_key;
return 0;
}
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
{
return attr ? nla_get_u32(attr) : 0;
}
/** /**
* ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
* @key: Receives extracted in_port, priority, tun_key and skb_mark. * @key: Receives extracted in_port, priority, tun_key and skb_mark.
@ -1216,12 +1269,12 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
return metadata_from_nlattrs(&match, &attrs, a, false, log); return metadata_from_nlattrs(&match, &attrs, a, false, log);
} }
int ovs_nla_put_flow(const struct sw_flow_key *swkey, static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb) const struct sw_flow_key *output, bool is_mask,
struct sk_buff *skb)
{ {
struct ovs_key_ethernet *eth_key; struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap; struct nlattr *nla, *encap;
bool is_mask = (swkey != output);
if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
goto nla_put_failure; goto nla_put_failure;
@ -1431,6 +1484,49 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
return -EMSGSIZE; return -EMSGSIZE;
} }
int ovs_nla_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, int attr, bool is_mask,
struct sk_buff *skb)
{
int err;
struct nlattr *nla;
nla = nla_nest_start(skb, attr);
if (!nla)
return -EMSGSIZE;
err = __ovs_nla_put_key(swkey, output, is_mask, skb);
if (err)
return err;
nla_nest_end(skb, nla);
return 0;
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
{
if (ovs_identifier_is_ufid(&flow->id))
return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
flow->id.ufid);
return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
OVS_FLOW_ATTR_KEY, false, skb);
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
{
return ovs_nla_put_key(&flow->mask->key, &flow->key,
OVS_FLOW_ATTR_KEY, false, skb);
}
/* Called with ovs_mutex or RCU read lock. */
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
{
return ovs_nla_put_key(&flow->key, &flow->mask->key,
OVS_FLOW_ATTR_MASK, true, skb);
}
#define MAX_ACTIONS_BUFSIZE (32 * 1024) #define MAX_ACTIONS_BUFSIZE (32 * 1024)
static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log) static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)

View File

@ -43,16 +43,25 @@ size_t ovs_key_attr_size(void);
void ovs_match_init(struct sw_flow_match *match, void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask); struct sw_flow_key *key, struct sw_flow_mask *mask);
int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
const struct sw_flow_key *, struct sk_buff *); int attr, bool is_mask, struct sk_buff *);
int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *, int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
bool log); bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key, int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
const struct nlattr *mask, bool log); const struct nlattr *mask, bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *, int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ovs_tunnel_info *); const struct ovs_tunnel_info *);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log);
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
int ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key, const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log); struct sw_flow_actions **sfa, bool log);

View File

@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow)
{ {
int node; int node;
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
kfree((struct sw_flow_actions __force *)flow->sf_acts); kfree((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node) for_each_node(node)
if (flow->stats[node]) if (flow->stats[node])
@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size)
int ovs_flow_tbl_init(struct flow_table *table) int ovs_flow_tbl_init(struct flow_table *table)
{ {
struct table_instance *ti; struct table_instance *ti, *ufid_ti;
ti = table_instance_alloc(TBL_MIN_BUCKETS); ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti) if (!ti)
return -ENOMEM; return -ENOMEM;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
goto free_ti;
rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
INIT_LIST_HEAD(&table->mask_list); INIT_LIST_HEAD(&table->mask_list);
table->last_rehash = jiffies; table->last_rehash = jiffies;
table->count = 0; table->count = 0;
table->ufid_count = 0;
return 0; return 0;
free_ti:
__table_instance_destroy(ti);
return -ENOMEM;
} }
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti); __table_instance_destroy(ti);
} }
static void table_instance_destroy(struct table_instance *ti, bool deferred) static void table_instance_destroy(struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{ {
int i; int i;
if (!ti) if (!ti)
return; return;
BUG_ON(!ufid_ti);
if (ti->keep_flows) if (ti->keep_flows)
goto skip_flows; goto skip_flows;
@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
struct hlist_head *head = flex_array_get(ti->buckets, i); struct hlist_head *head = flex_array_get(ti->buckets, i);
struct hlist_node *n; struct hlist_node *n;
int ver = ti->node_ver; int ver = ti->node_ver;
int ufid_ver = ufid_ti->node_ver;
hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
hlist_del_rcu(&flow->hash_node[ver]); hlist_del_rcu(&flow->flow_table.node[ver]);
if (ovs_identifier_is_ufid(&flow->id))
hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
ovs_flow_free(flow, deferred); ovs_flow_free(flow, deferred);
} }
} }
skip_flows: skip_flows:
if (deferred) if (deferred) {
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb); call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
else call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
} else {
__table_instance_destroy(ti); __table_instance_destroy(ti);
__table_instance_destroy(ufid_ti);
}
} }
/* No need for locking this function is called from RCU callback or /* No need for locking this function is called from RCU callback or
@ -256,8 +277,9 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
void ovs_flow_tbl_destroy(struct flow_table *table) void ovs_flow_tbl_destroy(struct flow_table *table)
{ {
struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
table_instance_destroy(ti, false); table_instance_destroy(ti, ufid_ti, false);
} }
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
while (*bucket < ti->n_buckets) { while (*bucket < ti->n_buckets) {
i = 0; i = 0;
head = flex_array_get(ti->buckets, *bucket); head = flex_array_get(ti->buckets, *bucket);
hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
if (i < *last) { if (i < *last) {
i++; i++;
continue; continue;
@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
(hash & (ti->n_buckets - 1))); (hash & (ti->n_buckets - 1)));
} }
static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow) static void table_instance_insert(struct table_instance *ti,
struct sw_flow *flow)
{ {
struct hlist_head *head; struct hlist_head *head;
head = find_bucket(ti, flow->hash); head = find_bucket(ti, flow->flow_table.hash);
hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head); hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
}
static void ufid_table_instance_insert(struct table_instance *ti,
struct sw_flow *flow)
{
struct hlist_head *head;
head = find_bucket(ti, flow->ufid_table.hash);
hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
} }
static void flow_table_copy_flows(struct table_instance *old, static void flow_table_copy_flows(struct table_instance *old,
struct table_instance *new) struct table_instance *new, bool ufid)
{ {
int old_ver; int old_ver;
int i; int i;
@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old,
head = flex_array_get(old->buckets, i); head = flex_array_get(old->buckets, i);
hlist_for_each_entry(flow, head, hash_node[old_ver]) if (ufid)
table_instance_insert(new, flow); hlist_for_each_entry(flow, head,
ufid_table.node[old_ver])
ufid_table_instance_insert(new, flow);
else
hlist_for_each_entry(flow, head,
flow_table.node[old_ver])
table_instance_insert(new, flow);
} }
old->keep_flows = true; old->keep_flows = true;
} }
static struct table_instance *table_instance_rehash(struct table_instance *ti, static struct table_instance *table_instance_rehash(struct table_instance *ti,
int n_buckets) int n_buckets, bool ufid)
{ {
struct table_instance *new_ti; struct table_instance *new_ti;
@ -334,32 +372,45 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
if (!new_ti) if (!new_ti)
return NULL; return NULL;
flow_table_copy_flows(ti, new_ti); flow_table_copy_flows(ti, new_ti, ufid);
return new_ti; return new_ti;
} }
int ovs_flow_tbl_flush(struct flow_table *flow_table) int ovs_flow_tbl_flush(struct flow_table *flow_table)
{ {
struct table_instance *old_ti; struct table_instance *old_ti, *new_ti;
struct table_instance *new_ti; struct table_instance *old_ufid_ti, *new_ufid_ti;
old_ti = ovsl_dereference(flow_table->ti);
new_ti = table_instance_alloc(TBL_MIN_BUCKETS); new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ti) if (!new_ti)
return -ENOMEM; return -ENOMEM;
new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!new_ufid_ti)
goto err_free_ti;
old_ti = ovsl_dereference(flow_table->ti);
old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
rcu_assign_pointer(flow_table->ti, new_ti); rcu_assign_pointer(flow_table->ti, new_ti);
rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
flow_table->last_rehash = jiffies; flow_table->last_rehash = jiffies;
flow_table->count = 0; flow_table->count = 0;
flow_table->ufid_count = 0;
table_instance_destroy(old_ti, true); table_instance_destroy(old_ti, old_ufid_ti, true);
return 0; return 0;
err_free_ti:
__table_instance_destroy(new_ti);
return -ENOMEM;
} }
static u32 flow_hash(const struct sw_flow_key *key, int key_start, static u32 flow_hash(const struct sw_flow_key *key,
int key_end) const struct sw_flow_key_range *range)
{ {
int key_start = range->start;
int key_end = range->end;
const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2; int hash_u32s = (key_end - key_start) >> 2;
@ -395,19 +446,20 @@ static bool cmp_key(const struct sw_flow_key *key1,
static bool flow_cmp_masked_key(const struct sw_flow *flow, static bool flow_cmp_masked_key(const struct sw_flow *flow,
const struct sw_flow_key *key, const struct sw_flow_key *key,
int key_start, int key_end) const struct sw_flow_key_range *range)
{ {
return cmp_key(&flow->key, key, key_start, key_end); return cmp_key(&flow->key, key, range->start, range->end);
} }
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
const struct sw_flow_match *match) const struct sw_flow_match *match)
{ {
struct sw_flow_key *key = match->key; struct sw_flow_key *key = match->key;
int key_start = flow_key_start(key); int key_start = flow_key_start(key);
int key_end = match->range.end; int key_end = match->range.end;
return cmp_key(&flow->unmasked_key, key, key_start, key_end); BUG_ON(ovs_identifier_is_ufid(&flow->id));
return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
} }
static struct sw_flow *masked_flow_lookup(struct table_instance *ti, static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@ -416,18 +468,15 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
{ {
struct sw_flow *flow; struct sw_flow *flow;
struct hlist_head *head; struct hlist_head *head;
int key_start = mask->range.start;
int key_end = mask->range.end;
u32 hash; u32 hash;
struct sw_flow_key masked_key; struct sw_flow_key masked_key;
ovs_flow_mask_key(&masked_key, unmasked, mask); ovs_flow_mask_key(&masked_key, unmasked, mask);
hash = flow_hash(&masked_key, key_start, key_end); hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash); head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) { hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->hash == hash && if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, flow_cmp_masked_key(flow, &masked_key, &mask->range))
key_start, key_end))
return flow; return flow;
} }
return NULL; return NULL;
@ -469,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
/* Always called under ovs-mutex. */ /* Always called under ovs-mutex. */
list_for_each_entry(mask, &tbl->mask_list, list) { list_for_each_entry(mask, &tbl->mask_list, list) {
flow = masked_flow_lookup(ti, match->key, mask); flow = masked_flow_lookup(ti, match->key, mask);
if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */ if (flow && ovs_identifier_is_key(&flow->id) &&
ovs_flow_cmp_unmasked_key(flow, match))
return flow;
}
return NULL;
}
static u32 ufid_hash(const struct sw_flow_id *sfid)
{
return jhash(sfid->ufid, sfid->ufid_len, 0);
}
static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
const struct sw_flow_id *sfid)
{
if (flow->id.ufid_len != sfid->ufid_len)
return false;
return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
}
bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
{
if (ovs_identifier_is_ufid(&flow->id))
return flow_cmp_masked_key(flow, match->key, &match->range);
return ovs_flow_cmp_unmasked_key(flow, match);
}
struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
const struct sw_flow_id *ufid)
{
struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
struct sw_flow *flow;
struct hlist_head *head;
u32 hash;
hash = ufid_hash(ufid);
head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
if (flow->ufid_table.hash == hash &&
ovs_flow_cmp_ufid(flow, ufid))
return flow; return flow;
} }
return NULL; return NULL;
@ -486,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
return num; return num;
} }
static struct table_instance *table_instance_expand(struct table_instance *ti) static struct table_instance *table_instance_expand(struct table_instance *ti,
bool ufid)
{ {
return table_instance_rehash(ti, ti->n_buckets * 2); return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
} }
/* Remove 'mask' from the mask list, if it is not needed any more. */ /* Remove 'mask' from the mask list, if it is not needed any more. */
@ -513,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{ {
struct table_instance *ti = ovsl_dereference(table->ti); struct table_instance *ti = ovsl_dereference(table->ti);
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0); BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[ti->node_ver]); hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
table->count--; table->count--;
if (ovs_identifier_is_ufid(&flow->id)) {
hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
table->ufid_count--;
}
/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
* accessible as long as the RCU read lock is held. * accessible as long as the RCU read lock is held.
@ -585,34 +681,64 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
} }
/* Must be called with OVS mutex held. */ /* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
const struct sw_flow_mask *mask)
{ {
struct table_instance *new_ti = NULL; struct table_instance *new_ti = NULL;
struct table_instance *ti; struct table_instance *ti;
int err;
err = flow_mask_insert(table, flow, mask); flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
if (err)
return err;
flow->hash = flow_hash(&flow->key, flow->mask->range.start,
flow->mask->range.end);
ti = ovsl_dereference(table->ti); ti = ovsl_dereference(table->ti);
table_instance_insert(ti, flow); table_instance_insert(ti, flow);
table->count++; table->count++;
/* Expand table, if necessary, to make room. */ /* Expand table, if necessary, to make room. */
if (table->count > ti->n_buckets) if (table->count > ti->n_buckets)
new_ti = table_instance_expand(ti); new_ti = table_instance_expand(ti, false);
else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL)) else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
new_ti = table_instance_rehash(ti, ti->n_buckets); new_ti = table_instance_rehash(ti, ti->n_buckets, false);
if (new_ti) { if (new_ti) {
rcu_assign_pointer(table->ti, new_ti); rcu_assign_pointer(table->ti, new_ti);
table_instance_destroy(ti, true); call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
table->last_rehash = jiffies; table->last_rehash = jiffies;
} }
}
/* Must be called with OVS mutex held. */
static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
{
struct table_instance *ti;
flow->ufid_table.hash = ufid_hash(&flow->id);
ti = ovsl_dereference(table->ufid_ti);
ufid_table_instance_insert(ti, flow);
table->ufid_count++;
/* Expand table, if necessary, to make room. */
if (table->ufid_count > ti->n_buckets) {
struct table_instance *new_ti;
new_ti = table_instance_expand(ti, true);
if (new_ti) {
rcu_assign_pointer(table->ufid_ti, new_ti);
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
}
}
}
/* Must be called with OVS mutex held. */
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
const struct sw_flow_mask *mask)
{
int err;
err = flow_mask_insert(table, flow, mask);
if (err)
return err;
flow_key_insert(table, flow);
if (ovs_identifier_is_ufid(&flow->id))
flow_ufid_insert(table, flow);
return 0; return 0;
} }

View File

@ -47,9 +47,11 @@ struct table_instance {
struct flow_table { struct flow_table {
struct table_instance __rcu *ti; struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
struct list_head mask_list; struct list_head mask_list;
unsigned long last_rehash; unsigned long last_rehash;
unsigned int count; unsigned int count;
unsigned int ufid_count;
}; };
extern struct kmem_cache *flow_stats_cache; extern struct kmem_cache *flow_stats_cache;
@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *); const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl, struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match); const struct sw_flow_match *match);
bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
const struct sw_flow_match *match); const struct sw_flow_id *);
bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask); const struct sw_flow_mask *mask);