diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 7f41f7d0000f..66d1c3ccfd8e 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -578,10 +578,25 @@ enum ovs_sample_attr { OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_SAMPLE_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_SAMPLE_ATTR_ARG /* struct sample_arg */ +#endif }; #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) +#ifdef __KERNEL__ +struct sample_arg { + bool exec; /* When true, actions in sample will not + * change flow keys. False otherwise. + */ + u32 probability; /* Same value as + * 'OVS_SAMPLE_ATTR_PROBABILITY'. + */ +}; +#endif + /** * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c82301ce3fff..e4610676299b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -44,13 +44,10 @@ #include "conntrack.h" #include "vport.h" -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key, - const struct nlattr *attr, int len); - struct deferred_action { struct sk_buff *skb; const struct nlattr *actions; + int actions_len; /* Store pkt_key clone when creating deferred action. */ struct sw_flow_key pkt_key; @@ -82,14 +79,31 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; -struct recirc_keys { +struct action_flow_keys { struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; }; static struct action_fifo __percpu *action_fifos; -static struct recirc_keys __percpu *recirc_keys; +static struct action_flow_keys __percpu *flow_keys; static DEFINE_PER_CPU(int, exec_actions_level); +/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' + * space. Return NULL if out of key spaces. + */ +static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) +{ + struct action_flow_keys *keys = this_cpu_ptr(flow_keys); + int level = this_cpu_read(exec_actions_level); + struct sw_flow_key *key = NULL; + + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + key = &keys->key[level - 1]; + *key = *key_; + } + + return key; +} + static void action_fifo_init(struct action_fifo *fifo) { fifo->head = 0; @@ -119,8 +133,9 @@ static struct deferred_action *action_fifo_put(struct action_fifo *fifo) /* Return true if fifo is not full */ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, - const struct sw_flow_key *key, - const struct nlattr *attr) + const struct sw_flow_key *key, + const struct nlattr *actions, + const int actions_len) { struct action_fifo *fifo; struct deferred_action *da; @@ -129,7 +144,8 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, da = action_fifo_put(fifo); if (da) { da->skb = skb; - da->actions = attr; + da->actions = actions; + da->actions_len = actions_len; da->pkt_key = *key; } @@ -146,6 +162,12 @@ static bool is_flow_key_valid(const struct sw_flow_key *key) return !(key->mac_proto & SW_FLOW_KEY_INVALID); } +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key); + static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { @@ -908,72 +930,35 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +/* When 'last' is true, sample() should always consume the 'skb'. + * Otherwise, sample() should keep 'skb' intact regardless what + * actions are executed within sample(). + */ static int sample(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *attr, - const struct nlattr *actions, int actions_len) + bool last) { - const struct nlattr *acts_list = NULL; - const struct nlattr *a; - int rem; - u32 cutlen = 0; + struct nlattr *actions; + struct nlattr *sample_arg; + int rem = nla_len(attr); + const struct sample_arg *arg; + bool clone_flow_key; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { - u32 probability; + /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (nla_type(a)) { - case OVS_SAMPLE_ATTR_PROBABILITY: - probability = nla_get_u32(a); - if (!probability || prandom_u32() > probability) - return 0; - break; - - case OVS_SAMPLE_ATTR_ACTIONS: - acts_list = a; - break; - } - } - - rem = nla_len(acts_list); - a = nla_data(acts_list); - - /* Actions list is empty, do nothing */ - if (unlikely(!rem)) + if ((arg->probability != U32_MAX) && + (!arg->probability || prandom_u32() > arg->probability)) { + if (last) + consume_skb(skb); return 0; - - /* The only known usage of sample action is having a single user-space - * action, or having a truncate action followed by a single user-space - * action. Treat this usage as a special case. - * The output_userspace() should clone the skb to be sent to the - * user space. This skb will be consumed by its caller. - */ - if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) { - struct ovs_action_trunc *trunc = nla_data(a); - - if (skb->len > trunc->max_len) - cutlen = skb->len - trunc->max_len; - - a = nla_next(a, &rem); } - if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE && - nla_is_last(a, rem))) - return output_userspace(dp, skb, key, a, actions, - actions_len, cutlen); - - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - /* Skip the sample action when out of memory. */ - return 0; - - if (!add_deferred_actions(skb, key, a)) { - if (net_ratelimit()) - pr_warn("%s: deferred actions limit reached, dropping sample action\n", - ovs_dp_name(dp)); - - kfree_skb(skb); - } - return 0; + clone_flow_key = !arg->exec; + return clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); } static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, @@ -1084,10 +1069,9 @@ static int execute_masked_set_action(struct sk_buff *skb, static int execute_recirc(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *a, int rem) + const struct nlattr *a, bool last) { - struct deferred_action *da; - int level; + u32 recirc_id; if (!is_flow_key_valid(key)) { int err; @@ -1098,43 +1082,8 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, } BUG_ON(!is_flow_key_valid(key)); - if (!nla_is_last(a, rem)) { - /* Recirc action is the not the last action - * of the action list, need to clone the skb. - */ - skb = skb_clone(skb, GFP_ATOMIC); - - /* Skip the recirc action when out of memory, but - * continue on with the rest of the action list. - */ - if (!skb) - return 0; - } - - level = this_cpu_read(exec_actions_level); - if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { - struct recirc_keys *rks = this_cpu_ptr(recirc_keys); - struct sw_flow_key *recirc_key = &rks->key[level - 1]; - - *recirc_key = *key; - recirc_key->recirc_id = nla_get_u32(a); - ovs_dp_process_packet(skb, recirc_key); - - return 0; - } - - da = add_deferred_actions(skb, key, NULL); - if (da) { - da->pkt_key.recirc_id = nla_get_u32(a); - } else { - kfree_skb(skb); - - if (net_ratelimit()) - pr_warn("%s: deferred action limit reached, drop recirc action\n", - ovs_dp_name(dp)); - } - - return 0; + recirc_id = nla_get_u32(a); + return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); } /* Execute a list of actions against 'skb'. */ @@ -1206,9 +1155,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb, key); break; - case OVS_ACTION_ATTR_RECIRC: - err = execute_recirc(dp, skb, key, a, rem); - if (nla_is_last(a, rem)) { + case OVS_ACTION_ATTR_RECIRC: { + bool last = nla_is_last(a, rem); + + err = execute_recirc(dp, skb, key, a, last); + if (last) { /* If this is the last action, the skb has * been consumed or freed. * Return immediately. @@ -1216,6 +1167,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err; } break; + } case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, key, nla_data(a)); @@ -1226,9 +1178,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = execute_masked_set_action(skb, key, nla_data(a)); break; - case OVS_ACTION_ATTR_SAMPLE: - err = sample(dp, skb, key, a, attr, len); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = sample(dp, skb, key, a, last); + if (last) + return err; + break; + } case OVS_ACTION_ATTR_CT: if (!is_flow_key_valid(key)) { @@ -1264,6 +1222,79 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return 0; } +/* Execute the actions on the clone of the packet. The effect of the + * execution does not affect the original 'skb' nor the original 'key'. + * + * The execution may be deferred in case the actions can not be executed + * immediately. + */ +static int clone_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 recirc_id, + const struct nlattr *actions, int len, + bool last, bool clone_flow_key) +{ + struct deferred_action *da; + struct sw_flow_key *clone; + + skb = last ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb) { + /* Out of memory, skip this action. + */ + return 0; + } + + /* When clone_flow_key is false, the 'key' will not be change + * by the actions, then the 'key' can be used directly. + * Otherwise, try to clone key from the next recursion level of + * 'flow_keys'. If clone is successful, execute the actions + * without deferring. + */ + clone = clone_flow_key ? clone_key(key) : key; + if (clone) { + int err = 0; + + if (actions) { /* Sample action */ + if (clone_flow_key) + __this_cpu_inc(exec_actions_level); + + err = do_execute_actions(dp, skb, clone, + actions, len); + + if (clone_flow_key) + __this_cpu_dec(exec_actions_level); + } else { /* Recirc action */ + clone->recirc_id = recirc_id; + ovs_dp_process_packet(skb, clone); + } + return err; + } + + /* Out of 'flow_keys' space. Defer actions */ + da = add_deferred_actions(skb, key, actions, len); + if (da) { + if (!actions) { /* Recirc action */ + key = &da->pkt_key; + key->recirc_id = recirc_id; + } + } else { + /* Out of per CPU action FIFO space. Drop the 'skb' and + * log an error. + */ + kfree_skb(skb); + + if (net_ratelimit()) { + if (actions) { /* Sample action */ + pr_warn("%s: deferred action limit reached, drop sample action\n", + ovs_dp_name(dp)); + } else { /* Recirc action */ + pr_warn("%s: deferred action limit reached, drop recirc action\n", + ovs_dp_name(dp)); + } + } + } + return 0; +} + static void process_deferred_actions(struct datapath *dp) { struct action_fifo *fifo = this_cpu_ptr(action_fifos); @@ -1278,10 +1309,10 @@ static void process_deferred_actions(struct datapath *dp) struct sk_buff *skb = da->skb; struct sw_flow_key *key = &da->pkt_key; const struct nlattr *actions = da->actions; + int actions_len = da->actions_len; if (actions) - do_execute_actions(dp, skb, key, actions, - nla_len(actions)); + do_execute_actions(dp, skb, key, actions, actions_len); else ovs_dp_process_packet(skb, key); } while (!action_fifo_is_empty(fifo)); @@ -1323,8 +1354,8 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; - recirc_keys = alloc_percpu(struct recirc_keys); - if (!recirc_keys) { + flow_keys = alloc_percpu(struct action_flow_keys); + if (!flow_keys) { free_percpu(action_fifos); return -ENOMEM; } @@ -1335,5 +1366,5 @@ int action_fifos_init(void) void action_fifos_exit(void) { free_percpu(action_fifos); - free_percpu(recirc_keys); + free_percpu(flow_keys); } diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1c6e9377436d..da931bdef8a7 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -34,8 +34,6 @@ #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 -#define SAMPLE_ACTION_DEPTH 3 - /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 6f5fa50f716d..2acfb5af2c45 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2014 Nicira, Inc. + * Copyright (c) 2007-2017 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -59,6 +59,39 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +static bool actions_may_change_flow(const struct nlattr *actions) +{ + struct nlattr *nla; + int rem; + + nla_for_each_nested(nla, actions, rem) { + u16 action = nla_type(nla); + + switch (action) { + case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_TRUNC: + case OVS_ACTION_ATTR_USERSPACE: + break; + + case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_POP_ETH: + case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_VLAN: + case OVS_ACTION_ATTR_PUSH_ETH: + case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_SET: + case OVS_ACTION_ATTR_SET_MASKED: + default: + return true; + } + } + return false; +} + static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { @@ -2021,18 +2054,20 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log) + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem, start, err, st_acts; + int rem, start, err; + struct sample_arg arg; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -2056,20 +2091,32 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; - err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, - nla_data(probability), sizeof(u32), log); + + /* When both skb and flow may be changed, put the sample + * into a deferred fifo. On the other hand, if only skb + * may be modified, the actions can be executed in place. + * + * Do this analysis at the flow installation time. + * Set 'clone_action->exec' to true if the actions can be + * executed without being deferred. + * + * If the sample is the last action, it can always be excuted + * rather than deferred. + */ + arg.exec = last || !actions_may_change_flow(actions); + arg.probability = nla_get_u32(probability); + + err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), + log); if (err) return err; - st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log); - if (st_acts < 0) - return st_acts; - err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, log); + if (err) return err; - add_nested_action_end(*sfa, st_acts); add_nested_action_end(*sfa, start); return 0; @@ -2406,16 +2453,13 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, - int depth, struct sw_flow_actions **sfa, + struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, bool log) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; - if (depth >= SAMPLE_ACTION_DEPTH) - return -EOVERFLOW; - nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -2553,13 +2597,17 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; break; - case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(net, a, key, depth, sfa, - eth_type, vlan_tci, log); + case OVS_ACTION_ATTR_SAMPLE: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_sample(net, a, key, sfa, + eth_type, vlan_tci, + log, last); if (err) return err; skip_copy = true; break; + } case OVS_ACTION_ATTR_CT: err = ovs_ct_copy_action(net, a, key, sfa, log); @@ -2613,7 +2661,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return PTR_ERR(*sfa); (*sfa)->orig_len = nla_len(attr); - err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); @@ -2621,39 +2669,44 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return err; } -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +static int sample_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) { - const struct nlattr *a; - struct nlattr *start; - int err = 0, rem; + struct nlattr *start, *ac_start = NULL, *sample_arg; + int err = 0, rem = nla_len(attr); + const struct sample_arg *arg; + struct nlattr *actions; start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; - nla_for_each_nested(a, attr, rem) { - int type = nla_type(a); - struct nlattr *st_sample; + sample_arg = nla_data(attr); + arg = nla_data(sample_arg); + actions = nla_next(sample_arg, &rem); - switch (type) { - case OVS_SAMPLE_ATTR_PROBABILITY: - if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, - sizeof(u32), nla_data(a))) - return -EMSGSIZE; - break; - case OVS_SAMPLE_ATTR_ACTIONS: - st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); - if (!st_sample) - return -EMSGSIZE; - err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); - if (err) - return err; - nla_nest_end(skb, st_sample); - break; - } + if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { + err = -EMSGSIZE; + goto out; + } + + ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(actions, rem, skb); + +out: + if (err) { + nla_nest_cancel(skb, ac_start); + nla_nest_cancel(skb, start); + } else { + nla_nest_end(skb, ac_start); + nla_nest_end(skb, start); } - nla_nest_end(skb, start); return err; }