net/mlx5e: Add accelerated RFS support

Implement ndo_rx_flow_steer ndo.
A new flow steering rule will be composed from the
skb 4-tuple and added to the hardware aRFS flow table.

Each rule is stored in an internal hash table, if such
skb 4-tuple rule already exists we update the corresponding
hardware steering rule with the new destination.

For garbage collection rps_may_expire_flow will be
invoked for a limited amount of old rules upon any
ndo_rx_flow_steer invocation.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Maor Gottlieb 2016-04-29 01:36:41 +03:00 committed by David S. Miller
parent 1cabe6b096
commit 18c908e477
2 changed files with 436 additions and 1 deletions

View File

@ -448,9 +448,12 @@ struct mlx5e_ttc_table {
struct mlx5_flow_rule *rules[MLX5E_NUM_TT];
};
#define ARFS_HASH_SHIFT BITS_PER_BYTE
#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
struct arfs_table {
struct mlx5e_flow_table ft;
struct mlx5_flow_rule *default_rule;
struct hlist_head rules_hash[ARFS_HASH_SIZE];
};
enum arfs_type {
@ -463,6 +466,11 @@ enum arfs_type {
struct mlx5e_arfs_tables {
struct arfs_table arfs_tables[ARFS_NUM_TYPES];
/* Protect aRFS rules list */
spinlock_t arfs_lock;
struct list_head rules;
int last_filter_id;
struct workqueue_struct *wq;
};
/* NIC prio FTS */
@ -685,6 +693,8 @@ static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
#else
int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#endif
u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev);

View File

@ -30,8 +30,47 @@
* SOFTWARE.
*/
#include "en.h"
#include <linux/hash.h>
#include <linux/mlx5/fs.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include "en.h"
struct arfs_tuple {
__be16 etype;
u8 ip_proto;
union {
__be32 src_ipv4;
struct in6_addr src_ipv6;
};
union {
__be32 dst_ipv4;
struct in6_addr dst_ipv6;
};
__be16 src_port;
__be16 dst_port;
};
struct arfs_rule {
struct mlx5e_priv *priv;
struct work_struct arfs_work;
struct mlx5_flow_rule *rule;
struct hlist_node hlist;
int rxq;
/* Flow ID passed to ndo_rx_flow_steer */
int flow_id;
/* Filter ID returned by ndo_rx_flow_steer */
int filter_id;
struct arfs_tuple tuple;
};
#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
for (i = 0; i < ARFS_NUM_TYPES; i++) \
mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
for (j = 0; j < ARFS_HASH_SIZE; j++) \
hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
static void arfs_destroy_table(struct arfs_table *arfs_t)
{
@ -39,12 +78,17 @@ static void arfs_destroy_table(struct arfs_table *arfs_t)
mlx5e_destroy_flow_table(&arfs_t->ft);
}
static void arfs_del_rules(struct mlx5e_priv *priv);
void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
{
int i;
if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
return;
arfs_del_rules(priv);
destroy_workqueue(priv->fs.arfs.wq);
for (i = 0; i < ARFS_NUM_TYPES; i++) {
if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
@ -239,6 +283,12 @@ int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
return 0;
spin_lock_init(&priv->fs.arfs.arfs_lock);
INIT_LIST_HEAD(&priv->fs.arfs.rules);
priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
if (!priv->fs.arfs.wq)
return -ENOMEM;
for (i = 0; i < ARFS_NUM_TYPES; i++) {
err = arfs_create_table(priv, i);
if (err)
@ -249,3 +299,378 @@ int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
mlx5e_arfs_destroy_tables(priv);
return err;
}
#define MLX5E_ARFS_EXPIRY_QUOTA 60
static void arfs_may_expire_flow(struct mlx5e_priv *priv)
{
struct arfs_rule *arfs_rule;
struct hlist_node *htmp;
int quota = 0;
int i;
int j;
HLIST_HEAD(del_list);
spin_lock_bh(&priv->fs.arfs.arfs_lock);
mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
break;
if (!work_pending(&arfs_rule->arfs_work) &&
rps_may_expire_flow(priv->netdev,
arfs_rule->rxq, arfs_rule->flow_id,
arfs_rule->filter_id)) {
hlist_del_init(&arfs_rule->hlist);
hlist_add_head(&arfs_rule->hlist, &del_list);
}
}
spin_unlock_bh(&priv->fs.arfs.arfs_lock);
hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
if (arfs_rule->rule)
mlx5_del_flow_rule(arfs_rule->rule);
hlist_del(&arfs_rule->hlist);
kfree(arfs_rule);
}
}
static void arfs_del_rules(struct mlx5e_priv *priv)
{
struct hlist_node *htmp;
struct arfs_rule *rule;
int i;
int j;
HLIST_HEAD(del_list);
spin_lock_bh(&priv->fs.arfs.arfs_lock);
mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
hlist_add_head(&rule->hlist, &del_list);
}
spin_unlock_bh(&priv->fs.arfs.arfs_lock);
hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
cancel_work_sync(&rule->arfs_work);
if (rule->rule)
mlx5_del_flow_rule(rule->rule);
hlist_del(&rule->hlist);
kfree(rule);
}
}
static struct hlist_head *
arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
__be16 dst_port)
{
unsigned long l;
int bucket_idx;
l = (__force unsigned long)src_port |
((__force unsigned long)dst_port << 2);
bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
return &arfs_t->rules_hash[bucket_idx];
}
static u8 arfs_get_ip_proto(const struct sk_buff *skb)
{
return (skb->protocol == htons(ETH_P_IP)) ?
ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
}
static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
u8 ip_proto, __be16 etype)
{
if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
return &arfs->arfs_tables[ARFS_IPV4_TCP];
if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
return &arfs->arfs_tables[ARFS_IPV4_UDP];
if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
return &arfs->arfs_tables[ARFS_IPV6_TCP];
if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
return &arfs->arfs_tables[ARFS_IPV6_UDP];
return NULL;
}
static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
struct arfs_rule *arfs_rule)
{
struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
struct arfs_tuple *tuple = &arfs_rule->tuple;
struct mlx5_flow_rule *rule = NULL;
struct mlx5_flow_destination dest;
struct arfs_table *arfs_table;
u8 match_criteria_enable = 0;
struct mlx5_flow_table *ft;
u32 *match_criteria;
u32 *match_value;
int err = 0;
match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param));
if (!match_value || !match_criteria) {
netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
err = -ENOMEM;
goto out;
}
match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.ethertype);
MLX5_SET(fte_match_param, match_value, outer_headers.ethertype,
ntohs(tuple->etype));
arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
if (!arfs_table) {
err = -EINVAL;
goto out;
}
ft = arfs_table->ft.t;
if (tuple->ip_proto == IPPROTO_TCP) {
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.tcp_dport);
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.tcp_sport);
MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport,
ntohs(tuple->dst_port));
MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport,
ntohs(tuple->src_port));
} else {
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.udp_dport);
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.udp_sport);
MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport,
ntohs(tuple->dst_port));
MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport,
ntohs(tuple->src_port));
}
if (tuple->etype == htons(ETH_P_IP)) {
memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
&tuple->src_ipv4,
4);
memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&tuple->dst_ipv4,
4);
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
MLX5_SET_TO_ONES(fte_match_param, match_criteria,
outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
} else {
memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
&tuple->src_ipv6,
16);
memcpy(MLX5_ADDR_OF(fte_match_param, match_value,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&tuple->dst_ipv6,
16);
memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
0xff,
16);
memset(MLX5_ADDR_OF(fte_match_param, match_criteria,
outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
0xff,
16);
}
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria,
match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
MLX5_FS_DEFAULT_FLOW_TAG,
&dest);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
__func__, arfs_rule->filter_id, arfs_rule->rxq, err);
}
out:
kvfree(match_criteria);
kvfree(match_value);
return err ? ERR_PTR(err) : rule;
}
static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
struct mlx5_flow_rule *rule, u16 rxq)
{
struct mlx5_flow_destination dst;
int err = 0;
dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dst.tir_num = priv->direct_tir[rxq].tirn;
err = mlx5_modify_rule_destination(rule, &dst);
if (err)
netdev_warn(priv->netdev,
"Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
}
static void arfs_handle_work(struct work_struct *work)
{
struct arfs_rule *arfs_rule = container_of(work,
struct arfs_rule,
arfs_work);
struct mlx5e_priv *priv = arfs_rule->priv;
struct mlx5_flow_rule *rule;
mutex_lock(&priv->state_lock);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
spin_lock_bh(&priv->fs.arfs.arfs_lock);
hlist_del(&arfs_rule->hlist);
spin_unlock_bh(&priv->fs.arfs.arfs_lock);
mutex_unlock(&priv->state_lock);
kfree(arfs_rule);
goto out;
}
mutex_unlock(&priv->state_lock);
if (!arfs_rule->rule) {
rule = arfs_add_rule(priv, arfs_rule);
if (IS_ERR(rule))
goto out;
arfs_rule->rule = rule;
} else {
arfs_modify_rule_rq(priv, arfs_rule->rule,
arfs_rule->rxq);
}
out:
arfs_may_expire_flow(priv);
}
/* return L4 destination port from ip4/6 packets */
static __be16 arfs_get_dst_port(const struct sk_buff *skb)
{
char *transport_header;
transport_header = skb_transport_header(skb);
if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
return ((struct tcphdr *)transport_header)->dest;
return ((struct udphdr *)transport_header)->dest;
}
/* return L4 source port from ip4/6 packets */
static __be16 arfs_get_src_port(const struct sk_buff *skb)
{
char *transport_header;
transport_header = skb_transport_header(skb);
if (arfs_get_ip_proto(skb) == IPPROTO_TCP)
return ((struct tcphdr *)transport_header)->source;
return ((struct udphdr *)transport_header)->source;
}
static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
struct arfs_table *arfs_t,
const struct sk_buff *skb,
u16 rxq, u32 flow_id)
{
struct arfs_rule *rule;
struct arfs_tuple *tuple;
rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
if (!rule)
return NULL;
rule->priv = priv;
rule->rxq = rxq;
INIT_WORK(&rule->arfs_work, arfs_handle_work);
tuple = &rule->tuple;
tuple->etype = skb->protocol;
if (tuple->etype == htons(ETH_P_IP)) {
tuple->src_ipv4 = ip_hdr(skb)->saddr;
tuple->dst_ipv4 = ip_hdr(skb)->daddr;
} else {
memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr));
memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
sizeof(struct in6_addr));
}
tuple->ip_proto = arfs_get_ip_proto(skb);
tuple->src_port = arfs_get_src_port(skb);
tuple->dst_port = arfs_get_dst_port(skb);
rule->flow_id = flow_id;
rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
hlist_add_head(&rule->hlist,
arfs_hash_bucket(arfs_t, tuple->src_port,
tuple->dst_port));
return rule;
}
static bool arfs_cmp_ips(struct arfs_tuple *tuple,
const struct sk_buff *skb)
{
if (tuple->etype == htons(ETH_P_IP) &&
tuple->src_ipv4 == ip_hdr(skb)->saddr &&
tuple->dst_ipv4 == ip_hdr(skb)->daddr)
return true;
if (tuple->etype == htons(ETH_P_IPV6) &&
(!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr))) &&
(!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr,
sizeof(struct in6_addr))))
return true;
return false;
}
static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
const struct sk_buff *skb)
{
struct arfs_rule *arfs_rule;
struct hlist_head *head;
__be16 src_port = arfs_get_src_port(skb);
__be16 dst_port = arfs_get_dst_port(skb);
head = arfs_hash_bucket(arfs_t, src_port, dst_port);
hlist_for_each_entry(arfs_rule, head, hlist) {
if (arfs_rule->tuple.src_port == src_port &&
arfs_rule->tuple.dst_port == dst_port &&
arfs_cmp_ips(&arfs_rule->tuple, skb)) {
return arfs_rule;
}
}
return NULL;
}
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
struct arfs_table *arfs_t;
struct arfs_rule *arfs_rule;
if (skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6))
return -EPROTONOSUPPORT;
arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol);
if (!arfs_t)
return -EPROTONOSUPPORT;
spin_lock_bh(&arfs->arfs_lock);
arfs_rule = arfs_find_rule(arfs_t, skb);
if (arfs_rule) {
if (arfs_rule->rxq == rxq_index) {
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}
arfs_rule->rxq = rxq_index;
} else {
arfs_rule = arfs_alloc_rule(priv, arfs_t, skb,
rxq_index, flow_id);
if (!arfs_rule) {
spin_unlock_bh(&arfs->arfs_lock);
return -ENOMEM;
}
}
queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
spin_unlock_bh(&arfs->arfs_lock);
return arfs_rule->filter_id;
}