Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Merge in overtime fixes, no conflicts. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
89695196f0
|
@ -10770,7 +10770,6 @@ L7 BPF FRAMEWORK
|
|||
M: John Fastabend <john.fastabend@gmail.com>
|
||||
M: Daniel Borkmann <daniel@iogearbox.net>
|
||||
M: Jakub Sitnicki <jakub@cloudflare.com>
|
||||
M: Lorenz Bauer <lmb@cloudflare.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: bpf@vger.kernel.org
|
||||
S: Maintained
|
||||
|
|
|
@ -76,7 +76,7 @@ static inline void bcmgenet_writel(u32 value, void __iomem *offset)
|
|||
if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
|
||||
__raw_writel(value, offset);
|
||||
else
|
||||
writel_relaxed(value, offset);
|
||||
writel(value, offset);
|
||||
}
|
||||
|
||||
static inline u32 bcmgenet_readl(void __iomem *offset)
|
||||
|
@ -84,7 +84,7 @@ static inline u32 bcmgenet_readl(void __iomem *offset)
|
|||
if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
|
||||
return __raw_readl(offset);
|
||||
else
|
||||
return readl_relaxed(offset);
|
||||
return readl(offset);
|
||||
}
|
||||
|
||||
static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
|
||||
|
|
|
@ -1430,6 +1430,15 @@ static int __ibmvnic_open(struct net_device *netdev)
|
|||
return rc;
|
||||
}
|
||||
|
||||
adapter->tx_queues_active = true;
|
||||
|
||||
/* Since queues were stopped until now, there shouldn't be any
|
||||
* one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we
|
||||
* don't need the synchronize_rcu()? Leaving it for consistency
|
||||
* with setting ->tx_queues_active = false.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
netif_tx_start_all_queues(netdev);
|
||||
|
||||
if (prev_state == VNIC_CLOSED) {
|
||||
|
@ -1604,6 +1613,14 @@ static void ibmvnic_cleanup(struct net_device *netdev)
|
|||
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
|
||||
|
||||
/* ensure that transmissions are stopped if called by do_reset */
|
||||
|
||||
adapter->tx_queues_active = false;
|
||||
|
||||
/* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active
|
||||
* update so they don't restart a queue after we stop it below.
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
if (test_bit(0, &adapter->resetting))
|
||||
netif_tx_disable(netdev);
|
||||
else
|
||||
|
@ -1843,15 +1860,22 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
|
|||
tx_buff->skb = NULL;
|
||||
adapter->netdev->stats.tx_dropped++;
|
||||
}
|
||||
|
||||
ind_bufp->index = 0;
|
||||
|
||||
if (atomic_sub_return(entries, &tx_scrq->used) <=
|
||||
(adapter->req_tx_entries_per_subcrq / 2) &&
|
||||
__netif_subqueue_stopped(adapter->netdev, queue_num) &&
|
||||
!test_bit(0, &adapter->resetting)) {
|
||||
__netif_subqueue_stopped(adapter->netdev, queue_num)) {
|
||||
rcu_read_lock();
|
||||
|
||||
if (adapter->tx_queues_active) {
|
||||
netif_wake_subqueue(adapter->netdev, queue_num);
|
||||
netdev_dbg(adapter->netdev, "Started queue %d\n",
|
||||
queue_num);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
|
||||
|
@ -1905,11 +1929,12 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||
int index = 0;
|
||||
u8 proto = 0;
|
||||
|
||||
tx_scrq = adapter->tx_scrq[queue_num];
|
||||
txq = netdev_get_tx_queue(netdev, queue_num);
|
||||
ind_bufp = &tx_scrq->ind_buf;
|
||||
|
||||
if (test_bit(0, &adapter->resetting)) {
|
||||
/* If a reset is in progress, drop the packet since
|
||||
* the scrqs may get torn down. Otherwise use the
|
||||
* rcu to ensure reset waits for us to complete.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
if (!adapter->tx_queues_active) {
|
||||
dev_kfree_skb_any(skb);
|
||||
|
||||
tx_send_failed++;
|
||||
|
@ -1918,6 +1943,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||
goto out;
|
||||
}
|
||||
|
||||
tx_scrq = adapter->tx_scrq[queue_num];
|
||||
txq = netdev_get_tx_queue(netdev, queue_num);
|
||||
ind_bufp = &tx_scrq->ind_buf;
|
||||
|
||||
if (ibmvnic_xmit_workarounds(skb, netdev)) {
|
||||
tx_dropped++;
|
||||
tx_send_failed++;
|
||||
|
@ -1925,6 +1954,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||
ibmvnic_tx_scrq_flush(adapter, tx_scrq);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (skb_is_gso(skb))
|
||||
tx_pool = &adapter->tso_pool[queue_num];
|
||||
else
|
||||
|
@ -2079,6 +2109,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
|
|||
netif_carrier_off(netdev);
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
netdev->stats.tx_dropped += tx_dropped;
|
||||
netdev->stats.tx_bytes += tx_bytes;
|
||||
netdev->stats.tx_packets += tx_packets;
|
||||
|
@ -3749,9 +3780,15 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
|
|||
(adapter->req_tx_entries_per_subcrq / 2) &&
|
||||
__netif_subqueue_stopped(adapter->netdev,
|
||||
scrq->pool_index)) {
|
||||
netif_wake_subqueue(adapter->netdev, scrq->pool_index);
|
||||
netdev_dbg(adapter->netdev, "Started queue %d\n",
|
||||
rcu_read_lock();
|
||||
if (adapter->tx_queues_active) {
|
||||
netif_wake_subqueue(adapter->netdev,
|
||||
scrq->pool_index);
|
||||
netdev_dbg(adapter->netdev,
|
||||
"Started queue %d\n",
|
||||
scrq->pool_index);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1006,11 +1006,14 @@ struct ibmvnic_adapter {
|
|||
struct work_struct ibmvnic_reset;
|
||||
struct delayed_work ibmvnic_delayed_reset;
|
||||
unsigned long resetting;
|
||||
bool napi_enabled, from_passive_init;
|
||||
bool login_pending;
|
||||
/* last device reset time */
|
||||
unsigned long last_reset_time;
|
||||
|
||||
bool napi_enabled;
|
||||
bool from_passive_init;
|
||||
bool login_pending;
|
||||
/* protected by rcu */
|
||||
bool tx_queues_active;
|
||||
bool failover_pending;
|
||||
bool force_reset_recovery;
|
||||
|
||||
|
|
|
@ -290,6 +290,7 @@ enum ice_pf_state {
|
|||
ICE_LINK_DEFAULT_OVERRIDE_PENDING,
|
||||
ICE_PHY_INIT_COMPLETE,
|
||||
ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */
|
||||
ICE_AUX_ERR_PENDING,
|
||||
ICE_STATE_NBITS /* must be last */
|
||||
};
|
||||
|
||||
|
@ -557,6 +558,7 @@ struct ice_pf {
|
|||
wait_queue_head_t reset_wait_queue;
|
||||
|
||||
u32 hw_csum_rx_error;
|
||||
u32 oicr_err_reg;
|
||||
u16 oicr_idx; /* Other interrupt cause MSIX vector index */
|
||||
u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
|
||||
u16 max_pf_txqs; /* Total Tx queues PF wide */
|
||||
|
|
|
@ -34,6 +34,9 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
|
|||
{
|
||||
struct iidc_auxiliary_drv *iadrv;
|
||||
|
||||
if (WARN_ON_ONCE(!in_task()))
|
||||
return;
|
||||
|
||||
if (!pf->adev)
|
||||
return;
|
||||
|
||||
|
|
|
@ -2278,6 +2278,19 @@ static void ice_service_task(struct work_struct *work)
|
|||
return;
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
|
||||
struct iidc_event *event;
|
||||
|
||||
event = kzalloc(sizeof(*event), GFP_KERNEL);
|
||||
if (event) {
|
||||
set_bit(IIDC_EVENT_CRIT_ERR, event->type);
|
||||
/* report the entire OICR value to AUX driver */
|
||||
swap(event->reg, pf->oicr_err_reg);
|
||||
ice_send_event_to_aux(pf, event);
|
||||
kfree(event);
|
||||
}
|
||||
}
|
||||
|
||||
if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
|
||||
/* Plug aux device per request */
|
||||
ice_plug_aux_dev(pf);
|
||||
|
@ -3064,17 +3077,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
|
|||
|
||||
#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
|
||||
if (oicr & ICE_AUX_CRIT_ERR) {
|
||||
struct iidc_event *event;
|
||||
|
||||
pf->oicr_err_reg |= oicr;
|
||||
set_bit(ICE_AUX_ERR_PENDING, pf->state);
|
||||
ena_mask &= ~ICE_AUX_CRIT_ERR;
|
||||
event = kzalloc(sizeof(*event), GFP_ATOMIC);
|
||||
if (event) {
|
||||
set_bit(IIDC_EVENT_CRIT_ERR, event->type);
|
||||
/* report the entire OICR value to AUX driver */
|
||||
event->reg = oicr;
|
||||
ice_send_event_to_aux(pf, event);
|
||||
kfree(event);
|
||||
}
|
||||
}
|
||||
|
||||
/* Report any remaining unexpected interrupts */
|
||||
|
|
|
@ -755,7 +755,7 @@ static int __maybe_unused bam_dmux_runtime_resume(struct device *dev)
|
|||
return 0;
|
||||
|
||||
dmux->tx = dma_request_chan(dev, "tx");
|
||||
if (IS_ERR(dmux->rx)) {
|
||||
if (IS_ERR(dmux->tx)) {
|
||||
dev_err(dev, "Failed to request TX DMA channel: %pe\n", dmux->tx);
|
||||
dmux->tx = NULL;
|
||||
bam_dmux_runtime_suspend(dev);
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <linux/netfilter/nf_conntrack_tuple_common.h>
|
||||
#include <net/flow_offload.h>
|
||||
#include <net/dst.h>
|
||||
#include <linux/if_pppox.h>
|
||||
#include <linux/ppp_defs.h>
|
||||
|
||||
struct nf_flowtable;
|
||||
struct nf_flow_rule;
|
||||
|
@ -317,4 +319,20 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
|
|||
int nf_flow_table_offload_init(void);
|
||||
void nf_flow_table_offload_exit(void);
|
||||
|
||||
static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
|
||||
{
|
||||
__be16 proto;
|
||||
|
||||
proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
|
||||
sizeof(struct pppoe_hdr)));
|
||||
switch (proto) {
|
||||
case htons(PPP_IP):
|
||||
return htons(ETH_P_IP);
|
||||
case htons(PPP_IPV6):
|
||||
return htons(ETH_P_IPV6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _NF_FLOW_TABLE_H */
|
||||
|
|
|
@ -89,18 +89,20 @@ static void ax25_kill_by_device(struct net_device *dev)
|
|||
sk = s->sk;
|
||||
if (!sk) {
|
||||
spin_unlock_bh(&ax25_list_lock);
|
||||
s->ax25_dev = NULL;
|
||||
ax25_disconnect(s, ENETUNREACH);
|
||||
s->ax25_dev = NULL;
|
||||
spin_lock_bh(&ax25_list_lock);
|
||||
goto again;
|
||||
}
|
||||
sock_hold(sk);
|
||||
spin_unlock_bh(&ax25_list_lock);
|
||||
lock_sock(sk);
|
||||
ax25_disconnect(s, ENETUNREACH);
|
||||
s->ax25_dev = NULL;
|
||||
if (sk->sk_socket) {
|
||||
dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
|
||||
ax25_dev_put(ax25_dev);
|
||||
ax25_disconnect(s, ENETUNREACH);
|
||||
}
|
||||
release_sock(sk);
|
||||
spin_lock_bh(&ax25_list_lock);
|
||||
sock_put(sk);
|
||||
|
@ -979,14 +981,20 @@ static int ax25_release(struct socket *sock)
|
|||
{
|
||||
struct sock *sk = sock->sk;
|
||||
ax25_cb *ax25;
|
||||
ax25_dev *ax25_dev;
|
||||
|
||||
if (sk == NULL)
|
||||
return 0;
|
||||
|
||||
sock_hold(sk);
|
||||
sock_orphan(sk);
|
||||
lock_sock(sk);
|
||||
sock_orphan(sk);
|
||||
ax25 = sk_to_ax25(sk);
|
||||
ax25_dev = ax25->ax25_dev;
|
||||
if (ax25_dev) {
|
||||
dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
|
||||
ax25_dev_put(ax25_dev);
|
||||
}
|
||||
|
||||
if (sk->sk_type == SOCK_SEQPACKET) {
|
||||
switch (ax25->state) {
|
||||
|
|
|
@ -261,12 +261,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
|
|||
{
|
||||
ax25_clear_queues(ax25);
|
||||
|
||||
if (reason == ENETUNREACH) {
|
||||
del_timer_sync(&ax25->timer);
|
||||
del_timer_sync(&ax25->t1timer);
|
||||
del_timer_sync(&ax25->t2timer);
|
||||
del_timer_sync(&ax25->t3timer);
|
||||
del_timer_sync(&ax25->idletimer);
|
||||
} else {
|
||||
if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
|
||||
ax25_stop_heartbeat(ax25);
|
||||
ax25_stop_t1timer(ax25);
|
||||
ax25_stop_t2timer(ax25);
|
||||
ax25_stop_t3timer(ax25);
|
||||
ax25_stop_idletimer(ax25);
|
||||
}
|
||||
|
||||
ax25->state = AX25_STATE_0;
|
||||
|
||||
|
|
|
@ -1786,6 +1786,10 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
|
|||
struct dsa_port *dp;
|
||||
|
||||
mutex_lock(&dsa2_mutex);
|
||||
|
||||
if (!ds->setup)
|
||||
goto out;
|
||||
|
||||
rtnl_lock();
|
||||
|
||||
dsa_switch_for_each_user_port(dp, ds) {
|
||||
|
@ -1802,6 +1806,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
|
|||
dp->master->dsa_ptr = NULL;
|
||||
|
||||
rtnl_unlock();
|
||||
out:
|
||||
mutex_unlock(&dsa2_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
|
||||
|
|
|
@ -498,6 +498,15 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
|
|||
}
|
||||
EXPORT_SYMBOL(__ip_select_ident);
|
||||
|
||||
static void ip_rt_fix_tos(struct flowi4 *fl4)
|
||||
{
|
||||
__u8 tos = RT_FL_TOS(fl4);
|
||||
|
||||
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
|
||||
fl4->flowi4_scope = tos & RTO_ONLINK ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
|
||||
}
|
||||
|
||||
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
|
||||
const struct sock *sk,
|
||||
const struct iphdr *iph,
|
||||
|
@ -823,6 +832,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
|
|||
rt = (struct rtable *) dst;
|
||||
|
||||
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
|
||||
ip_rt_fix_tos(&fl4);
|
||||
__ip_do_redirect(rt, skb, &fl4, true);
|
||||
}
|
||||
|
||||
|
@ -1047,6 +1057,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
|
|||
struct flowi4 fl4;
|
||||
|
||||
ip_rt_build_flow_key(&fl4, sk, skb);
|
||||
ip_rt_fix_tos(&fl4);
|
||||
|
||||
/* Don't make lookup fail for bridged encapsulations */
|
||||
if (skb && netif_is_any_bridge_port(skb->dev))
|
||||
|
@ -1121,6 +1132,8 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
|
|||
goto out;
|
||||
|
||||
new = true;
|
||||
} else {
|
||||
ip_rt_fix_tos(&fl4);
|
||||
}
|
||||
|
||||
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
|
||||
|
@ -2609,7 +2622,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
|
|||
struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
__u8 tos = RT_FL_TOS(fl4);
|
||||
struct fib_result res = {
|
||||
.type = RTN_UNSPEC,
|
||||
.fi = NULL,
|
||||
|
@ -2619,9 +2631,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
|
|||
struct rtable *rth;
|
||||
|
||||
fl4->flowi4_iif = LOOPBACK_IFINDEX;
|
||||
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
|
||||
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
|
||||
ip_rt_fix_tos(fl4);
|
||||
|
||||
rcu_read_lock();
|
||||
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
|
||||
|
|
|
@ -3730,6 +3730,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
|
|||
*/
|
||||
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
||||
{
|
||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
struct tcp_fastopen_request *fo = tp->fastopen_req;
|
||||
int space, err = 0;
|
||||
|
@ -3744,8 +3745,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
|
|||
* private TCP options. The cost is reduced data space in SYN :(
|
||||
*/
|
||||
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
|
||||
/* Sync mss_cache after updating the mss_clamp */
|
||||
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
|
||||
|
||||
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
|
||||
space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
|
||||
MAX_TCP_OPTION_SPACE;
|
||||
|
||||
space = min_t(size_t, space, fo->size);
|
||||
|
|
|
@ -1199,6 +1199,7 @@ static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, g
|
|||
tcp_skb_entail(ssk, skb);
|
||||
return skb;
|
||||
}
|
||||
tcp_skb_tsorted_anchor_cleanup(skb);
|
||||
kfree_skb(skb);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -6,12 +6,29 @@
|
|||
#include <linux/rhashtable.h>
|
||||
#include <net/netfilter/nf_flow_table.h>
|
||||
#include <net/netfilter/nf_tables.h>
|
||||
#include <linux/if_vlan.h>
|
||||
|
||||
static unsigned int
|
||||
nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state)
|
||||
{
|
||||
struct vlan_ethhdr *veth;
|
||||
__be16 proto;
|
||||
|
||||
switch (skb->protocol) {
|
||||
case htons(ETH_P_8021Q):
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
proto = veth->h_vlan_encapsulated_proto;
|
||||
break;
|
||||
case htons(ETH_P_PPP_SES):
|
||||
proto = nf_flow_pppoe_proto(skb);
|
||||
break;
|
||||
default:
|
||||
proto = skb->protocol;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (proto) {
|
||||
case htons(ETH_P_IP):
|
||||
return nf_flow_offload_ip_hook(priv, skb, state);
|
||||
case htons(ETH_P_IPV6):
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
#include <linux/ipv6.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_pppox.h>
|
||||
#include <linux/ppp_defs.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/ip6_route.h>
|
||||
|
@ -260,22 +258,6 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
|||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
|
||||
{
|
||||
__be16 proto;
|
||||
|
||||
proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
|
||||
sizeof(struct pppoe_hdr)));
|
||||
switch (proto) {
|
||||
case htons(PPP_IP):
|
||||
return htons(ETH_P_IP);
|
||||
case htons(PPP_IPV6):
|
||||
return htons(ETH_P_IPV6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
|
||||
u32 *offset)
|
||||
{
|
||||
|
|
|
@ -9363,17 +9363,23 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(nft_parse_u32_check);
|
||||
|
||||
static unsigned int nft_parse_register(const struct nlattr *attr)
|
||||
static unsigned int nft_parse_register(const struct nlattr *attr, u32 *preg)
|
||||
{
|
||||
unsigned int reg;
|
||||
|
||||
reg = ntohl(nla_get_be32(attr));
|
||||
switch (reg) {
|
||||
case NFT_REG_VERDICT...NFT_REG_4:
|
||||
return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
|
||||
*preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
|
||||
break;
|
||||
case NFT_REG32_00...NFT_REG32_15:
|
||||
*preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
|
||||
break;
|
||||
default:
|
||||
return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -9415,7 +9421,10 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
|
|||
u32 reg;
|
||||
int err;
|
||||
|
||||
reg = nft_parse_register(attr);
|
||||
err = nft_parse_register(attr, ®);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = nft_validate_register_load(reg, len);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
@ -9470,7 +9479,10 @@ int nft_parse_register_store(const struct nft_ctx *ctx,
|
|||
int err;
|
||||
u32 reg;
|
||||
|
||||
reg = nft_parse_register(attr);
|
||||
err = nft_parse_register(attr, ®);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = nft_validate_register_store(ctx, reg, data, type, len);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
|
|
@ -215,7 +215,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
|
|||
const struct nft_rule_dp *rule, *last_rule;
|
||||
const struct net *net = nft_net(pkt);
|
||||
const struct nft_expr *expr, *last;
|
||||
struct nft_regs regs;
|
||||
struct nft_regs regs = {};
|
||||
unsigned int stackptr = 0;
|
||||
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
|
||||
bool genbit = READ_ONCE(net->nft.gencursor);
|
||||
|
|
|
@ -159,6 +159,8 @@ EXPORT_SYMBOL(do_trace_netlink_extack);
|
|||
|
||||
static inline u32 netlink_group_mask(u32 group)
|
||||
{
|
||||
if (group > 32)
|
||||
return 0;
|
||||
return group ? 1 << (group - 1) : 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -734,6 +734,57 @@ static bool skb_nfct_cached(struct net *net,
|
|||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_NF_NAT)
|
||||
static void ovs_nat_update_key(struct sw_flow_key *key,
|
||||
const struct sk_buff *skb,
|
||||
enum nf_nat_manip_type maniptype)
|
||||
{
|
||||
if (maniptype == NF_NAT_MANIP_SRC) {
|
||||
__be16 src;
|
||||
|
||||
key->ct_state |= OVS_CS_F_SRC_NAT;
|
||||
if (key->eth.type == htons(ETH_P_IP))
|
||||
key->ipv4.addr.src = ip_hdr(skb)->saddr;
|
||||
else if (key->eth.type == htons(ETH_P_IPV6))
|
||||
memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
|
||||
sizeof(key->ipv6.addr.src));
|
||||
else
|
||||
return;
|
||||
|
||||
if (key->ip.proto == IPPROTO_UDP)
|
||||
src = udp_hdr(skb)->source;
|
||||
else if (key->ip.proto == IPPROTO_TCP)
|
||||
src = tcp_hdr(skb)->source;
|
||||
else if (key->ip.proto == IPPROTO_SCTP)
|
||||
src = sctp_hdr(skb)->source;
|
||||
else
|
||||
return;
|
||||
|
||||
key->tp.src = src;
|
||||
} else {
|
||||
__be16 dst;
|
||||
|
||||
key->ct_state |= OVS_CS_F_DST_NAT;
|
||||
if (key->eth.type == htons(ETH_P_IP))
|
||||
key->ipv4.addr.dst = ip_hdr(skb)->daddr;
|
||||
else if (key->eth.type == htons(ETH_P_IPV6))
|
||||
memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
|
||||
sizeof(key->ipv6.addr.dst));
|
||||
else
|
||||
return;
|
||||
|
||||
if (key->ip.proto == IPPROTO_UDP)
|
||||
dst = udp_hdr(skb)->dest;
|
||||
else if (key->ip.proto == IPPROTO_TCP)
|
||||
dst = tcp_hdr(skb)->dest;
|
||||
else if (key->ip.proto == IPPROTO_SCTP)
|
||||
dst = sctp_hdr(skb)->dest;
|
||||
else
|
||||
return;
|
||||
|
||||
key->tp.dst = dst;
|
||||
}
|
||||
}
|
||||
|
||||
/* Modelled after nf_nat_ipv[46]_fn().
|
||||
* range is only used for new, uninitialized NAT state.
|
||||
* Returns either NF_ACCEPT or NF_DROP.
|
||||
|
@ -741,7 +792,7 @@ static bool skb_nfct_cached(struct net *net,
|
|||
static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
|
||||
enum ip_conntrack_info ctinfo,
|
||||
const struct nf_nat_range2 *range,
|
||||
enum nf_nat_manip_type maniptype)
|
||||
enum nf_nat_manip_type maniptype, struct sw_flow_key *key)
|
||||
{
|
||||
int hooknum, nh_off, err = NF_ACCEPT;
|
||||
|
||||
|
@ -813,60 +864,13 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
|
|||
push:
|
||||
skb_push_rcsum(skb, nh_off);
|
||||
|
||||
/* Update the flow key if NAT successful. */
|
||||
if (err == NF_ACCEPT)
|
||||
ovs_nat_update_key(key, skb, maniptype);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovs_nat_update_key(struct sw_flow_key *key,
|
||||
const struct sk_buff *skb,
|
||||
enum nf_nat_manip_type maniptype)
|
||||
{
|
||||
if (maniptype == NF_NAT_MANIP_SRC) {
|
||||
__be16 src;
|
||||
|
||||
key->ct_state |= OVS_CS_F_SRC_NAT;
|
||||
if (key->eth.type == htons(ETH_P_IP))
|
||||
key->ipv4.addr.src = ip_hdr(skb)->saddr;
|
||||
else if (key->eth.type == htons(ETH_P_IPV6))
|
||||
memcpy(&key->ipv6.addr.src, &ipv6_hdr(skb)->saddr,
|
||||
sizeof(key->ipv6.addr.src));
|
||||
else
|
||||
return;
|
||||
|
||||
if (key->ip.proto == IPPROTO_UDP)
|
||||
src = udp_hdr(skb)->source;
|
||||
else if (key->ip.proto == IPPROTO_TCP)
|
||||
src = tcp_hdr(skb)->source;
|
||||
else if (key->ip.proto == IPPROTO_SCTP)
|
||||
src = sctp_hdr(skb)->source;
|
||||
else
|
||||
return;
|
||||
|
||||
key->tp.src = src;
|
||||
} else {
|
||||
__be16 dst;
|
||||
|
||||
key->ct_state |= OVS_CS_F_DST_NAT;
|
||||
if (key->eth.type == htons(ETH_P_IP))
|
||||
key->ipv4.addr.dst = ip_hdr(skb)->daddr;
|
||||
else if (key->eth.type == htons(ETH_P_IPV6))
|
||||
memcpy(&key->ipv6.addr.dst, &ipv6_hdr(skb)->daddr,
|
||||
sizeof(key->ipv6.addr.dst));
|
||||
else
|
||||
return;
|
||||
|
||||
if (key->ip.proto == IPPROTO_UDP)
|
||||
dst = udp_hdr(skb)->dest;
|
||||
else if (key->ip.proto == IPPROTO_TCP)
|
||||
dst = tcp_hdr(skb)->dest;
|
||||
else if (key->ip.proto == IPPROTO_SCTP)
|
||||
dst = sctp_hdr(skb)->dest;
|
||||
else
|
||||
return;
|
||||
|
||||
key->tp.dst = dst;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */
|
||||
static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
|
||||
const struct ovs_conntrack_info *info,
|
||||
|
@ -906,7 +910,7 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
|
|||
} else {
|
||||
return NF_ACCEPT; /* Connection is not NATed. */
|
||||
}
|
||||
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
|
||||
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key);
|
||||
|
||||
if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
|
||||
if (ct->status & IPS_SRC_NAT) {
|
||||
|
@ -916,17 +920,13 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
|
|||
maniptype = NF_NAT_MANIP_SRC;
|
||||
|
||||
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
|
||||
maniptype);
|
||||
maniptype, key);
|
||||
} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
|
||||
err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL,
|
||||
NF_NAT_MANIP_SRC);
|
||||
NF_NAT_MANIP_SRC, key);
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark NAT done if successful and update the flow key. */
|
||||
if (err == NF_ACCEPT)
|
||||
ovs_nat_update_key(key, skb, maniptype);
|
||||
|
||||
return err;
|
||||
}
|
||||
#else /* !CONFIG_NF_NAT */
|
||||
|
|
|
@ -2852,7 +2852,8 @@ static void tipc_sk_retry_connect(struct sock *sk, struct sk_buff_head *list)
|
|||
|
||||
/* Try again later if dest link is congested */
|
||||
if (tsk->cong_link_cnt) {
|
||||
sk_reset_timer(sk, &sk->sk_timer, msecs_to_jiffies(100));
|
||||
sk_reset_timer(sk, &sk->sk_timer,
|
||||
jiffies + msecs_to_jiffies(100));
|
||||
return;
|
||||
}
|
||||
/* Prepare SYN for retransmit */
|
||||
|
|
|
@ -2084,7 +2084,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
|
|||
if (ousk->oob_skb)
|
||||
consume_skb(ousk->oob_skb);
|
||||
|
||||
ousk->oob_skb = skb;
|
||||
WRITE_ONCE(ousk->oob_skb, skb);
|
||||
|
||||
scm_stat_add(other, skb);
|
||||
skb_queue_tail(&other->sk_receive_queue, skb);
|
||||
|
@ -2602,9 +2602,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
|
|||
|
||||
oob_skb = u->oob_skb;
|
||||
|
||||
if (!(state->flags & MSG_PEEK)) {
|
||||
u->oob_skb = NULL;
|
||||
}
|
||||
if (!(state->flags & MSG_PEEK))
|
||||
WRITE_ONCE(u->oob_skb, NULL);
|
||||
|
||||
unix_state_unlock(sk);
|
||||
|
||||
|
@ -2639,7 +2638,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
|
|||
skb = NULL;
|
||||
} else if (sock_flag(sk, SOCK_URGINLINE)) {
|
||||
if (!(flags & MSG_PEEK)) {
|
||||
u->oob_skb = NULL;
|
||||
WRITE_ONCE(u->oob_skb, NULL);
|
||||
consume_skb(skb);
|
||||
}
|
||||
} else if (!(flags & MSG_PEEK)) {
|
||||
|
@ -3094,11 +3093,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
|
|||
case SIOCATMARK:
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
int answ = 0;
|
||||
|
||||
skb = skb_peek(&sk->sk_receive_queue);
|
||||
if (skb && skb == u->oob_skb)
|
||||
if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
|
||||
answ = 1;
|
||||
err = put_user(answ, (int __user *)arg);
|
||||
}
|
||||
|
@ -3139,6 +3137,10 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
|
|||
mask |= EPOLLIN | EPOLLRDNORM;
|
||||
if (sk_is_readable(sk))
|
||||
mask |= EPOLLIN | EPOLLRDNORM;
|
||||
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
|
||||
if (READ_ONCE(unix_sk(sk)->oob_skb))
|
||||
mask |= EPOLLPRI;
|
||||
#endif
|
||||
|
||||
/* Connection-based need to check for termination and startup */
|
||||
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
|
||||
|
|
|
@ -402,18 +402,8 @@ EXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
|
|||
static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
|
||||
{
|
||||
struct net_device *dev = xs->dev;
|
||||
int err;
|
||||
|
||||
rcu_read_lock();
|
||||
err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xsk_zc_xmit(struct xdp_sock *xs)
|
||||
{
|
||||
return xsk_wakeup(xs, XDP_WAKEUP_TX);
|
||||
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
|
||||
}
|
||||
|
||||
static void xsk_destruct_skb(struct sk_buff *skb)
|
||||
|
@ -532,6 +522,12 @@ static int xsk_generic_xmit(struct sock *sk)
|
|||
|
||||
mutex_lock(&xs->mutex);
|
||||
|
||||
/* Since we dropped the RCU read lock, the socket state might have changed. */
|
||||
if (unlikely(!xsk_is_bound(xs))) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (xs->queue_id >= xs->dev->real_num_tx_queues)
|
||||
goto out;
|
||||
|
||||
|
@ -595,16 +591,26 @@ static int xsk_generic_xmit(struct sock *sk)
|
|||
return err;
|
||||
}
|
||||
|
||||
static int __xsk_sendmsg(struct sock *sk)
|
||||
static int xsk_xmit(struct sock *sk)
|
||||
{
|
||||
struct xdp_sock *xs = xdp_sk(sk);
|
||||
int ret;
|
||||
|
||||
if (unlikely(!(xs->dev->flags & IFF_UP)))
|
||||
return -ENETDOWN;
|
||||
if (unlikely(!xs->tx))
|
||||
return -ENOBUFS;
|
||||
|
||||
return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
|
||||
if (xs->zc)
|
||||
return xsk_wakeup(xs, XDP_WAKEUP_TX);
|
||||
|
||||
/* Drop the RCU lock since the SKB path might sleep. */
|
||||
rcu_read_unlock();
|
||||
ret = xsk_generic_xmit(sk);
|
||||
/* Reaquire RCU lock before going into common code. */
|
||||
rcu_read_lock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool xsk_no_wakeup(struct sock *sk)
|
||||
|
@ -618,7 +624,7 @@ static bool xsk_no_wakeup(struct sock *sk)
|
|||
#endif
|
||||
}
|
||||
|
||||
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
{
|
||||
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
|
||||
struct sock *sk = sock->sk;
|
||||
|
@ -638,11 +644,22 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
|||
|
||||
pool = xs->pool;
|
||||
if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
|
||||
return __xsk_sendmsg(sk);
|
||||
return xsk_xmit(sk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
|
||||
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = __xsk_sendmsg(sock, m, total_len);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
|
||||
{
|
||||
bool need_wait = !(flags & MSG_DONTWAIT);
|
||||
struct sock *sk = sock->sk;
|
||||
|
@ -668,6 +685,17 @@ static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int fl
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = __xsk_recvmsg(sock, m, len, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __poll_t xsk_poll(struct file *file, struct socket *sock,
|
||||
struct poll_table_struct *wait)
|
||||
{
|
||||
|
@ -678,8 +706,11 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
|
|||
|
||||
sock_poll_wait(file, sock, wait);
|
||||
|
||||
if (unlikely(!xsk_is_bound(xs)))
|
||||
rcu_read_lock();
|
||||
if (unlikely(!xsk_is_bound(xs))) {
|
||||
rcu_read_unlock();
|
||||
return mask;
|
||||
}
|
||||
|
||||
pool = xs->pool;
|
||||
|
||||
|
@ -688,7 +719,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
|
|||
xsk_wakeup(xs, pool->cached_need_wakeup);
|
||||
else
|
||||
/* Poll needs to drive Tx also in copy mode */
|
||||
__xsk_sendmsg(sk);
|
||||
xsk_xmit(sk);
|
||||
}
|
||||
|
||||
if (xs->rx && !xskq_prod_is_empty(xs->rx))
|
||||
|
@ -696,6 +727,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
|
|||
if (xs->tx && xsk_tx_writeable(xs))
|
||||
mask |= EPOLLOUT | EPOLLWRNORM;
|
||||
|
||||
rcu_read_unlock();
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
@ -727,7 +759,6 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
|
|||
|
||||
/* Wait for driver to stop using the xdp socket. */
|
||||
xp_del_xsk(xs->pool, xs);
|
||||
xs->dev = NULL;
|
||||
synchronize_net();
|
||||
dev_put(dev);
|
||||
}
|
||||
|
|
|
@ -218,10 +218,10 @@ main(int argc, char **argv)
|
|||
|
||||
/* Test 1:
|
||||
* veriyf that SIGURG is
|
||||
* delivered and 63 bytes are
|
||||
* read and oob is '@'
|
||||
* delivered, 63 bytes are
|
||||
* read, oob is '@', and POLLPRI works.
|
||||
*/
|
||||
wait_for_data(pfd, POLLIN | POLLPRI);
|
||||
wait_for_data(pfd, POLLPRI);
|
||||
read_oob(pfd, &oob);
|
||||
len = read_data(pfd, buf, 1024);
|
||||
if (!signal_recvd || len != 63 || oob != '@') {
|
||||
|
|
|
@ -26,6 +26,15 @@
|
|||
# - pmtu_ipv6
|
||||
# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
|
||||
#
|
||||
# - pmtu_ipv4_dscp_icmp_exception
|
||||
# Set up the same network topology as pmtu_ipv4, but use non-default
|
||||
# routing table in A. A fib-rule is used to jump to this routing table
|
||||
# based on DSCP. Send ICMPv4 packets with the expected DSCP value and
|
||||
# verify that ECN doesn't interfere with the creation of PMTU exceptions.
|
||||
#
|
||||
# - pmtu_ipv4_dscp_udp_exception
|
||||
# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
|
||||
#
|
||||
# - pmtu_ipv4_vxlan4_exception
|
||||
# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
|
||||
# over IPv4 between A and B, routed via R1. On the link between R1 and B,
|
||||
|
@ -203,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
|
|||
tests="
|
||||
pmtu_ipv4_exception ipv4: PMTU exceptions 1
|
||||
pmtu_ipv6_exception ipv6: PMTU exceptions 1
|
||||
pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1
|
||||
pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1
|
||||
pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
|
||||
pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
|
||||
pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
|
||||
|
@ -323,6 +334,9 @@ routes_nh="
|
|||
B 6 default 61
|
||||
"
|
||||
|
||||
policy_mark=0x04
|
||||
rt_table=main
|
||||
|
||||
veth4_a_addr="192.168.1.1"
|
||||
veth4_b_addr="192.168.1.2"
|
||||
veth4_c_addr="192.168.2.10"
|
||||
|
@ -346,6 +360,7 @@ dummy6_mask="64"
|
|||
err_buf=
|
||||
tcpdump_pids=
|
||||
nettest_pids=
|
||||
socat_pids=
|
||||
|
||||
err() {
|
||||
err_buf="${err_buf}${1}
|
||||
|
@ -723,7 +738,7 @@ setup_routing_old() {
|
|||
|
||||
ns_name="$(nsname ${ns})"
|
||||
|
||||
ip -n ${ns_name} route add ${addr} via ${gw}
|
||||
ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
|
||||
|
||||
ns=""; addr=""; gw=""
|
||||
done
|
||||
|
@ -753,7 +768,7 @@ setup_routing_new() {
|
|||
|
||||
ns_name="$(nsname ${ns})"
|
||||
|
||||
ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
|
||||
ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
|
||||
|
||||
ns=""; fam=""; addr=""; nhid=""
|
||||
done
|
||||
|
@ -798,6 +813,24 @@ setup_routing() {
|
|||
return 0
|
||||
}
|
||||
|
||||
setup_policy_routing() {
|
||||
setup_routing
|
||||
|
||||
ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
|
||||
table "${rt_table}"
|
||||
|
||||
# Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
|
||||
# have an option do to it.
|
||||
tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
|
||||
tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
|
||||
tc -netns "${NS_A}" filter add dev veth_A-R1 \
|
||||
protocol ipv4 flower ip_proto udp \
|
||||
action pedit ex munge ip df set 0x40 pipe csum ip and udp
|
||||
tc -netns "${NS_A}" filter add dev veth_A-R2 \
|
||||
protocol ipv4 flower ip_proto udp \
|
||||
action pedit ex munge ip df set 0x40 pipe csum ip and udp
|
||||
}
|
||||
|
||||
setup_bridge() {
|
||||
run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
|
||||
run_cmd ${ns_a} ip link set br0 up
|
||||
|
@ -903,6 +936,11 @@ cleanup() {
|
|||
done
|
||||
nettest_pids=
|
||||
|
||||
for pid in ${socat_pids}; do
|
||||
kill "${pid}"
|
||||
done
|
||||
socat_pids=
|
||||
|
||||
for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
|
||||
ip netns del ${n} 2> /dev/null
|
||||
done
|
||||
|
@ -950,15 +988,21 @@ link_get_mtu() {
|
|||
route_get_dst_exception() {
|
||||
ns_cmd="${1}"
|
||||
dst="${2}"
|
||||
dsfield="${3}"
|
||||
|
||||
${ns_cmd} ip route get "${dst}"
|
||||
if [ -z "${dsfield}" ]; then
|
||||
dsfield=0
|
||||
fi
|
||||
|
||||
${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
|
||||
}
|
||||
|
||||
route_get_dst_pmtu_from_exception() {
|
||||
ns_cmd="${1}"
|
||||
dst="${2}"
|
||||
dsfield="${3}"
|
||||
|
||||
mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
|
||||
mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
|
||||
}
|
||||
|
||||
check_pmtu_value() {
|
||||
|
@ -1068,6 +1112,95 @@ test_pmtu_ipv6_exception() {
|
|||
test_pmtu_ipvX 6
|
||||
}
|
||||
|
||||
test_pmtu_ipv4_dscp_icmp_exception() {
|
||||
rt_table=100
|
||||
|
||||
setup namespaces policy_routing || return $ksft_skip
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
|
||||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
# Set up initial MTU values
|
||||
mtu "${ns_a}" veth_A-R1 2000
|
||||
mtu "${ns_r1}" veth_R1-A 2000
|
||||
mtu "${ns_r1}" veth_R1-B 1400
|
||||
mtu "${ns_b}" veth_B-R1 1400
|
||||
|
||||
mtu "${ns_a}" veth_A-R2 2000
|
||||
mtu "${ns_r2}" veth_R2-A 2000
|
||||
mtu "${ns_r2}" veth_R2-B 1500
|
||||
mtu "${ns_b}" veth_B-R2 1500
|
||||
|
||||
len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
|
||||
|
||||
dst1="${prefix4}.${b_r1}.1"
|
||||
dst2="${prefix4}.${b_r2}.1"
|
||||
|
||||
# Create route exceptions
|
||||
dsfield=${policy_mark} # No ECN bit set (Not-ECT)
|
||||
run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
|
||||
|
||||
dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
|
||||
run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
|
||||
|
||||
# Check that exceptions have been created with the correct PMTU
|
||||
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
|
||||
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
|
||||
|
||||
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
|
||||
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
|
||||
}
|
||||
|
||||
test_pmtu_ipv4_dscp_udp_exception() {
|
||||
rt_table=100
|
||||
|
||||
if ! which socat > /dev/null 2>&1; then
|
||||
echo "'socat' command not found; skipping tests"
|
||||
return $ksft_skip
|
||||
fi
|
||||
|
||||
setup namespaces policy_routing || return $ksft_skip
|
||||
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
|
||||
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
|
||||
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
|
||||
"${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
|
||||
|
||||
# Set up initial MTU values
|
||||
mtu "${ns_a}" veth_A-R1 2000
|
||||
mtu "${ns_r1}" veth_R1-A 2000
|
||||
mtu "${ns_r1}" veth_R1-B 1400
|
||||
mtu "${ns_b}" veth_B-R1 1400
|
||||
|
||||
mtu "${ns_a}" veth_A-R2 2000
|
||||
mtu "${ns_r2}" veth_R2-A 2000
|
||||
mtu "${ns_r2}" veth_R2-B 1500
|
||||
mtu "${ns_b}" veth_B-R2 1500
|
||||
|
||||
len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
|
||||
|
||||
dst1="${prefix4}.${b_r1}.1"
|
||||
dst2="${prefix4}.${b_r2}.1"
|
||||
|
||||
# Create route exceptions
|
||||
run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
|
||||
socat_pids="${socat_pids} $!"
|
||||
|
||||
dsfield=${policy_mark} # No ECN bit set (Not-ECT)
|
||||
run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
|
||||
UDP:"${dst1}":50000,tos="${dsfield}"
|
||||
|
||||
dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
|
||||
run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
|
||||
UDP:"${dst2}":50000,tos="${dsfield}"
|
||||
|
||||
# Check that exceptions have been created with the correct PMTU
|
||||
pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
|
||||
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
|
||||
pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
|
||||
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
|
||||
}
|
||||
|
||||
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
|
||||
type=${1}
|
||||
family=${2}
|
||||
|
|
Loading…
Reference in New Issue