diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d5df40c75aa4..b183e2b606c8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -946,15 +946,20 @@ igmp_max_memberships - INTEGER The value 5459 assumes no IP header options, so in practice this number may be lower. - conf/interface/* changes special settings per interface (where - "interface" is the name of your network interface) - - conf/all/* is special, changes the settings for all interfaces +igmp_max_msf - INTEGER + Maximum number of addresses allowed in the source filter list for a + multicast group. + Default: 10 igmp_qrv - INTEGER - Controls the IGMP query robustness variable (see RFC2236 8.1). - Default: 2 (as specified by RFC2236 8.1) - Minimum: 1 (as specified by RFC6636 4.5) + Controls the IGMP query robustness variable (see RFC2236 8.1). + Default: 2 (as specified by RFC2236 8.1) + Minimum: 1 (as specified by RFC6636 4.5) + +conf/interface/* changes special settings per interface (where +"interface" is the name of your network interface) + +conf/all/* is special, changes the settings for all interfaces log_martians - BOOLEAN Log packets with impossible addresses to kernel log. diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c index 693fb7c9b59a..f8f659f1ce1b 100644 --- a/drivers/isdn/mISDN/clock.c +++ b/drivers/isdn/mISDN/clock.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -45,15 +46,15 @@ static u_int *debug; static LIST_HEAD(iclock_list); static DEFINE_RWLOCK(iclock_lock); static u16 iclock_count; /* counter of last clock */ -static struct timeval iclock_tv; /* time stamp of last clock */ -static int iclock_tv_valid; /* already received one timestamp */ +static ktime_t iclock_timestamp; /* time stamp of last clock */ +static int iclock_timestamp_valid; /* already received one timestamp */ static struct mISDNclock *iclock_current; void mISDN_init_clock(u_int *dp) { debug = dp; - do_gettimeofday(&iclock_tv); + iclock_timestamp = ktime_get(); } static void @@ -86,7 +87,7 @@ select_iclock(void) } if (bestclock != iclock_current) { /* no clock received yet */ - iclock_tv_valid = 0; + iclock_timestamp_valid = 0; } iclock_current = bestclock; } @@ -139,12 +140,11 @@ mISDN_unregister_clock(struct mISDNclock *iclock) EXPORT_SYMBOL(mISDN_unregister_clock); void -mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) +mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; write_lock_irqsave(&iclock_lock, flags); if (iclock_current != iclock) { @@ -156,33 +156,27 @@ mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv) write_unlock_irqrestore(&iclock_lock, flags); return; } - if (iclock_tv_valid) { + if (iclock_timestamp_valid) { /* increment sample counter by given samples */ iclock_count += samples; - if (tv) { /* tv must be set, if function call is delayed */ - iclock_tv.tv_sec = tv->tv_sec; - iclock_tv.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&iclock_tv); + if (timestamp) { /* timestamp must be set, if function call is delayed */ + iclock_timestamp = *timestamp; + } else { + iclock_timestamp = ktime_get(); + } } else { /* calc elapsed time by system clock */ - if (tv) { /* tv must be set, if function call is delayed */ - tv_now.tv_sec = tv->tv_sec; - tv_now.tv_usec = tv->tv_usec; - } else - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; + if (timestamp) { /* timestamp must be set, if function call is delayed */ + timestamp_now = *timestamp; + } else { + timestamp_now = ktime_get(); } + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter and set new timestamp */ - iclock_count += elapsed_sec * 8000 + elapsed_8000th; - iclock_tv.tv_sec = tv_now.tv_sec; - iclock_tv.tv_usec = tv_now.tv_usec; - iclock_tv_valid = 1; + iclock_count += delta; + iclock_timestamp = timestamp_now; + iclock_timestamp_valid = 1; if (*debug & DEBUG_CLOCK) printk("Received first clock from source '%s'.\n", iclock_current ? iclock_current->name : "nothing"); @@ -195,22 +189,17 @@ unsigned short mISDN_clock_get(void) { u_long flags; - struct timeval tv_now; - time_t elapsed_sec; - int elapsed_8000th; + ktime_t timestamp_now; + u16 delta; u16 count; read_lock_irqsave(&iclock_lock, flags); /* calc elapsed time by system clock */ - do_gettimeofday(&tv_now); - elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec; - elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125); - if (elapsed_8000th < 0) { - elapsed_sec -= 1; - elapsed_8000th += 8000; - } + timestamp_now = ktime_get(); + delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp), + (NSEC_PER_SEC / 8000)); /* add elapsed time to counter */ - count = iclock_count + elapsed_sec * 8000 + elapsed_8000th; + count = iclock_count + delta; read_unlock_irqrestore(&iclock_lock, flags); return count; } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 3ce6095ced3d..6619178ed77b 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2959,7 +2959,7 @@ static int macb_probe(struct platform_device *pdev) int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0); if (gpio_is_valid(gpio)) bp->reset_gpio = gpio_to_desc(gpio); - gpiod_set_value(bp->reset_gpio, GPIOD_OUT_HIGH); + gpiod_direction_output(bp->reset_gpio, 1); } of_node_put(phy_node); @@ -3029,7 +3029,7 @@ static int macb_remove(struct platform_device *pdev) mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ - gpiod_set_value(bp->reset_gpio, GPIOD_OUT_LOW); + gpiod_set_value(bp->reset_gpio, 0); unregister_netdev(dev); clk_disable_unprepare(bp->tx_clk); diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 79a210aaf0bb..ea83712a6d62 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -35,6 +35,7 @@ #include "fman.h" #include "fman_muram.h" +#include #include #include #include @@ -1871,6 +1872,90 @@ static int fman_config(struct fman *fman) return -EINVAL; } +static int fman_reset(struct fman *fman) +{ + u32 count; + int err = 0; + + if (fman->state->rev_info.major < 6) { + iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); + /* Wait for reset completion */ + count = 100; + do { + udelay(1); + } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & + FPM_RSTC_FM_RESET) && --count); + if (count == 0) + err = -EBUSY; + + goto _return; + } else { + struct device_node *guts_node; + struct ccsr_guts __iomem *guts_regs; + u32 devdisr2, reg; + + /* Errata A007273 */ + guts_node = + of_find_compatible_node(NULL, NULL, + "fsl,qoriq-device-config-2.0"); + if (!guts_node) { + dev_err(fman->dev, "%s: Couldn't find guts node\n", + __func__); + goto guts_node; + } + + guts_regs = of_iomap(guts_node, 0); + if (!guts_regs) { + dev_err(fman->dev, "%s: Couldn't map %s regs\n", + __func__, guts_node->full_name); + goto guts_regs; + } +#define FMAN1_ALL_MACS_MASK 0xFCC00000 +#define FMAN2_ALL_MACS_MASK 0x000FCC00 + /* Read current state */ + devdisr2 = ioread32be(&guts_regs->devdisr2); + if (fman->dts_params.id == 0) + reg = devdisr2 & ~FMAN1_ALL_MACS_MASK; + else + reg = devdisr2 & ~FMAN2_ALL_MACS_MASK; + + /* Enable all MACs */ + iowrite32be(reg, &guts_regs->devdisr2); + + /* Perform FMan reset */ + iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); + + /* Wait for reset completion */ + count = 100; + do { + udelay(1); + } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & + FPM_RSTC_FM_RESET) && --count); + if (count == 0) { + iounmap(guts_regs); + of_node_put(guts_node); + err = -EBUSY; + goto _return; + } + + /* Restore devdisr2 value */ + iowrite32be(devdisr2, &guts_regs->devdisr2); + + iounmap(guts_regs); + of_node_put(guts_node); + + goto _return; + +guts_regs: + of_node_put(guts_node); +guts_node: + dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n", + __func__); + } +_return: + return err; +} + static int fman_init(struct fman *fman) { struct fman_cfg *cfg = NULL; @@ -1914,22 +1999,9 @@ static int fman_init(struct fman *fman) fman->liodn_base[i] = liodn_base; } - /* FMan Reset (supported only for FMan V2) */ - if (fman->state->rev_info.major >= 6) { - /* Errata A007273 */ - dev_dbg(fman->dev, "%s: FManV3 reset is not supported!\n", - __func__); - } else { - iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc); - /* Wait for reset completion */ - count = 100; - do { - udelay(1); - } while (((ioread32be(&fman->fpm_regs->fm_rstc)) & - FPM_RSTC_FM_RESET) && --count); - if (count == 0) - return -EBUSY; - } + err = fman_reset(fman); + if (err) + return err; if (ioread32be(&fman->qmi_regs->fmqm_gs) & QMI_GS_HALT_NOT_BUSY) { resume(fman->fpm_regs); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 1cbcb9fa3fb5..37d0cce392be 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -147,6 +147,8 @@ enum hnae_led_state { #define HNSV2_TXD_BUFNUM_S 0 #define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S) +#define HNSV2_TXD_PORTID_S 4 +#define HNSV2_TXD_PORTID_M (0X7 << HNSV2_TXD_PORTID_S) #define HNSV2_TXD_RI_B 1 #define HNSV2_TXD_L4CS_B 2 #define HNSV2_TXD_L3CS_B 3 @@ -516,6 +518,7 @@ struct hnae_handle { int q_num; int vf_id; u32 eport_id; + u32 dport_id; /* v2 tx bd should fill the dport_id */ enum hnae_port_type port_type; struct list_head node; /* list to hnae_ae_dev->handle_list */ struct hnae_buf_ops *bops; /* operation for the buffer */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index d4f92ed322d6..285c893ab135 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -175,6 +175,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, ae_handle->phy_node = vf_cb->mac_cb->phy_node; ae_handle->if_support = vf_cb->mac_cb->if_support; ae_handle->port_type = vf_cb->mac_cb->mac_type; + ae_handle->dport_id = port_idx; return ae_handle; vf_id_err: @@ -419,7 +420,10 @@ static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable) static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en) { + struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); + hns_dsaf_set_promisc_mode(hns_ae_get_dsaf_dev(handle->dev), en); + hns_mac_set_promisc(mac_cb, (u8)!!en); } static int hns_ae_get_autoneg(struct hnae_handle *handle) @@ -787,7 +791,8 @@ static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key, memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE); /* update the current hash->queue mappings from the shadow RSS table */ - memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE); + memcpy(indir, ppe_cb->rss_indir_table, + HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir)); return 0; } @@ -799,10 +804,11 @@ static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir, /* set the RSS Hash Key if specififed by the user */ if (key) - hns_ppe_set_rss_key(ppe_cb, (int *)key); + hns_ppe_set_rss_key(ppe_cb, (u32 *)key); /* update the shadow RSS table with user specified qids */ - memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE); + memcpy(ppe_cb->rss_indir_table, indir, + HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir)); /* now update the hardware */ hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index b8517b00e706..6e2b76ede075 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -290,6 +290,24 @@ static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed, return 0; } +static void hns_gmac_set_uc_match(void *mac_drv, u16 en) +{ + struct mac_driver *drv = mac_drv; + + dsaf_set_dev_bit(drv, GMAC_REC_FILT_CONTROL_REG, + GMAC_UC_MATCH_EN_B, !en); + dsaf_set_dev_bit(drv, GMAC_STATION_ADDR_HIGH_2_REG, + GMAC_ADDR_EN_B, !en); +} + +static void hns_gmac_set_promisc(void *mac_drv, u8 en) +{ + struct mac_driver *drv = mac_drv; + + if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG) + hns_gmac_set_uc_match(mac_drv, en); +} + static void hns_gmac_init(void *mac_drv) { u32 port; @@ -305,6 +323,8 @@ static void hns_gmac_init(void *mac_drv) mdelay(10); hns_gmac_disable(mac_drv, MAC_COMM_MODE_RX_AND_TX); hns_gmac_tx_loop_pkt_dis(mac_drv); + if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG) + hns_gmac_set_uc_match(mac_drv, 0); } void hns_gmac_update_stats(void *mac_drv) @@ -402,14 +422,17 @@ static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr) { struct mac_driver *drv = (struct mac_driver *)mac_drv; - if (drv->mac_id >= DSAF_SERVICE_NW_NUM) { - u32 high_val = mac_addr[1] | (mac_addr[0] << 8); + u32 high_val = mac_addr[1] | (mac_addr[0] << 8); - u32 low_val = mac_addr[5] | (mac_addr[4] << 8) - | (mac_addr[3] << 16) | (mac_addr[2] << 24); - dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val); - dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, high_val); - } + u32 low_val = mac_addr[5] | (mac_addr[4] << 8) + | (mac_addr[3] << 16) | (mac_addr[2] << 24); + + u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG); + u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B); + + dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val); + dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, + high_val | (sta_addr_en << GMAC_ADDR_EN_B)); } static int hns_gmac_config_loopback(void *mac_drv, enum hnae_loop loop_mode, @@ -699,6 +722,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->get_sset_count = hns_gmac_get_sset_count; mac_drv->get_strings = hns_gmac_get_strings; mac_drv->update_stats = hns_gmac_update_stats; + mac_drv->set_promiscuous = hns_gmac_set_promisc; return (void *)mac_drv; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 5ef0e96e918a..a38084a22bf2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -467,8 +467,13 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu) struct mac_driver *drv = hns_mac_get_drv(mac_cb); u32 buf_size = mac_cb->dsaf_dev->buf_size; u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ? + MAC_MAX_MTU : MAC_MAX_MTU_V2; - if ((new_mtu < MAC_MIN_MTU) || (new_frm > MAC_MAX_MTU) || + if (mac_cb->mac_type == HNAE_PORT_DEBUG) + max_frm = MAC_MAX_MTU_DBG; + + if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) || (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)) return -EINVAL; @@ -861,6 +866,14 @@ int hns_mac_get_sset_count(struct hns_mac_cb *mac_cb, int stringset) return mac_ctrl_drv->get_sset_count(stringset); } +void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en) +{ + struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); + + if (mac_ctrl_drv->set_promiscuous) + mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en); +} + int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb) { struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 0b052191d751..823b6e78c8aa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -26,7 +26,9 @@ struct dsaf_device; #define MAC_DEFAULT_MTU (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN + ETH_DATA_LEN) #define MAC_MAX_MTU 9600 +#define MAC_MAX_MTU_V2 9728 #define MAC_MIN_MTU 68 +#define MAC_MAX_MTU_DBG MAC_DEFAULT_MTU #define MAC_DEFAULT_PAUSE_TIME 0xff @@ -365,7 +367,7 @@ struct mac_driver { /*config rx pause enable*/ void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable); /* config rx mode for promiscuous*/ - int (*set_promiscuous)(void *mac_drv, u8 enable); + void (*set_promiscuous)(void *mac_drv, u8 enable); /* get mac id */ void (*mac_get_id)(void *mac_drv, u8 *mac_id); void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en); @@ -453,4 +455,6 @@ int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb); void hns_set_led_opt(struct hns_mac_cb *mac_cb); int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb, enum hnae_led_state status); +void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en); + #endif /* _HNS_DSAF_MAC_H */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 38fc5be3870c..5c1ac9ba1bf2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -748,8 +748,9 @@ static void hns_dsaf_tbl_stat_en(struct dsaf_device *dsaf_dev) */ static void hns_dsaf_rocee_bp_en(struct dsaf_device *dsaf_dev) { - dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG, - DSAF_FC_XGE_TX_PAUSE_S, 1); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) + dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG, + DSAF_FC_XGE_TX_PAUSE_S, 1); } /* set msk for dsaf exception irq*/ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index f302ef9073c6..5b7ae5ff43e8 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -27,7 +27,7 @@ void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value) void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb, const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM]) { - int key_item = 0; + u32 key_item; for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++) dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4, @@ -343,6 +343,9 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb) if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) { hns_ppe_set_vlan_strip(ppe_cb, 0); + dsaf_write_dev(ppe_cb, PPE_CFG_MAX_FRAME_LEN_REG, + HNS_PPEV2_MAX_FRAME_LEN); + /* set default RSS key in h/w */ hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h index 0f5cb6962acf..e9c0ec2fa0dd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h @@ -30,6 +30,8 @@ #define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */ #define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32)) +#define HNS_PPEV2_MAX_FRAME_LEN 0X980 + enum ppe_qid_mode { PPE_QID_MODE0 = 0, /* fixed queue id mode */ PPE_QID_MODE1, /* switch:128VM non switch:6Port/4VM/4TC */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 60d695daa471..bf62687e5ea7 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -922,6 +922,8 @@ #define GMAC_LP_REG_CF2MI_LP_EN_B 2 #define GMAC_MODE_CHANGE_EB_B 0 +#define GMAC_UC_MATCH_EN_B 0 +#define GMAC_ADDR_EN_B 16 #define GMAC_RECV_CTRL_STRIP_PAD_EN_B 3 #define GMAC_RECV_CTRL_RUNT_PKT_EN_B 4 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 3f77ff77abbc..71aa37b4b338 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -48,7 +48,6 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct sk_buff *skb; - int skb_tmp_len; __be16 protocol; u8 bn_pid = 0; u8 rrcfv = 0; @@ -66,10 +65,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, desc->addr = cpu_to_le64(dma); desc->tx.send_size = cpu_to_le16((u16)size); - /*config bd buffer end */ + /* config bd buffer end */ hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1); + /* fill port_id in the tx bd for sending management pkts */ + hnae_set_field(bn_pid, HNSV2_TXD_PORTID_M, + HNSV2_TXD_PORTID_S, ring->q->handle->dport_id); + if (type == DESC_TYPE_SKB) { skb = (struct sk_buff *)priv; @@ -90,13 +93,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); /* check for tcp/udp header */ - if (iphdr->protocol == IPPROTO_TCP) { + if (iphdr->protocol == IPPROTO_TCP && + skb_is_gso(skb)) { hnae_set_bit(tvsvsn, HNSV2_TXD_TSE_B, 1); - skb_tmp_len = SKB_TMP_LEN(skb); l4_len = tcp_hdrlen(skb); - mss = mtu - skb_tmp_len - ETH_FCS_LEN; - paylen = skb->len - skb_tmp_len; + mss = skb_shinfo(skb)->gso_size; + paylen = skb->len - SKB_TMP_LEN(skb); } } else if (skb->protocol == htons(ETH_P_IPV6)) { hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1); @@ -104,13 +107,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1); /* check for tcp/udp header */ - if (ipv6hdr->nexthdr == IPPROTO_TCP) { + if (ipv6hdr->nexthdr == IPPROTO_TCP && + skb_is_gso(skb) && skb_is_gso_v6(skb)) { hnae_set_bit(tvsvsn, HNSV2_TXD_TSE_B, 1); - skb_tmp_len = SKB_TMP_LEN(skb); l4_len = tcp_hdrlen(skb); - mss = mtu - skb_tmp_len - ETH_FCS_LEN; - paylen = skb->len - skb_tmp_len; + mss = skb_shinfo(skb)->gso_size; + paylen = skb->len - SKB_TMP_LEN(skb); } } desc->tx.ip_offset = ip_offset; @@ -564,6 +567,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, struct sk_buff *skb; struct hnae_desc *desc; struct hnae_desc_cb *desc_cb; + struct ethhdr *eh; unsigned char *va; int bnum, length, i; int pull_len; @@ -670,6 +674,14 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, return -EFAULT; } + /* filter out multicast pkt with the same src mac as this port */ + eh = eth_hdr(skb); + if (unlikely(is_multicast_ether_addr(eh->h_dest) && + ether_addr_equal(ndev->dev_addr, eh->h_source))) { + dev_kfree_skb_any(skb); + return -EFAULT; + } + ring->stats.rx_pkts++; ring->stats.rx_bytes += skb->len; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 3c4a3bc31a89..9c3ba65988e1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1173,18 +1173,15 @@ hns_get_rss_key_size(struct net_device *netdev) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - u32 ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, "RSS feature is not supported on this hardware\n"); - return -EOPNOTSUPP; + return 0; } ops = priv->ae_handle->dev->ops; - ret = ops->get_rss_key_size(priv->ae_handle); - - return ret; + return ops->get_rss_key_size(priv->ae_handle); } static u32 @@ -1192,18 +1189,15 @@ hns_get_rss_indir_size(struct net_device *netdev) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - u32 ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, "RSS feature is not supported on this hardware\n"); - return -EOPNOTSUPP; + return 0; } ops = priv->ae_handle->dev->ops; - ret = ops->get_rss_indir_size(priv->ae_handle); - - return ret; + return ops->get_rss_indir_size(priv->ae_handle); } static int @@ -1211,7 +1205,6 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - int ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, @@ -1224,9 +1217,7 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) if (!indir) return 0; - ret = ops->get_rss(priv->ae_handle, indir, key, hfunc); - - return 0; + return ops->get_rss(priv->ae_handle, indir, key, hfunc); } static int @@ -1235,7 +1226,6 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_ae_ops *ops; - int ret; if (AE_IS_VER1(priv->enet_ver)) { netdev_err(netdev, @@ -1252,7 +1242,22 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, if (!indir) return 0; - ret = ops->set_rss(priv->ae_handle, indir, key, hfunc); + return ops->set_rss(priv->ae_handle, indir, key, hfunc); +} + +static int hns_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct hns_nic_priv *priv = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->ae_handle->q_num; + break; + default: + return -EOPNOTSUPP; + } return 0; } @@ -1280,6 +1285,7 @@ static struct ethtool_ops hns_ethtool_ops = { .get_rxfh_indir_size = hns_get_rss_indir_size, .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, + .get_rxnfc = hns_get_rxnfc, }; void hns_ethtool_set_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7f2126b6a179..e0b68afea56e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1690,8 +1690,8 @@ static int mtk_probe(struct platform_device *pdev) return -ENOMEM; eth->base = devm_ioremap_resource(&pdev->dev, res); - if (!eth->base) - return -EADDRNOTAVAIL; + if (IS_ERR(eth->base)) + return PTR_ERR(eth->base); spin_lock_init(ð->page_lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 42d8de892bfe..6aa73972d478 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -39,8 +39,6 @@ #include "mlx4.h" -static const u8 zero_gid[16]; /* automatically initialized to 0 */ - int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { return 1 << dev->oper_log_mgm_entry_size; diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 3f5711061432..a733868a43aa 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1791,9 +1791,11 @@ static int smc911x_probe(struct net_device *dev) unsigned int val, chip_id, revision; const char *version_string; unsigned long irq_flags; +#ifdef SMC_USE_DMA struct dma_slave_config config; dma_cap_mask_t mask; struct pxad_param param; +#endif DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 192631a345df..bc168894bda3 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -843,8 +843,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, if (info) { fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; - fl6->flowi6_tos = RT_TOS(info->key.tos); - fl6->flowlabel = info->key.label; + fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos), + info->key.label); dst_cache = &info->dst_cache; } else { prio = geneve->tos; @@ -855,8 +855,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, use_cache = false; } - fl6->flowi6_tos = RT_TOS(prio); - fl6->flowlabel = geneve->label; + fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio), + geneve->label); fl6->daddr = geneve->remote.sin6.sin6_addr; dst_cache = &geneve->dst_cache; } @@ -1049,7 +1049,8 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err; - prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb); + prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel), + iip, skb); ttl = geneve->ttl; if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) ttl = 1; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index b4c68783dfc3..8b3bd8ecd1c4 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -619,6 +619,7 @@ struct nvsp_message { #define NETVSC_PACKET_SIZE 4096 #define VRSS_SEND_TAB_SIZE 16 +#define VRSS_CHANNEL_MAX 64 #define RNDIS_MAX_PKT_DEFAULT 8 #define RNDIS_PKT_ALIGN_DEFAULT 8 @@ -700,13 +701,13 @@ struct netvsc_device { struct net_device *ndev; - struct vmbus_channel *chn_table[NR_CPUS]; + struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; spinlock_t sc_lock; /* Protects num_sc_offered variable */ u32 num_sc_offered; - atomic_t queue_sends[NR_CPUS]; + atomic_t queue_sends[VRSS_CHANNEL_MAX]; /* Holds rndis device info */ void *extension; @@ -718,7 +719,7 @@ struct netvsc_device { /* The sub channel callback buffer */ unsigned char *sub_cb_buf; - struct multi_send_data msd[NR_CPUS]; + struct multi_send_data msd[VRSS_CHANNEL_MAX]; u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 08608499fa17..b8121eba33ff 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -858,6 +858,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = hv_get_drvdata(hdev); struct netvsc_device_info device_info; int limit = ETH_DATA_LEN; + u32 num_chn; int ret = 0; if (nvdev == NULL || nvdev->destroy) @@ -873,6 +874,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto out; + num_chn = nvdev->num_chn; + nvdev->start_remove = true; rndis_filter_device_remove(hdev); @@ -883,7 +886,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.num_chn = nvdev->num_chn; + device_info.num_chn = num_chn; device_info.max_num_vrss_chns = max_num_vrss_chns; rndis_filter_device_add(hdev, &device_info); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 47d07c576a34..c4e1e0408433 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -986,12 +986,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); - spin_lock_irqsave(&nvscdev->sc_lock, flags); - nvscdev->num_sc_offered--; - spin_unlock_irqrestore(&nvscdev->sc_lock, flags); - if (nvscdev->num_sc_offered == 0) - complete(&nvscdev->channel_init_wait); - if (chn_index >= nvscdev->num_chn) return; @@ -1004,6 +998,12 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) if (ret == 0) nvscdev->chn_table[chn_index] = new_sc; + + spin_lock_irqsave(&nvscdev->sc_lock, flags); + nvscdev->num_sc_offered--; + spin_unlock_irqrestore(&nvscdev->sc_lock, flags); + if (nvscdev->num_sc_offered == 0) + complete(&nvscdev->channel_init_wait); } int rndis_filter_device_add(struct hv_device *dev, @@ -1113,9 +1113,9 @@ int rndis_filter_device_add(struct hv_device *dev, if (ret || rsscap.num_recv_que < 2) goto out; - num_rss_qs = min(device_info->max_num_vrss_chns, rsscap.num_recv_que); + net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que); - net_device->max_chn = rsscap.num_recv_que; + num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn); /* * We will limit the VRSS channels to the number CPUs in the NUMA node diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 1e901c7cfaac..b3ffaee30858 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -277,12 +277,16 @@ static int at803x_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; - gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (phydev->drv->phy_id != ATH8030_PHY_ID) + goto does_not_require_reset_workaround; + + gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(gpiod_reset)) return PTR_ERR(gpiod_reset); priv->gpiod_reset = gpiod_reset; +does_not_require_reset_workaround: phydev->priv = priv; return 0; @@ -362,10 +366,10 @@ static void at803x_link_change_notify(struct phy_device *phydev) at803x_context_save(phydev, &context); - gpiod_set_value(priv->gpiod_reset, 0); - msleep(1); gpiod_set_value(priv->gpiod_reset, 1); msleep(1); + gpiod_set_value(priv->gpiod_reset, 0); + msleep(1); at803x_context_restore(phydev, &context); diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index f70522c35163..135296508a7e 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -122,6 +122,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev) return -EPROBE_DEFER; dev_info(&pdev->dev, "no regulator found\n"); + data->regulator = NULL; } else { ret = regulator_enable(data->regulator); if (ret) @@ -137,7 +138,8 @@ static int sun4i_mdio_probe(struct platform_device *pdev) return 0; err_out_disable_regulator: - regulator_disable(data->regulator); + if (data->regulator) + regulator_disable(data->regulator); err_out_free_mdiobus: mdiobus_free(bus); return ret; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 4fd861063ed4..f572b31a2b20 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -2307,7 +2307,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan) pch->ppp = NULL; pch->chan = chan; - pch->chan_net = net; + pch->chan_net = get_net(net); chan->ppp = pch; init_ppp_file(&pch->file, CHANNEL); pch->file.hdrlen = chan->hdrlen; @@ -2404,6 +2404,8 @@ ppp_unregister_channel(struct ppp_channel *chan) spin_lock_bh(&pn->all_channels_lock); list_del(&pch->list); spin_unlock_bh(&pn->all_channels_lock); + put_net(pch->chan_net); + pch->chan_net = NULL; pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index d36d5ebf37f3..f20890ee03f3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3261,54 +3261,6 @@ void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); } -struct rtnl_link_stats64 *lan78xx_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *storage) -{ - struct lan78xx_net *dev = netdev_priv(netdev); - struct lan78xx_statstage64 stats; - - /* curr_stat is updated by timer. - * periodic reading from HW will prevent from entering USB auto suspend. - * if autosuspend is disabled, read from HW. - */ - if (!dev->udev->dev.power.runtime_auto) - lan78xx_update_stats(dev); - - mutex_lock(&dev->stats.access_lock); - memcpy(&stats, &dev->stats.curr_stat, sizeof(stats)); - mutex_unlock(&dev->stats.access_lock); - - /* calc by driver */ - storage->rx_packets = (__u64)netdev->stats.rx_packets; - storage->tx_packets = (__u64)netdev->stats.tx_packets; - storage->rx_bytes = (__u64)netdev->stats.rx_bytes; - storage->tx_bytes = (__u64)netdev->stats.tx_bytes; - - /* use counter */ - storage->rx_length_errors = stats.rx_undersize_frame_errors + - stats.rx_oversize_frame_errors; - storage->rx_crc_errors = stats.rx_fcs_errors; - storage->rx_frame_errors = stats.rx_alignment_errors; - storage->rx_fifo_errors = stats.rx_dropped_frames; - storage->rx_over_errors = stats.rx_oversize_frame_errors; - storage->rx_errors = stats.rx_fcs_errors + - stats.rx_alignment_errors + - stats.rx_fragment_errors + - stats.rx_jabber_errors + - stats.rx_undersize_frame_errors + - stats.rx_oversize_frame_errors + - stats.rx_dropped_frames; - - storage->tx_carrier_errors = stats.tx_carrier_errors; - storage->tx_errors = stats.tx_fcs_errors + - stats.tx_excess_deferral_errors + - stats.tx_carrier_errors; - - storage->multicast = stats.rx_multicast_frames; - - return storage; -} - static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3322,7 +3274,6 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, - .ndo_get_stats64 = lan78xx_get_stats64, }; static void lan78xx_stat_monitor(unsigned long param) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 800106a7246c..1c0fa364323e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1143,7 +1143,7 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) static bool vxlan_remcsum(struct vxlanhdr *unparsed, struct sk_buff *skb, u32 vxflags) { - size_t start, offset, plen; + size_t start, offset; if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) goto out; @@ -1151,9 +1151,7 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed, start = vxlan_rco_start(unparsed->vx_vni); offset = start + vxlan_rco_offset(unparsed->vx_vni); - plen = sizeof(struct vxlanhdr) + offset + sizeof(u16); - - if (!pskb_may_pull(skb, plen)) + if (!pskb_may_pull(skb, offset + sizeof(u16))) return false; skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, @@ -1810,10 +1808,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_tos = RT_TOS(tos); fl6.daddr = *daddr; fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; - fl6.flowlabel = label; + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h index 246a3529ecf6..ac02c54520e9 100644 --- a/include/linux/mISDNif.h +++ b/include/linux/mISDNif.h @@ -596,7 +596,7 @@ static inline struct mISDNdevice *dev_to_mISDN(struct device *dev) } extern void set_channel_address(struct mISDNchannel *, u_int, u_int); -extern void mISDN_clock_update(struct mISDNclock *, int, struct timeval *); +extern void mISDN_clock_update(struct mISDNclock *, int, ktime_t *); extern unsigned short mISDN_clock_get(void); extern const char *mISDNDevName4ch(struct mISDNchannel *); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 009c85adae4c..cb0d5d09c2e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -81,8 +81,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, * function. Real network devices commonly used with qdiscs should only return * the driver transmit return codes though - when qdiscs are used, the actual * transmission happens asynchronously, so the value is not propagated to - * higher layers. Virtual network devices transmit synchronously, in this case - * the driver transmit return codes are consumed by dev_queue_xmit(), all + * higher layers. Virtual network devices transmit synchronously; in this case + * the driver transmit return codes are consumed by dev_queue_xmit(), and all * others are propagated to higher layers. */ @@ -129,7 +129,7 @@ static inline bool dev_xmit_complete(int rc) } /* - * Compute the worst case header length according to the protocols + * Compute the worst-case header length according to the protocols * used. */ @@ -246,7 +246,7 @@ struct hh_cache { unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; -/* Reserve HH_DATA_MOD byte aligned hard_header_len, but at least that much. +/* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 @@ -272,7 +272,7 @@ struct header_ops { }; /* These flag bits are private to the generic network queueing - * layer, they may not be explicitly referenced by any other + * layer; they may not be explicitly referenced by any other * code. */ @@ -286,7 +286,7 @@ enum netdev_state_t { /* - * This structure holds at boot time configured netdevice settings. They + * This structure holds boot-time configured netdevice settings. They * are then used in the device probing. */ struct netdev_boot_setup { @@ -304,7 +304,7 @@ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct - * to the per-cpu poll_list, and whoever clears that bit + * to the per-CPU poll_list, and whoever clears that bit * can remove from the list right before clearing the bit. */ struct list_head poll_list; @@ -350,7 +350,7 @@ typedef enum gro_result gro_result_t; * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in * case skb->dev was changed by rx_handler. * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. - * @RX_HANDLER_PASS: Do nothing, passe the skb as if no rx_handler was called. + * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called. * * rx_handlers are functions called from inside __netif_receive_skb(), to do * special processing of the skb, prior to delivery to protocol handlers. @@ -365,19 +365,19 @@ typedef enum gro_result gro_result_t; * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * - * If the rx_handler consumed to skb in some way, it should return + * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for - * the skb to be delivered in some other ways. + * the skb to be delivered in some other way. * * If the rx_handler changed skb->dev, to divert the skb to another * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the * new device will be called if it exists. * - * If the rx_handler consider the skb should be ignored, it should return + * If the rx_handler decides the skb should be ignored, it should return * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * - * If the rx_handler didn't changed skb->dev, but want the skb to be normally + * If the rx_handler didn't change skb->dev, but wants the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler @@ -402,11 +402,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) } /** - * napi_schedule_prep - check if napi can be scheduled - * @n: napi context + * napi_schedule_prep - check if NAPI can be scheduled + * @n: NAPI context * * Test if NAPI routine is already running, and if not mark - * it as running. This is used as a condition variable + * it as running. This is used as a condition variable to * insure only one NAPI poll instance runs. We also make * sure there is no pending NAPI disable. */ @@ -418,7 +418,7 @@ static inline bool napi_schedule_prep(struct napi_struct *n) /** * napi_schedule - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Schedule NAPI poll routine to be called if it is not already * running. @@ -431,7 +431,7 @@ static inline void napi_schedule(struct napi_struct *n) /** * napi_schedule_irqoff - schedule NAPI poll - * @n: napi context + * @n: NAPI context * * Variant of napi_schedule(), assuming hard irqs are masked. */ @@ -455,7 +455,7 @@ void __napi_complete(struct napi_struct *n); void napi_complete_done(struct napi_struct *n, int work_done); /** * napi_complete - NAPI processing complete - * @n: napi context + * @n: NAPI context * * Mark NAPI processing as complete. * Consider using napi_complete_done() instead. @@ -467,32 +467,32 @@ static inline void napi_complete(struct napi_struct *n) /** * napi_hash_add - add a NAPI to global hashtable - * @napi: napi context + * @napi: NAPI context * - * generate a new napi_id and store a @napi under it in napi_hash - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL) + * Generate a new napi_id and store a @napi under it in napi_hash. + * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future linux version. + * so might disappear in a future Linux version. */ void napi_hash_add(struct napi_struct *napi); /** * napi_hash_del - remove a NAPI from global table - * @napi: napi context + * @napi: NAPI context * - * Warning: caller must observe rcu grace period + * Warning: caller must observe RCU grace period * before freeing memory containing @napi, if * this function returns true. * Note: core networking stack automatically calls it - * from netif_napi_del() + * from netif_napi_del(). * Drivers might want to call this helper to combine all - * the needed rcu grace periods into a single one. + * the needed RCU grace periods into a single one. */ bool napi_hash_del(struct napi_struct *napi); /** * napi_disable - prevent NAPI from scheduling - * @n: napi context + * @n: NAPI context * * Stop NAPI from being scheduled on this context. * Waits till any outstanding processing completes. @@ -501,7 +501,7 @@ void napi_disable(struct napi_struct *n); /** * napi_enable - enable NAPI scheduling - * @n: napi context + * @n: NAPI context * * Resume NAPI from being scheduled on this context. * Must be paired with napi_disable. @@ -516,7 +516,7 @@ static inline void napi_enable(struct napi_struct *n) /** * napi_synchronize - wait until NAPI is not running - * @n: napi context + * @n: NAPI context * * Wait until NAPI is done being scheduled on this context. * Waits till any outstanding processing completes but @@ -559,7 +559,7 @@ enum netdev_queue_state_t { struct netdev_queue { /* - * read mostly part + * read-mostly part */ struct net_device *dev; struct Qdisc __rcu *qdisc; @@ -571,7 +571,7 @@ struct netdev_queue { int numa_node; #endif /* - * write mostly part + * write-mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; @@ -648,11 +648,11 @@ struct rps_dev_flow_table { /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high order bits - * of flow hash, lower part is cpu number. + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. * rps_cpu_mask is used to partition the space, depending on number of - * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f, + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { @@ -674,7 +674,7 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, unsigned int index = hash & table->mask; u32 val = hash & ~rps_cpu_mask; - /* We only give a hint, preemption can change cpu under us */ + /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); if (table->ents[index] != val) @@ -807,21 +807,21 @@ struct tc_to_netdev { * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); - * This function is called once when network device is registered. - * The network device can use this to any late stage initializaton - * or semantic validattion. It can fail with an error code which will - * be propogated back to register_netdev + * This function is called once when a network device is registered. + * The network device can use this for any late stage initialization + * or semantic validation. It can fail with an error code which will + * be propagated back to register_netdev. * * void (*ndo_uninit)(struct net_device *dev); * This function is called when device is unregistered or when registration * fails. It is not called if init fails. * * int (*ndo_open)(struct net_device *dev); - * This function is called when network device transistions to the up + * This function is called when a network device transitions to the up * state. * * int (*ndo_stop)(struct net_device *dev); - * This function is called when network device transistions to the down + * This function is called when a network device transitions to the down * state. * * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, @@ -832,7 +832,7 @@ struct tc_to_netdev { * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) - * Required can not be NULL. + * Required; cannot be NULL. * * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -842,34 +842,34 @@ struct tc_to_netdev { * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * void *accel_priv, select_queue_fallback_t fallback); - * Called to decide which queue to when device supports multiple + * Called to decide which queue to use when device supports multiple * transmit queues. * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make - * changes to configuration when multicast or promiscious is enabled. + * changes to configuration when multicast or promiscuous is enabled. * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set - * IFF_UNICAST_FLT to its priv_flags. + * IFF_UNICAST_FLT in its priv_flags. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the - * mac address can not be changed. + * MAC address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); - * Called when a user request an ioctl which can't be handled by - * the generic interface code. If not defined ioctl's return + * Called when a user requests an ioctl which can't be handled by + * the generic interface code. If not defined ioctls return * not supported error code. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface - * is retained for legacy reason, new devices should use the bus + * is retained for legacy reasons; new devices should use the bus * interface (PCI) for low level management. * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); @@ -878,7 +878,7 @@ struct tc_to_netdev { * will return an error. * * void (*ndo_tx_timeout)(struct net_device *dev); - * Callback uses when the transmitter has not made any progress + * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, @@ -896,11 +896,11 @@ struct tc_to_netdev { * neither operation. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); - * If device support VLAN filtering this function is called when a + * If device supports VLAN filtering this function is called when a * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); @@ -920,7 +920,7 @@ struct tc_to_netdev { * * Enable or disable the VF ability to query its RSS Redirection Table and * Hash Key. This is needed since on some devices VF share this information - * with PF and querying it may adduce a theoretical security risk. + * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, u8 tc) @@ -1030,20 +1030,20 @@ struct tc_to_netdev { * * void (*ndo_add_vxlan_port)(struct net_device *dev, * sa_family_t sa_family, __be16 port); - * Called by vxlan to notiy a driver about the UDP port and socket - * address family that vxlan is listnening to. It is called only when + * Called by vxlan to notify a driver about the UDP port and socket + * address family that vxlan is listening to. It is called only when * a new port starts listening. The operation is protected by the * vxlan_net->sock_lock. * * void (*ndo_add_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify a driver about the UDP port and socket * address family that geneve is listnening to. It is called only when * a new port starts listening. The operation is protected by the * geneve_net->sock_lock. * * void (*ndo_del_geneve_port)(struct net_device *dev, - * sa_family_t sa_family, __be16 port); + * sa_family_t sa_family, __be16 port); * Called by geneve to notify the driver about a UDP port and socket * address family that geneve is not listening to anymore. The operation * is protected by the geneve_net->sock_lock. @@ -1072,9 +1072,9 @@ struct tc_to_netdev { * Callback to use for xmit over the accelerated station. This * is used in place of ndo_start_xmit on accelerated net * devices. - * netdev_features_t (*ndo_features_check) (struct sk_buff *skb, - * struct net_device *dev - * netdev_features_t features); + * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, + * struct net_device *dev + * netdev_features_t features); * Called by core transmit path to determine if device is capable of * performing offload operations on a given packet. This is to give * the device an opportunity to implement any restrictions that cannot @@ -1088,7 +1088,7 @@ struct tc_to_netdev { * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. * void (*ndo_change_proto_down)(struct net_device *dev, - * bool proto_down); + * bool proto_down); * This function is used to pass protocol port error state information * to the switch driver. The switch driver can react to the proto_down * by doing a phys down on the associated switch port. @@ -1100,7 +1100,7 @@ struct tc_to_netdev { * This function is used to specify the headroom that the skb must * consider when allocation skb during packet reception. Setting * appropriate rx headroom value allows avoiding skb head copy on - * forward. Setting a negative value reset the rx headroom to the + * forward. Setting a negative value resets the rx headroom to the * default value. * */ @@ -1299,7 +1299,7 @@ struct net_device_ops { * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to - * userspace, this means that the order of these flags can change + * userspace; this means that the order of these flags can change * during any kernel release. * * You should have a pretty good reason to be extending these flags. @@ -1323,6 +1323,10 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device + * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account + * underlying stacked devices + * @IFF_IPVLAN_MASTER: IPvlan master device + * @IFF_IPVLAN_SLAVE: IPvlan slave device * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master @@ -1413,10 +1417,12 @@ enum netdev_priv_flags { * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices - * @napi_list: List entry, that is used for polling napi devices - * @unreg_list: List entry, that is used, when we are unregistering the - * device, see the function unregister_netdev - * @close_list: List entry, that is used, when we are closing the device + * @napi_list: List entry used for polling NAPI devices + * @unreg_list: List entry when we are unregistering the + * device; see the function unregister_netdev + * @close_list: List entry used when we are closing the device + * @ptype_all: Device-specific packet handlers for all protocols + * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding * @all_adj_list: All linked devices, *including* neighbours @@ -1434,7 +1440,7 @@ enum netdev_priv_flags { * @mpls_features: Mask of features inheritable by MPLS * * @ifindex: interface index - * @group: The group, that the device belongs to + * @group: The group the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead @@ -1488,7 +1494,7 @@ enum netdev_priv_flags { * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one - * @uc_promisc: Counter, that indicates, that promiscuous mode + * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() @@ -1496,9 +1502,9 @@ enum netdev_priv_flags { * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses * @queues_kset: Group of all Kobjects in the Tx and RX queues - * @promiscuity: Number of times, the NIC is told to work in - * Promiscuous mode, if it becomes 0 the NIC will - * exit from working in Promiscuous mode + * @promiscuity: Number of times the NIC is told to work in + * promiscuous mode; if it becomes 0 the NIC will + * exit promiscuous mode * @allmulti: Counter, enables or disables allmulticast mode * * @vlan_info: VLAN info @@ -1544,7 +1550,7 @@ enum netdev_priv_flags { * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by - * the watchdog ( see dev_watchdog() ) + * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * * @pcpu_refcnt: Number of references to this device @@ -1661,8 +1667,8 @@ struct net_device { atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT - const struct iw_handler_def * wireless_handlers; - struct iw_public_data * wireless_data; + const struct iw_handler_def *wireless_handlers; + struct iw_public_data *wireless_data; #endif const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; @@ -1715,7 +1721,7 @@ struct net_device { unsigned int allmulti; - /* Protocol specific pointers */ + /* Protocol-specific pointers */ #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; @@ -1745,13 +1751,11 @@ struct net_device { /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; - #ifdef CONFIG_SYSFS struct netdev_rx_queue *_rx; unsigned int num_rx_queues; unsigned int real_num_rx_queues; - #endif unsigned long gro_flush_timeout; @@ -1843,7 +1847,7 @@ struct net_device { struct garp_port __rcu *garp_port; struct mrp_port __rcu *mrp_port; - struct device dev; + struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; @@ -1858,9 +1862,9 @@ struct net_device { #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - u8 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; - u8 prio_tc_map[TC_BITMASK + 1]; + u8 num_tc; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; + u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) unsigned int fcoe_ddp_xid; @@ -1868,9 +1872,9 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - struct phy_device *phydev; - struct lock_class_key *qdisc_tx_busylock; - bool proto_down; + struct phy_device *phydev; + struct lock_class_key *qdisc_tx_busylock; + bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2018,7 +2022,7 @@ static inline void *netdev_priv(const struct net_device *dev) /* Set the sysfs device type for the network logical device to allow * fine-grained identification of different network device types. For - * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc. + * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc. */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) @@ -2028,22 +2032,22 @@ static inline void *netdev_priv(const struct net_device *dev) #define NAPI_POLL_WEIGHT 64 /** - * netif_napi_add - initialize a napi context + * netif_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * - * netif_napi_add() must be used to initialize a napi context prior to calling - * *any* of the other napi related functions. + * netif_napi_add() must be used to initialize a NAPI context prior to calling + * *any* of the other NAPI-related functions. */ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); /** - * netif_tx_napi_add - initialize a napi context + * netif_tx_napi_add - initialize a NAPI context * @dev: network device - * @napi: napi context + * @napi: NAPI context * @poll: polling function * @weight: default weight * @@ -2061,22 +2065,22 @@ static inline void netif_tx_napi_add(struct net_device *dev, } /** - * netif_napi_del - remove a napi context - * @napi: napi context + * netif_napi_del - remove a NAPI context + * @napi: NAPI context * - * netif_napi_del() removes a napi context from the network device napi list + * netif_napi_del() removes a NAPI context from the network device NAPI list */ void netif_napi_del(struct napi_struct *napi); struct napi_gro_cb { /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ - void *frag0; + void *frag0; /* Length of frag0. */ unsigned int frag0_len; /* This indicates where we are processing relative to skb->data. */ - int data_offset; + int data_offset; /* This is non-zero if the packet cannot be merged with the new skb. */ u16 flush; @@ -2099,8 +2103,8 @@ struct napi_gro_cb { /* This is non-zero if the packet may be of the same flow. */ u8 same_flow:1; - /* Used in udp_gro_receive */ - u8 udp_mark:1; + /* Used in tunnel GRO receive */ + u8 encap_mark:1; /* GRO checksum is valid */ u8 csum_valid:1; @@ -2172,7 +2176,7 @@ struct udp_offload { struct udp_offload_callbacks callbacks; }; -/* often modified stats are per cpu, other are shared (netdev->stats) */ +/* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64 rx_packets; u64 rx_bytes; @@ -2269,7 +2273,7 @@ struct netdev_notifier_changeupper_info { struct netdev_notifier_info info; /* must be first */ struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ - bool linking; /* is the nofication for link or unlink */ + bool linking; /* is the notification for link or unlink */ void *upper_info; /* upper dev info */ }; @@ -2734,7 +2738,7 @@ extern int netdev_flow_limit_table_len; #endif /* CONFIG_NET_FLOW_LIMIT */ /* - * Incoming packets are placed on per-cpu queues + * Incoming packets are placed on per-CPU queues */ struct softnet_data { struct list_head poll_list; @@ -2904,7 +2908,7 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their ndo_start_xmit(), - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue) { @@ -2918,7 +2922,7 @@ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_que * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their TX completion path, - * to give appropriate hint to the cpu. + * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue) { @@ -3057,7 +3061,7 @@ static inline bool netif_running(const struct net_device *dev) } /* - * Routines to manage the subqueues on a device. We only need start + * Routines to manage the subqueues on a device. We only need start, * stop, and a check if it's stopped. All other device management is * done at the overall netdevice level. * Also test the device if we're multiqueue. @@ -3341,7 +3345,6 @@ void netif_carrier_off(struct net_device *dev); * in a "pending" state, waiting for some external event. For "on- * demand" interfaces, this new state identifies the situation where the * interface is waiting for events to place it in the up state. - * */ static inline void netif_dormant_on(struct net_device *dev) { @@ -3676,7 +3679,7 @@ void dev_uc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_uc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3692,7 +3695,7 @@ static inline int __dev_uc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_uc_sync(). - **/ + */ static inline void __dev_uc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) @@ -3720,7 +3723,7 @@ void dev_mc_init(struct net_device *dev); * * Add newly added addresses to the interface, and release * addresses that have been deleted. - **/ + */ static inline int __dev_mc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), @@ -3736,7 +3739,7 @@ static inline int __dev_mc_sync(struct net_device *dev, * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_mc_sync(). - **/ + */ static inline void __dev_mc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) diff --git a/include/net/flow.h b/include/net/flow.h index 83969eebebf3..d47ef4bb5423 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -127,7 +127,6 @@ struct flowi6 { #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif #define flowi6_mark __fl_common.flowic_mark -#define flowi6_tos __fl_common.flowic_tos #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags @@ -135,6 +134,7 @@ struct flowi6 { #define flowi6_tun_key __fl_common.flowic_tun_key struct in6_addr daddr; struct in6_addr saddr; + /* Note: flowi6_tos is encoded in flowlabel, too. */ __be32 flowlabel; union flowi_uli uli; #define fl6_sport uli.ports.sport diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 064cfbe639d0..954ad6bfb56a 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,7 +15,6 @@ #include -struct in6_addr; struct inet_bind_bucket; struct request_sock; struct sk_buff; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c35dda9ec991..56050f913339 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -305,6 +305,22 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask); +static inline int iptunnel_pull_offloads(struct sk_buff *skb) +{ + if (skb_is_gso(skb)) { + int err; + + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return err; + skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >> + NETIF_F_GSO_SHIFT); + } + + skb->encapsulation = 0; + return 0; +} + static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { if (pkt_len > 0) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f3c9857c645d..d0aeb97aec5d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -835,6 +835,12 @@ static inline u8 ip6_tclass(__be32 flowinfo) { return ntohl(flowinfo & IPV6_TCLASS_MASK) >> IPV6_TCLASS_SHIFT; } + +static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) +{ + return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; +} + /* * Prototypes exported by ipv6 */ diff --git a/include/net/ping.h b/include/net/ping.h index 5fd7cc244833..4cd90d6b5c25 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -79,7 +79,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len); int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len); -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 835aa2ed9870..65521cfdcade 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -82,6 +82,11 @@ #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT #endif +/* Round an int up to the next multiple of 4. */ +#define WORD_ROUND(s) (((s)+3)&~3) +/* Truncate to the previous multiple of 4. */ +#define WORD_TRUNC(s) ((s)&~3) + /* * Function declarations. */ @@ -426,7 +431,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN); + frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -475,9 +480,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\ (void *)pos <= (void *)chunk->subh.fwdtsn_hdr->skip + end - sizeof(struct sctp_fwdtsn_skip);\ pos++) -/* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) - /* External references. */ extern struct proto sctp_prot; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e2ac0620d4be..6df1ce7a411c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1097,7 +1097,7 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, gfp_t gfp); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - __u8 addr_state, gfp_t gfp); + int new_size, __u8 addr_state, gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index a763c96ecde4..73ed2e951c02 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -271,36 +271,36 @@ static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) static inline __be32 vxlan_vni(__be32 vni_field) { #if defined(__BIG_ENDIAN) - return vni_field >> 8; + return (__force __be32)((__force u32)vni_field >> 8); #else - return (vni_field & VXLAN_VNI_MASK) << 8; + return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); #endif } static inline __be32 vxlan_vni_field(__be32 vni) { #if defined(__BIG_ENDIAN) - return vni << 8; + return (__force __be32)((__force u32)vni << 8); #else - return vni >> 8; + return (__force __be32)((__force u32)vni >> 8); #endif } static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) { #if defined(__BIG_ENDIAN) - return tun_id; + return (__force __be32)tun_id; #else - return tun_id >> 32; + return (__force __be32)((__force u64)tun_id >> 32); #endif } static inline __be64 vxlan_vni_to_tun_id(__be32 vni) { #if defined(__BIG_ENDIAN) - return (__be64)vni; + return (__force __be64)vni; #else - return (__be64)vni << 32; + return (__force __be64)((u64)(__force u32)vni << 32); #endif } diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 4cf6bac4686d..d60096cddb2a 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -37,7 +37,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->tb_id = tb_id; __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; - __entry->tos = flp->flowi6_tos; + __entry->tos = ip6_tclass(flp->flowlabel); __entry->scope = flp->flowi6_scope; __entry->flags = flp->flowi6_flags; diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 2835b07416b7..9222db8ccccc 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1648,9 +1648,9 @@ enum ethtool_reset_flags { * %ETHTOOL_GLINKSETTINGS: on entry, number of words passed by user * (>= 0); on return, if handshake in progress, negative if * request size unsupported by kernel: absolute value indicates - * kernel recommended size and cmd field is 0, as well as all the - * other fields; otherwise (handshake completed), strictly - * positive to indicate size used by kernel and cmd field is + * kernel expected size and all the other fields but cmd + * are 0; otherwise (handshake completed), strictly positive + * to indicate size used by kernel and cmd field stays * %ETHTOOL_GLINKSETTINGS, all other fields populated by driver. For * %ETHTOOL_SLINKSETTINGS: must be valid on entry, ie. a positive * value returned previously by %ETHTOOL_GLINKSETTINGS, otherwise diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a62a0129d614..c488066fb53a 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -153,6 +153,8 @@ enum { IFLA_LINK_NETNSID, IFLA_PHYS_PORT_NAME, IFLA_PROTO_DOWN, + IFLA_GSO_MAX_SEGS, + IFLA_GSO_MAX_SIZE, __IFLA_MAX }; diff --git a/net/Kconfig b/net/Kconfig index e13449870d06..a8934d8c8fda 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -397,7 +397,7 @@ config LWTUNNEL with light weight tunnel state associated with fib routes. config DST_CACHE - bool "dst cache" + bool default n config NET_DEVLINK diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a73df3315df9..8217aecf025b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -437,6 +437,20 @@ int br_min_mtu(const struct net_bridge *br) return mtu; } +static void br_set_gso_limits(struct net_bridge *br) +{ + unsigned int gso_max_size = GSO_MAX_SIZE; + u16 gso_max_segs = GSO_MAX_SEGS; + const struct net_bridge_port *p; + + list_for_each_entry(p, &br->port_list, list) { + gso_max_size = min(gso_max_size, p->dev->gso_max_size); + gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs); + } + br->dev->gso_max_size = gso_max_size; + br->dev->gso_max_segs = gso_max_segs; +} + /* * Recomputes features using slave's features */ @@ -564,6 +578,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); dev_set_mtu(br->dev, br_min_mtu(br)); + br_set_gso_limits(br); kobject_uevent(&p->kobj, KOBJ_ADD); @@ -610,6 +625,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) del_nbp(p); dev_set_mtu(br->dev, br_min_mtu(br)); + br_set_gso_limits(br); spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); diff --git a/net/core/dev.c b/net/core/dev.c index edb7179bc051..b9bcbe77d913 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4438,7 +4438,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - NAPI_GRO_CB(skb)->udp_mark = 0; + NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ @@ -6445,6 +6445,7 @@ EXPORT_SYMBOL(dev_get_phys_port_id); * dev_get_phys_port_name - Get device physical port name * @dev: device * @name: port name + * @len: limit of bytes to copy to name * * Get device physical port name */ diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 92d886f4adcb..4573d81093fe 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -191,6 +191,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats /** * gen_new_estimator - create a new rate estimator * @bstats: basic statistics + * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock * @opt: rate estimator configuration TLV @@ -287,6 +288,7 @@ EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration * @bstats: basic statistics + * @cpu_bstats: bstats per cpu * @rate_est: rate estimator statistics * @stats_lock: statistics lock * @opt: rate estimator configuration TLV diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 1e2f46a69d50..e640462ea8bf 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -140,6 +140,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV * @d: dumping handle + * @cpu: copy statistic per cpu * @b: basic statistics * * Appends the basic statistics to the top level TLV created by diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 167883e09317..f2066772d0f3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -895,6 +895,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + + nla_total_size(4) /* IFLA_MAX_GSO_SEGS */ + + nla_total_size(4) /* IFLA_MAX_GSO_SIZE */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ @@ -1223,6 +1225,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || + nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || + nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f044f970f1a6..d04c2d1c8c87 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -815,7 +815,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) trace_consume_skb(skb); /* if SKB is a clone, don't handle this case */ - if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) { + if (skb->fclone != SKB_FCLONE_UNAVAILABLE) { __kfree_skb(skb); return; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0cc923f83e10..9e481992dbae 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1380,6 +1380,19 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, return pp; } +static struct sk_buff **ipip_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return inet_gro_receive(head, skb); +} + #define SECONDS_PER_DAY 86400 /* inet_current_timestamp - Return IP network timestamp @@ -1402,7 +1415,7 @@ __be32 inet_current_timestamp(void) msecs += (u32)ts.tv_nsec / NSEC_PER_MSEC; /* Convert to network byte order. */ - return htons(msecs); + return htonl(msecs); } EXPORT_SYMBOL(inet_current_timestamp); @@ -1448,6 +1461,13 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff) return err; } +static int ipip_gro_complete(struct sk_buff *skb, int nhoff) +{ + skb->encapsulation = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_IPIP; + return inet_gro_complete(skb, nhoff); +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1675,8 +1695,8 @@ static struct packet_offload ip_packet_offload __read_mostly = { static const struct net_offload ipip_offload = { .callbacks = { .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, + .gro_receive = ipip_gro_receive, + .gro_complete = ipip_gro_complete, }, }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 21add552e56a..8a9246deccfe 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) struct in_device *in_dev; struct fib_result res; struct rtable *rt; - struct flowi4 fl4; struct net *net; int scope; @@ -296,14 +295,13 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { - fl4.flowi4_oif = 0; - fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.daddr = ip_hdr(skb)->saddr; - fl4.saddr = 0; - fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); - fl4.flowi4_scope = scope; - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - fl4.flowi4_tun_key.tun_id = 0; + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_scope = scope, + .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0, + }; if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 780484243e14..a0586b4a197d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -48,7 +48,7 @@ static inline struct fou *fou_from_sock(struct sock *sk) return sk->sk_user_data; } -static void fou_recv_pull(struct sk_buff *skb, size_t len) +static int fou_recv_pull(struct sk_buff *skb, size_t len) { struct iphdr *iph = ip_hdr(skb); @@ -59,6 +59,7 @@ static void fou_recv_pull(struct sk_buff *skb, size_t len) __skb_pull(skb, len); skb_postpull_rcsum(skb, udp_hdr(skb), len); skb_reset_transport_header(skb); + return iptunnel_pull_offloads(skb); } static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) @@ -68,9 +69,14 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb) if (!fou) return 1; - fou_recv_pull(skb, sizeof(struct udphdr)); + if (fou_recv_pull(skb, sizeof(struct udphdr))) + goto drop; return -fou->protocol; + +drop: + kfree_skb(skb); + return 0; } static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr, @@ -170,6 +176,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); + if (iptunnel_pull_offloads(skb)) + goto drop; + return -guehdr->proto_ctype; drop: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 540866dbd27d..c47539d04b88 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -49,6 +49,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* setup inner skb. */ skb->encapsulation = 0; + SKB_GSO_CB(skb)->encap_level = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -126,6 +127,11 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, struct packet_offload *ptype; __be16 type; + if (NAPI_GRO_CB(skb)->encap_mark) + goto out; + + NAPI_GRO_CB(skb)->encap_mark = 1; + off = skb_gro_offset(skb); hlen = off + sizeof(*greh); greh = skb_gro_header_fast(skb, off); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d27276f6f8dd..02dd990af542 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -114,7 +114,8 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb_scrub_packet(skb, xnet); - return 0; + + return iptunnel_pull_offloads(skb); } EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 836abe58a9c5..08eed5e16df0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2070,10 +2070,14 @@ void udp_v4_early_demux(struct sk_buff *skb) if (!in_dev) return; - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; + /* we are supposed to accept bcast packets */ + if (skb->pkt_type == PACKET_MULTICAST) { + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return; + } + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, dif); } else if (skb->pkt_type == PACKET_HOST) { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8a3405a80260..0ed2dafb7cc4 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -56,6 +56,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, /* setup inner skb. */ skb->encapsulation = 0; + SKB_GSO_CB(skb)->encap_level = 0; __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); @@ -311,14 +312,14 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - if (NAPI_GRO_CB(skb)->udp_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid)) goto out; - /* mark that this skb passed once through the udp gro layer */ - NAPI_GRO_CB(skb)->udp_mark = 1; + /* mark that this skb passed once through the tunnel gro layer */ + NAPI_GRO_CB(skb)->encap_mark = 1; rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index eeca943f12dc..82e9f3076028 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -258,6 +258,19 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, return pp; } +static struct sk_buff **sit_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + if (NAPI_GRO_CB(skb)->encap_mark) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + NAPI_GRO_CB(skb)->encap_mark = 1; + + return ipv6_gro_receive(head, skb); +} + static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; @@ -302,7 +315,7 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, + .gro_receive = sit_gro_receive, .gro_complete = sit_gro_complete, }, }; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 263a5164a6f5..c382db7a2e73 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -26,35 +26,6 @@ #include #include -struct proto pingv6_prot = { - .name = "PINGv6", - .owner = THIS_MODULE, - .init = ping_init_sock, - .close = ping_close, - .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, - .setsockopt = ipv6_setsockopt, - .getsockopt = ipv6_getsockopt, - .sendmsg = ping_v6_sendmsg, - .recvmsg = ping_recvmsg, - .bind = ping_bind, - .backlog_rcv = ping_queue_rcv_skb, - .hash = ping_hash, - .unhash = ping_unhash, - .get_port = ping_get_port, - .obj_size = sizeof(struct raw6_sock), -}; -EXPORT_SYMBOL_GPL(pingv6_prot); - -static struct inet_protosw pingv6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_ICMPV6, - .prot = &pingv6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_REUSE, -}; - - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -77,7 +48,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) +static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -192,6 +163,34 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return len; } +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect_v6_only, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .flags = INET_PROTOSW_REUSE, +}; + #ifdef CONFIG_PROC_FS static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index f45b8ffc2840..83384308d032 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -681,14 +681,16 @@ static int ipip6_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; skb_reset_network_header(skb); IPCB(skb)->flags = 0; - skb->protocol = htons(ETH_P_IPV6); + skb->dev = tunnel->dev; if (packet_is_spoofed(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; goto out; } - __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); + if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6), + !net_eq(tunnel->net, dev_net(tunnel->dev)))) + goto out; err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c8416792cce0..215fc08c02ab 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1033,6 +1033,14 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, return 0; } +static int netlink_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* try to hand this ioctl down to the NIC drivers. + */ + return -ENOIOCTLCMD; +} + static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; @@ -2494,7 +2502,7 @@ static const struct proto_ops netlink_ops = { .accept = sock_no_accept, .getname = netlink_getname, .poll = datagram_poll, - .ioctl = sock_no_ioctl, + .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = netlink_setsockopt, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a19b3e607703..e1849f3714ad 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1406,7 +1406,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); + sctp_transport_update_pmtu(sk, t, + WORD_TRUNC(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 871cdf9567e6..401c60750b20 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -111,7 +111,8 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest, dest->port = src->port; list_for_each_entry(addr, &src->address_list, list) { - error = sctp_add_bind_addr(dest, &addr->a, 1, gfp); + error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a), + 1, gfp); if (error < 0) break; } @@ -150,7 +151,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - __u8 addr_state, gfp_t gfp) + int new_size, __u8 addr_state, gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -159,7 +160,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, if (!addr) return -ENOMEM; - memcpy(&addr->a, new, sizeof(*new)); + memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size)); /* Fix up the port if it has not yet been set. * Both v4 and v6 have the port at the same offset. @@ -291,7 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, htons(port), 0); - retval = sctp_add_bind_addr(bp, &addr, SCTP_ADDR_SRC, gfp); + retval = sctp_add_bind_addr(bp, &addr, sizeof(addr), + SCTP_ADDR_SRC, gfp); if (retval) { /* Can't finish building the list, clean up. */ sctp_bind_addr_clean(bp); @@ -453,8 +455,8 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && (flags & SCTP_ADDR6_PEERSUPP)))) - error = sctp_add_bind_addr(dest, addr, SCTP_ADDR_SRC, - gfp); + error = sctp_add_bind_addr(dest, addr, sizeof(*addr), + SCTP_ADDR_SRC, gfp); } return error; diff --git a/net/sctp/input.c b/net/sctp/input.c index db76f1ab4ac2..00b8445364e3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -606,7 +606,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { - sctp_icmp_frag_needed(sk, asoc, transport, info); + sctp_icmp_frag_needed(sk, asoc, transport, + WORD_TRUNC(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index f03541d0f12d..8d3d3625130e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -978,8 +978,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) (new_transport->state == SCTP_UNCONFIRMED) || (new_transport->state == SCTP_PF))) new_transport = asoc->peer.active_path; - if (new_transport->state == SCTP_UNCONFIRMED) + if (new_transport->state == SCTP_UNCONFIRMED) { + WARN_ONCE(1, "Atempt to send packet on unconfirmed path."); + sctp_chunk_fail(chunk, 0); + sctp_chunk_free(chunk); continue; + } /* Change packets if necessary. */ if (new_transport != transport) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1099e99a53c4..d3d50daa248b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -216,6 +216,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, + sizeof(addr->a), SCTP_ADDR_SRC, GFP_ATOMIC); if (error) goto end_copy; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e47abf254ff3..7f0bf798205b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1849,7 +1849,8 @@ struct sctp_association *sctp_unpack_cookie( /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, - SCTP_ADDR_SRC, GFP_ATOMIC); + sizeof(chunk->dest), SCTP_ADDR_SRC, + GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 3c22c41a2bc2..7fe56d0acabf 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -215,10 +215,14 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { + __u32 old_a_rwnd = asoc->a_rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); - if (!sack) + if (!sack) { + asoc->a_rwnd = old_a_rwnd; goto nomem; + } asoc->peer.sack_needed = 0; asoc->peer.sack_cnt = 0; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6f2653d86961..878d28eda1a6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -386,7 +386,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. */ - ret = sctp_add_bind_addr(bp, addr, SCTP_ADDR_SRC, GFP_ATOMIC); + ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len, + SCTP_ADDR_SRC, GFP_ATOMIC); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -577,6 +578,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, + sizeof(saveaddr), SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index d517153891a6..9b6b48c7524e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -226,7 +226,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -280,7 +280,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = dst_mtu(transport->dst); + transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bbe65dcb9738..3dce53ebea92 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - goto out_wait_error; - } else + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + } else { err = 0; + } out_wait: finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; - -out_wait_error: - sk->sk_state = SS_UNCONNECTED; - sock->state = SS_UNCONNECTED; - goto out_wait; } static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) @@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) listener->sk_err == 0) { release_sock(listener); timeout = schedule_timeout(timeout); + finish_wait(sk_sleep(listener), &wait); lock_sock(listener); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + goto out; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + goto out; } prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } + finish_wait(sk_sleep(listener), &wait); if (listener->sk_err) err = -listener->sk_err; @@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) */ if (err) { vconnected->rejected = true; - release_sock(connected); - sock_put(connected); - goto out_wait; + } else { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); } - newsock->state = SS_CONNECTED; - sock_graft(connected, newsock); release_sock(connected); sock_put(connected); } -out_wait: - finish_wait(sk_sleep(listener), &wait); out: release_sock(listener); return err; @@ -1557,9 +1556,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; + while (total_written < len) { ssize_t written; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1568,27 +1569,33 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); - if (err < 0) - goto out_wait; + if (err < 0) { + finish_wait(sk_sleep(sk), &wait); + goto out_err; + } release_sock(sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); timeout = schedule_timeout(timeout); - finish_wait(sk_sleep(sk), &wait); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + finish_wait(sk_sleep(sk), &wait); + goto out_err; } + prepare_to_wait(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); } + finish_wait(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after @@ -1596,16 +1603,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, */ if (sk->sk_err) { err = -sk->sk_err; - goto out_wait; + goto out_err; } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || (vsk->peer_shutdown & RCV_SHUTDOWN)) { err = -EPIPE; - goto out_wait; + goto out_err; } err = transport->notify_send_pre_enqueue(vsk, &send_data); if (err < 0) - goto out_wait; + goto out_err; /* Note that enqueue will only write as many bytes as are free * in the produce queue, so we don't need to ensure len is @@ -1618,7 +1625,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, len - total_written); if (written < 0) { err = -ENOMEM; - goto out_wait; + goto out_err; } total_written += written; @@ -1626,11 +1633,11 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = transport->notify_send_post_enqueue( vsk, written, &send_data); if (err < 0) - goto out_wait; + goto out_err; } -out_wait: +out_err: if (total_written > 0) err = total_written; out: @@ -1715,18 +1722,59 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, while (1) { - s64 ready = vsock_stream_has_data(vsk); + s64 ready; - if (ready < 0) { - /* Invalid queue pair content. XXX This should be - * changed to a connection reset in a later change. - */ + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + ready = vsock_stream_has_data(vsk); - err = -ENOMEM; - goto out; - } else if (ready > 0) { + if (ready == 0) { + if (sk->sk_err != 0 || + (sk->sk_shutdown & RCV_SHUTDOWN) || + (vsk->peer_shutdown & SEND_SHUTDOWN)) { + finish_wait(sk_sleep(sk), &wait); + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) { + finish_wait(sk_sleep(sk), &wait); + break; + } + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + finish_wait(sk_sleep(sk), &wait); + break; + } else if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + } else { ssize_t read; + finish_wait(sk_sleep(sk), &wait); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should + * be changed to a connection reset in a later + * change. + */ + + err = -ENOMEM; + goto out; + } + err = transport->notify_recv_pre_dequeue( vsk, target, &recv_data); if (err < 0) @@ -1752,35 +1800,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, break; target -= read; - } else { - if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) - || (vsk->peer_shutdown & SEND_SHUTDOWN)) { - break; - } - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; - break; - } - - err = transport->notify_recv_pre_block( - vsk, target, &recv_data); - if (err < 0) - break; - - release_sock(sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - timeout = schedule_timeout(timeout); - finish_wait(sk_sleep(sk), &wait); - lock_sock(sk); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - break; - } else if (timeout == 0) { - err = -EAGAIN; - break; - } } }