From 406f353c857e4b2dbddb7cd20c67941d829b8b15 Mon Sep 17 00:00:00 2001 From: Matheos Worku Date: Fri, 4 Jan 2008 23:48:26 -0800 Subject: [PATCH 01/36] [NIU]: Fix slowpath interrupt handling. niu_slowpath_interrupt() expects values to be setup in lp->{v0,v1,v2} but they aren't. That's only done by niu_schedule_napi() which is done later in the interrupt path. If niu_rx_error() returns zero, and v0 is clear, hit the RX_DMA_CTL_STATE register with a RX_DMA_CTL_STAT_MEX. Only emit verbose RX error logs if a fatal channel or port error is signalled. Other cases will be recorded into statistics by niu_log_rxchan_errors(). Signed-off-by: David S. Miller --- drivers/net/niu.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index abfc61c3a38c..32ed87d54706 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -2508,15 +2508,19 @@ static int niu_rx_error(struct niu *np, struct rx_ring_info *rp) u64 stat = nr64(RX_DMA_CTL_STAT(rp->rx_channel)); int err = 0; - dev_err(np->device, PFX "%s: RX channel %u error, stat[%llx]\n", - np->dev->name, rp->rx_channel, (unsigned long long) stat); - - niu_log_rxchan_errors(np, rp, stat); if (stat & (RX_DMA_CTL_STAT_CHAN_FATAL | RX_DMA_CTL_STAT_PORT_FATAL)) err = -EINVAL; + if (err) { + dev_err(np->device, PFX "%s: RX channel %u error, stat[%llx]\n", + np->dev->name, rp->rx_channel, + (unsigned long long) stat); + + niu_log_rxchan_errors(np, rp, stat); + } + nw64(RX_DMA_CTL_STAT(rp->rx_channel), stat & RX_DMA_CTL_WRITE_CLEAR_ERRS); @@ -2749,13 +2753,16 @@ static int niu_device_error(struct niu *np) return -ENODEV; } -static int niu_slowpath_interrupt(struct niu *np, struct niu_ldg *lp) +static int niu_slowpath_interrupt(struct niu *np, struct niu_ldg *lp, + u64 v0, u64 v1, u64 v2) { - u64 v0 = lp->v0; - u64 v1 = lp->v1; - u64 v2 = lp->v2; + int i, err = 0; + lp->v0 = v0; + lp->v1 = v1; + lp->v2 = v2; + if (v1 & 0x00000000ffffffffULL) { u32 rx_vec = (v1 & 0xffffffff); @@ -2764,8 +2771,13 @@ static int niu_slowpath_interrupt(struct niu *np, struct niu_ldg *lp) if (rx_vec & (1 << rp->rx_channel)) { int r = niu_rx_error(np, rp); - if (r) + if (r) { err = r; + } else { + if (!v0) + nw64(RX_DMA_CTL_STAT(rp->rx_channel), + RX_DMA_CTL_STAT_MEX); + } } } } @@ -2803,7 +2815,7 @@ static int niu_slowpath_interrupt(struct niu *np, struct niu_ldg *lp) if (err) niu_enable_interrupts(np, 0); - return -EINVAL; + return err; } static void niu_rxchan_intr(struct niu *np, struct rx_ring_info *rp, @@ -2905,7 +2917,7 @@ static irqreturn_t niu_interrupt(int irq, void *dev_id) } if (unlikely((v0 & ((u64)1 << LDN_MIF)) || v1 || v2)) { - int err = niu_slowpath_interrupt(np, lp); + int err = niu_slowpath_interrupt(np, lp, v0, v1, v2); if (err) goto out; } From 792dd90f114a48c210c566f3642b26f699702cb7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Jan 2008 23:52:06 -0800 Subject: [PATCH 02/36] [NIU]: Missing ->last_rx update. Noticed by Paul Lodridge. Signed-off-by: David S. Miller --- drivers/net/niu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 32ed87d54706..babb1ef37e34 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -2241,6 +2241,8 @@ static int niu_process_rx_pkt(struct niu *np, struct rx_ring_info *rp) skb->protocol = eth_type_trans(skb, np->dev); netif_receive_skb(skb); + np->dev->last_rx = jiffies; + return num_rcr; } From 3ebebccf89b1b6e4fec4de05b245d6c459f27ce8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Jan 2008 23:54:06 -0800 Subject: [PATCH 03/36] [NIU]: Fix potentially stuck TCP socket send queues. It is possible for the TX ring to have packets sit in it for unbounded amounts of time. The only way to defer TX interrupts in the chip is to periodically set "mark" bits, when processing of a TX descriptor with the mark bit set is complete it triggers the interrupt for the TX queue's LDG. A consequence of this kind of scheme is that if packet flow suddenly stops, the remaining TX packets will just sit there. If this happens, since those packets could be charged to TCP socket send queues, such sockets could get stuck. The simplest solution is to divorce the socket ownership of the packet once the device takes the SKB, by using skb_orphan() in niu_start_xmit(). In hindsight, it would have been much nicer if the chip provided two interrupt sources for TX (like basically every other ethernet chip does). Namely, keep the "mark" bit, but also signal the LDG when the TX queue becomes completely empty. That way there is no need to have a deadlock breaker like this. Signed-off-by: David S. Miller --- drivers/net/niu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index babb1ef37e34..ac361642567a 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -5208,7 +5208,8 @@ static int niu_start_xmit(struct sk_buff *skb, struct net_device *dev) } kfree_skb(skb); skb = skb_new; - } + } else + skb_orphan(skb); align = ((unsigned long) skb->data & (16 - 1)); headroom = align + sizeof(struct tx_pkt_hdr); From cb77df3ec88f07c6141924dfe6fd96a2f541cc09 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 5 Jan 2008 00:02:59 -0800 Subject: [PATCH 04/36] [NIU]: Update driver version and release date. Signed-off-by: David S. Miller --- drivers/net/niu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/niu.c b/drivers/net/niu.c index ac361642567a..9a0c6d3adfe9 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -33,8 +33,8 @@ #define DRV_MODULE_NAME "niu" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.5" -#define DRV_MODULE_RELDATE "October 5, 2007" +#define DRV_MODULE_VERSION "0.6" +#define DRV_MODULE_RELDATE "January 5, 2008" static char version[] __devinitdata = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; From f844c74fe07321953e2dd227fe35280075f18f60 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 5 Jan 2008 23:14:44 -0800 Subject: [PATCH 05/36] [IPV4] raw: Strengthen check on validity of iph->ihl We currently check that iph->ihl is bounded by the real length and that the real length is greater than the minimum IP header length. However, we did not check the caes where iph->ihl is less than the minimum IP header length. This breaks because some ip_fast_csum implementations assume that which is quite reasonable. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/raw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 66b42f547bf9..e7050f8eabeb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -271,6 +271,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, int hh_len; struct iphdr *iph; struct sk_buff *skb; + unsigned int iphlen; int err; if (length > rt->u.dst.dev->mtu) { @@ -304,7 +305,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, goto error_fault; /* We don't modify invalid header */ - if (length >= sizeof(*iph) && iph->ihl * 4U <= length) { + iphlen = iph->ihl * 4; + if (iphlen >= sizeof(*iph) && iphlen <= length) { if (!iph->saddr) iph->saddr = rt->rt_src; iph->check = 0; From 92ffb85dd33d62ac1dad8b44da62365f2aad413d Mon Sep 17 00:00:00 2001 From: Amos Waterland Date: Sat, 5 Jan 2008 23:23:06 -0800 Subject: [PATCH 06/36] [IPV4] ipconfig: Fix regression in ip command line processing The recent changes for ip command line processing fixed some problems but unfortunately broke some common usage scenarios. In current 2.6.24-rc6 the following command line results in no IP address assignment, which is surely a regression: ip=10.0.2.15::10.0.2.2:255.255.255.0::eth0:off Please find below a patch that works for all cases I can find. Signed-off-by: Amos Waterland Signed-off-by: David S. Miller --- Documentation/nfsroot.txt | 1 + net/ipv4/ipconfig.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Documentation/nfsroot.txt b/Documentation/nfsroot.txt index c86dd38e2281..31b329172343 100644 --- a/Documentation/nfsroot.txt +++ b/Documentation/nfsroot.txt @@ -145,6 +145,7 @@ ip=:::::: this option. off or none: don't use autoconfiguration + (do static IP assignment instead) on or any: use any protocol available in the kernel (default) dhcp: use DHCP diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 56a675734ea5..b8f7763b2261 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1404,8 +1404,7 @@ static int __init ic_proto_name(char *name) return 1; } if (!strcmp(name, "off") || !strcmp(name, "none")) { - ic_enable = 0; - return 1; + return 0; } #ifdef CONFIG_IP_PNP_DHCP else if (!strcmp(name, "dhcp")) { @@ -1442,10 +1441,22 @@ static int __init ip_auto_config_setup(char *addrs) ic_set_manually = 1; ic_enable = 1; + /* + * If any dhcp, bootp etc options are set, leave autoconfig on + * and skip the below static IP processing. + */ if (ic_proto_name(addrs)) return 1; - /* Parse the whole string */ + /* If no static IP is given, turn off autoconfig and bail. */ + if (*addrs == 0 || + strcmp(addrs, "off") == 0 || + strcmp(addrs, "none") == 0) { + ic_enable = 0; + return 1; + } + + /* Parse string for static IP assignment. */ ip = addrs; while (ip && *ip) { if ((cp = strchr(ip, ':'))) @@ -1483,7 +1494,10 @@ static int __init ip_auto_config_setup(char *addrs) strlcpy(user_dev_name, ip, sizeof(user_dev_name)); break; case 6: - ic_proto_name(ip); + if (ic_proto_name(ip) == 0 && + ic_myaddr == NONE) { + ic_enable = 0; + } break; } } From 9a262d5c24c63d2b7bea05e41d9b3bfbef63e903 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 5 Jan 2008 23:55:13 -0800 Subject: [PATCH 07/36] [NET]: Fix netx-eth.c compilation. This was missed when commit e2ac455a18806b31c2d0da0a51d8740af5010b7a fixed the compile errors in drivers/net/netx-eth.c caused by commit 09f75cd7bf13720738e6a196cc0107ce9a5bd5a0. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- drivers/net/netx-eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c index 5267e031daa0..78d34af13a1c 100644 --- a/drivers/net/netx-eth.c +++ b/drivers/net/netx-eth.c @@ -169,8 +169,8 @@ static void netx_eth_receive(struct net_device *ndev) ndev->last_rx = jiffies; skb->protocol = eth_type_trans(skb, ndev); netif_rx(skb); - dev->stats.rx_packets++; - dev->stats.rx_bytes += len; + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += len; } static irqreturn_t From edba2a1fefc6296bc527754dee1c72a625bb675a Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sun, 6 Jan 2008 00:21:47 -0800 Subject: [PATCH 08/36] [METH]: Fix MAC address handling. meth didn't set a valid mac address during probing, but later during open. Newer kernel refuse to open device with 00:00:00:00:00:00 as mac address -> dead ethernet. This patch sets the mac address in the probe function and uses only the mac address from the netdevice struct when setting up the hardware. Signed-off-by: Thomas Bogendoerfer Signed-off-by: David S. Miller --- drivers/net/meth.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/meth.c b/drivers/net/meth.c index 0c89b028a80c..cdaa8fc21809 100644 --- a/drivers/net/meth.c +++ b/drivers/net/meth.c @@ -95,11 +95,14 @@ static inline void load_eaddr(struct net_device *dev) { int i; DECLARE_MAC_BUF(mac); + u64 macaddr; - for (i = 0; i < 6; i++) - dev->dev_addr[i] = o2meth_eaddr[i]; DPRINTK("Loading MAC Address: %s\n", print_mac(mac, dev->dev_addr)); - mace->eth.mac_addr = (*(unsigned long*)o2meth_eaddr) >> 16; + macaddr = 0; + for (i = 0; i < 6; i++) + macaddr |= dev->dev_addr[i] << ((5 - i) * 8); + + mace->eth.mac_addr = macaddr; } /* @@ -794,6 +797,7 @@ static int __init meth_probe(struct platform_device *pdev) #endif dev->irq = MACE_ETHERNET_IRQ; dev->base_addr = (unsigned long)&mace->eth; + memcpy(dev->dev_addr, o2meth_eaddr, 6); priv = netdev_priv(dev); spin_lock_init(&priv->meth_lock); From c6a1b62de9d043f274ec3ae2e207908c6d5feff3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 7 Jan 2008 00:23:04 -0800 Subject: [PATCH 09/36] [TULIP]: NAPI full quantum bug. This should fix the kernel warn/oops reported while routing. The tulip driver has a fencepost bug with new NAPI in 2.6.24 It has an off by one bug if a full quantum is reached. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/tulip/interrupt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c index 365331446387..0461956d3b50 100644 --- a/drivers/net/tulip/interrupt.c +++ b/drivers/net/tulip/interrupt.c @@ -151,7 +151,8 @@ int tulip_poll(struct napi_struct *napi, int budget) if (tulip_debug > 5) printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n", dev->name, entry, status); - if (work_done++ >= budget) + + if (++work_done >= budget) goto not_done; if ((status & 0x38008300) != 0x0300) { From 52961955aa180959158faeb9fd6b4f8a591450f5 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Mon, 7 Jan 2008 00:26:22 -0800 Subject: [PATCH 10/36] [ATM]: [nicstar] delay irq setup until card is configured Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- drivers/atm/nicstar.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 14ced85b3f54..0c205b000e8b 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -625,14 +625,6 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) if (mac[i] == NULL) nicstar_init_eprom(card->membase); - if (request_irq(pcidev->irq, &ns_irq_handler, IRQF_DISABLED | IRQF_SHARED, "nicstar", card) != 0) - { - printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); - error = 9; - ns_init_card_error(card, error); - return error; - } - /* Set the VPI/VCI MSb mask to zero so we can receive OAM cells */ writel(0x00000000, card->membase + VPM); @@ -858,8 +850,6 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) card->iovpool.count++; } - card->intcnt = 0; - /* Configure NICStAR */ if (card->rct_size == 4096) ns_cfg_rctsize = NS_CFG_RCTSIZE_4096_ENTRIES; @@ -868,6 +858,15 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) card->efbie = 1; + card->intcnt = 0; + if (request_irq(pcidev->irq, &ns_irq_handler, IRQF_DISABLED | IRQF_SHARED, "nicstar", card) != 0) + { + printk("nicstar%d: can't allocate IRQ %d.\n", i, pcidev->irq); + error = 9; + ns_init_card_error(card, error); + return error; + } + /* Register device */ card->atmdev = atm_dev_register("nicstar", &atm_ops, -1, NULL); if (card->atmdev == NULL) From f691724c4d3b150bfa9cc8a969ea2020e20dfb12 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 7 Jan 2008 00:27:16 -0800 Subject: [PATCH 11/36] [SCTP]: Fix the name of the authentication event. The even should be called SCTP_AUTHENTICATION_INDICATION. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/user.h | 2 +- net/sctp/ulpevent.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 00848b641f59..954090b1e354 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -450,7 +450,7 @@ enum sctp_sn_type { SCTP_SHUTDOWN_EVENT, SCTP_PARTIAL_DELIVERY_EVENT, SCTP_ADAPTATION_INDICATION, - SCTP_AUTHENTICATION_EVENT, + SCTP_AUTHENTICATION_INDICATION, }; /* Notification error codes used to fill up the error fields in some diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 2c17c7efad46..307314356e16 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -830,7 +830,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_authkey( ak = (struct sctp_authkey_event *) skb_put(skb, sizeof(struct sctp_authkey_event)); - ak->auth_type = SCTP_AUTHENTICATION_EVENT; + ak->auth_type = SCTP_AUTHENTICATION_INDICATION; ak->auth_flags = 0; ak->auth_length = sizeof(struct sctp_authkey_event); From 6df9cfc1ad45839e2a11330ab354330c6128cb73 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 7 Jan 2008 00:27:45 -0800 Subject: [PATCH 12/36] [SCTP]: Correctly handle AUTH parameters in unexpected INIT When processing an unexpected INIT chunk, we do not need to do any preservation of the old AUTH parameters. In fact, doing such preservations will nullify AUTH and allow connection stealing. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5fb84778846d..d247ed4ee423 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1309,26 +1309,6 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc, new_asoc->c.initial_tsn = asoc->c.initial_tsn; } -static void sctp_auth_params_populate(struct sctp_association *new_asoc, - const struct sctp_association *asoc) -{ - /* Only perform this if AUTH extension is enabled */ - if (!sctp_auth_enable) - return; - - /* We need to provide the same parameter information as - * was in the original INIT. This means that we need to copy - * the HMACS, CHUNKS, and RANDOM parameter from the original - * assocaition. - */ - memcpy(new_asoc->c.auth_random, asoc->c.auth_random, - sizeof(asoc->c.auth_random)); - memcpy(new_asoc->c.auth_hmacs, asoc->c.auth_hmacs, - sizeof(asoc->c.auth_hmacs)); - memcpy(new_asoc->c.auth_chunks, asoc->c.auth_chunks, - sizeof(asoc->c.auth_chunks)); -} - /* * Compare vtag/tietag values to determine unexpected COOKIE-ECHO * handling action. @@ -1486,8 +1466,6 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_tietags_populate(new_asoc, asoc); - sctp_auth_params_populate(new_asoc, asoc); - /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, From 036b579b1146f52c51398f1ab663cf659094107d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 7 Jan 2008 00:28:16 -0800 Subject: [PATCH 13/36] [SCTP]: Add back the code that accounted for FORWARD_TSN parameter in INIT. Some recent changes completely removed accounting for the FORWARD_TSN parameter length in the INIT and INIT-ACK chunk. This is wrong and should be restored. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ed7c9e30ebc9..3cc629d3c9ff 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -210,6 +210,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len + SCTP_SAT_LEN(num_types); chunksize += sizeof(ecap_param); + if (sctp_prsctp_enable) + chunksize += sizeof(prsctp_param); + /* ADDIP: Section 4.2.7: * An implementation supporting this extension [ADDIP] MUST list * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and @@ -369,6 +372,9 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (asoc->peer.ecn_capable) chunksize += sizeof(ecap_param); + if (sctp_prsctp_enable) + chunksize += sizeof(prsctp_param); + if (sctp_addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; From 9e8d6f8959c356d8294d45f11231331c3e1bcae6 Mon Sep 17 00:00:00 2001 From: maximilian attems Date: Mon, 7 Jan 2008 00:30:57 -0800 Subject: [PATCH 14/36] [IRDA]: irda_create() nuke user triggable printk easy to trigger as user with sfuzz. irda_create() is quiet on unknown sock->type, match this behaviour for SOCK_DGRAM unknown protocol Signed-off-by: maximilian attems Signed-off-by: David S. Miller --- net/irda/af_irda.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 48ce59a6e026..d5e4dd75200b 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1118,8 +1118,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) self->max_sdu_size_rx = TTP_SAR_UNBOUND; break; default: - IRDA_ERROR("%s: protocol not supported!\n", - __FUNCTION__); return -ESOCKTNOSUPPORT; } break; From d987160b710c98997015832422a05e18d9f0f925 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 2 Jan 2008 18:55:53 +0100 Subject: [PATCH 15/36] b43: Fix rxheader channel parsing This patch fixes the parsing of the RX data header channel field. The current code parses the header incorrectly and passes a wrong channel number and frequency for each frame to mac80211. The FIXMEs added by this patch don't matter for now as the code where they live won't get executed anyway. They will be fixed later. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/net/wireless/b43/b43.h | 2 + drivers/net/wireless/b43/main.h | 20 ++-------- drivers/net/wireless/b43/xmit.c | 27 ++++++++++---- drivers/net/wireless/b43/xmit.h | 65 ++++++++++++++++++--------------- 4 files changed, 61 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index a28ad230d63e..7b6fc1ab2b90 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -273,6 +273,8 @@ enum { #define B43_PHYTYPE_A 0x00 #define B43_PHYTYPE_B 0x01 #define B43_PHYTYPE_G 0x02 +#define B43_PHYTYPE_N 0x04 +#define B43_PHYTYPE_LP 0x05 /* PHYRegisters */ #define B43_PHY_ILT_A_CTRL 0x0072 diff --git a/drivers/net/wireless/b43/main.h b/drivers/net/wireless/b43/main.h index 284d17da17d1..08e2e56e48f4 100644 --- a/drivers/net/wireless/b43/main.h +++ b/drivers/net/wireless/b43/main.h @@ -39,11 +39,11 @@ #define PAD_BYTES(nr_bytes) P4D_BYTES( __LINE__ , (nr_bytes)) /* Lightweight function to convert a frequency (in Mhz) to a channel number. */ -static inline u8 b43_freq_to_channel_a(int freq) +static inline u8 b43_freq_to_channel_5ghz(int freq) { return ((freq - 5000) / 5); } -static inline u8 b43_freq_to_channel_bg(int freq) +static inline u8 b43_freq_to_channel_2ghz(int freq) { u8 channel; @@ -54,19 +54,13 @@ static inline u8 b43_freq_to_channel_bg(int freq) return channel; } -static inline u8 b43_freq_to_channel(struct b43_wldev *dev, int freq) -{ - if (dev->phy.type == B43_PHYTYPE_A) - return b43_freq_to_channel_a(freq); - return b43_freq_to_channel_bg(freq); -} /* Lightweight function to convert a channel number to a frequency (in Mhz). */ -static inline int b43_channel_to_freq_a(u8 channel) +static inline int b43_channel_to_freq_5ghz(u8 channel) { return (5000 + (5 * channel)); } -static inline int b43_channel_to_freq_bg(u8 channel) +static inline int b43_channel_to_freq_2ghz(u8 channel) { int freq; @@ -77,12 +71,6 @@ static inline int b43_channel_to_freq_bg(u8 channel) return freq; } -static inline int b43_channel_to_freq(struct b43_wldev *dev, u8 channel) -{ - if (dev->phy.type == B43_PHYTYPE_A) - return b43_channel_to_freq_a(channel); - return b43_channel_to_freq_bg(channel); -} static inline int b43_is_cck_rate(int rate) { diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 0bd6f8a348a8..3307ba1856b1 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -531,21 +531,32 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) switch (chanstat & B43_RX_CHAN_PHYTYPE) { case B43_PHYTYPE_A: status.phymode = MODE_IEEE80211A; - status.freq = chanid; - status.channel = b43_freq_to_channel_a(chanid); - break; - case B43_PHYTYPE_B: - status.phymode = MODE_IEEE80211B; - status.freq = chanid + 2400; - status.channel = b43_freq_to_channel_bg(chanid + 2400); + B43_WARN_ON(1); + /* FIXME: We don't really know which value the "chanid" contains. + * So the following assignment might be wrong. */ + status.channel = chanid; + status.freq = b43_channel_to_freq_5ghz(status.channel); break; case B43_PHYTYPE_G: status.phymode = MODE_IEEE80211G; + /* chanid is the radio channel cookie value as used + * to tune the radio. */ status.freq = chanid + 2400; - status.channel = b43_freq_to_channel_bg(chanid + 2400); + status.channel = b43_freq_to_channel_2ghz(status.freq); + break; + case B43_PHYTYPE_N: + status.phymode = 0xDEAD /*FIXME MODE_IEEE80211N*/; + /* chanid is the SHM channel cookie. Which is the plain + * channel number in b43. */ + status.channel = chanid; + if (chanstat & B43_RX_CHAN_5GHZ) + status.freq = b43_freq_to_channel_5ghz(status.freq); + else + status.freq = b43_freq_to_channel_2ghz(status.freq); break; default: B43_WARN_ON(1); + goto drop; } dev->stats.last_rx = jiffies; diff --git a/drivers/net/wireless/b43/xmit.h b/drivers/net/wireless/b43/xmit.h index 03bddd251618..6dc079382f7f 100644 --- a/drivers/net/wireless/b43/xmit.h +++ b/drivers/net/wireless/b43/xmit.h @@ -142,49 +142,56 @@ struct b43_rxhdr_fw4 { } __attribute__ ((__packed__)); /* PHY RX Status 0 */ -#define B43_RX_PHYST0_GAINCTL 0x4000 /* Gain Control */ -#define B43_RX_PHYST0_PLCPHCF 0x0200 -#define B43_RX_PHYST0_PLCPFV 0x0100 -#define B43_RX_PHYST0_SHORTPRMBL 0x0080 /* Received with Short Preamble */ +#define B43_RX_PHYST0_GAINCTL 0x4000 /* Gain Control */ +#define B43_RX_PHYST0_PLCPHCF 0x0200 +#define B43_RX_PHYST0_PLCPFV 0x0100 +#define B43_RX_PHYST0_SHORTPRMBL 0x0080 /* Received with Short Preamble */ #define B43_RX_PHYST0_LCRS 0x0040 -#define B43_RX_PHYST0_ANT 0x0020 /* Antenna */ -#define B43_RX_PHYST0_UNSRATE 0x0010 +#define B43_RX_PHYST0_ANT 0x0020 /* Antenna */ +#define B43_RX_PHYST0_UNSRATE 0x0010 #define B43_RX_PHYST0_CLIP 0x000C #define B43_RX_PHYST0_CLIP_SHIFT 2 -#define B43_RX_PHYST0_FTYPE 0x0003 /* Frame type */ -#define B43_RX_PHYST0_CCK 0x0000 /* Frame type: CCK */ -#define B43_RX_PHYST0_OFDM 0x0001 /* Frame type: OFDM */ -#define B43_RX_PHYST0_PRE_N 0x0002 /* Pre-standard N-PHY frame */ -#define B43_RX_PHYST0_STD_N 0x0003 /* Standard N-PHY frame */ +#define B43_RX_PHYST0_FTYPE 0x0003 /* Frame type */ +#define B43_RX_PHYST0_CCK 0x0000 /* Frame type: CCK */ +#define B43_RX_PHYST0_OFDM 0x0001 /* Frame type: OFDM */ +#define B43_RX_PHYST0_PRE_N 0x0002 /* Pre-standard N-PHY frame */ +#define B43_RX_PHYST0_STD_N 0x0003 /* Standard N-PHY frame */ /* PHY RX Status 2 */ -#define B43_RX_PHYST2_LNAG 0xC000 /* LNA Gain */ +#define B43_RX_PHYST2_LNAG 0xC000 /* LNA Gain */ #define B43_RX_PHYST2_LNAG_SHIFT 14 -#define B43_RX_PHYST2_PNAG 0x3C00 /* PNA Gain */ +#define B43_RX_PHYST2_PNAG 0x3C00 /* PNA Gain */ #define B43_RX_PHYST2_PNAG_SHIFT 10 -#define B43_RX_PHYST2_FOFF 0x03FF /* F offset */ +#define B43_RX_PHYST2_FOFF 0x03FF /* F offset */ /* PHY RX Status 3 */ -#define B43_RX_PHYST3_DIGG 0x1800 /* DIG Gain */ +#define B43_RX_PHYST3_DIGG 0x1800 /* DIG Gain */ #define B43_RX_PHYST3_DIGG_SHIFT 11 -#define B43_RX_PHYST3_TRSTATE 0x0400 /* TR state */ +#define B43_RX_PHYST3_TRSTATE 0x0400 /* TR state */ /* MAC RX Status */ -#define B43_RX_MAC_BEACONSENT 0x00008000 /* Beacon send flag */ -#define B43_RX_MAC_KEYIDX 0x000007E0 /* Key index */ -#define B43_RX_MAC_KEYIDX_SHIFT 5 -#define B43_RX_MAC_DECERR 0x00000010 /* Decrypt error */ -#define B43_RX_MAC_DEC 0x00000008 /* Decryption attempted */ -#define B43_RX_MAC_PADDING 0x00000004 /* Pad bytes present */ -#define B43_RX_MAC_RESP 0x00000002 /* Response frame transmitted */ -#define B43_RX_MAC_FCSERR 0x00000001 /* FCS error */ +#define B43_RX_MAC_RXST_VALID 0x01000000 /* PHY RXST valid */ +#define B43_RX_MAC_TKIP_MICERR 0x00100000 /* TKIP MIC error */ +#define B43_RX_MAC_TKIP_MICATT 0x00080000 /* TKIP MIC attempted */ +#define B43_RX_MAC_AGGTYPE 0x00060000 /* Aggregation type */ +#define B43_RX_MAC_AGGTYPE_SHIFT 17 +#define B43_RX_MAC_AMSDU 0x00010000 /* A-MSDU mask */ +#define B43_RX_MAC_BEACONSENT 0x00008000 /* Beacon sent flag */ +#define B43_RX_MAC_KEYIDX 0x000007E0 /* Key index */ +#define B43_RX_MAC_KEYIDX_SHIFT 5 +#define B43_RX_MAC_DECERR 0x00000010 /* Decrypt error */ +#define B43_RX_MAC_DEC 0x00000008 /* Decryption attempted */ +#define B43_RX_MAC_PADDING 0x00000004 /* Pad bytes present */ +#define B43_RX_MAC_RESP 0x00000002 /* Response frame transmitted */ +#define B43_RX_MAC_FCSERR 0x00000001 /* FCS error */ /* RX channel */ -#define B43_RX_CHAN_GAIN 0xFC00 /* Gain */ -#define B43_RX_CHAN_GAIN_SHIFT 10 -#define B43_RX_CHAN_ID 0x03FC /* Channel ID */ -#define B43_RX_CHAN_ID_SHIFT 2 -#define B43_RX_CHAN_PHYTYPE 0x0003 /* PHY type */ +#define B43_RX_CHAN_40MHZ 0x1000 /* 40 Mhz channel width */ +#define B43_RX_CHAN_5GHZ 0x0800 /* 5 Ghz band */ +#define B43_RX_CHAN_ID 0x07F8 /* Channel ID */ +#define B43_RX_CHAN_ID_SHIFT 3 +#define B43_RX_CHAN_PHYTYPE 0x0007 /* PHY type */ + u8 b43_plcp_get_ratecode_cck(const u8 bitrate); u8 b43_plcp_get_ratecode_ofdm(const u8 bitrate); From bdb95b1792664f25eb2a4d13a587d2020aa93002 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 20:26:59 -0800 Subject: [PATCH 16/36] [NET]: Do not grab device reference when scheduling a NAPI poll. It is pointless, because everything that can make a device go away will do a napi_disable() first. The main impetus behind this is that now we can legally do a NAPI completion in generic code like net_rx_action() which a following changeset needs to do. net_rx_action() can only perform actions in NAPI centric ways, because there may be a one to many mapping between NAPI contexts and network devices (SKY2 is one example). We also want to get rid of this because it's an extra atomic in the NAPI paths, and also because it is one of the last instances where the NAPI interfaces care about net devices. The one remaining netdev detail the NAPI stuff cares about is the netif_running() check which will be killed off in a subsequent changeset. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e6af4f174b6..e393995d283a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1277,7 +1277,6 @@ static inline int netif_rx_schedule_prep(struct net_device *dev, static inline void __netif_rx_schedule(struct net_device *dev, struct napi_struct *napi) { - dev_hold(dev); __napi_schedule(napi); } @@ -1308,7 +1307,6 @@ static inline void __netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { __napi_complete(napi); - dev_put(dev); } /* Remove interface from poll list: it must be in the poll list From a0a46196cd98af5cc015842bba757571f02a8c30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 20:35:07 -0800 Subject: [PATCH 17/36] [NET]: Add NAPI_STATE_DISABLE. Create a bit to signal that a napi_disable() is in progress. This sets up infrastructure such that net_rx_action() can generically break out of the ->poll() loop on a NAPI context that has a pending napi_disable() yet is being bombed with packets (and thus would otherwise poll endlessly and not allow the napi_disable() to finish). Now, what napi_disable() does is first set the NAPI_STATE_DISABLE bit (to indicate that a disable is pending), then it polls for the NAPI_STATE_SCHED bit, and once the NAPI_STATE_SCHED bit is acquired the NAPI_STATE_DISABLE bit is cleared. Here, the test_and_set_bit() provides the necessary memory barrier between the various bitops. napi_schedule_prep() now tests for a pending disable as it's first action and won't try to obtain the NAPI_STATE_SCHED bit if a disable is pending. As a result, we can remove the netif_running() check in netif_rx_schedule_prep() because the NAPI disable pending state serves this purpose. And, it does so in a NAPI centric manner which is what we really want. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e393995d283a..b0813c3286b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,21 +319,29 @@ struct napi_struct { enum { NAPI_STATE_SCHED, /* Poll is scheduled */ + NAPI_STATE_DISABLE, /* Disable pending */ }; extern void FASTCALL(__napi_schedule(struct napi_struct *n)); +static inline int napi_disable_pending(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_DISABLE, &n->state); +} + /** * napi_schedule_prep - check if napi can be scheduled * @n: napi context * * Test if NAPI routine is already running, and if not mark * it as running. This is used as a condition variable - * insure only one NAPI poll instance runs + * insure only one NAPI poll instance runs. We also make + * sure there is no pending NAPI disable. */ static inline int napi_schedule_prep(struct napi_struct *n) { - return !test_and_set_bit(NAPI_STATE_SCHED, &n->state); + return !napi_disable_pending(n) && + !test_and_set_bit(NAPI_STATE_SCHED, &n->state); } /** @@ -389,8 +397,10 @@ static inline void napi_complete(struct napi_struct *n) */ static inline void napi_disable(struct napi_struct *n) { + set_bit(NAPI_STATE_DISABLE, &n->state); while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) msleep(1); + clear_bit(NAPI_STATE_DISABLE, &n->state); } /** @@ -1268,7 +1278,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline int netif_rx_schedule_prep(struct net_device *dev, struct napi_struct *napi) { - return netif_running(dev) && napi_schedule_prep(napi); + return napi_schedule_prep(napi); } /* Add interface to tail of rx poll list. This assumes that _prep has From 4ec2411980d0fd2995e8dea8a06fe57aa47523cb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 20:48:21 -0800 Subject: [PATCH 18/36] [NET]: Do not check netif_running() and carrier state in ->poll() Drivers do this to try to break out of the ->poll()'ing loop when the device is being brought administratively down. Now that we have a napi_disable() "pending" state we are going to solve that problem generically. Signed-off-by: David S. Miller --- drivers/net/e100.c | 2 +- drivers/net/e1000/e1000_main.c | 8 +------- drivers/net/e1000e/netdev.c | 8 +------- drivers/net/epic100.c | 2 +- drivers/net/fec_8xx/fec_main.c | 5 ----- drivers/net/fs_enet/fs_enet-main.c | 3 --- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/ixgbe/ixgbe_main.c | 8 +------- drivers/net/ixp2000/ixpdev.c | 2 -- drivers/net/myri10ge/myri10ge.c | 2 +- drivers/net/natsemi.c | 2 +- drivers/net/qla3xxx.c | 7 +------ drivers/net/s2io.c | 3 --- drivers/net/tulip/interrupt.c | 5 ----- drivers/net/xen-netfront.c | 5 ----- 15 files changed, 9 insertions(+), 55 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 2b06e4b4dabc..68316f15a7cb 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1997,7 +1997,7 @@ static int e100_poll(struct napi_struct *napi, int budget) tx_cleaned = e100_tx_clean(nic); /* If no Rx and Tx cleanup work was done, exit polling mode. */ - if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) { + if((!tx_cleaned && (work_done == 0))) { netif_rx_complete(netdev, napi); e100_enable_irq(nic); } diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 4f37506ad374..9de71443ef89 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3924,10 +3924,6 @@ e1000_clean(struct napi_struct *napi, int budget) /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(poll_dev)) - goto quit_polling; - /* e1000_clean is called per-cpu. This lock protects * tx_ring[0] from being cleaned by multiple cpus * simultaneously. A failure obtaining the lock means @@ -3942,9 +3938,7 @@ e1000_clean(struct napi_struct *napi, int budget) &work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done == 0)) || - !netif_running(poll_dev)) { -quit_polling: + if ((!tx_cleaned && (work_done == 0))) { if (likely(adapter->itr_setting & 3)) e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 4fd2e23720b6..dd9698cfbb9d 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -1389,10 +1389,6 @@ static int e1000_clean(struct napi_struct *napi, int budget) /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(poll_dev)) - goto quit_polling; - /* e1000_clean is called per-cpu. This lock protects * tx_ring from being cleaned by multiple cpus * simultaneously. A failure obtaining the lock means @@ -1405,9 +1401,7 @@ static int e1000_clean(struct napi_struct *napi, int budget) adapter->clean_rx(adapter, &work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done < budget)) || - !netif_running(poll_dev)) { -quit_polling: + if ((!tx_cleaned && (work_done < budget))) { if (adapter->itr_setting & 3) e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index ecdd3fc8d70c..0b365b8d947b 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -1273,7 +1273,7 @@ static int epic_poll(struct napi_struct *napi, int budget) epic_rx_err(dev, ep); - if (netif_running(dev) && (work_done < budget)) { + if (work_done < budget) { unsigned long flags; int more; diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c index 8d2904fa5789..ab9637ab3a8d 100644 --- a/drivers/net/fec_8xx/fec_main.c +++ b/drivers/net/fec_8xx/fec_main.c @@ -476,11 +476,6 @@ static int fec_enet_rx_common(struct fec_enet_private *ep, __u16 pkt_len, sc; int curidx; - if (fpi->use_napi) { - if (!netif_running(dev)) - return 0; - } - /* * First, grab all of the stats for the incoming packet. * These get messed up if we get called due to a busy condition. diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index f2a4d399a6e5..3e1a57a42f11 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -96,9 +96,6 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) u16 pkt_len, sc; int curidx; - if (!netif_running(dev)) - return 0; - /* * First, grab all of the stats for the incoming packet. * These get messed up if we get called due to a busy condition. diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index bf9085fe035a..a8bef52870fd 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1794,7 +1794,7 @@ ixgb_clean(struct napi_struct *napi, int budget) ixgb_clean_rx_irq(adapter, &work_done, budget); /* if no Tx and not enough Rx work done, exit the polling mode */ - if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) { + if((!tx_cleaned && (work_done == 0))) { netif_rx_complete(netdev, napi); ixgb_irq_enable(adapter); } diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 00bc525c6560..7c319303f0f8 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1470,19 +1470,13 @@ static int ixgbe_clean(struct napi_struct *napi, int budget) struct net_device *netdev = adapter->netdev; int tx_cleaned = 0, work_done = 0; - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(adapter->netdev)) - goto quit_polling; - /* In non-MSIX case, there is no multi-Tx/Rx queue */ tx_cleaned = ixgbe_clean_tx_irq(adapter, adapter->tx_ring); ixgbe_clean_rx_irq(adapter, &adapter->rx_ring[0], &work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done < budget)) || - !netif_running(adapter->netdev)) { -quit_polling: + if ((!tx_cleaned && (work_done < budget))) { netif_rx_complete(netdev, napi); ixgbe_irq_enable(adapter); } diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c index 6c0dd49149d0..484cb2ba717f 100644 --- a/drivers/net/ixp2000/ixpdev.c +++ b/drivers/net/ixp2000/ixpdev.c @@ -135,8 +135,6 @@ static int ixpdev_poll(struct napi_struct *napi, int budget) struct net_device *dev = ip->dev; int rx; - /* @@@ Have to stop polling when nds[0] is administratively - * downed while we are polling. */ rx = 0; do { ixp2000_reg_write(IXP2000_IRQ_THD_RAW_STATUS_A_0, 0x00ff); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 8def8657251f..c90958f6d3fe 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1239,7 +1239,7 @@ static int myri10ge_poll(struct napi_struct *napi, int budget) /* process as many rx events as NAPI will allow */ work_done = myri10ge_clean_rx_done(mgp, budget); - if (work_done < budget || !netif_running(netdev)) { + if (work_done < budget) { netif_rx_complete(netdev, napi); put_be32(htonl(3), mgp->irq_claim); } diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c index 87cde062fd63..c329a4f5840c 100644 --- a/drivers/net/natsemi.c +++ b/drivers/net/natsemi.c @@ -2266,7 +2266,7 @@ static int natsemi_poll(struct napi_struct *napi, int budget) /* Reenable interrupts providing nothing is trying to shut * the chip down. */ spin_lock(&np->lock); - if (!np->hands_off && netif_running(dev)) + if (!np->hands_off) natsemi_irq_enable(dev); spin_unlock(&np->lock); diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c index a5791114b7bd..cf0774de6c41 100644 --- a/drivers/net/qla3xxx.c +++ b/drivers/net/qla3xxx.c @@ -2320,14 +2320,9 @@ static int ql_poll(struct napi_struct *napi, int budget) unsigned long hw_flags; struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers; - if (!netif_carrier_ok(ndev)) - goto quit_polling; - ql_tx_rx_clean(qdev, &tx_cleaned, &rx_cleaned, budget); - if (tx_cleaned + rx_cleaned != budget || - !netif_running(ndev)) { -quit_polling: + if (tx_cleaned + rx_cleaned != budget) { spin_lock_irqsave(&qdev->hw_lock, hw_flags); __netif_rx_complete(ndev, napi); ql_update_small_bufq_prod_index(qdev); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 9d80f1cf73ac..fa57c49c0c51 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -2704,9 +2704,6 @@ static int s2io_poll(struct napi_struct *napi, int budget) struct XENA_dev_config __iomem *bar0 = nic->bar0; int i; - if (!is_s2io_card_up(nic)) - return 0; - mac_control = &nic->mac_control; config = &nic->config; diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c index 0461956d3b50..6284afd14bbb 100644 --- a/drivers/net/tulip/interrupt.c +++ b/drivers/net/tulip/interrupt.c @@ -117,9 +117,6 @@ int tulip_poll(struct napi_struct *napi, int budget) int received = 0; #endif - if (!netif_running(dev)) - goto done; - #ifdef CONFIG_TULIP_NAPI_HW_MITIGATION /* that one buffer is needed for mit activation; or might be a @@ -261,8 +258,6 @@ int tulip_poll(struct napi_struct *napi, int budget) * finally: amount of IO did not increase at all. */ } while ((ioread32(tp->base_addr + CSR5) & RxIntr)); -done: - #ifdef CONFIG_TULIP_NAPI_HW_MITIGATION /* We use this simplistic scheme for IM. It's proven by diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 2a8fc431099f..bca37bf0f545 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -852,11 +852,6 @@ static int xennet_poll(struct napi_struct *napi, int budget) spin_lock(&np->rx_lock); - if (unlikely(!netif_carrier_ok(dev))) { - spin_unlock(&np->rx_lock); - return 0; - } - skb_queue_head_init(&rxq); skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); From 87c4ac841c1d524416ab36c19689550bf302dab1 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Thu, 3 Jan 2008 18:59:25 +0100 Subject: [PATCH 19/36] ssb: Fix probing of PCI cores if PCI and PCIE core is available This will make sure that always the correct core is selected, even if there are both a PCI and PCI-E core on a PCI or PCI-E card. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/scan.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c index 96258c60919d..63ee5cfbefbb 100644 --- a/drivers/ssb/scan.c +++ b/drivers/ssb/scan.c @@ -388,6 +388,17 @@ int ssb_bus_scan(struct ssb_bus *bus, case SSB_DEV_PCI: case SSB_DEV_PCIE: #ifdef CONFIG_SSB_DRIVER_PCICORE + if (bus->bustype == SSB_BUSTYPE_PCI) { + /* Ignore PCI cores on PCI-E cards. + * Ignore PCI-E cores on PCI cards. */ + if (dev->id.coreid == SSB_DEV_PCI) { + if (bus->host_pci->is_pcie) + continue; + } else { + if (!bus->host_pci->is_pcie) + continue; + } + } if (bus->pcicore.dev) { ssb_printk(KERN_WARNING PFX "WARNING: Multiple PCI(E) cores found\n"); From 5cdfed54e7200dde2e846e2ef153d1694ce44875 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Thu, 3 Jan 2008 21:03:19 -0800 Subject: [PATCH 20/36] mac80211: return an error when SIWRATE doesn't match any rate Currently mac80211 fails silently when trying to set a nonexistent rate. Return an error instead. Signed-Off-By: Andy Lutomirski Signed-off-by: John W. Linville --- net/mac80211/ieee80211_ioctl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 7027eed4d4ae..308bbe4a1333 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -591,7 +591,7 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, sdata->bss->force_unicast_rateidx = -1; if (rate->value < 0) return 0; - for (i=0; i< mode->num_rates; i++) { + for (i=0; i < mode->num_rates; i++) { struct ieee80211_rate *rates = &mode->rates[i]; int this_rate = rates->rate; @@ -599,10 +599,10 @@ static int ieee80211_ioctl_siwrate(struct net_device *dev, sdata->bss->max_ratectrl_rateidx = i; if (rate->fixed) sdata->bss->force_unicast_rateidx = i; - break; + return 0; } } - return 0; + return -EINVAL; } static int ieee80211_ioctl_giwrate(struct net_device *dev, From 1706287f6eb58726a9a0e5cbbde87f49757615e3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 20:51:29 -0800 Subject: [PATCH 21/36] [NETXEN]: Fix ->poll() done logic. If work_done >= budget we should always elide the NAPI completion. Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index a80f0cd6b528..454226f7baa8 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1321,7 +1321,7 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget) budget / MAX_RCV_CTX); } - if (work_done >= budget && netxen_nic_rx_has_work(adapter) != 0) + if (work_done >= budget) done = 0; if (netxen_process_cmd_ring((unsigned long)adapter) == 0) From d1d08d1265810ef1f165864850416dcbc9725ee7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 20:53:33 -0800 Subject: [PATCH 22/36] [NET]: Fix drivers to handle napi_disable() disabling interrupts. When we add the generic napi_disable_pending() breakout logic to net_rx_action() it means that napi_disable() can cause NAPI poll interrupt events to be disabled. And this is exactly what we want. If a napi_disable() is pending, and we are looping in the ->poll(), we want ->poll() event interrupts to stay disabled and we want to complete the NAPI poll ASAP. When ->poll() break out during device down was being handled on a per-driver basis, often these drivers would turn interrupts back on when '!netif_running()' was detected. And this would just cause a reschedule of the NAPI ->poll() in the interrupt handler before the napi_disable() could get in there and grab the NAPI_STATE_SCHED bit. The vast majority of drivers don't care if napi_disable() might have the side effect of disabling NAPI ->poll() event interrupts. In all such cases, when a napi_disable() is performed, the driver just disabled interrupts or is about to. However there were three exceptions to this in PCNET32, R8169, and SKY2. To fix those cases, at the subsequent napi_enable() points, I added code to ensure that the ->poll() interrupt events are enabled in the hardware. Signed-off-by: David S. Miller Acked-by: Don Fry --- drivers/net/pcnet32.c | 5 +++++ drivers/net/r8169.c | 2 ++ drivers/net/sky2.c | 3 +++ 3 files changed, 10 insertions(+) diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index ff92aca0a7b3..90498ffe26f2 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -455,9 +455,14 @@ static void pcnet32_netif_start(struct net_device *dev) { #ifdef CONFIG_PCNET32_NAPI struct pcnet32_private *lp = netdev_priv(dev); + ulong ioaddr = dev->base_addr; + u16 val; #endif netif_wake_queue(dev); #ifdef CONFIG_PCNET32_NAPI + val = lp->a.read_csr(ioaddr, CSR3); + val &= 0x00ff; + lp->a.write_csr(ioaddr, CSR3, val); napi_enable(&lp->napi); #endif } diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 5863190894cc..af8030981f10 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2398,6 +2398,8 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev) rtl8169_irq_mask_and_ack(ioaddr); #ifdef CONFIG_R8169_NAPI + tp->intr_mask = 0xffff; + RTL_W16(IntrMask, tp->intr_event); napi_enable(&tp->napi); #endif } diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index a74fc11a6482..52ec89b82f64 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1168,6 +1168,7 @@ static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp TX_VLAN_TAG_OFF); } + sky2_read32(hw, B0_Y2_SP_LISR); napi_enable(&hw->napi); netif_tx_unlock_bh(dev); } @@ -2043,6 +2044,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) err = sky2_rx_start(sky2); sky2_write32(hw, B0_IMSK, imask); + sky2_read32(hw, B0_Y2_SP_LISR); napi_enable(&hw->napi); if (err) @@ -3861,6 +3863,7 @@ static int sky2_debug_show(struct seq_file *seq, void *v) last = sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_PUT_IDX)), sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_LAST_IDX))); + sky2_read32(hw, B0_Y2_SP_LISR); napi_enable(&hw->napi); return 0; } From fed17f3094b960d3a54b10f17abbe4b57e976eec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 21:00:40 -0800 Subject: [PATCH 23/36] [NET]: Stop polling when napi_disable() is pending. This finally adds the code in net_rx_action() to break out of the ->poll()'ing loop when a napi_disable() is found to be pending. Now, even if a device is being flooded with packets it can be cleanly brought down. Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index be9d3015beaa..0879f52115eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2207,8 +2207,12 @@ static void net_rx_action(struct softirq_action *h) * still "owns" the NAPI instance and therefore can * move the instance around on the list at-will. */ - if (unlikely(work == weight)) - list_move_tail(&n->poll_list, list); + if (unlikely(work == weight)) { + if (unlikely(napi_disable_pending(n))) + __napi_complete(n); + else + list_move_tail(&n->poll_list, list); + } netpoll_poll_unlock(have); } From 53e52c729cc169db82a6105fac7a166e10c2ec36 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Jan 2008 21:06:12 -0800 Subject: [PATCH 24/36] [NET]: Make ->poll() breakout consistent in Intel ethernet drivers. This makes the ->poll() routines of the E100, E1000, E1000E, IXGB, and IXGBE drivers complete ->poll() consistently. Now they will all break out when the amount of RX work done is less than 'budget'. At a later time, we may want put back code to include the TX work as well (as at least one other NAPI driver does, but by in large NAPI drivers do not do this). But if so, it should be done consistently across the board to all of these drivers. Signed-off-by: David S. Miller Acked-by: Auke Kok --- drivers/net/e100.c | 7 +++---- drivers/net/e1000/e1000_main.c | 10 +++++----- drivers/net/e1000e/netdev.c | 8 ++++---- drivers/net/ixgb/ixgb_main.c | 7 +++---- drivers/net/ixgbe/ixgbe_main.c | 8 ++++---- 5 files changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 68316f15a7cb..b87402bc8308 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1991,13 +1991,12 @@ static int e100_poll(struct napi_struct *napi, int budget) struct nic *nic = container_of(napi, struct nic, napi); struct net_device *netdev = nic->netdev; unsigned int work_done = 0; - int tx_cleaned; e100_rx_clean(nic, &work_done, budget); - tx_cleaned = e100_tx_clean(nic); + e100_tx_clean(nic); - /* If no Rx and Tx cleanup work was done, exit polling mode. */ - if((!tx_cleaned && (work_done == 0))) { + /* If budget not fully consumed, exit the polling mode */ + if (work_done < budget) { netif_rx_complete(netdev, napi); e100_enable_irq(nic); } diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 9de71443ef89..13d57b0a88fa 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3919,7 +3919,7 @@ e1000_clean(struct napi_struct *napi, int budget) { struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi); struct net_device *poll_dev = adapter->netdev; - int tx_cleaned = 0, work_done = 0; + int work_done = 0; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; @@ -3929,16 +3929,16 @@ e1000_clean(struct napi_struct *napi, int budget) * simultaneously. A failure obtaining the lock means * tx_ring[0] is currently being cleaned anyway. */ if (spin_trylock(&adapter->tx_queue_lock)) { - tx_cleaned = e1000_clean_tx_irq(adapter, - &adapter->tx_ring[0]); + e1000_clean_tx_irq(adapter, + &adapter->tx_ring[0]); spin_unlock(&adapter->tx_queue_lock); } adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget); - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done == 0))) { + /* If budget not fully consumed, exit the polling mode */ + if (work_done < budget) { if (likely(adapter->itr_setting & 3)) e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index dd9698cfbb9d..4a6fc7453776 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -1384,7 +1384,7 @@ static int e1000_clean(struct napi_struct *napi, int budget) { struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi); struct net_device *poll_dev = adapter->netdev; - int tx_cleaned = 0, work_done = 0; + int work_done = 0; /* Must NOT use netdev_priv macro here. */ adapter = poll_dev->priv; @@ -1394,14 +1394,14 @@ static int e1000_clean(struct napi_struct *napi, int budget) * simultaneously. A failure obtaining the lock means * tx_ring is currently being cleaned anyway. */ if (spin_trylock(&adapter->tx_queue_lock)) { - tx_cleaned = e1000_clean_tx_irq(adapter); + e1000_clean_tx_irq(adapter); spin_unlock(&adapter->tx_queue_lock); } adapter->clean_rx(adapter, &work_done, budget); - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done < budget))) { + /* If budget not fully consumed, exit the polling mode */ + if (work_done < budget) { if (adapter->itr_setting & 3) e1000_set_itr(adapter); netif_rx_complete(poll_dev, napi); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index a8bef52870fd..d2fb88d5cda2 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1787,14 +1787,13 @@ ixgb_clean(struct napi_struct *napi, int budget) { struct ixgb_adapter *adapter = container_of(napi, struct ixgb_adapter, napi); struct net_device *netdev = adapter->netdev; - int tx_cleaned; int work_done = 0; - tx_cleaned = ixgb_clean_tx_irq(adapter); + ixgb_clean_tx_irq(adapter); ixgb_clean_rx_irq(adapter, &work_done, budget); - /* if no Tx and not enough Rx work done, exit the polling mode */ - if((!tx_cleaned && (work_done == 0))) { + /* If budget not fully consumed, exit the polling mode */ + if (work_done < budget) { netif_rx_complete(netdev, napi); ixgb_irq_enable(adapter); } diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7c319303f0f8..a56491617661 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1468,15 +1468,15 @@ static int ixgbe_clean(struct napi_struct *napi, int budget) struct ixgbe_adapter *adapter = container_of(napi, struct ixgbe_adapter, napi); struct net_device *netdev = adapter->netdev; - int tx_cleaned = 0, work_done = 0; + int work_done = 0; /* In non-MSIX case, there is no multi-Tx/Rx queue */ - tx_cleaned = ixgbe_clean_tx_irq(adapter, adapter->tx_ring); + ixgbe_clean_tx_irq(adapter, adapter->tx_ring); ixgbe_clean_rx_irq(adapter, &adapter->rx_ring[0], &work_done, budget); - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done < budget))) { + /* If budget not fully consumed, exit the polling mode */ + if (work_done < budget) { netif_rx_complete(netdev, napi); ixgbe_irq_enable(adapter); } From 204246596b8b51c0ba44420e60f60561101b2b31 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Mon, 7 Jan 2008 21:47:25 -0800 Subject: [PATCH 25/36] [NET] Intel ethernet drivers: update MAINTAINERS Unfortunately Jeb decided to move away from our group. We wish Jeb good luck with his new group! Reordered people a bit so most active team members are on top. Signed-off-by: Auke Kok Signed-off-by: David S. Miller --- MAINTAINERS | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 56e6159a1b5d..b4f611c60cfc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1984,29 +1984,27 @@ L: netdev@vger.kernel.org S: Maintained INTEL PRO/100 ETHERNET SUPPORT -P: John Ronciak -M: john.ronciak@intel.com +P: Auke Kok +M: auke-jan.h.kok@intel.com P: Jesse Brandeburg M: jesse.brandeburg@intel.com P: Jeff Kirsher M: jeffrey.t.kirsher@intel.com -P: Auke Kok -M: auke-jan.h.kok@intel.com +P: John Ronciak +M: john.ronciak@intel.com L: e1000-devel@lists.sourceforge.net W: http://sourceforge.net/projects/e1000/ S: Supported INTEL PRO/1000 GIGABIT ETHERNET SUPPORT -P: Jeb Cramer -M: cramerj@intel.com -P: John Ronciak -M: john.ronciak@intel.com +P: Auke Kok +M: auke-jan.h.kok@intel.com P: Jesse Brandeburg M: jesse.brandeburg@intel.com P: Jeff Kirsher M: jeffrey.t.kirsher@intel.com -P: Auke Kok -M: auke-jan.h.kok@intel.com +P: John Ronciak +M: john.ronciak@intel.com L: e1000-devel@lists.sourceforge.net W: http://sourceforge.net/projects/e1000/ S: Supported From 2b2b2e35b71e5be8bc06cc0ff38df15dfedda19b Mon Sep 17 00:00:00 2001 From: Russ Dill Date: Mon, 7 Jan 2008 21:48:12 -0800 Subject: [PATCH 26/36] [NET]: kaweth was forgotten in msec switchover of usb_start_wait_urb Back in 2.6.12-pre, usb_start_wait_urb was switched over to take milliseconds instead of jiffies. kaweth.c was never updated to match. Signed-off-by: Russ Dill Signed-off-by: David S. Miller --- drivers/net/usb/kaweth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 58a53a641754..569ad8bfd383 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -70,7 +70,7 @@ #define KAWETH_TX_TIMEOUT (5 * HZ) #define KAWETH_SCRATCH_SIZE 32 #define KAWETH_FIRMWARE_BUF_SIZE 4096 -#define KAWETH_CONTROL_TIMEOUT (30 * HZ) +#define KAWETH_CONTROL_TIMEOUT (30000) #define KAWETH_STATUS_BROKEN 0x0000001 #define KAWETH_STATUS_CLOSING 0x0000002 From d8c9283089287341c85a0a69de32c2287a990e71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Jan 2008 21:52:14 -0800 Subject: [PATCH 27/36] [IPV4] ROUTE: ip_rt_dump() is unecessary slow I noticed "ip route list cache x.y.z.t" can be *very* slow. While strace-ing -T it I also noticed that first part of route cache is fetched quite fast : recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3772 <0.000047> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3736 <0.000042> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3740 <0.000055> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3712 <0.000043> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3732 <0.000053> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3708 <0.000052> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3680 <0.000041> while the part at the end of the table is more expensive: recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3656 <0.003857> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3772 <0.003891> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3712 <0.003765> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3700 <0.003879> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3676 <0.003797> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3724 <0.003856> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3736 <0.003848> The following patch corrects this performance/latency problem, removing quadratic behavior. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d2bc6148a737..d3377069ce05 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2626,11 +2626,10 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; s_h = cb->args[0]; + if (s_h < 0) + s_h = 0; s_idx = idx = cb->args[1]; - for (h = 0; h <= rt_hash_mask; h++) { - if (h < s_h) continue; - if (h > s_h) - s_idx = 0; + for (h = s_h; h <= rt_hash_mask; h++) { rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { @@ -2647,6 +2646,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); + s_idx = 0; } done: From 02f1c89d6e36507476f78108a3dcc78538be460b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 7 Jan 2008 21:56:41 -0800 Subject: [PATCH 28/36] [NET]: Clone the sk_buff 'iif' field in __skb_clone() Both NetLabel and SELinux (other LSMs may grow to use it as well) rely on the 'iif' field to determine the receiving network interface of inbound packets. Unfortunately, at present this field is not preserved across a skb clone operation which can lead to garbage values if the cloned skb is sent back through the network stack. This patch corrects this problem by properly copying the 'iif' field in __skb_clone() and removing the 'iif' field assignment from skb_act_clone() since it is no longer needed. Also, while we are here, put the assignments in the same order as the offsets to reduce cacheline bounces. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - net/core/skbuff.c | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c9265518a378..4c3b35153c37 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -325,7 +325,6 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = SET_TC_VERD(n->tc_verd, 0); n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - n->iif = skb->iif; } return n; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b4ce9b4dd20..b6283779e93d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -416,16 +416,17 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(len); C(data_len); C(mac_len); - n->cloned = 1; n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->cloned = 1; n->nohdr = 0; n->destructor = NULL; - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); + C(iif); C(tail); C(end); + C(head); + C(data); + C(truesize); + atomic_set(&n->users, 1); atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; From 877364e60edeea06efa8ceb9d29201f195af8a47 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 7 Jan 2008 22:09:08 -0800 Subject: [PATCH 29/36] [LRO] Fix lro_mgr->features checks lro_mgr->features contains a bitmask of LRO_F_* values which are defined as power of two, not as bit indexes. They must be checked with x&LRO_F_FOO, not with test_bit(LRO_F_FOO,&x). Signed-off-by: Brice Goglin Acked-by: Andrew Gallatin Signed-off-by: David S. Miller --- net/ipv4/inet_lro.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 9a96c277393d..4a4d49fca1f2 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -310,7 +310,7 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; if (lro_desc->vgrp) { - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) vlan_hwaccel_receive_skb(lro_desc->parent, lro_desc->vgrp, lro_desc->vlan_tag); @@ -320,7 +320,7 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, lro_desc->vlan_tag); } else { - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(lro_desc->parent); else netif_rx(lro_desc->parent); @@ -352,7 +352,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, goto out; if ((skb->protocol == htons(ETH_P_8021Q)) - && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; if (!lro_desc->active) { /* start new lro session */ @@ -474,7 +474,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, goto out; if ((skb->protocol == htons(ETH_P_8021Q)) - && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; iph = (void *)(skb->data + vlan_hdr_len); @@ -516,7 +516,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, void *priv) { if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(skb); else netif_rx(skb); @@ -531,7 +531,7 @@ void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, void *priv) { if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); else vlan_hwaccel_rx(skb, vgrp, vlan_tag); @@ -550,7 +550,7 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr, if (!skb) return; - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(skb); else netif_rx(skb); @@ -570,7 +570,7 @@ void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, if (!skb) return; - if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + if (lro_mgr->features & LRO_F_NAPI) vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); else vlan_hwaccel_rx(skb, vgrp, vlan_tag); From 1d39da3dcaad4231f0fa75024b1d6d710a2ced74 Mon Sep 17 00:00:00 2001 From: Russ Dill Date: Mon, 7 Jan 2008 23:15:41 -0800 Subject: [PATCH 30/36] [NET]: mcs7830 passes msecs instead of jiffies to usb_control_msg usb_control_msg was changed long ago (2.6.12-pre) to take milliseconds instead of jiffies. Oddly, mcs7830 wasn't added until 2.6.19-rc3. Signed-off-by: Russ Dill Signed-off-by: David S. Miller --- drivers/net/usb/mcs7830.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index f55a5951733a..5ea7411e1337 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -94,7 +94,7 @@ static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) ret = usb_control_msg(xdev, usb_rcvctrlpipe(xdev, 0), MCS7830_RD_BREQ, MCS7830_RD_BMREQ, 0x0000, index, data, - size, msecs_to_jiffies(MCS7830_CTRL_TIMEOUT)); + size, MCS7830_CTRL_TIMEOUT); return ret; } @@ -105,7 +105,7 @@ static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, void *data) ret = usb_control_msg(xdev, usb_sndctrlpipe(xdev, 0), MCS7830_WR_BREQ, MCS7830_WR_BMREQ, 0x0000, index, data, - size, msecs_to_jiffies(MCS7830_CTRL_TIMEOUT)); + size, MCS7830_CTRL_TIMEOUT); return ret; } From 2e3884b5b16795c03a7bf295797c1b2402885b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=F6rn=20Steinbrink?= Date: Mon, 7 Jan 2008 23:22:53 -0800 Subject: [PATCH 31/36] [FORCEDETH]: Fix reversing the MAC address on suspend. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For cards that initially have the MAC address stored in reverse order, the forcedeth driver uses a flag to signal whether the address was already corrected, so that it is not reversed again on a subsequent probe. Unfortunately this flag, which is stored in a register of the card, seems to get lost during suspend, resulting in the MAC address being reversed again. To fix that, the MAC address needs to be written back in reversed order before we suspend and the flag needs to be reset. The flag is still required because at least kexec will never write back the reversed address and thus needs to know what state the card is in. Signed-off-by: Björn Steinbrink Signed-off-by: David S. Miller --- drivers/net/forcedeth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index a96583cceb5e..f84c752997a4 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5199,10 +5199,6 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff; dev->dev_addr[4] = (np->orig_mac[0] >> 8) & 0xff; dev->dev_addr[5] = (np->orig_mac[0] >> 0) & 0xff; - /* set permanent address to be correct aswell */ - np->orig_mac[0] = (dev->dev_addr[0] << 0) + (dev->dev_addr[1] << 8) + - (dev->dev_addr[2] << 16) + (dev->dev_addr[3] << 24); - np->orig_mac[1] = (dev->dev_addr[4] << 0) + (dev->dev_addr[5] << 8); writel(txreg|NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll); } memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); @@ -5414,6 +5410,8 @@ static void __devexit nv_remove(struct pci_dev *pci_dev) */ writel(np->orig_mac[0], base + NvRegMacAddrA); writel(np->orig_mac[1], base + NvRegMacAddrB); + writel(readl(base + NvRegTransmitPoll) & ~NVREG_TRANSMITPOLL_MAC_ADDR_REV, + base + NvRegTransmitPoll); /* free all structures */ free_rings(dev); From 0f99be0d115a5716292c58dfdb20d2eddd0f3387 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jan 2008 23:39:06 -0800 Subject: [PATCH 32/36] [XFRM]: xfrm_algo_clone() allocates too much memory alg_key_len is the length in bits of the key, not in bytes. Best way to fix this is to move alg_len() function from net/xfrm/xfrm_user.c to include/net/xfrm.h, and to use it in xfrm_algo_clone() alg_len() is renamed to xfrm_alg_len() because of its global exposition. Signed-off-by: Eric Dumazet Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 7 ++++++- net/xfrm/xfrm_user.c | 17 ++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 58dfa82889aa..1dd20cf17982 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1188,10 +1188,15 @@ static inline int xfrm_aevent_is_on(void) return ret; } +static inline int xfrm_alg_len(struct xfrm_algo *alg) +{ + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + #ifdef CONFIG_XFRM_MIGRATE static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { - return (struct xfrm_algo *)kmemdup(orig, sizeof(*orig) + orig->alg_key_len, GFP_KERNEL); + return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); } static inline void xfrm_states_put(struct xfrm_state **states, int n) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e75dbdcb08a4..c4f6419b1769 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,11 +31,6 @@ #include #endif -static inline int alg_len(struct xfrm_algo *alg) -{ - return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); -} - static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { struct nlattr *rt = attrs[type]; @@ -45,7 +40,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; algp = nla_data(rt); - if (nla_len(rt) < alg_len(algp)) + if (nla_len(rt) < xfrm_alg_len(algp)) return -EINVAL; switch (type) { @@ -204,7 +199,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOSYS; *props = algo->desc.sadb_alg_id; - p = kmemdup(ualg, alg_len(ualg), GFP_KERNEL); + p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; @@ -516,9 +511,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x, NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); if (x->aalg) - NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); + NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg); if (x->ealg) - NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); + NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); if (x->calg) NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); @@ -1978,9 +1973,9 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) { size_t l = 0; if (x->aalg) - l += nla_total_size(alg_len(x->aalg)); + l += nla_total_size(xfrm_alg_len(x->aalg)); if (x->ealg) - l += nla_total_size(alg_len(x->ealg)); + l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) l += nla_total_size(sizeof(*x->calg)); if (x->encap) From 9d3e44425e3498eb33f25d94392b4fd0d56a5176 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Jan 2008 23:41:28 -0800 Subject: [PATCH 33/36] [SOCK]: Adds a rcu_dereference() in sk_filter It seems commit fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 introduced a RCU protection for sk_filter(), without a rcu_dereference() Either we need a rcu_dereference(), either a comment should explain why we dont need it. I vote for the former. Signed-off-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 67e35c7e230c..6e1542da33a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -944,7 +944,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) return err; rcu_read_lock_bh(); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len); From cf585ae8ae9ac7287a6d078425ea32f22bf7f1f7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 8 Jan 2008 23:44:44 -0800 Subject: [PATCH 34/36] [CONNECTOR]: Don't touch queue dev after decrement of ref count. cn_queue_free_callback() will touch 'dev'(i.e. cbq->pdev), so it should be called before atomic_dec(&dev->refcnt). Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- drivers/connector/cn_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c index 296f51002b55..12ceed54ab18 100644 --- a/drivers/connector/cn_queue.c +++ b/drivers/connector/cn_queue.c @@ -99,8 +99,8 @@ int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id spin_unlock_bh(&dev->queue_lock); if (found) { - atomic_dec(&dev->refcnt); cn_queue_free_callback(cbq); + atomic_dec(&dev->refcnt); return -EINVAL; } From 1ac4f008857487bf45b709248d71c5b3f4cae7b5 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 8 Jan 2008 23:52:21 -0800 Subject: [PATCH 35/36] [IPV6]: IPV6_MULTICAST_IF setting is ignored on link-local connect() Signed-off-by: Brian Haley Acked-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2ed689ac449e..5d4245ab4183 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -123,11 +123,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; } sk->sk_bound_dev_if = usin->sin6_scope_id; - if (!sk->sk_bound_dev_if && - (addr_type & IPV6_ADDR_MULTICAST)) - fl.oif = np->mcast_oif; } + if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST)) + sk->sk_bound_dev_if = np->mcast_oif; + /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; From 1c9b7aa1eb40ab708ef3242f74b9a61487623168 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 Jan 2008 03:51:59 -0800 Subject: [PATCH 36/36] [ATM]: Check IP header validity in mpc_send_packet Al went through the ip_fast_csum callers and found this piece of code that did not validate the IP header. While root crashing the machine by sending bogus packets through raw or AF_PACKET sockets isn't that serious, it is still nice to react gracefully. This patch ensures that the skb has enough data for an IP header and that the header length field is valid. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/atm/mpc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 2086396de177..9c7f712fc7e9 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -542,6 +542,13 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) if (eth->h_proto != htons(ETH_P_IP)) goto non_ip; /* Multi-Protocol Over ATM :-) */ + /* Weed out funny packets (e.g., AF_PACKET or raw). */ + if (skb->len < ETH_HLEN + sizeof(struct iphdr)) + goto non_ip; + skb_set_network_header(skb, ETH_HLEN); + if (skb->len < ETH_HLEN + ip_hdr(skb)->ihl * 4 || ip_hdr(skb)->ihl < 5) + goto non_ip; + while (i < mpc->number_of_mps_macs) { if (!compare_ether_addr(eth->h_dest, (mpc->mps_macs + i*ETH_ALEN))) if ( send_via_shortcut(skb, mpc) == 0 ) /* try shortcut */