diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index f79b02e80e75..4d2bdbdd34e8 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -420,7 +420,8 @@ static void tg3_enable_ints(struct tg3 *tp) { tw32(TG3PCI_MISC_HOST_CTRL, (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000); + tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + (tp->last_tag << 24)); tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); tg3_cond_int(tp); @@ -455,10 +456,16 @@ static void tg3_restart_ints(struct tg3 *tp) { tw32(TG3PCI_MISC_HOST_CTRL, (tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT)); - tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000); + tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + tp->last_tag << 24); mmiowb(); - if (tg3_has_work(tp)) + /* When doing tagged status, this work check is unnecessary. + * The last_tag we write above tells the chip which piece of + * work we've completed. + */ + if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) && + tg3_has_work(tp)) tw32(HOSTCC_MODE, tp->coalesce_mode | (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW)); } @@ -2500,7 +2507,7 @@ static int tg3_setup_phy(struct tg3 *tp, int force_reset) if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { if (netif_carrier_ok(tp->dev)) { tw32(HOSTCC_STAT_COAL_TICKS, - DEFAULT_STAT_COAL_TICKS); + tp->coal.stats_block_coalesce_usecs); } else { tw32(HOSTCC_STAT_COAL_TICKS, 0); } @@ -2886,7 +2893,6 @@ static int tg3_poll(struct net_device *netdev, int *budget) * All RX "locking" is done by ensuring outside * code synchronizes with dev->poll() */ - done = 1; if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) { int orig_budget = *budget; int work_done; @@ -2898,12 +2904,14 @@ static int tg3_poll(struct net_device *netdev, int *budget) *budget -= work_done; netdev->quota -= work_done; - - if (work_done >= orig_budget) - done = 0; } + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) + tp->last_tag = sblk->status_tag; + rmb(); + /* if no more work, tell net stack and NIC we're done */ + done = !tg3_has_work(tp); if (done) { spin_lock_irqsave(&tp->lock, flags); __netif_rx_complete(netdev); @@ -2928,22 +2936,21 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs) spin_lock_irqsave(&tp->lock, flags); /* - * writing any value to intr-mbox-0 clears PCI INTA# and + * Writing any value to intr-mbox-0 clears PCI INTA# and * chip-internal interrupt pending events. - * writing non-zero to intr-mbox-0 additional tells the + * Writing non-zero to intr-mbox-0 additional tells the * NIC to stop sending us irqs, engaging "in-intr-handler" * event coalescing. */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); + tp->last_tag = sblk->status_tag; sblk->status &= ~SD_STATUS_UPDATED; - if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ else { - /* no work, re-enable interrupts - */ + /* No work, re-enable interrupts. */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, - 0x00000000); + tp->last_tag << 24); } spin_unlock_irqrestore(&tp->lock, flags); @@ -2961,6 +2968,52 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) spin_lock_irqsave(&tp->lock, flags); + /* In INTx mode, it is possible for the interrupt to arrive at + * the CPU before the status block posted prior to the interrupt. + * Reading the PCI State register will confirm whether the + * interrupt is ours and will flush the status block. + */ + if ((sblk->status & SD_STATUS_UPDATED) || + !(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) { + /* + * Writing any value to intr-mbox-0 clears PCI INTA# and + * chip-internal interrupt pending events. + * Writing non-zero to intr-mbox-0 additional tells the + * NIC to stop sending us irqs, engaging "in-intr-handler" + * event coalescing. + */ + tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + 0x00000001); + sblk->status &= ~SD_STATUS_UPDATED; + if (likely(tg3_has_work(tp))) + netif_rx_schedule(dev); /* schedule NAPI poll */ + else { + /* No work, shared interrupt perhaps? re-enable + * interrupts, and flush that PCI write + */ + tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, + 0x00000000); + tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + } + } else { /* shared interrupt */ + handled = 0; + } + + spin_unlock_irqrestore(&tp->lock, flags); + + return IRQ_RETVAL(handled); +} + +static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *regs) +{ + struct net_device *dev = dev_id; + struct tg3 *tp = netdev_priv(dev); + struct tg3_hw_status *sblk = tp->hw_status; + unsigned long flags; + unsigned int handled = 1; + + spin_lock_irqsave(&tp->lock, flags); + /* In INTx mode, it is possible for the interrupt to arrive at * the CPU before the status block posted prior to the interrupt. * Reading the PCI State register will confirm whether the @@ -2977,13 +3030,8 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); - /* - * Flush PCI write. This also guarantees that our - * status block has been flushed to host memory. - */ - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tp->last_tag = sblk->status_tag; sblk->status &= ~SD_STATUS_UPDATED; - if (likely(tg3_has_work(tp))) netif_rx_schedule(dev); /* schedule NAPI poll */ else { @@ -2991,7 +3039,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs) * interrupts, and flush that PCI write */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, - 0x00000000); + tp->last_tag << 24); tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); } } else { /* shared interrupt */ @@ -5044,6 +5092,27 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr, } static void __tg3_set_rx_mode(struct net_device *); +static void tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) +{ + tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs); + tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs); + tw32(HOSTCC_RXMAX_FRAMES, ec->rx_max_coalesced_frames); + tw32(HOSTCC_TXMAX_FRAMES, ec->tx_max_coalesced_frames); + if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { + tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq); + tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq); + } + tw32(HOSTCC_RXCOAL_MAXF_INT, ec->rx_max_coalesced_frames_irq); + tw32(HOSTCC_TXCOAL_MAXF_INT, ec->tx_max_coalesced_frames_irq); + if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { + u32 val = ec->stats_block_coalesce_usecs; + + if (!netif_carrier_ok(tp->dev)) + val = 0; + + tw32(HOSTCC_STAT_COAL_TICKS, val); + } +} /* tp->lock is held. */ static int tg3_reset_hw(struct tg3 *tp) @@ -5366,16 +5435,7 @@ static int tg3_reset_hw(struct tg3 *tp) udelay(10); } - tw32(HOSTCC_RXCOL_TICKS, 0); - tw32(HOSTCC_TXCOL_TICKS, LOW_TXCOL_TICKS); - tw32(HOSTCC_RXMAX_FRAMES, 1); - tw32(HOSTCC_TXMAX_FRAMES, LOW_RXMAX_FRAMES); - if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { - tw32(HOSTCC_RXCOAL_TICK_INT, 0); - tw32(HOSTCC_TXCOAL_TICK_INT, 0); - } - tw32(HOSTCC_RXCOAL_MAXF_INT, 1); - tw32(HOSTCC_TXCOAL_MAXF_INT, 0); + tg3_set_coalesce(tp, &tp->coal); /* set status block DMA address */ tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH, @@ -5388,8 +5448,6 @@ static int tg3_reset_hw(struct tg3 *tp) * the tg3_periodic_fetch_stats call there, and * tg3_get_stats to see how this works for 5705/5750 chips. */ - tw32(HOSTCC_STAT_COAL_TICKS, - DEFAULT_STAT_COAL_TICKS); tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH, ((u64) tp->stats_mapping >> 32)); tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW, @@ -5445,7 +5503,8 @@ static int tg3_reset_hw(struct tg3 *tp) udelay(100); tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0); - tr32(MAILBOX_INTERRUPT_0); + tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + tp->last_tag = 0; if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) { tw32_f(DMAC_MODE, DMAC_MODE_ENABLE); @@ -5723,31 +5782,33 @@ static void tg3_timer(unsigned long __opaque) spin_lock_irqsave(&tp->lock, flags); spin_lock(&tp->tx_lock); - /* All of this garbage is because when using non-tagged - * IRQ status the mailbox/status_block protocol the chip - * uses with the cpu is race prone. - */ - if (tp->hw_status->status & SD_STATUS_UPDATED) { - tw32(GRC_LOCAL_CTRL, - tp->grc_local_ctrl | GRC_LCLCTRL_SETINT); - } else { - tw32(HOSTCC_MODE, tp->coalesce_mode | - (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW)); - } + if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) { + /* All of this garbage is because when using non-tagged + * IRQ status the mailbox/status_block protocol the chip + * uses with the cpu is race prone. + */ + if (tp->hw_status->status & SD_STATUS_UPDATED) { + tw32(GRC_LOCAL_CTRL, + tp->grc_local_ctrl | GRC_LCLCTRL_SETINT); + } else { + tw32(HOSTCC_MODE, tp->coalesce_mode | + (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW)); + } - if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { - tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER; - spin_unlock(&tp->tx_lock); - spin_unlock_irqrestore(&tp->lock, flags); - schedule_work(&tp->reset_task); - return; + if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { + tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER; + spin_unlock(&tp->tx_lock); + spin_unlock_irqrestore(&tp->lock, flags); + schedule_work(&tp->reset_task); + return; + } } - if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) - tg3_periodic_fetch_stats(tp); - /* This part only runs once per second. */ if (!--tp->timer_counter) { + if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) + tg3_periodic_fetch_stats(tp); + if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) { u32 mac_stat; int phy_event; @@ -5846,9 +5907,13 @@ static int tg3_test_interrupt(struct tg3 *tp) if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) err = request_irq(tp->pdev->irq, tg3_msi, SA_SAMPLE_RANDOM, dev->name, dev); - else - err = request_irq(tp->pdev->irq, tg3_interrupt, + else { + irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt; + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) + fn = tg3_interrupt_tagged; + err = request_irq(tp->pdev->irq, fn, SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev); + } if (err) return err; @@ -5900,9 +5965,14 @@ static int tg3_test_msi(struct tg3 *tp) tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI; - err = request_irq(tp->pdev->irq, tg3_interrupt, - SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev); + { + irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt; + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) + fn = tg3_interrupt_tagged; + err = request_irq(tp->pdev->irq, fn, + SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev); + } if (err) return err; @@ -5948,7 +6018,13 @@ static int tg3_open(struct net_device *dev) if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) && (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_AX) && (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_BX)) { - if (pci_enable_msi(tp->pdev) == 0) { + /* All MSI supporting chips should support tagged + * status. Assert that this is the case. + */ + if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) { + printk(KERN_WARNING PFX "%s: MSI without TAGGED? " + "Not using MSI.\n", tp->dev->name); + } else if (pci_enable_msi(tp->pdev) == 0) { u32 msi_mode; msi_mode = tr32(MSGINT_MODE); @@ -5959,9 +6035,14 @@ static int tg3_open(struct net_device *dev) if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) err = request_irq(tp->pdev->irq, tg3_msi, SA_SAMPLE_RANDOM, dev->name, dev); - else - err = request_irq(tp->pdev->irq, tg3_interrupt, + else { + irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt; + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) + fn = tg3_interrupt_tagged; + + err = request_irq(tp->pdev->irq, fn, SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev); + } if (err) { if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { @@ -5980,9 +6061,16 @@ static int tg3_open(struct net_device *dev) tg3_halt(tp, 1); tg3_free_rings(tp); } else { - tp->timer_offset = HZ / 10; - tp->timer_counter = tp->timer_multiplier = 10; - tp->asf_counter = tp->asf_multiplier = (10 * 120); + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) + tp->timer_offset = HZ; + else + tp->timer_offset = HZ / 10; + + BUG_ON(tp->timer_offset > HZ); + tp->timer_counter = tp->timer_multiplier = + (HZ / tp->timer_offset); + tp->asf_counter = tp->asf_multiplier = + ((HZ / tp->timer_offset) * 120); init_timer(&tp->timer); tp->timer.expires = jiffies + tp->timer_offset; @@ -6005,6 +6093,7 @@ static int tg3_open(struct net_device *dev) if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { err = tg3_test_msi(tp); + if (err) { spin_lock_irq(&tp->lock); spin_lock(&tp->tx_lock); @@ -7203,6 +7292,14 @@ static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) } #endif +static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct tg3 *tp = netdev_priv(dev); + + memcpy(ec, &tp->coal, sizeof(*ec)); + return 0; +} + static struct ethtool_ops tg3_ethtool_ops = { .get_settings = tg3_get_settings, .set_settings = tg3_set_settings, @@ -7235,6 +7332,7 @@ static struct ethtool_ops tg3_ethtool_ops = { .get_strings = tg3_get_strings, .get_stats_count = tg3_get_stats_count, .get_ethtool_stats = tg3_get_ethtool_stats, + .get_coalesce = tg3_get_coalesce, }; static void __devinit tg3_get_eeprom_size(struct tg3 *tp) @@ -8422,15 +8520,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG; - /* Only 5701 and later support tagged irq status mode. - * Also, 5788 chips cannot use tagged irq status. - * - * However, since we are using NAPI avoid tagged irq status - * because the interrupt condition is more difficult to - * fully clear in that mode. - */ tp->coalesce_mode = 0; - if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX && GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX) tp->coalesce_mode |= HOSTCC_MODE_32BYTE; @@ -8494,6 +8584,18 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5788M)) tp->tg3_flags2 |= TG3_FLG2_IS_5788; + if (!(tp->tg3_flags2 & TG3_FLG2_IS_5788) && + (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700)) + tp->tg3_flags |= TG3_FLAG_TAGGED_STATUS; + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) { + tp->coalesce_mode |= (HOSTCC_MODE_CLRTICK_RXBD | + HOSTCC_MODE_CLRTICK_TXBD); + + tp->misc_host_ctrl |= MISC_HOST_CTRL_TAGGED_STATUS; + pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL, + tp->misc_host_ctrl); + } + /* these are limited to 10/100 only */ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 && (grc_misc_cfg == 0x8000 || grc_misc_cfg == 0x4000)) || @@ -8671,6 +8773,146 @@ static int __devinit tg3_get_device_address(struct tg3 *tp) return 0; } +#define BOUNDARY_SINGLE_CACHELINE 1 +#define BOUNDARY_MULTI_CACHELINE 2 + +static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val) +{ + int cacheline_size; + u8 byte; + int goal; + + pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + /* On 5703 and later chips, the boundary bits have no + * effect. + */ + if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 && + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701 && + !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) + goto out; + +#if defined(CONFIG_PPC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC) + goal = BOUNDARY_MULTI_CACHELINE; +#else +#if defined(CONFIG_SPARC64) || defined(CONFIG_ALPHA) + goal = BOUNDARY_SINGLE_CACHELINE; +#else + goal = 0; +#endif +#endif + + if (!goal) + goto out; + + /* PCI controllers on most RISC systems tend to disconnect + * when a device tries to burst across a cache-line boundary. + * Therefore, letting tg3 do so just wastes PCI bandwidth. + * + * Unfortunately, for PCI-E there are only limited + * write-side controls for this, and thus for reads + * we will still get the disconnects. We'll also waste + * these PCI cycles for both read and write for chips + * other than 5700 and 5701 which do not implement the + * boundary bits. + */ + if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) && + !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) { + switch (cacheline_size) { + case 16: + case 32: + case 64: + case 128: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val |= (DMA_RWCTRL_READ_BNDRY_128_PCIX | + DMA_RWCTRL_WRITE_BNDRY_128_PCIX); + } else { + val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX | + DMA_RWCTRL_WRITE_BNDRY_384_PCIX); + } + break; + + case 256: + val |= (DMA_RWCTRL_READ_BNDRY_256_PCIX | + DMA_RWCTRL_WRITE_BNDRY_256_PCIX); + break; + + default: + val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX | + DMA_RWCTRL_WRITE_BNDRY_384_PCIX); + break; + }; + } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) { + switch (cacheline_size) { + case 16: + case 32: + case 64: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE; + val |= DMA_RWCTRL_WRITE_BNDRY_64_PCIE; + break; + } + /* fallthrough */ + case 128: + default: + val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE; + val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE; + break; + }; + } else { + switch (cacheline_size) { + case 16: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val |= (DMA_RWCTRL_READ_BNDRY_16 | + DMA_RWCTRL_WRITE_BNDRY_16); + break; + } + /* fallthrough */ + case 32: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val |= (DMA_RWCTRL_READ_BNDRY_32 | + DMA_RWCTRL_WRITE_BNDRY_32); + break; + } + /* fallthrough */ + case 64: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val |= (DMA_RWCTRL_READ_BNDRY_64 | + DMA_RWCTRL_WRITE_BNDRY_64); + break; + } + /* fallthrough */ + case 128: + if (goal == BOUNDARY_SINGLE_CACHELINE) { + val |= (DMA_RWCTRL_READ_BNDRY_128 | + DMA_RWCTRL_WRITE_BNDRY_128); + break; + } + /* fallthrough */ + case 256: + val |= (DMA_RWCTRL_READ_BNDRY_256 | + DMA_RWCTRL_WRITE_BNDRY_256); + break; + case 512: + val |= (DMA_RWCTRL_READ_BNDRY_512 | + DMA_RWCTRL_WRITE_BNDRY_512); + break; + case 1024: + default: + val |= (DMA_RWCTRL_READ_BNDRY_1024 | + DMA_RWCTRL_WRITE_BNDRY_1024); + break; + }; + } + +out: + return val; +} + static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, int size, int to_device) { struct tg3_internal_buffer_desc test_desc; @@ -8757,7 +8999,7 @@ static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dm static int __devinit tg3_test_dma(struct tg3 *tp) { dma_addr_t buf_dma; - u32 *buf; + u32 *buf, saved_dma_rwctrl; int ret; buf = pci_alloc_consistent(tp->pdev, TEST_BUFFER_SIZE, &buf_dma); @@ -8769,46 +9011,7 @@ static int __devinit tg3_test_dma(struct tg3 *tp) tp->dma_rwctrl = ((0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) | (0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT)); -#ifndef CONFIG_X86 - { - u8 byte; - int cacheline_size; - pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte); - - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - switch (cacheline_size) { - case 16: - case 32: - case 64: - case 128: - if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) && - !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) { - tp->dma_rwctrl |= - DMA_RWCTRL_WRITE_BNDRY_384_PCIX; - break; - } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) { - tp->dma_rwctrl &= - ~(DMA_RWCTRL_PCI_WRITE_CMD); - tp->dma_rwctrl |= - DMA_RWCTRL_WRITE_BNDRY_128_PCIE; - break; - } - /* fallthrough */ - case 256: - if (!(tp->tg3_flags & TG3_FLAG_PCIX_MODE) && - !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) - tp->dma_rwctrl |= - DMA_RWCTRL_WRITE_BNDRY_256; - else if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) - tp->dma_rwctrl |= - DMA_RWCTRL_WRITE_BNDRY_256_PCIX; - }; - } -#endif + tp->dma_rwctrl = tg3_calc_dma_bndry(tp, tp->dma_rwctrl); if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) { /* DMA read watermark not used on PCIE */ @@ -8827,7 +9030,7 @@ static int __devinit tg3_test_dma(struct tg3 *tp) if (ccval == 0x6 || ccval == 0x7) tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA; - /* Set bit 23 to renable PCIX hw bug fix */ + /* Set bit 23 to enable PCIX hw bug fix */ tp->dma_rwctrl |= 0x009f0000; } else { tp->dma_rwctrl |= 0x001b000f; @@ -8868,6 +9071,13 @@ static int __devinit tg3_test_dma(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) goto out; + /* It is best to perform DMA test with maximum write burst size + * to expose the 5700/5701 write DMA bug. + */ + saved_dma_rwctrl = tp->dma_rwctrl; + tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK; + tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl); + while (1) { u32 *p = buf, i; @@ -8906,8 +9116,9 @@ static int __devinit tg3_test_dma(struct tg3 *tp) if (p[i] == i) continue; - if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) == - DMA_RWCTRL_WRITE_BNDRY_DISAB) { + if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) != + DMA_RWCTRL_WRITE_BNDRY_16) { + tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK; tp->dma_rwctrl |= DMA_RWCTRL_WRITE_BNDRY_16; tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl); break; @@ -8924,6 +9135,14 @@ static int __devinit tg3_test_dma(struct tg3 *tp) break; } } + if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) != + DMA_RWCTRL_WRITE_BNDRY_16) { + /* DMA test passed without adjusting DMA boundary, + * just restore the calculated DMA boundary + */ + tp->dma_rwctrl = saved_dma_rwctrl; + tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl); + } out: pci_free_consistent(tp->pdev, TEST_BUFFER_SIZE, buf, buf_dma); @@ -9011,6 +9230,31 @@ static struct pci_dev * __devinit tg3_find_5704_peer(struct tg3 *tp) return peer; } +static void __devinit tg3_init_coal(struct tg3 *tp) +{ + struct ethtool_coalesce *ec = &tp->coal; + + memset(ec, 0, sizeof(*ec)); + ec->cmd = ETHTOOL_GCOALESCE; + ec->rx_coalesce_usecs = LOW_RXCOL_TICKS; + ec->tx_coalesce_usecs = LOW_TXCOL_TICKS; + ec->rx_max_coalesced_frames = LOW_RXMAX_FRAMES; + ec->tx_max_coalesced_frames = LOW_TXMAX_FRAMES; + ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT; + ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT; + ec->rx_max_coalesced_frames_irq = DEFAULT_RXCOAL_MAXF_INT; + ec->tx_max_coalesced_frames_irq = DEFAULT_TXCOAL_MAXF_INT; + ec->stats_block_coalesce_usecs = DEFAULT_STAT_COAL_TICKS; + + if (tp->coalesce_mode & (HOSTCC_MODE_CLRTICK_RXBD | + HOSTCC_MODE_CLRTICK_TXBD)) { + ec->rx_coalesce_usecs = LOW_RXCOL_TICKS_CLRTCKS; + ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT_CLRTCKS; + ec->tx_coalesce_usecs = LOW_TXCOL_TICKS_CLRTCKS; + ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT_CLRTCKS; + } +} + static int __devinit tg3_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -9256,6 +9500,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, /* flow control autonegotiation is default behavior */ tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; + tg3_init_coal(tp); + err = register_netdev(dev); if (err) { printk(KERN_ERR PFX "Cannot register net device, " @@ -9298,6 +9544,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) != 0, (tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED) == 0, (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) != 0); + printk(KERN_INFO "%s: dma_rwctrl[%08x]\n", + dev->name, tp->dma_rwctrl); return 0; diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 8de6f21037ba..993f84c93dc4 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -876,10 +876,12 @@ #define HOSTCC_STATUS_ERROR_ATTN 0x00000004 #define HOSTCC_RXCOL_TICKS 0x00003c08 #define LOW_RXCOL_TICKS 0x00000032 +#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014 #define DEFAULT_RXCOL_TICKS 0x00000048 #define HIGH_RXCOL_TICKS 0x00000096 #define HOSTCC_TXCOL_TICKS 0x00003c0c #define LOW_TXCOL_TICKS 0x00000096 +#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048 #define DEFAULT_TXCOL_TICKS 0x0000012c #define HIGH_TXCOL_TICKS 0x00000145 #define HOSTCC_RXMAX_FRAMES 0x00003c10 @@ -892,8 +894,10 @@ #define HIGH_TXMAX_FRAMES 0x00000052 #define HOSTCC_RXCOAL_TICK_INT 0x00003c18 #define DEFAULT_RXCOAL_TICK_INT 0x00000019 +#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014 #define HOSTCC_TXCOAL_TICK_INT 0x00003c1c #define DEFAULT_TXCOAL_TICK_INT 0x00000019 +#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014 #define HOSTCC_RXCOAL_MAXF_INT 0x00003c20 #define DEFAULT_RXCOAL_MAXF_INT 0x00000005 #define HOSTCC_TXCOAL_MAXF_INT 0x00003c24 @@ -2023,6 +2027,7 @@ struct tg3 { struct tg3_hw_status *hw_status; dma_addr_t status_mapping; + u32 last_tag; u32 msg_enable; @@ -2068,6 +2073,7 @@ struct tg3 { u32 rx_offset; u32 tg3_flags; +#define TG3_FLAG_TAGGED_STATUS 0x00000001 #define TG3_FLAG_TXD_MBOX_HWBUG 0x00000002 #define TG3_FLAG_RX_CHECKSUMS 0x00000004 #define TG3_FLAG_USE_LINKCHG_REG 0x00000008 @@ -2225,7 +2231,7 @@ struct tg3 { #define SST_25VF0X0_PAGE_SIZE 4098 - + struct ethtool_coalesce coal; }; #endif /* !(_T3_H) */ diff --git a/fs/namei.c b/fs/namei.c index defe6781e003..dd78f01b6de8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1580,6 +1580,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) fail: return dentry; } +EXPORT_SYMBOL_GPL(lookup_create); int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { diff --git a/include/net/act_generic.h b/include/net/act_generic.h index 95b120781c14..c9daa7e52300 100644 --- a/include/net/act_generic.h +++ b/include/net/act_generic.h @@ -2,8 +2,8 @@ * include/net/act_generic.h * */ -#ifndef ACT_GENERIC_H -#define ACT_GENERIC_H +#ifndef _NET_ACT_GENERIC_H +#define _NET_ACT_GENERIC_H static inline int tcf_defact_release(struct tcf_defact *p, int bind) { int ret = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index daebd93fd8a0..760dc8238d65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; + + BUG_ON(frag->sk); + if (skb->sk) { + sock_hold(skb->sk); + frag->sk = skb->sk; + frag->destructor = sock_wfree; + skb->truesize -= frag->truesize; + } } /* Everything is OK. Generate! */ diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index faa6176bbeb1..de21da00057f 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); - __ip_vs_conn_put(cp); goto out; } diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 28d9425d5c39..09e824622977 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) { - struct sock *sk = skb->sk; #ifdef CONFIG_NETFILTER_DEBUG unsigned int olddebug = skb->nf_debug; #endif - if (sk) { - sock_hold(sk); - skb_orphan(skb); - } + skb_orphan(skb); local_bh_disable(); skb = ip_defrag(skb, user); local_bh_enable(); - if (!skb) { - if (sk) - sock_put(sk); - return skb; - } - - if (sk) { - skb_set_owner_w(skb, sk); - sock_put(sk); - } - - ip_send_check(skb->nh.iph); - skb->nfcache |= NFC_ALTERED; + if (skb) { + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; #ifdef CONFIG_NETFILTER_DEBUG - /* Packet path as if nothing had happened. */ - skb->nf_debug = olddebug; + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; #endif + } + return skb; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0f0711417c9d..b78a53586804 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_headroom(frag) < hlen) goto slow_path; - /* Correct socket ownership. */ - if (frag->sk == NULL) - goto slow_path; - /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; + + BUG_ON(frag->sk); + if (skb->sk) { + sock_hold(skb->sk); + frag->sk = skb->sk; + frag->destructor = sock_wfree; + skb->truesize -= frag->truesize; + } } err = 0; @@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk) tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; -#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */ skb->truesize += tmp_skb->truesize; __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; -#endif } ipv6_addr_copy(final_dst, &fl->fl6_dst); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 733bf52cef3e..e41ce458c2a9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -735,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk, sock_hold(sk); if (p->skb2 == NULL) { - if (atomic_read(&p->skb->users) != 1) { + if (skb_shared(p->skb)) { p->skb2 = skb_clone(p->skb, p->allocation); } else { - p->skb2 = p->skb; - atomic_inc(&p->skb->users); + p->skb2 = skb_get(p->skb); + /* + * skb ownership may have been set when + * delivered to a previous socket. + */ + skb_orphan(p->skb2); } } if (p->skb2 == NULL) { @@ -785,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); + kfree_skb(skb); + netlink_unlock_table(); if (info.skb2) kfree_skb(info.skb2); - kfree_skb(skb); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c478fc8db776..c420eba4876b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); if (err) goto out_mknod_parent; - /* - * Yucky last component or no last component at all? - * (foo/., foo/.., /////) - */ - err = -EEXIST; - if (nd.last_type != LAST_NORM) - goto out_mknod; - /* - * Lock the directory. - */ - down(&nd.dentry->d_inode->i_sem); - /* - * Do the final lookup. - */ - dentry = lookup_hash(&nd.last, nd.dentry); + + dentry = lookup_create(&nd, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_mknod_unlock; - err = -ENOENT; - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (nd.last.name[nd.last.len] && !dentry->d_inode) - goto out_mknod_dput; + /* * All right, let's create it. */ @@ -845,7 +824,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) dput(dentry); out_mknod_unlock: up(&nd.dentry->d_inode->i_sem); -out_mknod: path_release(&nd); out_mknod_parent: if (err==-EEXIST) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 080aae243ce0..2f4531fcaca2 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return -ENOMEM; if (skb1->sk) - skb_set_owner_w(skb, skb1->sk); + skb_set_owner_w(skb2, skb1->sk); /* Looking around. Are we still alive? * OK, link new skb, drop old one */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5ddda2c98af9..97509011c274 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { struct rtattr *rt = xfrma[type - 1]; struct xfrm_algo *algp; + int len; if (!rt) return 0; - if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) + len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); + if (len < 0) return -EINVAL; algp = RTA_DATA(rt); + + len -= (algp->alg_key_len + 7U) / 8; + if (len < 0) + return -EINVAL; + switch (type) { case XFRMA_ALG_AUTH: if (!algp->alg_key_len && @@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; + int len; if (!rta) return 0; @@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOSYS; *props = algo->desc.sadb_alg_id; - p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); + len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; + p = kmalloc(len, GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); + memcpy(p, ualg, len); *algpp = p; return 0; }