2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
|
|
* operating system. INET is implemented using the BSD Socket
|
|
|
|
* interface as the means of communication with the user level.
|
|
|
|
*
|
|
|
|
* Pseudo-driver for the loopback interface.
|
|
|
|
*
|
|
|
|
* Version: @(#)loopback.c 1.0.4b 08/16/93
|
|
|
|
*
|
2005-05-06 07:16:16 +08:00
|
|
|
* Authors: Ross Biro
|
2005-04-17 06:20:36 +08:00
|
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
|
|
* Donald Becker, <becker@scyld.com>
|
|
|
|
*
|
|
|
|
* Alan Cox : Fixed oddments for NET3.014
|
|
|
|
* Alan Cox : Rejig for NET3.029 snap #3
|
|
|
|
* Alan Cox : Fixed NET3.029 bugs and sped up
|
|
|
|
* Larry McVoy : Tiny tweak to double performance
|
|
|
|
* Alan Cox : Backed out LMV's tweak - the linux mm
|
|
|
|
* can't take it...
|
|
|
|
* Michael Griffith: Don't bother computing the checksums
|
|
|
|
* on packets received on the loopback
|
|
|
|
* interface.
|
|
|
|
* Alexey Kuznetsov: Potential hang under some extreme
|
|
|
|
* cases removed.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/jiffies.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/socket.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/fcntl.h>
|
|
|
|
#include <linux/in.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
|
|
|
|
#include <asm/system.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <asm/io.h>
|
|
|
|
|
|
|
|
#include <linux/inet.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/etherdevice.h>
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/ethtool.h>
|
|
|
|
#include <net/sock.h>
|
|
|
|
#include <net/checksum.h>
|
|
|
|
#include <linux/if_ether.h> /* For the statistics structure. */
|
|
|
|
#include <linux/if_arp.h> /* For ARPHRD_ETHER */
|
|
|
|
#include <linux/ip.h>
|
|
|
|
#include <linux/tcp.h>
|
|
|
|
#include <linux/percpu.h>
|
2007-09-27 13:10:56 +08:00
|
|
|
#include <net/net_namespace.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-10-19 11:51:57 +08:00
|
|
|
struct pcpu_lstats {
|
2010-06-14 13:59:22 +08:00
|
|
|
u64 packets;
|
|
|
|
u64 bytes;
|
|
|
|
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
|
|
|
|
seqcount_t seq;
|
|
|
|
#endif
|
2009-04-18 06:03:10 +08:00
|
|
|
unsigned long drops;
|
2006-10-19 11:51:57 +08:00
|
|
|
};
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2010-06-14 13:59:22 +08:00
|
|
|
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
|
|
|
|
static void inline lstats_update_begin(struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
write_seqcount_begin(&lstats->seq);
|
|
|
|
}
|
|
|
|
static void inline lstats_update_end(struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
write_seqcount_end(&lstats->seq);
|
|
|
|
}
|
|
|
|
static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
u64 tpackets, tbytes;
|
|
|
|
unsigned int seq;
|
|
|
|
|
|
|
|
do {
|
|
|
|
seq = read_seqcount_begin(&lstats->seq);
|
|
|
|
tpackets = lstats->packets;
|
|
|
|
tbytes = lstats->bytes;
|
|
|
|
} while (read_seqcount_retry(&lstats->seq, seq));
|
|
|
|
|
|
|
|
*packets += tpackets;
|
|
|
|
*bytes += tbytes;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static void inline lstats_update_begin(struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static void inline lstats_update_end(struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static void inline lstats_fetch_and_add(u64 *packets, u64 *bytes, const struct pcpu_lstats *lstats)
|
|
|
|
{
|
|
|
|
*packets += lstats->packets;
|
|
|
|
*bytes += lstats->bytes;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* The higher levels take care of making this non-reentrant (it's
|
|
|
|
* called with bh's disabled).
|
|
|
|
*/
|
2009-09-01 03:50:58 +08:00
|
|
|
static netdev_tx_t loopback_xmit(struct sk_buff *skb,
|
|
|
|
struct net_device *dev)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2010-02-16 23:21:08 +08:00
|
|
|
struct pcpu_lstats __percpu *pcpu_lstats;
|
|
|
|
struct pcpu_lstats *lb_stats;
|
2009-04-18 06:03:10 +08:00
|
|
|
int len;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
skb_orphan(skb);
|
|
|
|
|
2009-04-18 06:03:10 +08:00
|
|
|
skb->protocol = eth_type_trans(skb, dev);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-09-28 08:09:39 +08:00
|
|
|
/* it's OK to use per_cpu_ptr() because BHs are off */
|
2010-02-16 23:21:08 +08:00
|
|
|
pcpu_lstats = (void __percpu __force *)dev->ml_priv;
|
2009-10-03 18:48:22 +08:00
|
|
|
lb_stats = this_cpu_ptr(pcpu_lstats);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-04-18 06:03:10 +08:00
|
|
|
len = skb->len;
|
|
|
|
if (likely(netif_rx(skb) == NET_RX_SUCCESS)) {
|
2010-06-14 13:59:22 +08:00
|
|
|
lstats_update_begin(lb_stats);
|
2009-04-18 06:03:10 +08:00
|
|
|
lb_stats->bytes += len;
|
|
|
|
lb_stats->packets++;
|
2010-06-14 13:59:22 +08:00
|
|
|
lstats_update_end(lb_stats);
|
2009-04-18 06:03:10 +08:00
|
|
|
} else
|
|
|
|
lb_stats->drops++;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2009-06-23 14:03:08 +08:00
|
|
|
return NETDEV_TX_OK;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2010-06-14 13:59:22 +08:00
|
|
|
static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2010-02-16 23:21:08 +08:00
|
|
|
const struct pcpu_lstats __percpu *pcpu_lstats;
|
2010-06-14 13:59:22 +08:00
|
|
|
struct rtnl_link_stats64 *stats = &dev->stats64;
|
|
|
|
u64 bytes = 0;
|
|
|
|
u64 packets = 0;
|
|
|
|
u64 drops = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
int i;
|
|
|
|
|
2010-02-16 23:21:08 +08:00
|
|
|
pcpu_lstats = (void __percpu __force *)dev->ml_priv;
|
2006-03-28 17:56:37 +08:00
|
|
|
for_each_possible_cpu(i) {
|
2006-10-19 11:51:57 +08:00
|
|
|
const struct pcpu_lstats *lb_stats;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-09-27 13:08:12 +08:00
|
|
|
lb_stats = per_cpu_ptr(pcpu_lstats, i);
|
2010-06-14 13:59:22 +08:00
|
|
|
lstats_fetch_and_add(&packets, &bytes, lb_stats);
|
2009-04-18 06:03:10 +08:00
|
|
|
drops += lb_stats->drops;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2006-10-19 11:51:57 +08:00
|
|
|
stats->rx_packets = packets;
|
|
|
|
stats->tx_packets = packets;
|
2009-04-18 06:03:10 +08:00
|
|
|
stats->rx_dropped = drops;
|
|
|
|
stats->rx_errors = drops;
|
|
|
|
stats->rx_bytes = bytes;
|
|
|
|
stats->tx_bytes = bytes;
|
2005-04-17 06:20:36 +08:00
|
|
|
return stats;
|
|
|
|
}
|
|
|
|
|
2006-09-28 11:33:34 +08:00
|
|
|
static u32 always_on(struct net_device *dev)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2006-09-14 02:30:00 +08:00
|
|
|
static const struct ethtool_ops loopback_ethtool_ops = {
|
2006-09-28 11:33:34 +08:00
|
|
|
.get_link = always_on,
|
2005-04-17 06:20:36 +08:00
|
|
|
.set_tso = ethtool_op_set_tso,
|
2006-09-28 11:33:34 +08:00
|
|
|
.get_tx_csum = always_on,
|
|
|
|
.get_sg = always_on,
|
|
|
|
.get_rx_csum = always_on,
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2007-09-27 13:08:12 +08:00
|
|
|
static int loopback_dev_init(struct net_device *dev)
|
|
|
|
{
|
2010-02-16 23:21:08 +08:00
|
|
|
struct pcpu_lstats __percpu *lstats;
|
2007-09-27 13:08:12 +08:00
|
|
|
|
|
|
|
lstats = alloc_percpu(struct pcpu_lstats);
|
|
|
|
if (!lstats)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2010-02-16 23:21:08 +08:00
|
|
|
dev->ml_priv = (void __force *)lstats;
|
2007-09-27 13:08:12 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void loopback_dev_free(struct net_device *dev)
|
|
|
|
{
|
2010-02-16 23:21:08 +08:00
|
|
|
struct pcpu_lstats __percpu *lstats =
|
|
|
|
(void __percpu __force *)dev->ml_priv;
|
2007-09-27 13:08:12 +08:00
|
|
|
|
|
|
|
free_percpu(lstats);
|
|
|
|
free_netdev(dev);
|
|
|
|
}
|
|
|
|
|
2008-11-20 13:46:18 +08:00
|
|
|
static const struct net_device_ops loopback_ops = {
|
|
|
|
.ndo_init = loopback_dev_init,
|
2008-11-21 12:14:53 +08:00
|
|
|
.ndo_start_xmit= loopback_xmit,
|
2010-06-14 13:59:22 +08:00
|
|
|
.ndo_get_stats64 = loopback_get_stats64,
|
2008-11-20 13:46:18 +08:00
|
|
|
};
|
|
|
|
|
2006-09-28 11:33:34 +08:00
|
|
|
/*
|
2007-09-28 08:09:39 +08:00
|
|
|
* The loopback device is special. There is only one instance
|
|
|
|
* per network namespace.
|
2006-09-28 11:33:34 +08:00
|
|
|
*/
|
2007-09-26 10:18:04 +08:00
|
|
|
static void loopback_setup(struct net_device *dev)
|
|
|
|
{
|
|
|
|
dev->mtu = (16 * 1024) + 20 + 20 + 12;
|
|
|
|
dev->hard_header_len = ETH_HLEN; /* 14 */
|
|
|
|
dev->addr_len = ETH_ALEN; /* 6 */
|
|
|
|
dev->tx_queue_len = 0;
|
|
|
|
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
|
|
|
|
dev->flags = IFF_LOOPBACK;
|
net: release dst entry in dev_hard_start_xmit()
One point of contention in high network loads is the dst_release() performed
when a transmited skb is freed. This is because NIC tx completion calls
dev_kree_skb() long after original call to dev_queue_xmit(skb).
CPU cache is cold and the atomic op in dst_release() stalls. On SMP, this is
quite visible if one CPU is 100% handling softirqs for a network device,
since dst_clone() is done by other cpus, involving cache line ping pongs.
It seems right place to release dst is in dev_hard_start_xmit(), for most
devices but ones that are virtual, and some exceptions.
David Miller suggested to define a new device flag, set in alloc_netdev_mq()
(so that most devices set it at init time), and carefuly unset in devices
which dont want a NULL skb->dst in their ndo_start_xmit().
List of devices that must clear this flag is :
- loopback device, because it calls netif_rx() and quoting Patrick :
"ip_route_input() doesn't accept loopback addresses, so loopback packets
already need to have a dst_entry attached."
- appletalk/ipddp.c : needs skb->dst in its xmit function
- And all devices that call again dev_queue_xmit() from their xmit function
(as some classifiers need skb->dst) : bonding, vlan, macvlan, eql, ifb, hdlc_fr
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-05-19 13:19:19 +08:00
|
|
|
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
2007-09-26 10:18:04 +08:00
|
|
|
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
|
|
|
|
| NETIF_F_TSO
|
|
|
|
| NETIF_F_NO_CSUM
|
|
|
|
| NETIF_F_HIGHDMA
|
|
|
|
| NETIF_F_LLTX
|
2007-12-27 22:17:22 +08:00
|
|
|
| NETIF_F_NETNS_LOCAL;
|
2007-09-26 10:18:04 +08:00
|
|
|
dev->ethtool_ops = &loopback_ethtool_ops;
|
2007-10-09 16:40:57 +08:00
|
|
|
dev->header_ops = ð_header_ops;
|
2008-11-20 13:46:18 +08:00
|
|
|
dev->netdev_ops = &loopback_ops;
|
|
|
|
dev->destructor = loopback_dev_free;
|
2007-09-26 10:18:04 +08:00
|
|
|
}
|
2007-09-26 10:16:28 +08:00
|
|
|
|
2005-08-19 05:05:18 +08:00
|
|
|
/* Setup and register the loopback device. */
|
2007-10-09 11:38:39 +08:00
|
|
|
static __net_init int loopback_net_init(struct net *net)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2007-09-26 10:18:04 +08:00
|
|
|
struct net_device *dev;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
dev = alloc_netdev(0, "lo", loopback_setup);
|
|
|
|
if (!dev)
|
|
|
|
goto out;
|
2007-07-31 07:37:19 +08:00
|
|
|
|
2008-03-25 20:47:49 +08:00
|
|
|
dev_net_set(dev, net);
|
2007-09-26 10:18:04 +08:00
|
|
|
err = register_netdev(dev);
|
2007-07-31 07:37:19 +08:00
|
|
|
if (err)
|
2007-09-26 10:18:04 +08:00
|
|
|
goto out_free_netdev;
|
2007-07-31 07:37:19 +08:00
|
|
|
|
2007-09-27 13:10:56 +08:00
|
|
|
net->loopback_dev = dev;
|
2007-10-16 03:55:33 +08:00
|
|
|
return 0;
|
2007-09-26 10:18:04 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-09-26 10:18:04 +08:00
|
|
|
out_free_netdev:
|
|
|
|
free_netdev(dev);
|
2007-10-16 03:55:33 +08:00
|
|
|
out:
|
2009-11-26 07:14:13 +08:00
|
|
|
if (net_eq(net, &init_net))
|
2007-10-16 03:55:33 +08:00
|
|
|
panic("loopback: Failed to register netdevice: %d\n", err);
|
|
|
|
return err;
|
2007-09-26 10:18:04 +08:00
|
|
|
}
|
|
|
|
|
2008-11-08 14:54:20 +08:00
|
|
|
/* Registered in net/core/dev.c */
|
|
|
|
struct pernet_operations __net_initdata loopback_net_ops = {
|
2007-09-27 13:10:56 +08:00
|
|
|
.init = loopback_net_init,
|
|
|
|
};
|