mirror of https://gitee.com/openkylin/linux.git
Merge branch 'mw-3.1-jul25' of git://oss.oracle.com/git/smushran/linux-2.6 into ocfs2-fixes
This commit is contained in:
commit
99b1bb61b2
8
CREDITS
8
CREDITS
|
@ -518,6 +518,14 @@ N: Zach Brown
|
|||
E: zab@zabbo.net
|
||||
D: maestro pci sound
|
||||
|
||||
N: David Brownell
|
||||
D: Kernel engineer, mentor, and friend. Maintained USB EHCI and
|
||||
D: gadget layers, SPI subsystem, GPIO subsystem, and more than a few
|
||||
D: device drivers. His encouragement also helped many engineers get
|
||||
D: started working on the Linux kernel. David passed away in early
|
||||
D: 2011, and will be greatly missed.
|
||||
W: https://lkml.org/lkml/2011/4/5/36
|
||||
|
||||
N: Gary Brubaker
|
||||
E: xavyer@ix.netcom.com
|
||||
D: USB Serial Empeg Empeg-car Mark I/II Driver
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
What: /dev/fw[0-9]+
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
The character device files /dev/fw* are the interface between
|
||||
firewire-core and IEEE 1394 device drivers implemented in
|
||||
userspace. The ioctl(2)- and read(2)-based ABI is defined and
|
||||
documented in <linux/firewire-cdev.h>.
|
||||
|
||||
This ABI offers most of the features which firewire-core also
|
||||
exposes to kernelspace IEEE 1394 drivers.
|
||||
|
||||
Each /dev/fw* is associated with one IEEE 1394 node, which can
|
||||
be remote or local nodes. Operations on a /dev/fw* file have
|
||||
different scope:
|
||||
- The 1394 node which is associated with the file:
|
||||
- Asynchronous request transmission
|
||||
- Get the Configuration ROM
|
||||
- Query node ID
|
||||
- Query maximum speed of the path between this node
|
||||
and local node
|
||||
- The 1394 bus (i.e. "card") to which the node is attached to:
|
||||
- Isochronous stream transmission and reception
|
||||
- Asynchronous stream transmission and reception
|
||||
- Asynchronous broadcast request transmission
|
||||
- PHY packet transmission and reception
|
||||
- Allocate, reallocate, deallocate isochronous
|
||||
resources (channels, bandwidth) at the bus's IRM
|
||||
- Query node IDs of local node, root node, IRM, bus
|
||||
manager
|
||||
- Query cycle time
|
||||
- Bus reset initiation, bus reset event reception
|
||||
- All 1394 buses:
|
||||
- Allocation of IEEE 1212 address ranges on the local
|
||||
link layers, reception of inbound requests to such
|
||||
an address range, asynchronous response transmission
|
||||
to inbound requests
|
||||
- Addition of descriptors or directories to the local
|
||||
nodes' Configuration ROM
|
||||
|
||||
Due to the different scope of operations and in order to let
|
||||
userland implement different access permission models, some
|
||||
operations are restricted to /dev/fw* files that are associated
|
||||
with a local node:
|
||||
- Addition of descriptors or directories to the local
|
||||
nodes' Configuration ROM
|
||||
- PHY packet transmission and reception
|
||||
|
||||
A /dev/fw* file remains associated with one particular node
|
||||
during its entire life time. Bus topology changes, and hence
|
||||
node ID changes, are tracked by firewire-core. ABI users do not
|
||||
need to be aware of topology.
|
||||
|
||||
The following file operations are supported:
|
||||
|
||||
open(2)
|
||||
Currently the only useful flags are O_RDWR.
|
||||
|
||||
ioctl(2)
|
||||
Initiate various actions. Some take immediate effect, others
|
||||
are performed asynchronously while or after the ioctl returns.
|
||||
See the inline documentation in <linux/firewire-cdev.h> for
|
||||
descriptions of all ioctls.
|
||||
|
||||
poll(2), select(2), epoll_wait(2) etc.
|
||||
Watch for events to become available to be read.
|
||||
|
||||
read(2)
|
||||
Receive various events. There are solicited events like
|
||||
outbound asynchronous transaction completion or isochronous
|
||||
buffer completion, and unsolicited events such as bus resets,
|
||||
request reception, or PHY packet reception. Always use a read
|
||||
buffer which is large enough to receive the largest event that
|
||||
could ever arrive. See <linux/firewire-cdev.h> for descriptions
|
||||
of all event types and for which ioctls affect reception of
|
||||
events.
|
||||
|
||||
mmap(2)
|
||||
Allocate a DMA buffer for isochronous reception or transmission
|
||||
and map it into the process address space. The arguments should
|
||||
be used as follows: addr = NULL, length = the desired buffer
|
||||
size, i.e. number of packets times size of largest packet,
|
||||
prot = at least PROT_READ for reception and at least PROT_WRITE
|
||||
for transmission, flags = MAP_SHARED, fd = the handle to the
|
||||
/dev/fw*, offset = 0.
|
||||
|
||||
Isochronous reception works in packet-per-buffer fashion except
|
||||
for multichannel reception which works in buffer-fill mode.
|
||||
|
||||
munmap(2)
|
||||
Unmap the isochronous I/O buffer from the process address space.
|
||||
|
||||
close(2)
|
||||
Besides stopping and freeing I/O contexts that were associated
|
||||
with the file descriptor, back out any changes to the local
|
||||
nodes' Configuration ROM. Deallocate isochronous channels and
|
||||
bandwidth at the IRM that were marked for kernel-assisted
|
||||
re- and deallocation.
|
||||
|
||||
Users: libraw1394
|
||||
libdc1394
|
||||
tools like jujuutils, fwhack, ...
|
|
@ -0,0 +1,122 @@
|
|||
What: /sys/bus/firewire/devices/fw[0-9]+/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 node device attributes.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
config_rom
|
||||
Contents of the Configuration ROM register.
|
||||
Binary attribute; an array of host-endian u32.
|
||||
|
||||
guid
|
||||
The node's EUI-64 in the bus information block of
|
||||
Configuration ROM.
|
||||
Hexadecimal string representation of an u64.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/fw[0-9]+/units
|
||||
Date: June 2009
|
||||
KernelVersion: 2.6.31
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 node device attribute.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
units
|
||||
Summary of all units present in an IEEE 1394 node.
|
||||
Contains space-separated tuples of specifier_id and
|
||||
version of each unit present in the node. Specifier_id
|
||||
and version are hexadecimal string representations of
|
||||
u24 of the respective unit directory entries.
|
||||
Specifier_id and version within each tuple are separated
|
||||
by a colon.
|
||||
|
||||
Users: udev rules to set ownership and access permissions or ACLs of
|
||||
/dev/fw[0-9]+ character device files
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
IEEE 1394 unit device attributes.
|
||||
Read-only. Immutable during the unit device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
modalias
|
||||
Same as MODALIAS in the uevent at device creation.
|
||||
|
||||
rom_index
|
||||
Offset of the unit directory within the parent device's
|
||||
(node device's) Configuration ROM, in quadlets.
|
||||
Decimal string representation.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/devices/*/
|
||||
Date: May 2007
|
||||
KernelVersion: 2.6.22
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
Attributes common to IEEE 1394 node devices and unit devices.
|
||||
Read-only. Mutable during the node device's lifetime.
|
||||
Immutable during the unit device's lifetime.
|
||||
See IEEE 1212 for semantic definitions.
|
||||
|
||||
These attributes are only created if the root directory of an
|
||||
IEEE 1394 node or the unit directory of an IEEE 1394 unit
|
||||
actually contains according entries.
|
||||
|
||||
hardware_version
|
||||
Hexadecimal string representation of an u24.
|
||||
|
||||
hardware_version_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
model
|
||||
Hexadecimal string representation of an u24.
|
||||
|
||||
model_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
specifier_id
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in unit directories according to IEEE 1212.
|
||||
|
||||
vendor
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in the root directory according to IEEE 1212.
|
||||
|
||||
vendor_name
|
||||
Contents of a respective textual descriptor leaf.
|
||||
|
||||
version
|
||||
Hexadecimal string representation of an u24.
|
||||
Mandatory in unit directories according to IEEE 1212.
|
||||
|
||||
|
||||
What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
|
||||
formerly
|
||||
/sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
|
||||
Date: Feb 2004
|
||||
KernelVersion: 2.6.4
|
||||
Contact: linux1394-devel@lists.sourceforge.net
|
||||
Description:
|
||||
SCSI target port identifier and logical unit identifier of a
|
||||
logical unit of an SBP-2 target. The identifiers are specified
|
||||
in SAM-2...SAM-4 annex A. They are persistent and world-wide
|
||||
unique properties the SBP-2 attached target.
|
||||
|
||||
Read-only attribute, immutable during the target's lifetime.
|
||||
Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
|
||||
Colon-separated hexadecimal string representations of
|
||||
u64 EUI-64 : u24 directory_ID : u16 LUN
|
||||
without 0x prefixes, without whitespace. The former sbp2 driver
|
||||
(removed in 2.6.37 after being superseded by firewire-sbp2) used
|
||||
a somewhat shorter format which was not as close to SAM.
|
||||
|
||||
Users: udev rules to create /dev/disk/by-id/ symlinks
|
|
@ -0,0 +1,27 @@
|
|||
On some architectures, when the kernel loads any userspace program it
|
||||
maps an ELF DSO into that program's address space. This DSO is called
|
||||
the vDSO and it often contains useful and highly-optimized alternatives
|
||||
to real syscalls.
|
||||
|
||||
These functions are called just like ordinary C function according to
|
||||
your platform's ABI. Call them from a sensible context. (For example,
|
||||
if you set CS on x86 to something strange, the vDSO functions are
|
||||
within their rights to crash.) In addition, if you pass a bad
|
||||
pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
|
||||
|
||||
To find the DSO, parse the auxiliary vector passed to the program's
|
||||
entry point. The AT_SYSINFO_EHDR entry will point to the vDSO.
|
||||
|
||||
The vDSO uses symbol versioning; whenever you request a symbol from the
|
||||
vDSO, specify the version you are expecting.
|
||||
|
||||
Programs that dynamically link to glibc will use the vDSO automatically.
|
||||
Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c.
|
||||
|
||||
Unless otherwise noted, the set of symbols with any given version and the
|
||||
ABI of those symbols is considered stable. It may vary across architectures,
|
||||
though.
|
||||
|
||||
(As of this writing, this ABI documentation as been confirmed for x86_64.
|
||||
The maintainers of the other vDSO-using architectures should confirm
|
||||
that it is correct for their architecture.)
|
|
@ -0,0 +1,56 @@
|
|||
What: /sys/class/backlight/<backlight>/<ambient light zone>_max
|
||||
What: /sys/class/backlight/<backlight>/l1_daylight_max
|
||||
What: /sys/class/backlight/<backlight>/l2_bright_max
|
||||
What: /sys/class/backlight/<backlight>/l3_office_max
|
||||
What: /sys/class/backlight/<backlight>/l4_indoor_max
|
||||
What: /sys/class/backlight/<backlight>/l5_dark_max
|
||||
Date: Mai 2011
|
||||
KernelVersion: 2.6.40
|
||||
Contact: device-drivers-devel@blackfin.uclinux.org
|
||||
Description:
|
||||
Control the maximum brightness for <ambient light zone>
|
||||
on this <backlight>. Values are between 0 and 127. This file
|
||||
will also show the brightness level stored for this
|
||||
<ambient light zone>.
|
||||
|
||||
What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
|
||||
What: /sys/class/backlight/<backlight>/l2_bright_dim
|
||||
What: /sys/class/backlight/<backlight>/l3_office_dim
|
||||
What: /sys/class/backlight/<backlight>/l4_indoor_dim
|
||||
What: /sys/class/backlight/<backlight>/l5_dark_dim
|
||||
Date: Mai 2011
|
||||
KernelVersion: 2.6.40
|
||||
Contact: device-drivers-devel@blackfin.uclinux.org
|
||||
Description:
|
||||
Control the dim brightness for <ambient light zone>
|
||||
on this <backlight>. Values are between 0 and 127, typically
|
||||
set to 0. Full off when the backlight is disabled.
|
||||
This file will also show the dim brightness level stored for
|
||||
this <ambient light zone>.
|
||||
|
||||
What: /sys/class/backlight/<backlight>/ambient_light_level
|
||||
Date: Mai 2011
|
||||
KernelVersion: 2.6.40
|
||||
Contact: device-drivers-devel@blackfin.uclinux.org
|
||||
Description:
|
||||
Get conversion value of the light sensor.
|
||||
This value is updated every 80 ms (when the light sensor
|
||||
is enabled). Returns integer between 0 (dark) and
|
||||
8000 (max ambient brightness)
|
||||
|
||||
What: /sys/class/backlight/<backlight>/ambient_light_zone
|
||||
Date: Mai 2011
|
||||
KernelVersion: 2.6.40
|
||||
Contact: device-drivers-devel@blackfin.uclinux.org
|
||||
Description:
|
||||
Get/Set current ambient light zone. Reading returns
|
||||
integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
|
||||
Writing a value between 1..5 forces the backlight controller
|
||||
to enter the corresponding ambient light zone.
|
||||
Writing 0 returns to normal/automatic ambient light level
|
||||
operation. The ambient light sensing feature on these devices
|
||||
is an extension to the API documented in
|
||||
Documentation/ABI/stable/sysfs-class-backlight.
|
||||
It can be enabled by writing the value stored in
|
||||
/sys/class/backlight/<backlight>/max_brightness to
|
||||
/sys/class/backlight/<backlight>/brightness.
|
|
@ -92,6 +92,14 @@ Description: The mouse has a tracking- and a distance-control-unit. These
|
|||
This file is writeonly.
|
||||
Users: http://roccat.sourceforge.net
|
||||
|
||||
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/talk
|
||||
Date: May 2011
|
||||
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
|
||||
Description: Used to active some easy* functions of the mouse from outside.
|
||||
The data has to be 16 bytes long.
|
||||
This file is writeonly.
|
||||
Users: http://roccat.sourceforge.net
|
||||
|
||||
What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu
|
||||
Date: October 2010
|
||||
Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
What: /sys/bus/hid/drivers/wiimote/<dev>/led1
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led2
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led3
|
||||
What: /sys/bus/hid/drivers/wiimote/<dev>/led4
|
||||
Date: July 2011
|
||||
KernelVersion: 3.1
|
||||
Contact: David Herrmann <dh.herrmann@googlemail.com>
|
||||
Description: Make it possible to set/get current led state. Reading from it
|
||||
returns 0 if led is off and 1 if it is on. Writing 0 to it
|
||||
disables the led, writing 1 enables it.
|
|
@ -2,13 +2,7 @@ Intro
|
|||
=====
|
||||
|
||||
This document is designed to provide a list of the minimum levels of
|
||||
software necessary to run the 2.6 kernels, as well as provide brief
|
||||
instructions regarding any other "Gotchas" users may encounter when
|
||||
trying life on the Bleeding Edge. If upgrading from a pre-2.4.x
|
||||
kernel, please consult the Changes file included with 2.4.x kernels for
|
||||
additional information; most of that information will not be repeated
|
||||
here. Basically, this document assumes that your system is already
|
||||
functional and running at least 2.4.x kernels.
|
||||
software necessary to run the 3.0 kernels.
|
||||
|
||||
This document is originally based on my "Changes" file for 2.0.x kernels
|
||||
and therefore owes credit to the same people as that file (Jared Mauch,
|
||||
|
@ -22,11 +16,10 @@ Upgrade to at *least* these software revisions before thinking you've
|
|||
encountered a bug! If you're unsure what version you're currently
|
||||
running, the suggested command should tell you.
|
||||
|
||||
Again, keep in mind that this list assumes you are already
|
||||
functionally running a Linux 2.4 kernel. Also, not all tools are
|
||||
necessary on all systems; obviously, if you don't have any ISDN
|
||||
hardware, for example, you probably needn't concern yourself with
|
||||
isdn4k-utils.
|
||||
Again, keep in mind that this list assumes you are already functionally
|
||||
running a Linux kernel. Also, not all tools are necessary on all
|
||||
systems; obviously, if you don't have any ISDN hardware, for example,
|
||||
you probably needn't concern yourself with isdn4k-utils.
|
||||
|
||||
o Gnu C 3.2 # gcc --version
|
||||
o Gnu make 3.80 # make --version
|
||||
|
@ -114,12 +107,12 @@ Ksymoops
|
|||
|
||||
If the unthinkable happens and your kernel oopses, you may need the
|
||||
ksymoops tool to decode it, but in most cases you don't.
|
||||
In the 2.6 kernel it is generally preferred to build the kernel with
|
||||
CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
|
||||
(this also produces better output than ksymoops).
|
||||
If for some reason your kernel is not build with CONFIG_KALLSYMS and
|
||||
you have no way to rebuild and reproduce the Oops with that option, then
|
||||
you can still decode that Oops with ksymoops.
|
||||
It is generally preferred to build the kernel with CONFIG_KALLSYMS so
|
||||
that it produces readable dumps that can be used as-is (this also
|
||||
produces better output than ksymoops). If for some reason your kernel
|
||||
is not build with CONFIG_KALLSYMS and you have no way to rebuild and
|
||||
reproduce the Oops with that option, then you can still decode that Oops
|
||||
with ksymoops.
|
||||
|
||||
Module-Init-Tools
|
||||
-----------------
|
||||
|
@ -261,8 +254,8 @@ needs to be recompiled or (preferably) upgraded.
|
|||
NFS-utils
|
||||
---------
|
||||
|
||||
In 2.4 and earlier kernels, the nfs server needed to know about any
|
||||
client that expected to be able to access files via NFS. This
|
||||
In ancient (2.4 and earlier) kernels, the nfs server needed to know
|
||||
about any client that expected to be able to access files via NFS. This
|
||||
information would be given to the kernel by "mountd" when the client
|
||||
mounted the filesystem, or by "exportfs" at system startup. exportfs
|
||||
would take information about active clients from /var/lib/nfs/rmtab.
|
||||
|
@ -272,11 +265,11 @@ which is not always easy, particularly when trying to implement
|
|||
fail-over. Even when the system is working well, rmtab suffers from
|
||||
getting lots of old entries that never get removed.
|
||||
|
||||
With 2.6 we have the option of having the kernel tell mountd when it
|
||||
gets a request from an unknown host, and mountd can give appropriate
|
||||
export information to the kernel. This removes the dependency on
|
||||
rmtab and means that the kernel only needs to know about currently
|
||||
active clients.
|
||||
With modern kernels we have the option of having the kernel tell mountd
|
||||
when it gets a request from an unknown host, and mountd can give
|
||||
appropriate export information to the kernel. This removes the
|
||||
dependency on rmtab and means that the kernel only needs to know about
|
||||
currently active clients.
|
||||
|
||||
To enable this new functionality, you need to:
|
||||
|
||||
|
|
|
@ -680,8 +680,8 @@ ones already enabled by DEBUG.
|
|||
Chapter 14: Allocating memory
|
||||
|
||||
The kernel provides the following general purpose memory allocators:
|
||||
kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API
|
||||
documentation for further information about them.
|
||||
kmalloc(), kzalloc(), kcalloc(), vmalloc(), and vzalloc(). Please refer to
|
||||
the API documentation for further information about them.
|
||||
|
||||
The preferred form for passing a size of a struct is the following:
|
||||
|
||||
|
|
|
@ -402,8 +402,9 @@
|
|||
!Finclude/net/mac80211.h set_key_cmd
|
||||
!Finclude/net/mac80211.h ieee80211_key_conf
|
||||
!Finclude/net/mac80211.h ieee80211_key_flags
|
||||
!Finclude/net/mac80211.h ieee80211_tkip_key_type
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_key
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
|
||||
!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
|
||||
!Finclude/net/mac80211.h ieee80211_key_removed
|
||||
</chapter>
|
||||
|
||||
|
|
|
@ -409,7 +409,7 @@ cond_resched(); /* Will sleep */
|
|||
|
||||
<para>
|
||||
You should always compile your kernel
|
||||
<symbol>CONFIG_DEBUG_SPINLOCK_SLEEP</symbol> on, and it will warn
|
||||
<symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn
|
||||
you if you break these rules. If you <emphasis>do</emphasis> break
|
||||
the rules, you will eventually lock up your box.
|
||||
</para>
|
||||
|
|
|
@ -1164,7 +1164,7 @@
|
|||
}
|
||||
chip->port = pci_resource_start(pci, 0);
|
||||
if (request_irq(pci->irq, snd_mychip_interrupt,
|
||||
IRQF_SHARED, "My Chip", chip)) {
|
||||
IRQF_SHARED, KBUILD_MODNAME, chip)) {
|
||||
printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
|
||||
snd_mychip_free(chip);
|
||||
return -EBUSY;
|
||||
|
@ -1197,7 +1197,7 @@
|
|||
|
||||
/* pci_driver definition */
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Own Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_mychip_ids,
|
||||
.probe = snd_mychip_probe,
|
||||
.remove = __devexit_p(snd_mychip_remove),
|
||||
|
@ -1340,7 +1340,7 @@
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
if (request_irq(pci->irq, snd_mychip_interrupt,
|
||||
IRQF_SHARED, "My Chip", chip)) {
|
||||
IRQF_SHARED, KBUILD_MODNAME, chip)) {
|
||||
printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
|
||||
snd_mychip_free(chip);
|
||||
return -EBUSY;
|
||||
|
@ -1616,7 +1616,7 @@
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Own Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_mychip_ids,
|
||||
.probe = snd_mychip_probe,
|
||||
.remove = __devexit_p(snd_mychip_remove),
|
||||
|
@ -5816,7 +5816,7 @@ struct _snd_pcm_runtime {
|
|||
<programlisting>
|
||||
<![CDATA[
|
||||
static struct pci_driver driver = {
|
||||
.name = "My Chip",
|
||||
.name = KBUILD_MODNAME,
|
||||
.id_table = snd_my_ids,
|
||||
.probe = snd_my_probe,
|
||||
.remove = __devexit_p(snd_my_remove),
|
||||
|
|
|
@ -53,7 +53,7 @@ kernel patches.
|
|||
|
||||
12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
|
||||
CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP all simultaneously
|
||||
enabled.
|
||||
|
||||
13: Has been build- and runtime tested with and without CONFIG_SMP and
|
||||
|
|
|
@ -21,7 +21,7 @@ information will not be available.
|
|||
To extract cgroup statistics a utility very similar to getdelays.c
|
||||
has been developed, the sample output of the utility is shown below
|
||||
|
||||
~/balbir/cgroupstats # ./getdelays -C "/cgroup/a"
|
||||
~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
|
||||
sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
|
||||
~/balbir/cgroupstats # ./getdelays -C "/cgroup"
|
||||
~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
|
||||
sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
|
||||
|
|
|
@ -164,3 +164,8 @@ In either case, the following conditions must be met:
|
|||
- The boot loader is expected to call the kernel image by jumping
|
||||
directly to the first instruction of the kernel image.
|
||||
|
||||
On CPUs supporting the ARM instruction set, the entry must be
|
||||
made in ARM state, even for a Thumb-2 kernel.
|
||||
|
||||
On CPUs supporting only the Thumb instruction set such as
|
||||
Cortex-M class CPUs, the entry must be made in Thumb state.
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
ROM-able zImage boot from eSD
|
||||
-----------------------------
|
||||
|
||||
An ROM-able zImage compiled with ZBOOT_ROM_SDHI may be written to eSD and
|
||||
SuperH Mobile ARM will to boot directly from the SDHI hardware block.
|
||||
|
||||
This is achieved by the mask ROM loading the first portion of the image into
|
||||
MERAM and then jumping to it. This portion contains loader code which
|
||||
copies the entire image to SDRAM and jumps to it. From there the zImage
|
||||
boot code proceeds as normal, uncompressing the image into its final
|
||||
location and then jumping to it.
|
||||
|
||||
This code has been tested on an mackerel board using the developer 1A eSD
|
||||
boot mode which is configured using the following jumper settings.
|
||||
|
||||
8 7 6 5 4 3 2 1
|
||||
x|x|x|x| |x|x|
|
||||
S4 -+-+-+-+-+-+-+-
|
||||
| | | |x| | |x on
|
||||
|
||||
The eSD card needs to be present in SDHI slot 1 (CN7).
|
||||
As such S1 and S33 also need to be configured as per
|
||||
the notes in arch/arm/mach-shmobile/board-mackerel.c.
|
||||
|
||||
A partial zImage must be written to physical partition #1 (boot)
|
||||
of the eSD at sector 0 in vrl4 format. A utility vrl4 is supplied to
|
||||
accomplish this.
|
||||
|
||||
e.g.
|
||||
vrl4 < zImage | dd of=/dev/sdX bs=512 count=17
|
||||
|
||||
A full copy of _the same_ zImage should be written to physical partition #1
|
||||
(boot) of the eSD at sector 0. This should _not_ be in vrl4 format.
|
||||
|
||||
vrl4 < zImage | dd of=/dev/sdX bs=512
|
||||
|
||||
Note: The commands above assume that the physical partition has been
|
||||
switched. No such facility currently exists in the Linux Kernel.
|
||||
|
||||
Physical partitions are described in the eSD specification. At the time of
|
||||
writing they are not the same as partitions that are typically configured
|
||||
using fdisk and visible through /proc/partitions
|
|
@ -0,0 +1,267 @@
|
|||
Kernel-provided User Helpers
|
||||
============================
|
||||
|
||||
These are segment of kernel provided user code reachable from user space
|
||||
at a fixed address in kernel memory. This is used to provide user space
|
||||
with some operations which require kernel help because of unimplemented
|
||||
native feature and/or instructions in many ARM CPUs. The idea is for this
|
||||
code to be executed directly in user mode for best efficiency but which is
|
||||
too intimate with the kernel counter part to be left to user libraries.
|
||||
In fact this code might even differ from one CPU to another depending on
|
||||
the available instruction set, or whether it is a SMP systems. In other
|
||||
words, the kernel reserves the right to change this code as needed without
|
||||
warning. Only the entry points and their results as documented here are
|
||||
guaranteed to be stable.
|
||||
|
||||
This is different from (but doesn't preclude) a full blown VDSO
|
||||
implementation, however a VDSO would prevent some assembly tricks with
|
||||
constants that allows for efficient branching to those code segments. And
|
||||
since those code segments only use a few cycles before returning to user
|
||||
code, the overhead of a VDSO indirect far call would add a measurable
|
||||
overhead to such minimalistic operations.
|
||||
|
||||
User space is expected to bypass those helpers and implement those things
|
||||
inline (either in the code emitted directly by the compiler, or part of
|
||||
the implementation of a library call) when optimizing for a recent enough
|
||||
processor that has the necessary native support, but only if resulting
|
||||
binaries are already to be incompatible with earlier ARM processors due to
|
||||
useage of similar native instructions for other things. In other words
|
||||
don't make binaries unable to run on earlier processors just for the sake
|
||||
of not using these kernel helpers if your compiled code is not going to
|
||||
use new instructions for other purpose.
|
||||
|
||||
New helpers may be added over time, so an older kernel may be missing some
|
||||
helpers present in a newer kernel. For this reason, programs must check
|
||||
the value of __kuser_helper_version (see below) before assuming that it is
|
||||
safe to call any particular helper. This check should ideally be
|
||||
performed only once at process startup time, and execution aborted early
|
||||
if the required helpers are not provided by the kernel version that
|
||||
process is running on.
|
||||
|
||||
kuser_helper_version
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0ffc
|
||||
|
||||
Reference declaration:
|
||||
|
||||
extern int32_t __kuser_helper_version;
|
||||
|
||||
Definition:
|
||||
|
||||
This field contains the number of helpers being implemented by the
|
||||
running kernel. User space may read this to determine the availability
|
||||
of a particular helper.
|
||||
|
||||
Usage example:
|
||||
|
||||
#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
|
||||
|
||||
void check_kuser_version(void)
|
||||
{
|
||||
if (__kuser_helper_version < 2) {
|
||||
fprintf(stderr, "can't do atomic operations, kernel too old\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
User space may assume that the value of this field never changes
|
||||
during the lifetime of any single process. This means that this
|
||||
field can be read once during the initialisation of a library or
|
||||
startup phase of a program.
|
||||
|
||||
kuser_get_tls
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fe0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void * __kuser_get_tls(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = TLS value
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void * (__kuser_get_tls_t)(void);
|
||||
#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
|
||||
|
||||
void foo()
|
||||
{
|
||||
void *tls = __kuser_get_tls();
|
||||
printf("TLS = %p\n", tls);
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
|
||||
|
||||
kuser_cmpxchg
|
||||
-------------
|
||||
|
||||
Location: 0xffff0fc0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = oldval
|
||||
r1 = newval
|
||||
r2 = ptr
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, ip, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store newval in *ptr only if *ptr is equal to oldval.
|
||||
Return zero if *ptr was changed or non-zero if no exchange happened.
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
|
||||
#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
|
||||
|
||||
int atomic_add(volatile int *ptr, int val)
|
||||
{
|
||||
int old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg(old, new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
|
||||
|
||||
kuser_memory_barrier
|
||||
--------------------
|
||||
|
||||
Location: 0xffff0fa0
|
||||
|
||||
Reference prototype:
|
||||
|
||||
void __kuser_memory_barrier(void);
|
||||
|
||||
Input:
|
||||
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
none
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
none
|
||||
|
||||
Definition:
|
||||
|
||||
Apply any needed memory barrier to preserve consistency with data modified
|
||||
manually and __kuser_cmpxchg usage.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef void (__kuser_dmb_t)(void);
|
||||
#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
|
||||
|
||||
Notes:
|
||||
|
||||
- Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
|
||||
|
||||
kuser_cmpxchg64
|
||||
---------------
|
||||
|
||||
Location: 0xffff0f60
|
||||
|
||||
Reference prototype:
|
||||
|
||||
int __kuser_cmpxchg64(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
|
||||
Input:
|
||||
|
||||
r0 = pointer to oldval
|
||||
r1 = pointer to newval
|
||||
r2 = pointer to target value
|
||||
lr = return address
|
||||
|
||||
Output:
|
||||
|
||||
r0 = success code (zero or non-zero)
|
||||
C flag = set if r0 == 0, clear if r0 != 0
|
||||
|
||||
Clobbered registers:
|
||||
|
||||
r3, lr, flags
|
||||
|
||||
Definition:
|
||||
|
||||
Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
|
||||
is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
|
||||
changed or non-zero if no exchange happened.
|
||||
|
||||
The C flag is also set if *ptr was changed to allow for assembly
|
||||
optimization in the calling code.
|
||||
|
||||
Usage example:
|
||||
|
||||
typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
|
||||
const int64_t *newval,
|
||||
volatile int64_t *ptr);
|
||||
#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
|
||||
|
||||
int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
|
||||
{
|
||||
int64_t old, new;
|
||||
|
||||
do {
|
||||
old = *ptr;
|
||||
new = old + val;
|
||||
} while(__kuser_cmpxchg64(&old, &new, ptr));
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
Notes:
|
||||
|
||||
- This routine already includes memory barriers as needed.
|
||||
|
||||
- Due to the length of this sequence, this spans 2 conventional kuser
|
||||
"slots", therefore 0xffff0f80 is not used as a valid entry point.
|
||||
|
||||
- Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
|
|
@ -9,6 +9,8 @@ the entire SPI transfer. - And not just bits_per_word duration.
|
|||
In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this
|
||||
behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2
|
||||
timing, you can utilize the GPIO controlled SPI Slave Select option instead.
|
||||
In this case, you should use GPIO based CS for all of your slaves and not just
|
||||
the ones using mode 0 or 2 in order to guarantee correct CS toggling behavior.
|
||||
|
||||
You can even use the same pin whose peripheral role is a SSEL,
|
||||
but use it as a GPIO instead.
|
||||
|
|
|
@ -28,16 +28,19 @@ cgroups. Here is what you can do.
|
|||
- Enable group scheduling in CFQ
|
||||
CONFIG_CFQ_GROUP_IOSCHED=y
|
||||
|
||||
- Compile and boot into kernel and mount IO controller (blkio).
|
||||
- Compile and boot into kernel and mount IO controller (blkio); see
|
||||
cgroups.txt, Why are cgroups needed?.
|
||||
|
||||
mount -t cgroup -o blkio none /cgroup
|
||||
mount -t tmpfs cgroup_root /sys/fs/cgroup
|
||||
mkdir /sys/fs/cgroup/blkio
|
||||
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
|
||||
|
||||
- Create two cgroups
|
||||
mkdir -p /cgroup/test1/ /cgroup/test2
|
||||
mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
|
||||
|
||||
- Set weights of group test1 and test2
|
||||
echo 1000 > /cgroup/test1/blkio.weight
|
||||
echo 500 > /cgroup/test2/blkio.weight
|
||||
echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
|
||||
echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
|
||||
|
||||
- Create two same size files (say 512MB each) on same disk (file1, file2) and
|
||||
launch two dd threads in different cgroup to read those files.
|
||||
|
@ -46,12 +49,12 @@ cgroups. Here is what you can do.
|
|||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
dd if=/mnt/sdb/zerofile1 of=/dev/null &
|
||||
echo $! > /cgroup/test1/tasks
|
||||
cat /cgroup/test1/tasks
|
||||
echo $! > /sys/fs/cgroup/blkio/test1/tasks
|
||||
cat /sys/fs/cgroup/blkio/test1/tasks
|
||||
|
||||
dd if=/mnt/sdb/zerofile2 of=/dev/null &
|
||||
echo $! > /cgroup/test2/tasks
|
||||
cat /cgroup/test2/tasks
|
||||
echo $! > /sys/fs/cgroup/blkio/test2/tasks
|
||||
cat /sys/fs/cgroup/blkio/test2/tasks
|
||||
|
||||
- At macro level, first dd should finish first. To get more precise data, keep
|
||||
on looking at (with the help of script), at blkio.disk_time and
|
||||
|
@ -68,13 +71,13 @@ Throttling/Upper Limit policy
|
|||
- Enable throttling in block layer
|
||||
CONFIG_BLK_DEV_THROTTLING=y
|
||||
|
||||
- Mount blkio controller
|
||||
mount -t cgroup -o blkio none /cgroup/blkio
|
||||
- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
|
||||
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
|
||||
|
||||
- Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <byes_per_second>".
|
||||
|
||||
echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device
|
||||
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
|
||||
|
||||
Above will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
|
@ -87,7 +90,7 @@ Throttling/Upper Limit policy
|
|||
1024+0 records out
|
||||
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
|
||||
|
||||
Limits for writes can be put using blkio.write_bps_device file.
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
|
||||
Hierarchical Cgroups
|
||||
====================
|
||||
|
@ -108,7 +111,7 @@ Hierarchical Cgroups
|
|||
CFQ and throttling will practically treat all groups at same level.
|
||||
|
||||
pivot
|
||||
/ | \ \
|
||||
/ / \ \
|
||||
root test1 test2 test3
|
||||
|
||||
Down the line we can implement hierarchical accounting/control support
|
||||
|
@ -149,7 +152,7 @@ Proportional weight policy files
|
|||
|
||||
Following is the format.
|
||||
|
||||
#echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device
|
||||
# echo dev_maj:dev_minor weight > blkio.weight_device
|
||||
Configure weight=300 on /dev/sdb (8:16) in this cgroup
|
||||
# echo 8:16 300 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
|
@ -283,28 +286,28 @@ Throttling/Upper limit policy files
|
|||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
|
||||
|
||||
- blkio.throttle.write_bps_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in bytes per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
|
||||
|
||||
- blkio.throttle.read_iops_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in IO per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
|
||||
|
||||
- blkio.throttle.write_iops_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in io per second. Rules are per deivce. Following is
|
||||
the format.
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
|
||||
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjectd to both the constraints.
|
||||
|
|
|
@ -138,11 +138,11 @@ With the ability to classify tasks differently for different resources
|
|||
the admin can easily set up a script which receives exec notifications
|
||||
and depending on who is launching the browser he can
|
||||
|
||||
# echo browser_pid > /mnt/<restype>/<userclass>/tasks
|
||||
# echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
|
||||
|
||||
With only a single hierarchy, he now would potentially have to create
|
||||
a separate cgroup for every browser launched and associate it with
|
||||
approp network and other resource class. This may lead to
|
||||
appropriate network and other resource class. This may lead to
|
||||
proliferation of such cgroups.
|
||||
|
||||
Also lets say that the administrator would like to give enhanced network
|
||||
|
@ -153,9 +153,9 @@ apps enhanced CPU power,
|
|||
With ability to write pids directly to resource classes, it's just a
|
||||
matter of :
|
||||
|
||||
# echo pid > /mnt/network/<new_class>/tasks
|
||||
# echo pid > /sys/fs/cgroup/network/<new_class>/tasks
|
||||
(after some time)
|
||||
# echo pid > /mnt/network/<orig_class>/tasks
|
||||
# echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
|
||||
|
||||
Without this ability, he would have to split the cgroup into
|
||||
multiple separate ones and then associate the new cgroups with the
|
||||
|
@ -310,21 +310,24 @@ subsystem, this is the case for the cpuset.
|
|||
To start a new job that is to be contained within a cgroup, using
|
||||
the "cpuset" cgroup subsystem, the steps are something like:
|
||||
|
||||
1) mkdir /dev/cgroup
|
||||
2) mount -t cgroup -ocpuset cpuset /dev/cgroup
|
||||
3) Create the new cgroup by doing mkdir's and write's (or echo's) in
|
||||
the /dev/cgroup virtual file system.
|
||||
4) Start a task that will be the "founding father" of the new job.
|
||||
5) Attach that task to the new cgroup by writing its pid to the
|
||||
/dev/cgroup tasks file for that cgroup.
|
||||
6) fork, exec or clone the job tasks from this founding father task.
|
||||
1) mount -t tmpfs cgroup_root /sys/fs/cgroup
|
||||
2) mkdir /sys/fs/cgroup/cpuset
|
||||
3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
|
||||
4) Create the new cgroup by doing mkdir's and write's (or echo's) in
|
||||
the /sys/fs/cgroup virtual file system.
|
||||
5) Start a task that will be the "founding father" of the new job.
|
||||
6) Attach that task to the new cgroup by writing its pid to the
|
||||
/sys/fs/cgroup/cpuset/tasks file for that cgroup.
|
||||
7) fork, exec or clone the job tasks from this founding father task.
|
||||
|
||||
For example, the following sequence of commands will setup a cgroup
|
||||
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
|
||||
and then start a subshell 'sh' in that cgroup:
|
||||
|
||||
mount -t cgroup cpuset -ocpuset /dev/cgroup
|
||||
cd /dev/cgroup
|
||||
mount -t tmpfs cgroup_root /sys/fs/cgroup
|
||||
mkdir /sys/fs/cgroup/cpuset
|
||||
mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
|
||||
cd /sys/fs/cgroup/cpuset
|
||||
mkdir Charlie
|
||||
cd Charlie
|
||||
/bin/echo 2-3 > cpuset.cpus
|
||||
|
@ -345,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup
|
|||
virtual filesystem.
|
||||
|
||||
To mount a cgroup hierarchy with all available subsystems, type:
|
||||
# mount -t cgroup xxx /dev/cgroup
|
||||
# mount -t cgroup xxx /sys/fs/cgroup
|
||||
|
||||
The "xxx" is not interpreted by the cgroup code, but will appear in
|
||||
/proc/mounts so may be any useful identifying string that you like.
|
||||
|
@ -354,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance,
|
|||
if cpusets are enabled the user will have to populate the cpus and mems files
|
||||
for each new cgroup created before that group can be used.
|
||||
|
||||
As explained in section `1.2 Why are cgroups needed?' you should create
|
||||
different hierarchies of cgroups for each single resource or group of
|
||||
resources you want to control. Therefore, you should mount a tmpfs on
|
||||
/sys/fs/cgroup and create directories for each cgroup resource or resource
|
||||
group.
|
||||
|
||||
# mount -t tmpfs cgroup_root /sys/fs/cgroup
|
||||
# mkdir /sys/fs/cgroup/rg1
|
||||
|
||||
To mount a cgroup hierarchy with just the cpuset and memory
|
||||
subsystems, type:
|
||||
# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
|
||||
# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
|
||||
|
||||
To change the set of subsystems bound to a mounted hierarchy, just
|
||||
remount with different options:
|
||||
# mount -o remount,cpuset,blkio hier1 /dev/cgroup
|
||||
# mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1
|
||||
|
||||
Now memory is removed from the hierarchy and blkio is added.
|
||||
|
||||
Note this will add blkio to the hierarchy but won't remove memory or
|
||||
cpuset, because the new options are appended to the old ones:
|
||||
# mount -o remount,blkio /dev/cgroup
|
||||
# mount -o remount,blkio /sys/fs/cgroup/rg1
|
||||
|
||||
To Specify a hierarchy's release_agent:
|
||||
# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
|
||||
xxx /dev/cgroup
|
||||
xxx /sys/fs/cgroup/rg1
|
||||
|
||||
Note that specifying 'release_agent' more than once will return failure.
|
||||
|
||||
|
@ -379,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting
|
|||
the ability to arbitrarily bind/unbind subsystems from an existing
|
||||
cgroup hierarchy is intended to be implemented in the future.
|
||||
|
||||
Then under /dev/cgroup you can find a tree that corresponds to the
|
||||
tree of the cgroups in the system. For instance, /dev/cgroup
|
||||
Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
|
||||
tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
|
||||
is the cgroup that holds the whole system.
|
||||
|
||||
If you want to change the value of release_agent:
|
||||
# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
|
||||
# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
|
||||
|
||||
It can also be changed via remount.
|
||||
|
||||
If you want to create a new cgroup under /dev/cgroup:
|
||||
# cd /dev/cgroup
|
||||
If you want to create a new cgroup under /sys/fs/cgroup/rg1:
|
||||
# cd /sys/fs/cgroup/rg1
|
||||
# mkdir my_cgroup
|
||||
|
||||
Now you want to do something with this cgroup.
|
||||
|
|
|
@ -10,26 +10,25 @@ directly present in its group.
|
|||
|
||||
Accounting groups can be created by first mounting the cgroup filesystem.
|
||||
|
||||
# mkdir /cgroups
|
||||
# mount -t cgroup -ocpuacct none /cgroups
|
||||
# mount -t cgroup -ocpuacct none /sys/fs/cgroup
|
||||
|
||||
With the above step, the initial or the parent accounting group
|
||||
becomes visible at /cgroups. At bootup, this group includes all the
|
||||
tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
|
||||
/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
|
||||
this group which is essentially the CPU time obtained by all the tasks
|
||||
With the above step, the initial or the parent accounting group becomes
|
||||
visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
|
||||
the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
|
||||
/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
|
||||
by this group which is essentially the CPU time obtained by all the tasks
|
||||
in the system.
|
||||
|
||||
New accounting groups can be created under the parent group /cgroups.
|
||||
New accounting groups can be created under the parent group /sys/fs/cgroup.
|
||||
|
||||
# cd /cgroups
|
||||
# cd /sys/fs/cgroup
|
||||
# mkdir g1
|
||||
# echo $$ > g1
|
||||
# echo $$ > g1/tasks
|
||||
|
||||
The above steps create a new group g1 and move the current shell
|
||||
process (bash) into it. CPU time consumed by this bash and its children
|
||||
can be obtained from g1/cpuacct.usage and the same is accumulated in
|
||||
/cgroups/cpuacct.usage also.
|
||||
/sys/fs/cgroup/cpuacct.usage also.
|
||||
|
||||
cpuacct.stat file lists a few statistics which further divide the
|
||||
CPU time obtained by the cgroup into user and system times. Currently
|
||||
|
|
|
@ -180,7 +180,7 @@ files describing that cpuset:
|
|||
- cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
|
||||
- cpuset.sched_relax_domain_level: the searching range when migrating tasks
|
||||
|
||||
In addition, the root cpuset only has the following file:
|
||||
In addition, only the root cpuset has the following file:
|
||||
- cpuset.memory_pressure_enabled flag: compute memory_pressure?
|
||||
|
||||
New cpusets are created using the mkdir system call or shell
|
||||
|
@ -661,21 +661,21 @@ than stress the kernel.
|
|||
|
||||
To start a new job that is to be contained within a cpuset, the steps are:
|
||||
|
||||
1) mkdir /dev/cpuset
|
||||
2) mount -t cgroup -ocpuset cpuset /dev/cpuset
|
||||
1) mkdir /sys/fs/cgroup/cpuset
|
||||
2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
|
||||
3) Create the new cpuset by doing mkdir's and write's (or echo's) in
|
||||
the /dev/cpuset virtual file system.
|
||||
the /sys/fs/cgroup/cpuset virtual file system.
|
||||
4) Start a task that will be the "founding father" of the new job.
|
||||
5) Attach that task to the new cpuset by writing its pid to the
|
||||
/dev/cpuset tasks file for that cpuset.
|
||||
/sys/fs/cgroup/cpuset tasks file for that cpuset.
|
||||
6) fork, exec or clone the job tasks from this founding father task.
|
||||
|
||||
For example, the following sequence of commands will setup a cpuset
|
||||
named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
|
||||
and then start a subshell 'sh' in that cpuset:
|
||||
|
||||
mount -t cgroup -ocpuset cpuset /dev/cpuset
|
||||
cd /dev/cpuset
|
||||
mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
|
||||
cd /sys/fs/cgroup/cpuset
|
||||
mkdir Charlie
|
||||
cd Charlie
|
||||
/bin/echo 2-3 > cpuset.cpus
|
||||
|
@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset
|
|||
virtual filesystem.
|
||||
|
||||
To mount it, type:
|
||||
# mount -t cgroup -o cpuset cpuset /dev/cpuset
|
||||
# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
|
||||
|
||||
Then under /dev/cpuset you can find a tree that corresponds to the
|
||||
tree of the cpusets in the system. For instance, /dev/cpuset
|
||||
Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
|
||||
tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
|
||||
is the cpuset that holds the whole system.
|
||||
|
||||
If you want to create a new cpuset under /dev/cpuset:
|
||||
# cd /dev/cpuset
|
||||
If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
|
||||
# cd /sys/fs/cgroup/cpuset
|
||||
# mkdir my_cpuset
|
||||
|
||||
Now you want to do something with this cpuset.
|
||||
|
@ -765,12 +765,12 @@ wrapper around the cgroup filesystem.
|
|||
|
||||
The command
|
||||
|
||||
mount -t cpuset X /dev/cpuset
|
||||
mount -t cpuset X /sys/fs/cgroup/cpuset
|
||||
|
||||
is equivalent to
|
||||
|
||||
mount -t cgroup -ocpuset,noprefix X /dev/cpuset
|
||||
echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
|
||||
mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
|
||||
echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
|
||||
|
||||
2.2 Adding/removing cpus
|
||||
------------------------
|
||||
|
|
|
@ -22,16 +22,16 @@ removed from the child(ren).
|
|||
An entry is added using devices.allow, and removed using
|
||||
devices.deny. For instance
|
||||
|
||||
echo 'c 1:3 mr' > /cgroups/1/devices.allow
|
||||
echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
|
||||
|
||||
allows cgroup 1 to read and mknod the device usually known as
|
||||
/dev/null. Doing
|
||||
|
||||
echo a > /cgroups/1/devices.deny
|
||||
echo a > /sys/fs/cgroup/1/devices.deny
|
||||
|
||||
will remove the default 'a *:* rwm' entry. Doing
|
||||
|
||||
echo a > /cgroups/1/devices.allow
|
||||
echo a > /sys/fs/cgroup/1/devices.allow
|
||||
|
||||
will add the 'a *:* rwm' entry to the whitelist.
|
||||
|
||||
|
|
|
@ -59,28 +59,28 @@ is non-freezable.
|
|||
|
||||
* Examples of usage :
|
||||
|
||||
# mkdir /containers
|
||||
# mount -t cgroup -ofreezer freezer /containers
|
||||
# mkdir /containers/0
|
||||
# echo $some_pid > /containers/0/tasks
|
||||
# mkdir /sys/fs/cgroup/freezer
|
||||
# mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
|
||||
# mkdir /sys/fs/cgroup/freezer/0
|
||||
# echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
|
||||
|
||||
to get status of the freezer subsystem :
|
||||
|
||||
# cat /containers/0/freezer.state
|
||||
# cat /sys/fs/cgroup/freezer/0/freezer.state
|
||||
THAWED
|
||||
|
||||
to freeze all tasks in the container :
|
||||
|
||||
# echo FROZEN > /containers/0/freezer.state
|
||||
# cat /containers/0/freezer.state
|
||||
# echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
|
||||
# cat /sys/fs/cgroup/freezer/0/freezer.state
|
||||
FREEZING
|
||||
# cat /containers/0/freezer.state
|
||||
# cat /sys/fs/cgroup/freezer/0/freezer.state
|
||||
FROZEN
|
||||
|
||||
to unfreeze all tasks in the container :
|
||||
|
||||
# echo THAWED > /containers/0/freezer.state
|
||||
# cat /containers/0/freezer.state
|
||||
# echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
|
||||
# cat /sys/fs/cgroup/freezer/0/freezer.state
|
||||
THAWED
|
||||
|
||||
This is the basic mechanism which should do the right thing for user space task
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
Memory Resource Controller
|
||||
|
||||
NOTE: The Memory Resource Controller has been generically been referred
|
||||
to as the memory controller in this document. Do not confuse memory
|
||||
controller used here with the memory controller that is used in hardware.
|
||||
NOTE: The Memory Resource Controller has generically been referred to as the
|
||||
memory controller in this document. Do not confuse memory controller
|
||||
used here with the memory controller that is used in hardware.
|
||||
|
||||
(For editors)
|
||||
In this document:
|
||||
|
@ -70,6 +70,7 @@ Brief summary of control files.
|
|||
(See sysctl's vm.swappiness)
|
||||
memory.move_charge_at_immigrate # set/show controls of moving charges
|
||||
memory.oom_control # set/show oom controls.
|
||||
memory.numa_stat # show the number of memory usage per numa node
|
||||
|
||||
1. History
|
||||
|
||||
|
@ -181,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared
|
|||
page will eventually get charged for it (once it is uncharged from
|
||||
the cgroup that brought it in -- this will happen on memory pressure).
|
||||
|
||||
Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used..
|
||||
Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.
|
||||
When you do swapoff and make swapped-out pages of shmem(tmpfs) to
|
||||
be backed into memory in force, charges for pages are accounted against the
|
||||
caller of swapoff rather than the users of shmem.
|
||||
|
@ -213,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from
|
|||
OS point of view.
|
||||
|
||||
* What happens when a cgroup hits memory.memsw.limit_in_bytes
|
||||
When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out
|
||||
When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
|
||||
in this cgroup. Then, swap-out will not be done by cgroup routine and file
|
||||
caches are dropped. But as mentioned above, global LRU can do swapout memory
|
||||
from it for sanity of the system's memory management state. You can't forbid
|
||||
|
@ -263,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS
|
|||
c. Enable CONFIG_CGROUP_MEM_RES_CTLR
|
||||
d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension)
|
||||
|
||||
1. Prepare the cgroups
|
||||
# mkdir -p /cgroups
|
||||
# mount -t cgroup none /cgroups -o memory
|
||||
1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
|
||||
# mount -t tmpfs none /sys/fs/cgroup
|
||||
# mkdir /sys/fs/cgroup/memory
|
||||
# mount -t cgroup none /sys/fs/cgroup/memory -o memory
|
||||
|
||||
2. Make the new group and move bash into it
|
||||
# mkdir /cgroups/0
|
||||
# echo $$ > /cgroups/0/tasks
|
||||
# mkdir /sys/fs/cgroup/memory/0
|
||||
# echo $$ > /sys/fs/cgroup/memory/0/tasks
|
||||
|
||||
Since now we're in the 0 cgroup, we can alter the memory limit:
|
||||
# echo 4M > /cgroups/0/memory.limit_in_bytes
|
||||
# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
|
||||
|
||||
NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
|
||||
mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
|
||||
|
@ -280,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
|
|||
NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
|
||||
NOTE: We cannot set limits on the root cgroup any more.
|
||||
|
||||
# cat /cgroups/0/memory.limit_in_bytes
|
||||
# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
|
||||
4194304
|
||||
|
||||
We can check the usage:
|
||||
# cat /cgroups/0/memory.usage_in_bytes
|
||||
# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
|
||||
1216512
|
||||
|
||||
A successful write to this file does not guarantee a successful set of
|
||||
|
@ -464,6 +466,24 @@ value for efficient access. (Of course, when necessary, it's synchronized.)
|
|||
If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
|
||||
value in memory.stat(see 5.2).
|
||||
|
||||
5.6 numa_stat
|
||||
|
||||
This is similar to numa_maps but operates on a per-memcg basis. This is
|
||||
useful for providing visibility into the numa locality information within
|
||||
an memcg since the pages are allowed to be allocated from any physical
|
||||
node. One of the usecases is evaluating application performance by
|
||||
combining this information with the application's cpu allocation.
|
||||
|
||||
We export "total", "file", "anon" and "unevictable" pages per-node for
|
||||
each memcg. The ouput format of memory.numa_stat is:
|
||||
|
||||
total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
And we have total = file + anon + unevictable.
|
||||
|
||||
6. Hierarchy support
|
||||
|
||||
The memory controller supports a deep hierarchy and hierarchical accounting.
|
||||
|
@ -471,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the
|
|||
cgroup filesystem. Consider for example, the following cgroup filesystem
|
||||
hierarchy
|
||||
|
||||
root
|
||||
root
|
||||
/ | \
|
||||
/ | \
|
||||
a b c
|
||||
| \
|
||||
| \
|
||||
d e
|
||||
/ | \
|
||||
a b c
|
||||
| \
|
||||
| \
|
||||
d e
|
||||
|
||||
In the diagram above, with hierarchical accounting enabled, all memory
|
||||
usage of e, is accounted to its ancestors up until the root (i.e, c and root),
|
||||
|
|
|
@ -244,7 +244,7 @@ testing purposes. In particular, you should turn on:
|
|||
- DEBUG_SLAB can find a variety of memory allocation and use errors; it
|
||||
should be used on most development kernels.
|
||||
|
||||
- DEBUG_SPINLOCK, DEBUG_SPINLOCK_SLEEP, and DEBUG_MUTEXES will find a
|
||||
- DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP, and DEBUG_MUTEXES will find a
|
||||
number of common locking errors.
|
||||
|
||||
There are quite a few other debugging options, some of which will be
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
* ARM Performance Monitor Units
|
||||
|
||||
ARM cores often have a PMU for counting cpu and cache events like cache misses
|
||||
and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
|
||||
representation in the device tree should be done as under:-
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be one of
|
||||
"arm,cortex-a9-pmu"
|
||||
"arm,cortex-a8-pmu"
|
||||
"arm,arm1176-pmu"
|
||||
"arm,arm1136-pmu"
|
||||
- interrupts : 1 combined interrupt or 1 per core.
|
||||
|
||||
Example:
|
||||
|
||||
pmu {
|
||||
compatible = "arm,cortex-a9-pmu";
|
||||
interrupts = <100 101>;
|
||||
};
|
|
@ -0,0 +1,21 @@
|
|||
* ARM Primecell Peripherals
|
||||
|
||||
ARM, Ltd. Primecell peripherals have a standard id register that can be used to
|
||||
identify the peripheral type, vendor, and revision. This value can be used for
|
||||
driver matching.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should be a specific value for peripheral and "arm,primecell"
|
||||
|
||||
Optional properties:
|
||||
|
||||
- arm,primecell-periphid : Value to override the h/w value with
|
||||
|
||||
Example:
|
||||
|
||||
serial@fff36000 {
|
||||
compatible = "arm,pl011", "arm,primecell";
|
||||
arm,primecell-periphid = <0x00341011>;
|
||||
};
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
Freescale SoC SEC Security Engines
|
||||
Freescale SoC SEC Security Engines versions 2.x-3.x
|
||||
|
||||
Required properties:
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
* Freescale i.MX/MXC GPIO controller
|
||||
|
||||
Required properties:
|
||||
- compatible : Should be "fsl,<soc>-gpio"
|
||||
- reg : Address and length of the register set for the device
|
||||
- interrupts : Should be the port interrupt shared by all 32 pins, if
|
||||
one number. If two numbers, the first one is the interrupt shared
|
||||
by low 16 pins and the second one is for high 16 pins.
|
||||
- gpio-controller : Marks the device node as a gpio controller.
|
||||
- #gpio-cells : Should be two. The first cell is the pin number and
|
||||
the second cell is used to specify optional parameters (currently
|
||||
unused).
|
||||
|
||||
Example:
|
||||
|
||||
gpio0: gpio@73f84000 {
|
||||
compatible = "fsl,imx51-gpio", "fsl,imx31-gpio";
|
||||
reg = <0x73f84000 0x4000>;
|
||||
interrupts = <50 51>;
|
||||
gpio-controller;
|
||||
#gpio-cells = <2>;
|
||||
};
|
|
@ -4,17 +4,45 @@ Specifying GPIO information for devices
|
|||
1) gpios property
|
||||
-----------------
|
||||
|
||||
Nodes that makes use of GPIOs should define them using `gpios' property,
|
||||
format of which is: <&gpio-controller1-phandle gpio1-specifier
|
||||
&gpio-controller2-phandle gpio2-specifier
|
||||
0 /* holes are permitted, means no GPIO 3 */
|
||||
&gpio-controller4-phandle gpio4-specifier
|
||||
...>;
|
||||
Nodes that makes use of GPIOs should specify them using one or more
|
||||
properties, each containing a 'gpio-list':
|
||||
|
||||
Note that gpio-specifier length is controller dependent.
|
||||
gpio-list ::= <single-gpio> [gpio-list]
|
||||
single-gpio ::= <gpio-phandle> <gpio-specifier>
|
||||
gpio-phandle : phandle to gpio controller node
|
||||
gpio-specifier : Array of #gpio-cells specifying specific gpio
|
||||
(controller specific)
|
||||
|
||||
GPIO properties should be named "[<name>-]gpios". Exact
|
||||
meaning of each gpios property must be documented in the device tree
|
||||
binding for each device.
|
||||
|
||||
For example, the following could be used to describe gpios pins to use
|
||||
as chip select lines; with chip selects 0, 1 and 3 populated, and chip
|
||||
select 2 left empty:
|
||||
|
||||
gpio1: gpio1 {
|
||||
gpio-controller
|
||||
#gpio-cells = <2>;
|
||||
};
|
||||
gpio2: gpio2 {
|
||||
gpio-controller
|
||||
#gpio-cells = <1>;
|
||||
};
|
||||
[...]
|
||||
chipsel-gpios = <&gpio1 12 0>,
|
||||
<&gpio1 13 0>,
|
||||
<0>, /* holes are permitted, means no GPIO 2 */
|
||||
<&gpio2 2>;
|
||||
|
||||
Note that gpio-specifier length is controller dependent. In the
|
||||
above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
|
||||
only uses one.
|
||||
|
||||
gpio-specifier may encode: bank, pin position inside the bank,
|
||||
whether pin is open-drain and whether pin is logically inverted.
|
||||
Exact meaning of each specifier cell is controller specific, and must
|
||||
be documented in the device tree binding for the device.
|
||||
|
||||
Example of the node using GPIOs:
|
||||
|
||||
|
@ -28,8 +56,8 @@ and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
|
|||
2) gpio-controller nodes
|
||||
------------------------
|
||||
|
||||
Every GPIO controller node must have #gpio-cells property defined,
|
||||
this information will be used to translate gpio-specifiers.
|
||||
Every GPIO controller node must both an empty "gpio-controller"
|
||||
property, and have #gpio-cells contain the size of the gpio-specifier.
|
||||
|
||||
Example of two SOC GPIO banks defined as gpio-controller nodes:
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
NVIDIA Tegra 2 GPIO controller
|
||||
|
||||
Required properties:
|
||||
- compatible : "nvidia,tegra20-gpio"
|
||||
- #gpio-cells : Should be two. The first cell is the pin number and the
|
||||
second cell is used to specify optional parameters:
|
||||
- bit 0 specifies polarity (0 for normal, 1 for inverted)
|
||||
- gpio-controller : Marks the device node as a GPIO controller.
|
|
@ -0,0 +1,22 @@
|
|||
* Freescale (Enhanced) Configurable Serial Peripheral Interface
|
||||
(CSPI/eCSPI) for i.MX
|
||||
|
||||
Required properties:
|
||||
- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
|
||||
- reg : Offset and length of the register set for the device
|
||||
- interrupts : Should contain CSPI/eCSPI interrupt
|
||||
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
||||
- cs-gpios : Specifies the gpio pins to be used for chipselects.
|
||||
|
||||
Example:
|
||||
|
||||
ecspi@70010000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "fsl,imx51-ecspi";
|
||||
reg = <0x70010000 0x4000>;
|
||||
interrupts = <36>;
|
||||
fsl,spi-num-chipselects = <2>;
|
||||
cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */
|
||||
<&gpio3 25 0>; /* GPIO4_25 */
|
||||
};
|
|
@ -0,0 +1,5 @@
|
|||
NVIDIA Tegra 2 SPI device
|
||||
|
||||
Required properties:
|
||||
- compatible : should be "nvidia,tegra20-spi".
|
||||
- gpios : should specify GPIOs used for chipselect.
|
|
@ -0,0 +1,36 @@
|
|||
* UART (Universal Asynchronous Receiver/Transmitter)
|
||||
|
||||
Required properties:
|
||||
- compatible : one of:
|
||||
- "ns8250"
|
||||
- "ns16450"
|
||||
- "ns16550a"
|
||||
- "ns16550"
|
||||
- "ns16750"
|
||||
- "ns16850"
|
||||
- "nvidia,tegra20-uart"
|
||||
- "ibm,qpace-nwp-serial"
|
||||
- "serial" if the port type is unknown.
|
||||
- reg : offset and length of the register set for the device.
|
||||
- interrupts : should contain uart interrupt.
|
||||
- clock-frequency : the input clock frequency for the UART.
|
||||
|
||||
Optional properties:
|
||||
- current-speed : the current active speed of the UART.
|
||||
- reg-offset : offset to apply to the mapbase from the start of the registers.
|
||||
- reg-shift : quantity to shift the register offsets by.
|
||||
- reg-io-width : the size (in bytes) of the IO accesses that should be
|
||||
performed on the device. There are some systems that require 32-bit
|
||||
accesses to the UART (e.g. TI davinci).
|
||||
- used-by-rtas : set to indicate that the port is in use by the OpenFirmware
|
||||
RTAS and should not be registered.
|
||||
|
||||
Example:
|
||||
|
||||
uart@80230000 {
|
||||
compatible = "ns8250";
|
||||
reg = <0x80230000 0x100>;
|
||||
clock-frequency = <3686400>;
|
||||
interrupts = <10>;
|
||||
reg-shift = <2>;
|
||||
};
|
|
@ -481,23 +481,6 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
|
|||
|
||||
----------------------------
|
||||
|
||||
What: namespace cgroup (ns_cgroup)
|
||||
When: 2.6.38
|
||||
Why: The ns_cgroup leads to some problems:
|
||||
* cgroup creation is out-of-control
|
||||
* cgroup name can conflict when pids are looping
|
||||
* it is not possible to have a single process handling
|
||||
a lot of namespaces without falling in a exponential creation time
|
||||
* we may want to create a namespace without creating a cgroup
|
||||
|
||||
The ns_cgroup is replaced by a compatibility flag 'clone_children',
|
||||
where a newly created cgroup will copy the parent cgroup values.
|
||||
The userspace has to manually create a cgroup and add a task to
|
||||
the 'tasks' file.
|
||||
Who: Daniel Lezcano <daniel.lezcano@free.fr>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: iwlwifi disable_hw_scan module parameters
|
||||
When: 2.6.40
|
||||
Why: Hareware scan is the prefer method for iwlwifi devices for
|
||||
|
@ -518,16 +501,6 @@ Who: NeilBrown <neilb@suse.de>
|
|||
|
||||
----------------------------
|
||||
|
||||
What: cancel_rearming_delayed_work[queue]()
|
||||
When: 2.6.39
|
||||
|
||||
Why: The functions have been superceded by cancel_delayed_work_sync()
|
||||
quite some time ago. The conversion is trivial and there is no
|
||||
in-kernel user left.
|
||||
Who: Tejun Heo <tj@kernel.org>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: Legacy, non-standard chassis intrusion detection interface.
|
||||
When: June 2011
|
||||
Why: The adm9240, w83792d and w83793 hardware monitoring drivers have
|
||||
|
@ -600,3 +573,25 @@ Why: Superseded by the UVCIOC_CTRL_QUERY ioctl.
|
|||
Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: For VIDIOC_S_FREQUENCY the type field must match the device node's type.
|
||||
If not, return -EINVAL.
|
||||
When: 3.2
|
||||
Why: It makes no sense to switch the tuner to radio mode by calling
|
||||
VIDIOC_S_FREQUENCY on a video node, or to switch the tuner to tv mode by
|
||||
calling VIDIOC_S_FREQUENCY on a radio node. This is the first step of a
|
||||
move to more consistent handling of tv and radio tuners.
|
||||
Who: Hans Verkuil <hans.verkuil@cisco.com>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: Opening a radio device node will no longer automatically switch the
|
||||
tuner mode from tv to radio.
|
||||
When: 3.3
|
||||
Why: Just opening a V4L device should not change the state of the hardware
|
||||
like that. It's very unexpected and against the V4L spec. Instead, you
|
||||
switch to radio mode by calling VIDIOC_S_FREQUENCY. This is the second
|
||||
and last step of the move to consistent handling of tv and radio tuners.
|
||||
Who: Hans Verkuil <hans.verkuil@cisco.com>
|
||||
|
||||
----------------------------
|
||||
|
|
|
@ -52,7 +52,7 @@ ata *);
|
|||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
|
@ -412,7 +412,7 @@ prototypes:
|
|||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
int (*fsync) (struct file *, int datasync);
|
||||
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
|
||||
int (*aio_fsync) (struct kiocb *, int datasync);
|
||||
int (*fasync) (int, struct file *, int);
|
||||
int (*lock) (struct file *, int, struct file_lock *);
|
||||
|
@ -438,9 +438,7 @@ prototypes:
|
|||
|
||||
locking rules:
|
||||
All may block except for ->setlease.
|
||||
No VFS locks held on entry except for ->fsync and ->setlease.
|
||||
|
||||
->fsync() has i_mutex on inode.
|
||||
No VFS locks held on entry except for ->setlease.
|
||||
|
||||
->setlease has the file_list_lock held and must not sleep.
|
||||
|
||||
|
|
|
@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request -
|
|||
in which case the page will not be stored in the cache this time.
|
||||
|
||||
|
||||
BULK INODE PAGE UNCACHE
|
||||
-----------------------
|
||||
|
||||
A convenience routine is provided to perform an uncache on all the pages
|
||||
attached to an inode. This assumes that the pages on the inode correspond on a
|
||||
1:1 basis with the pages in the cache.
|
||||
|
||||
void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
|
||||
struct inode *inode);
|
||||
|
||||
This takes the netfs cookie that the pages were cached with and the inode that
|
||||
the pages are attached to. This function will wait for pages to finish being
|
||||
written to the cache and for the cache to finish with the page generally. No
|
||||
error is returned.
|
||||
|
||||
|
||||
==========================
|
||||
INDEX AND DATA FILE UPDATE
|
||||
==========================
|
||||
|
|
|
@ -40,7 +40,6 @@ Features which NILFS2 does not support yet:
|
|||
- POSIX ACLs
|
||||
- quotas
|
||||
- fsck
|
||||
- resize
|
||||
- defragmentation
|
||||
|
||||
Mount options
|
||||
|
|
|
@ -398,12 +398,33 @@ Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
|
|||
so the i_size should not change when hole punching, even when puching the end of
|
||||
a file off.
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
->get_sb() is gone. Switch to use of ->mount(). Typically it's just
|
||||
a matter of switching from calling get_sb_... to mount_... and changing the
|
||||
function type. If you were doing it manually, just switch from setting ->mnt_root
|
||||
to some pointer to returning that pointer. On errors return ERR_PTR(...).
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
->permission(), generic_permission() and ->check_acl() have lost flags
|
||||
argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
|
||||
generic_permission() has also lost the check_acl argument; if you want
|
||||
non-NULL to be used for that inode, put it into ->i_op->check_acl.
|
||||
|
||||
--
|
||||
[mandatory]
|
||||
If you implement your own ->llseek() you must handle SEEK_HOLE and
|
||||
SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to
|
||||
support it in some way. The generic handler assumes that the entire file is
|
||||
data and there is a virtual hole at the end of the file. So if the provided
|
||||
offset is less than i_size and SEEK_DATA is specified, return the same offset.
|
||||
If the above is true for the offset and you are given SEEK_HOLE, return the end
|
||||
of the file. If the offset is i_size or greater return -ENXIO in either case.
|
||||
|
||||
[mandatory]
|
||||
If you have your own ->fsync() you must make sure to call
|
||||
filemap_write_and_wait_range() so that all dirty pages are synced out properly.
|
||||
You must also keep in mind that ->fsync() is not called with i_mutex held
|
||||
anymore, so if you require i_mutex locking you must make sure to take it and
|
||||
release it yourself.
|
||||
|
|
|
@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu.
|
|||
TASKLET: 0 0 0 290
|
||||
SCHED: 27035 26983 26971 26746
|
||||
HRTIMER: 0 0 0 0
|
||||
RCU: 1678 1769 2178 2250
|
||||
|
||||
|
||||
1.3 IDE devices in /proc/ide
|
||||
|
|
|
@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0
|
|||
to UBI and mount volume "rootfs":
|
||||
ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
|
||||
|
||||
|
||||
Module Parameters for Debugging
|
||||
===============================
|
||||
|
||||
When UBIFS has been compiled with debugging enabled, there are 2 module
|
||||
parameters that are available to control aspects of testing and debugging.
|
||||
|
||||
debug_chks Selects extra checks that UBIFS can do while running:
|
||||
|
||||
Check Flag value
|
||||
|
||||
General checks 1
|
||||
Check Tree Node Cache (TNC) 2
|
||||
Check indexing tree size 4
|
||||
Check orphan area 8
|
||||
Check old indexing tree 16
|
||||
Check LEB properties (lprops) 32
|
||||
Check leaf nodes and inodes 64
|
||||
|
||||
debug_tsts Selects a mode of testing, as follows:
|
||||
|
||||
Test mode Flag value
|
||||
|
||||
Failure mode for recovery testing 4
|
||||
|
||||
For example, set debug_chks to 3 to enable general and TNC checks.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
|
|
@ -229,6 +229,8 @@ struct super_operations {
|
|||
|
||||
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
|
||||
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
|
||||
int (*nr_cached_objects)(struct super_block *);
|
||||
void (*free_cached_objects)(struct super_block *, int);
|
||||
};
|
||||
|
||||
All methods are called without any locks being held, unless otherwise
|
||||
|
@ -301,6 +303,26 @@ or bottom half).
|
|||
|
||||
quota_write: called by the VFS to write to filesystem quota file.
|
||||
|
||||
nr_cached_objects: called by the sb cache shrinking function for the
|
||||
filesystem to return the number of freeable cached objects it contains.
|
||||
Optional.
|
||||
|
||||
free_cache_objects: called by the sb cache shrinking function for the
|
||||
filesystem to scan the number of objects indicated to try to free them.
|
||||
Optional, but any filesystem implementing this method needs to also
|
||||
implement ->nr_cached_objects for it to be called correctly.
|
||||
|
||||
We can't do anything with any errors that the filesystem might
|
||||
encountered, hence the void return type. This will never be called if
|
||||
the VM is trying to reclaim under GFP_NOFS conditions, hence this
|
||||
method does not need to handle that situation itself.
|
||||
|
||||
Implementations must include conditional reschedule calls inside any
|
||||
scanning loop that is done. This allows the VFS to determine
|
||||
appropriate scan batch sizes without having to worry about whether
|
||||
implementations will cause holdoff problems due to large scan batch
|
||||
sizes.
|
||||
|
||||
Whoever sets up the inode is responsible for filling in the "i_op" field. This
|
||||
is a pointer to a "struct inode_operations" which describes the methods that
|
||||
can be performed on individual inodes.
|
||||
|
@ -333,8 +355,8 @@ struct inode_operations {
|
|||
void * (*follow_link) (struct dentry *, struct nameidata *);
|
||||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*permission) (struct inode *, int);
|
||||
int (*check_acl)(struct inode *, int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
|
@ -423,7 +445,7 @@ otherwise noted.
|
|||
permission: called by the VFS to check for access rights on a POSIX-like
|
||||
filesystem.
|
||||
|
||||
May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
|
||||
May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
|
||||
mode, the filesystem must check the permission without blocking or
|
||||
storing to the inode.
|
||||
|
||||
|
@ -755,7 +777,7 @@ struct file_operations {
|
|||
int (*open) (struct inode *, struct file *);
|
||||
int (*flush) (struct file *);
|
||||
int (*release) (struct inode *, struct file *);
|
||||
int (*fsync) (struct file *, int datasync);
|
||||
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
|
||||
int (*aio_fsync) (struct kiocb *, int datasync);
|
||||
int (*fasync) (int, struct file *, int);
|
||||
int (*lock) (struct file *, int, struct file_lock *);
|
||||
|
|
|
@ -22,6 +22,10 @@ Supported chips:
|
|||
Prefix: 'f71869'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Available from the Fintek website
|
||||
* Fintek F71869A
|
||||
Prefix: 'f71869a'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
Datasheet: Not public
|
||||
* Fintek F71882FG and F71883FG
|
||||
Prefix: 'f71882fg'
|
||||
Addresses scanned: none, address read from Super I/O config space
|
||||
|
|
|
@ -9,8 +9,8 @@ Supported chips:
|
|||
Socket S1G3: Athlon II, Sempron, Turion II
|
||||
* AMD Family 11h processors:
|
||||
Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
|
||||
* AMD Family 12h processors: "Llano"
|
||||
* AMD Family 14h processors: "Brazos" (C/E/G-Series)
|
||||
* AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series)
|
||||
* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
|
||||
* AMD Family 15h processors: "Bulldozer"
|
||||
|
||||
Prefix: 'k10temp'
|
||||
|
@ -20,12 +20,16 @@ Supported chips:
|
|||
http://support.amd.com/us/Processor_TechDocs/31116.pdf
|
||||
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/41256.pdf
|
||||
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 12h Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/41131.pdf
|
||||
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/43170.pdf
|
||||
Revision Guide for AMD Family 10h Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/41322.pdf
|
||||
Revision Guide for AMD Family 11h Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/41788.pdf
|
||||
Revision Guide for AMD Family 12h Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/44739.pdf
|
||||
Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
|
||||
http://support.amd.com/us/Processor_TechDocs/47534.pdf
|
||||
AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
|
||||
|
|
|
@ -68,7 +68,7 @@ Linux カーネルパッチ投稿者向けチェックリスト
|
|||
|
||||
12: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB,
|
||||
CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
|
||||
CONFIG_DEBUG_SPINLOCK_SLEEP これら全てを同時に有効にして動作確認を
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP これら全てを同時に有効にして動作確認を
|
||||
行ってください。
|
||||
|
||||
13: CONFIG_SMP, CONFIG_PREEMPT を有効にした場合と無効にした場合の両方で
|
||||
|
|
|
@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
With this option on every unmap_single operation will
|
||||
result in a hardware IOTLB flush operation as opposed
|
||||
to batching them for performance.
|
||||
|
||||
sp_off [Default Off]
|
||||
By default, super page will be supported if Intel IOMMU
|
||||
has the capability. With this option, super page will
|
||||
not be supported.
|
||||
intremap= [X86-64, Intel-IOMMU]
|
||||
Format: { on (default) | off | nosid }
|
||||
on enable Interrupt Remapping (default)
|
||||
|
@ -1156,10 +1159,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
for all guests.
|
||||
Default is 1 (enabled) if in 64bit or 32bit-PAE mode
|
||||
|
||||
kvm-intel.bypass_guest_pf=
|
||||
[KVM,Intel] Disables bypassing of guest page faults
|
||||
on Intel chips. Default is 1 (enabled)
|
||||
|
||||
kvm-intel.ept= [KVM,Intel] Disable extended page tables
|
||||
(virtualized MMU) support on capable Intel chips.
|
||||
Default is 1 (enabled)
|
||||
|
@ -1734,6 +1733,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
|
||||
fault handling.
|
||||
|
||||
no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
|
||||
steal time is computed, but won't influence scheduler
|
||||
behaviour
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
|
@ -2012,6 +2015,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
the default.
|
||||
off: Turn ECRC off
|
||||
on: Turn ECRC on.
|
||||
realloc reallocate PCI resources if allocations done by BIOS
|
||||
are erroneous.
|
||||
|
||||
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
|
||||
Management.
|
||||
|
@ -2595,6 +2600,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
unlock ejectable media);
|
||||
m = MAX_SECTORS_64 (don't transfer more
|
||||
than 64 sectors = 32 KB at a time);
|
||||
n = INITIAL_READ10 (force a retry of the
|
||||
initial READ(10) command);
|
||||
o = CAPACITY_OK (accept the capacity
|
||||
reported by the device);
|
||||
r = IGNORE_RESIDUE (the device reports
|
||||
|
|
|
@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only
|
|||
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
|
||||
Valgrind tool (memcheck --leak-check) to detect the memory leaks in
|
||||
user-space applications.
|
||||
Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile.
|
||||
|
||||
Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported
|
||||
architectures.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
|
|
@ -534,6 +534,8 @@ Events that are never propagated by the driver:
|
|||
0x2404 System is waking up from hibernation to undock
|
||||
0x2405 System is waking up from hibernation to eject bay
|
||||
0x5010 Brightness level changed/control event
|
||||
0x6000 KEYBOARD: Numlock key pressed
|
||||
0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
|
||||
|
||||
Events that are propagated by the driver to userspace:
|
||||
|
||||
|
@ -545,6 +547,8 @@ Events that are propagated by the driver to userspace:
|
|||
0x3006 Bay hotplug request (hint to power up SATA link when
|
||||
the optical drive tray is ejected)
|
||||
0x4003 Undocked (see 0x2x04), can sleep again
|
||||
0x4010 Docked into hotplug port replicator (non-ACPI dock)
|
||||
0x4011 Undocked from hotplug port replicator (non-ACPI dock)
|
||||
0x500B Tablet pen inserted into its storage bay
|
||||
0x500C Tablet pen removed from its storage bay
|
||||
0x6011 ALARM: battery is too hot
|
||||
|
@ -552,6 +556,7 @@ Events that are propagated by the driver to userspace:
|
|||
0x6021 ALARM: a sensor is too hot
|
||||
0x6022 ALARM: a sensor is extremely hot
|
||||
0x6030 System thermal table changed
|
||||
0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
|
||||
|
||||
Battery nearly empty alarms are a last resort attempt to get the
|
||||
operating system to hibernate or shutdown cleanly (0x2313), or shutdown
|
||||
|
|
|
@ -555,7 +555,7 @@ also have
|
|||
sync_min
|
||||
sync_max
|
||||
The two values, given as numbers of sectors, indicate a range
|
||||
withing the array where 'check'/'repair' will operate. Must be
|
||||
within the array where 'check'/'repair' will operate. Must be
|
||||
a multiple of chunk_size. When it reaches "sync_max" it will
|
||||
pause, rather than complete.
|
||||
You can use 'select' or 'poll' on "sync_completed" to wait for
|
||||
|
|
|
@ -4,3 +4,5 @@ mmc-dev-attrs.txt
|
|||
- info on SD and MMC device attributes
|
||||
mmc-dev-parts.txt
|
||||
- info on SD and MMC device partitions
|
||||
mmc-async-req.txt
|
||||
- info on mmc asynchronous requests
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
Rationale
|
||||
=========
|
||||
|
||||
How significant is the cache maintenance overhead?
|
||||
It depends. Fast eMMC and multiple cache levels with speculative cache
|
||||
pre-fetch makes the cache overhead relatively significant. If the DMA
|
||||
preparations for the next request are done in parallel with the current
|
||||
transfer, the DMA preparation overhead would not affect the MMC performance.
|
||||
The intention of non-blocking (asynchronous) MMC requests is to minimize the
|
||||
time between when an MMC request ends and another MMC request begins.
|
||||
Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
|
||||
dma_unmap_sg are processing. Using non-blocking MMC requests makes it
|
||||
possible to prepare the caches for next job in parallel with an active
|
||||
MMC request.
|
||||
|
||||
MMC block driver
|
||||
================
|
||||
|
||||
The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
|
||||
The increase in throughput is proportional to the time it takes to
|
||||
prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
|
||||
a request and how fast the memory is. The faster the MMC/SD is the
|
||||
more significant the prepare request time becomes. Roughly the expected
|
||||
performance gain is 5% for large writes and 10% on large reads on a L2 cache
|
||||
platform. In power save mode, when clocks run on a lower frequency, the DMA
|
||||
preparation may cost even more. As long as these slower preparations are run
|
||||
in parallel with the transfer performance won't be affected.
|
||||
|
||||
Details on measurements from IOZone and mmc_test
|
||||
================================================
|
||||
|
||||
https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
|
||||
|
||||
MMC core API extension
|
||||
======================
|
||||
|
||||
There is one new public function mmc_start_req().
|
||||
It starts a new MMC command request for a host. The function isn't
|
||||
truly non-blocking. If there is an ongoing async request it waits
|
||||
for completion of that request and starts the new one and returns. It
|
||||
doesn't wait for the new request to complete. If there is no ongoing
|
||||
request it starts the new request and returns immediately.
|
||||
|
||||
MMC host extensions
|
||||
===================
|
||||
|
||||
There are two optional members in the mmc_host_ops -- pre_req() and
|
||||
post_req() -- that the host driver may implement in order to move work
|
||||
to before and after the actual mmc_host_ops.request() function is called.
|
||||
In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
|
||||
descriptor, and post_req() runs the dma_unmap_sg().
|
||||
|
||||
Optimize for the first request
|
||||
==============================
|
||||
|
||||
The first request in a series of requests can't be prepared in parallel
|
||||
with the previous transfer, since there is no previous request.
|
||||
The argument is_first_req in pre_req() indicates that there is no previous
|
||||
request. The host driver may optimize for this scenario to minimize
|
||||
the performance loss. A way to optimize for this is to split the current
|
||||
request in two chunks, prepare the first chunk and start the request,
|
||||
and finally prepare the second chunk and start the transfer.
|
||||
|
||||
Pseudocode to handle is_first_req scenario with minimal prepare overhead:
|
||||
|
||||
if (is_first_req && req->size > threshold)
|
||||
/* start MMC transfer for the complete transfer size */
|
||||
mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
|
||||
|
||||
/*
|
||||
* Begin to prepare DMA while cmd is being processed by MMC.
|
||||
* The first chunk of the request should take the same time
|
||||
* to prepare as the "MMC process command time".
|
||||
* If prepare time exceeds MMC cmd time
|
||||
* the transfer is delayed, guesstimate max 4k as first chunk size.
|
||||
*/
|
||||
prepare_1st_chunk_for_dma(req);
|
||||
/* flush pending desc to the DMAC (dmaengine.h) */
|
||||
dma_issue_pending(req->dma_desc);
|
||||
|
||||
prepare_2nd_chunk_for_dma(req);
|
||||
/*
|
||||
* The second issue_pending should be called before MMC runs out
|
||||
* of the first chunk. If the MMC runs out of the first data chunk
|
||||
* before this call, the transfer is delayed.
|
||||
*/
|
||||
dma_issue_pending(req->dma_desc);
|
|
@ -260,7 +260,7 @@ int main(int argc, char *argv[])
|
|||
case 'V': opt_V++; exclusive++; break;
|
||||
|
||||
case '?':
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -268,13 +268,13 @@ int main(int argc, char *argv[])
|
|||
|
||||
/* options check */
|
||||
if (exclusive > 1) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opt_v || opt_V) {
|
||||
printf(version);
|
||||
printf("%s", version);
|
||||
if (opt_V) {
|
||||
res = 0;
|
||||
goto out;
|
||||
|
@ -282,14 +282,14 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
if (opt_u) {
|
||||
printf(usage_msg);
|
||||
printf("%s", usage_msg);
|
||||
res = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opt_h) {
|
||||
printf(usage_msg);
|
||||
printf(help_msg);
|
||||
printf("%s", usage_msg);
|
||||
printf("%s", help_msg);
|
||||
res = 0;
|
||||
goto out;
|
||||
}
|
||||
|
@ -309,7 +309,7 @@ int main(int argc, char *argv[])
|
|||
goto out;
|
||||
} else {
|
||||
/* Just show usage */
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -320,7 +320,7 @@ int main(int argc, char *argv[])
|
|||
master_ifname = *spp++;
|
||||
|
||||
if (master_ifname == NULL) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
@ -339,7 +339,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
if (slave_ifname == NULL) {
|
||||
if (opt_d || opt_c) {
|
||||
fprintf(stderr, usage_msg);
|
||||
fprintf(stderr, "%s", usage_msg);
|
||||
res = 2;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -106,16 +106,6 @@ inet_peer_maxttl - INTEGER
|
|||
when the number of entries in the pool is very small).
|
||||
Measured in seconds.
|
||||
|
||||
inet_peer_gc_mintime - INTEGER
|
||||
Minimum interval between garbage collection passes. This interval is
|
||||
in effect under high memory pressure on the pool.
|
||||
Measured in seconds.
|
||||
|
||||
inet_peer_gc_maxtime - INTEGER
|
||||
Minimum interval between garbage collection passes. This interval is
|
||||
in effect under low (or absent) memory pressure on the pool.
|
||||
Measured in seconds.
|
||||
|
||||
TCP variables:
|
||||
|
||||
somaxconn - INTEGER
|
||||
|
@ -346,7 +336,7 @@ tcp_orphan_retries - INTEGER
|
|||
when RTO retransmissions remain unacknowledged.
|
||||
See tcp_retries2 for more details.
|
||||
|
||||
The default value is 7.
|
||||
The default value is 8.
|
||||
If your machine is a loaded WEB server,
|
||||
you should think about lowering this value, such sockets
|
||||
may consume significant resources. Cf. tcp_max_orphans.
|
||||
|
@ -394,7 +384,7 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
|
|||
min: Minimal size of receive buffer used by TCP sockets.
|
||||
It is guaranteed to each TCP socket, even under moderate memory
|
||||
pressure.
|
||||
Default: 8K
|
||||
Default: 1 page
|
||||
|
||||
default: initial size of receive buffer used by TCP sockets.
|
||||
This value overrides net.core.rmem_default used by other protocols.
|
||||
|
@ -483,7 +473,7 @@ tcp_window_scaling - BOOLEAN
|
|||
tcp_wmem - vector of 3 INTEGERs: min, default, max
|
||||
min: Amount of memory reserved for send buffers for TCP sockets.
|
||||
Each TCP socket has rights to use it due to fact of its birth.
|
||||
Default: 4K
|
||||
Default: 1 page
|
||||
|
||||
default: initial size of send buffer used by TCP sockets. This
|
||||
value overrides net.core.wmem_default used by other protocols.
|
||||
|
@ -553,13 +543,13 @@ udp_rmem_min - INTEGER
|
|||
Minimal size of receive buffer used by UDP sockets in moderation.
|
||||
Each UDP socket is able to use the size for receiving data, even if
|
||||
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
|
||||
Default: 4096
|
||||
Default: 1 page
|
||||
|
||||
udp_wmem_min - INTEGER
|
||||
Minimal size of send buffer used by UDP sockets in moderation.
|
||||
Each UDP socket is able to use the size for sending data, even if
|
||||
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
|
||||
Default: 4096
|
||||
Default: 1 page
|
||||
|
||||
CIPSOv4 Variables:
|
||||
|
||||
|
@ -1465,10 +1455,17 @@ sctp_mem - vector of 3 INTEGERs: min, pressure, max
|
|||
Default is calculated at boot time from amount of available memory.
|
||||
|
||||
sctp_rmem - vector of 3 INTEGERs: min, default, max
|
||||
See tcp_rmem for a description.
|
||||
Only the first value ("min") is used, "default" and "max" are
|
||||
ignored.
|
||||
|
||||
min: Minimal size of receive buffer used by SCTP socket.
|
||||
It is guaranteed to each SCTP socket (but not association) even
|
||||
under moderate memory pressure.
|
||||
|
||||
Default: 1 page
|
||||
|
||||
sctp_wmem - vector of 3 INTEGERs: min, default, max
|
||||
See tcp_wmem for a description.
|
||||
Currently this tunable has no effect.
|
||||
|
||||
addr_scope_policy - INTEGER
|
||||
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
Netdev features mess and how to get out from it alive
|
||||
=====================================================
|
||||
|
||||
Author:
|
||||
Michał Mirosław <mirq-linux@rere.qmqm.pl>
|
||||
|
||||
|
||||
|
||||
Part I: Feature sets
|
||||
======================
|
||||
|
||||
Long gone are the days when a network card would just take and give packets
|
||||
verbatim. Today's devices add multiple features and bugs (read: offloads)
|
||||
that relieve an OS of various tasks like generating and checking checksums,
|
||||
splitting packets, classifying them. Those capabilities and their state
|
||||
are commonly referred to as netdev features in Linux kernel world.
|
||||
|
||||
There are currently three sets of features relevant to the driver, and
|
||||
one used internally by network core:
|
||||
|
||||
1. netdev->hw_features set contains features whose state may possibly
|
||||
be changed (enabled or disabled) for a particular device by user's
|
||||
request. This set should be initialized in ndo_init callback and not
|
||||
changed later.
|
||||
|
||||
2. netdev->features set contains features which are currently enabled
|
||||
for a device. This should be changed only by network core or in
|
||||
error paths of ndo_set_features callback.
|
||||
|
||||
3. netdev->vlan_features set contains features whose state is inherited
|
||||
by child VLAN devices (limits netdev->features set). This is currently
|
||||
used for all VLAN devices whether tags are stripped or inserted in
|
||||
hardware or software.
|
||||
|
||||
4. netdev->wanted_features set contains feature set requested by user.
|
||||
This set is filtered by ndo_fix_features callback whenever it or
|
||||
some device-specific conditions change. This set is internal to
|
||||
networking core and should not be referenced in drivers.
|
||||
|
||||
|
||||
|
||||
Part II: Controlling enabled features
|
||||
=======================================
|
||||
|
||||
When current feature set (netdev->features) is to be changed, new set
|
||||
is calculated and filtered by calling ndo_fix_features callback
|
||||
and netdev_fix_features(). If the resulting set differs from current
|
||||
set, it is passed to ndo_set_features callback and (if the callback
|
||||
returns success) replaces value stored in netdev->features.
|
||||
NETDEV_FEAT_CHANGE notification is issued after that whenever current
|
||||
set might have changed.
|
||||
|
||||
The following events trigger recalculation:
|
||||
1. device's registration, after ndo_init returned success
|
||||
2. user requested changes in features state
|
||||
3. netdev_update_features() is called
|
||||
|
||||
ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
|
||||
are treated as always returning success.
|
||||
|
||||
A driver that wants to trigger recalculation must do so by calling
|
||||
netdev_update_features() while holding rtnl_lock. This should not be done
|
||||
from ndo_*_features callbacks. netdev->features should not be modified by
|
||||
driver except by means of ndo_fix_features callback.
|
||||
|
||||
|
||||
|
||||
Part III: Implementation hints
|
||||
================================
|
||||
|
||||
* ndo_fix_features:
|
||||
|
||||
All dependencies between features should be resolved here. The resulting
|
||||
set can be reduced further by networking core imposed limitations (as coded
|
||||
in netdev_fix_features()). For this reason it is safer to disable a feature
|
||||
when its dependencies are not met instead of forcing the dependency on.
|
||||
|
||||
This callback should not modify hardware nor driver state (should be
|
||||
stateless). It can be called multiple times between successive
|
||||
ndo_set_features calls.
|
||||
|
||||
Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
|
||||
NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
|
||||
care must be taken as the change won't affect already configured VLANs.
|
||||
|
||||
* ndo_set_features:
|
||||
|
||||
Hardware should be reconfigured to match passed feature set. The set
|
||||
should not be altered unless some error condition happens that can't
|
||||
be reliably detected in ndo_fix_features. In this case, the callback
|
||||
should update netdev->features to match resulting hardware state.
|
||||
Errors returned are not (and cannot be) propagated anywhere except dmesg.
|
||||
(Note: successful return is zero, >0 means silent error.)
|
||||
|
||||
|
||||
|
||||
Part IV: Features
|
||||
===================
|
||||
|
||||
For current list of features, see include/linux/netdev_features.h.
|
||||
This section describes semantics of some of them.
|
||||
|
||||
* Transmit checksumming
|
||||
|
||||
For complete description, see comments near the top of include/linux/skbuff.h.
|
||||
|
||||
Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
|
||||
It means that device can fill TCP/UDP-like checksum anywhere in the packets
|
||||
whatever headers there might be.
|
||||
|
||||
* Transmit TCP segmentation offload
|
||||
|
||||
NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
|
||||
set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
|
||||
|
||||
* Transmit DMA from high memory
|
||||
|
||||
On platforms where this is relevant, NETIF_F_HIGHDMA signals that
|
||||
ndo_start_xmit can handle skbs with frags in high memory.
|
||||
|
||||
* Transmit scatter-gather
|
||||
|
||||
Those features say that ndo_start_xmit can handle fragmented skbs:
|
||||
NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
|
||||
chained skbs (skb->next/prev list).
|
||||
|
||||
* Software features
|
||||
|
||||
Features contained in NETIF_F_SOFT_FEATURES are features of networking
|
||||
stack. Driver should not change behaviour based on them.
|
||||
|
||||
* LLTX driver (deprecated for hardware drivers)
|
||||
|
||||
NETIF_F_LLTX should be set in drivers that implement their own locking in
|
||||
transmit path or don't need locking at all (e.g. software tunnels).
|
||||
In ndo_start_xmit, it is recommended to use a try_lock and return
|
||||
NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
|
||||
protect against other callbacks (the rules you need to find out).
|
||||
|
||||
Don't use it for new drivers.
|
||||
|
||||
* netns-local device
|
||||
|
||||
NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
|
||||
network namespaces (e.g. loopback).
|
||||
|
||||
Don't use it in drivers.
|
||||
|
||||
* VLAN challenged
|
||||
|
||||
NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
|
||||
headers. Some drivers set this because the cards can't handle the bigger MTU.
|
||||
[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
|
||||
VLANs. This may be not useful, though.]
|
|
@ -0,0 +1,128 @@
|
|||
Linux NFC subsystem
|
||||
===================
|
||||
|
||||
The Near Field Communication (NFC) subsystem is required to standardize the
|
||||
NFC device drivers development and to create an unified userspace interface.
|
||||
|
||||
This document covers the architecture overview, the device driver interface
|
||||
description and the userspace interface description.
|
||||
|
||||
Architecture overview
|
||||
---------------------
|
||||
|
||||
The NFC subsystem is responsible for:
|
||||
- NFC adapters management;
|
||||
- Polling for targets;
|
||||
- Low-level data exchange;
|
||||
|
||||
The subsystem is divided in some parts. The 'core' is responsible for
|
||||
providing the device driver interface. On the other side, it is also
|
||||
responsible for providing an interface to control operations and low-level
|
||||
data exchange.
|
||||
|
||||
The control operations are available to userspace via generic netlink.
|
||||
|
||||
The low-level data exchange interface is provided by the new socket family
|
||||
PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
|
||||
|
||||
|
||||
+--------------------------------------+
|
||||
| USER SPACE |
|
||||
+--------------------------------------+
|
||||
^ ^
|
||||
| low-level | control
|
||||
| data exchange | operations
|
||||
| |
|
||||
| v
|
||||
| +-----------+
|
||||
| AF_NFC | netlink |
|
||||
| socket +-----------+
|
||||
| raw ^
|
||||
| |
|
||||
v v
|
||||
+---------+ +-----------+
|
||||
| rawsock | <--------> | core |
|
||||
+---------+ +-----------+
|
||||
^
|
||||
|
|
||||
v
|
||||
+-----------+
|
||||
| driver |
|
||||
+-----------+
|
||||
|
||||
Device Driver Interface
|
||||
-----------------------
|
||||
|
||||
When registering on the NFC subsystem, the device driver must inform the core
|
||||
of the set of supported NFC protocols and the set of ops callbacks. The ops
|
||||
callbacks that must be implemented are the following:
|
||||
|
||||
* start_poll - setup the device to poll for targets
|
||||
* stop_poll - stop on progress polling operation
|
||||
* activate_target - select and initialize one of the targets found
|
||||
* deactivate_target - deselect and deinitialize the selected target
|
||||
* data_exchange - send data and receive the response (transceive operation)
|
||||
|
||||
Userspace interface
|
||||
--------------------
|
||||
|
||||
The userspace interface is divided in control operations and low-level data
|
||||
exchange operation.
|
||||
|
||||
CONTROL OPERATIONS:
|
||||
|
||||
Generic netlink is used to implement the interface to the control operations.
|
||||
The operations are composed by commands and events, all listed below:
|
||||
|
||||
* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
|
||||
* NFC_CMD_START_POLL - setup a specific device to polling for targets
|
||||
* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
|
||||
* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
|
||||
|
||||
* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
|
||||
* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
|
||||
* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
|
||||
are found
|
||||
|
||||
The user must call START_POLL to poll for NFC targets, passing the desired NFC
|
||||
protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
|
||||
state until it finds any target. However, the user can stop the polling
|
||||
operation by calling STOP_POLL command. In this case, it will be checked if
|
||||
the requester of STOP_POLL is the same of START_POLL.
|
||||
|
||||
If the polling operation finds one or more targets, the event TARGETS_FOUND is
|
||||
sent (including the device id). The user must call GET_TARGET to get the list of
|
||||
all targets found by such device. Each reply message has target attributes with
|
||||
relevant information such as the supported NFC protocols.
|
||||
|
||||
All polling operations requested through one netlink socket are stopped when
|
||||
it's closed.
|
||||
|
||||
LOW-LEVEL DATA EXCHANGE:
|
||||
|
||||
The userspace must use PF_NFC sockets to perform any data communication with
|
||||
targets. All NFC sockets use AF_NFC:
|
||||
|
||||
struct sockaddr_nfc {
|
||||
sa_family_t sa_family;
|
||||
__u32 dev_idx;
|
||||
__u32 target_idx;
|
||||
__u32 nfc_protocol;
|
||||
};
|
||||
|
||||
To establish a connection with one target, the user must create an
|
||||
NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
|
||||
struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
|
||||
netlink event. As a target can support more than one NFC protocol, the user
|
||||
must inform which protocol it wants to use.
|
||||
|
||||
Internally, 'connect' will result in an activate_target call to the driver.
|
||||
When the socket is closed, the target is deactivated.
|
||||
|
||||
The data format exchanged through the sockets is NFC protocol dependent. For
|
||||
instance, when communicating with MIFARE tags, the data exchanged are MIFARE
|
||||
commands and their responses.
|
||||
|
||||
The first received package is the response to the first sent package and so
|
||||
on. In order to allow valid "empty" responses, every data received has a NULL
|
||||
header of 1 byte.
|
|
@ -7,7 +7,7 @@ This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
|
|||
(Synopsys IP blocks); it has been fully tested on STLinux platforms.
|
||||
|
||||
Currently this network device driver is for all STM embedded MAC/GMAC
|
||||
(7xxx SoCs). Other platforms start using it i.e. ARM SPEAr.
|
||||
(i.e. 7xxx/5xxx SoCs) and it's known working on other platforms i.e. ARM SPEAr.
|
||||
|
||||
DWC Ether MAC 10/100/1000 Universal version 3.41a and DWC Ether MAC 10/100
|
||||
Universal version 4.0 have been used for developing the first code
|
||||
|
@ -71,7 +71,7 @@ Several performance tests on STM platforms showed this optimisation allows to sp
|
|||
the CPU while having the maximum throughput.
|
||||
|
||||
4.4) WOL
|
||||
Wake up on Lan feature through Magic Frame is only supported for the GMAC
|
||||
Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC
|
||||
core.
|
||||
|
||||
4.5) DMA descriptors
|
||||
|
@ -91,11 +91,15 @@ LRO is not supported.
|
|||
The driver is compatible with PAL to work with PHY and GPHY devices.
|
||||
|
||||
4.9) Platform information
|
||||
Several information came from the platform; please refer to the
|
||||
driver's Header file in include/linux directory.
|
||||
Several driver's information can be passed through the platform
|
||||
These are included in the include/linux/stmmac.h header file
|
||||
and detailed below as well:
|
||||
|
||||
struct plat_stmmacenet_data {
|
||||
struct plat_stmmacenet_data {
|
||||
int bus_id;
|
||||
int phy_addr;
|
||||
int interface;
|
||||
struct stmmac_mdio_bus_data *mdio_bus_data;
|
||||
int pbl;
|
||||
int clk_csr;
|
||||
int has_gmac;
|
||||
|
@ -103,67 +107,135 @@ struct plat_stmmacenet_data {
|
|||
int tx_coe;
|
||||
int bugged_jumbo;
|
||||
int pmt;
|
||||
void (*fix_mac_speed)(void *priv, unsigned int speed);
|
||||
void (*bus_setup)(unsigned long ioaddr);
|
||||
#ifdef CONFIG_STM_DRIVERS
|
||||
struct stm_pad_config *pad_config;
|
||||
#endif
|
||||
void *bsp_priv;
|
||||
};
|
||||
int force_sf_dma_mode;
|
||||
void (*fix_mac_speed)(void *priv, unsigned int speed);
|
||||
void (*bus_setup)(void __iomem *ioaddr);
|
||||
int (*init)(struct platform_device *pdev);
|
||||
void (*exit)(struct platform_device *pdev);
|
||||
void *bsp_priv;
|
||||
};
|
||||
|
||||
Where:
|
||||
- pbl (Programmable Burst Length) is maximum number of
|
||||
beats to be transferred in one DMA transaction.
|
||||
GMAC also enables the 4xPBL by default.
|
||||
- fix_mac_speed and bus_setup are used to configure internal target
|
||||
registers (on STM platforms);
|
||||
- has_gmac: GMAC core is on board (get it at run-time in the next step);
|
||||
- bus_id: bus identifier.
|
||||
- tx_coe: core is able to perform the tx csum in HW.
|
||||
- enh_desc: if sets the MAC will use the enhanced descriptor structure.
|
||||
- clk_csr: CSR Clock range selection.
|
||||
- bugged_jumbo: some HWs are not able to perform the csum in HW for
|
||||
over-sized frames due to limited buffer sizes. Setting this
|
||||
flag the csum will be done in SW on JUMBO frames.
|
||||
o bus_id: bus identifier.
|
||||
o phy_addr: the physical address can be passed from the platform.
|
||||
If it is set to -1 the driver will automatically
|
||||
detect it at run-time by probing all the 32 addresses.
|
||||
o interface: PHY device's interface.
|
||||
o mdio_bus_data: specific platform fields for the MDIO bus.
|
||||
o pbl: the Programmable Burst Length is maximum number of beats to
|
||||
be transferred in one DMA transaction.
|
||||
GMAC also enables the 4xPBL by default.
|
||||
o clk_csr: CSR Clock range selection.
|
||||
o has_gmac: uses the GMAC core.
|
||||
o enh_desc: if sets the MAC will use the enhanced descriptor structure.
|
||||
o tx_coe: core is able to perform the tx csum in HW.
|
||||
o bugged_jumbo: some HWs are not able to perform the csum in HW for
|
||||
over-sized frames due to limited buffer sizes.
|
||||
Setting this flag the csum will be done in SW on
|
||||
JUMBO frames.
|
||||
o pmt: core has the embedded power module (optional).
|
||||
o force_sf_dma_mode: force DMA to use the Store and Forward mode
|
||||
instead of the Threshold.
|
||||
o fix_mac_speed: this callback is used for modifying some syscfg registers
|
||||
(on ST SoCs) according to the link speed negotiated by the
|
||||
physical layer .
|
||||
o bus_setup: perform HW setup of the bus. For example, on some ST platforms
|
||||
this field is used to configure the AMBA bridge to generate more
|
||||
efficient STBus traffic.
|
||||
o init/exit: callbacks used for calling a custom initialisation;
|
||||
this is sometime necessary on some platforms (e.g. ST boxes)
|
||||
where the HW needs to have set some PIO lines or system cfg
|
||||
registers.
|
||||
o custom_cfg: this is a custom configuration that can be passed while
|
||||
initialising the resources.
|
||||
|
||||
struct plat_stmmacphy_data {
|
||||
int bus_id;
|
||||
int phy_addr;
|
||||
unsigned int phy_mask;
|
||||
int interface;
|
||||
int (*phy_reset)(void *priv);
|
||||
void *priv;
|
||||
};
|
||||
The we have:
|
||||
|
||||
struct stmmac_mdio_bus_data {
|
||||
int bus_id;
|
||||
int (*phy_reset)(void *priv);
|
||||
unsigned int phy_mask;
|
||||
int *irqs;
|
||||
int probed_phy_irq;
|
||||
};
|
||||
|
||||
Where:
|
||||
- bus_id: bus identifier;
|
||||
- phy_addr: physical address used for the attached phy device;
|
||||
set it to -1 to get it at run-time;
|
||||
- interface: physical MII interface mode;
|
||||
- phy_reset: hook to reset HW function.
|
||||
o bus_id: bus identifier;
|
||||
o phy_reset: hook to reset the phy device attached to the bus.
|
||||
o phy_mask: phy mask passed when register the MDIO bus within the driver.
|
||||
o irqs: list of IRQs, one per PHY.
|
||||
o probed_phy_irq: if irqs is NULL, use this for probed PHY.
|
||||
|
||||
SOURCES:
|
||||
- Kconfig
|
||||
- Makefile
|
||||
- stmmac_main.c: main network device driver;
|
||||
- stmmac_mdio.c: mdio functions;
|
||||
- stmmac_ethtool.c: ethtool support;
|
||||
- stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
|
||||
Only tested on ST40 platforms based.
|
||||
- stmmac.h: private driver structure;
|
||||
- common.h: common definitions and VFTs;
|
||||
- descs.h: descriptor structure definitions;
|
||||
- dwmac1000_core.c: GMAC core functions;
|
||||
- dwmac1000_dma.c: dma functions for the GMAC chip;
|
||||
- dwmac1000.h: specific header file for the GMAC;
|
||||
- dwmac100_core: MAC 100 core and dma code;
|
||||
- dwmac100_dma.c: dma funtions for the MAC chip;
|
||||
- dwmac1000.h: specific header file for the MAC;
|
||||
- dwmac_lib.c: generic DMA functions shared among chips
|
||||
- enh_desc.c: functions for handling enhanced descriptors
|
||||
- norm_desc.c: functions for handling normal descriptors
|
||||
Below an example how the structures above are using on ST platforms.
|
||||
|
||||
TODO:
|
||||
- XGMAC controller is not supported.
|
||||
- Review the timer optimisation code to use an embedded device that seems to be
|
||||
static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
|
||||
.pbl = 32,
|
||||
.has_gmac = 0,
|
||||
.enh_desc = 0,
|
||||
.fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
|
||||
|
|
||||
|-> to write an internal syscfg
|
||||
| on this platform when the
|
||||
| link speed changes from 10 to
|
||||
| 100 and viceversa
|
||||
.init = &stmmac_claim_resource,
|
||||
|
|
||||
|-> On ST SoC this calls own "PAD"
|
||||
| manager framework to claim
|
||||
| all the resources necessary
|
||||
| (GPIO ...). The .custom_cfg field
|
||||
| is used to pass a custom config.
|
||||
};
|
||||
|
||||
Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
|
||||
there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
|
||||
with fixed_link support.
|
||||
|
||||
static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
|
||||
.bus_id = 1,
|
||||
|
|
||||
|-> phy device on the bus_id 1
|
||||
.phy_reset = phy_reset;
|
||||
|
|
||||
|-> function to provide the phy_reset on this board
|
||||
.phy_mask = 0,
|
||||
};
|
||||
|
||||
static struct fixed_phy_status stmmac0_fixed_phy_status = {
|
||||
.link = 1,
|
||||
.speed = 100,
|
||||
.duplex = 1,
|
||||
};
|
||||
|
||||
During the board's device_init we can configure the first
|
||||
MAC for fixed_link by calling:
|
||||
fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));)
|
||||
and the second one, with a real PHY device attached to the bus,
|
||||
by using the stmmac_mdio_bus_data structure (to provide the id, the
|
||||
reset procedure etc).
|
||||
|
||||
4.10) List of source files:
|
||||
o Kconfig
|
||||
o Makefile
|
||||
o stmmac_main.c: main network device driver;
|
||||
o stmmac_mdio.c: mdio functions;
|
||||
o stmmac_ethtool.c: ethtool support;
|
||||
o stmmac_timer.[ch]: timer code used for mitigating the driver dma interrupts
|
||||
Only tested on ST40 platforms based.
|
||||
o stmmac.h: private driver structure;
|
||||
o common.h: common definitions and VFTs;
|
||||
o descs.h: descriptor structure definitions;
|
||||
o dwmac1000_core.c: GMAC core functions;
|
||||
o dwmac1000_dma.c: dma functions for the GMAC chip;
|
||||
o dwmac1000.h: specific header file for the GMAC;
|
||||
o dwmac100_core: MAC 100 core and dma code;
|
||||
o dwmac100_dma.c: dma funtions for the MAC chip;
|
||||
o dwmac1000.h: specific header file for the MAC;
|
||||
o dwmac_lib.c: generic DMA functions shared among chips
|
||||
o enh_desc.c: functions for handling enhanced descriptors
|
||||
o norm_desc.c: functions for handling normal descriptors
|
||||
|
||||
5) TODO:
|
||||
o XGMAC is not supported.
|
||||
o Review the timer optimisation code to use an embedded device that will be
|
||||
available in new chip generations.
|
||||
|
|
|
@ -506,8 +506,8 @@ routines. Nevertheless, different callback pointers are used in case there is a
|
|||
situation where it actually matters.
|
||||
|
||||
|
||||
Device Power Domains
|
||||
--------------------
|
||||
Device Power Management Domains
|
||||
-------------------------------
|
||||
Sometimes devices share reference clocks or other power resources. In those
|
||||
cases it generally is not possible to put devices into low-power states
|
||||
individually. Instead, a set of devices sharing a power resource can be put
|
||||
|
@ -516,63 +516,24 @@ power resource. Of course, they also need to be put into the full-power state
|
|||
together, by turning the shared power resource on. A set of devices with this
|
||||
property is often referred to as a power domain.
|
||||
|
||||
Support for power domains is provided through the pwr_domain field of struct
|
||||
device. This field is a pointer to an object of type struct dev_power_domain,
|
||||
Support for power domains is provided through the pm_domain field of struct
|
||||
device. This field is a pointer to an object of type struct dev_pm_domain,
|
||||
defined in include/linux/pm.h, providing a set of power management callbacks
|
||||
analogous to the subsystem-level and device driver callbacks that are executed
|
||||
for the given device during all power transitions, in addition to the respective
|
||||
subsystem-level callbacks. Specifically, the power domain "suspend" callbacks
|
||||
(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
|
||||
executed after the analogous subsystem-level callbacks, while the power domain
|
||||
"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
|
||||
etc.) are executed before the analogous subsystem-level callbacks. Error codes
|
||||
returned by the "suspend" and "resume" power domain callbacks are ignored.
|
||||
for the given device during all power transitions, instead of the respective
|
||||
subsystem-level callbacks. Specifically, if a device's pm_domain pointer is
|
||||
not NULL, the ->suspend() callback from the object pointed to by it will be
|
||||
executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
|
||||
anlogously for all of the remaining callbacks. In other words, power management
|
||||
domain callbacks, if defined for the given device, always take precedence over
|
||||
the callbacks provided by the device's subsystem (e.g. bus type).
|
||||
|
||||
Power domain ->runtime_idle() callback is executed before the subsystem-level
|
||||
->runtime_idle() callback and the result returned by it is not ignored. Namely,
|
||||
if it returns error code, the subsystem-level ->runtime_idle() callback will not
|
||||
be called and the helper function rpm_idle() executing it will return error
|
||||
code. This mechanism is intended to help platforms where saving device state
|
||||
is a time consuming operation and should only be carried out if all devices
|
||||
in the power domain are idle, before turning off the shared power resource(s).
|
||||
Namely, the power domain ->runtime_idle() callback may return error code until
|
||||
the pm_runtime_idle() helper (or its asychronous version) has been called for
|
||||
all devices in the power domain (it is recommended that the returned error code
|
||||
be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
|
||||
callback from being run prematurely.
|
||||
|
||||
The support for device power domains is only relevant to platforms needing to
|
||||
use the same subsystem-level (e.g. platform bus type) and device driver power
|
||||
management callbacks in many different power domain configurations and wanting
|
||||
to avoid incorporating the support for power domains into the subsystem-level
|
||||
callbacks. The other platforms need not implement it or take it into account
|
||||
in any way.
|
||||
|
||||
|
||||
System Devices
|
||||
--------------
|
||||
System devices (sysdevs) follow a slightly different API, which can be found in
|
||||
|
||||
include/linux/sysdev.h
|
||||
drivers/base/sys.c
|
||||
|
||||
System devices will be suspended with interrupts disabled, and after all other
|
||||
devices have been suspended. On resume, they will be resumed before any other
|
||||
devices, and also with interrupts disabled. These things occur in special
|
||||
"sysdev_driver" phases, which affect only system devices.
|
||||
|
||||
Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when
|
||||
the non-boot CPUs are all offline and IRQs are disabled on the remaining online
|
||||
CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a
|
||||
sleep state (or a system image is created). During resume (or after the image
|
||||
has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs
|
||||
are enabled on the only online CPU, the non-boot CPUs are enabled, and the
|
||||
resume_noirq (or thaw_noirq or restore_noirq) phase begins.
|
||||
|
||||
Code to actually enter and exit the system-wide low power state sometimes
|
||||
involves hardware details that are only known to the boot firmware, and
|
||||
may leave a CPU running software (from SRAM or flash memory) that monitors
|
||||
the system and manages its wakeup sequence.
|
||||
The support for device power management domains is only relevant to platforms
|
||||
needing to use the same device driver power management callbacks in many
|
||||
different power domain configurations and wanting to avoid incorporating the
|
||||
support for power domains into subsystem-level callbacks, for example by
|
||||
modifying the platform bus type. Other platforms need not implement it or take
|
||||
it into account in any way.
|
||||
|
||||
|
||||
Device Low Power (suspend) States
|
||||
|
@ -643,7 +604,7 @@ state temporarily, for example so that its system wakeup capability can be
|
|||
disabled. This all depends on the hardware and the design of the subsystem and
|
||||
device driver in question.
|
||||
|
||||
During system-wide resume from a sleep state it's best to put devices into the
|
||||
full-power state, as explained in Documentation/power/runtime_pm.txt. Refer to
|
||||
that document for more information regarding this particular issue as well as
|
||||
During system-wide resume from a sleep state it's easiest to put devices into
|
||||
the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer
|
||||
to that document for more information regarding this particular issue as well as
|
||||
for information on the device runtime power management framework in general.
|
||||
|
|
|
@ -321,6 +321,8 @@ opp_init_cpufreq_table - cpufreq framework typically is initialized with
|
|||
addition to CONFIG_PM as power management feature is required to
|
||||
dynamically scale voltage and frequency in a system.
|
||||
|
||||
opp_free_cpufreq_table - Free up the table allocated by opp_init_cpufreq_table
|
||||
|
||||
7. Data Structures
|
||||
==================
|
||||
Typically an SoC contains multiple voltage domains which are variable. Each
|
||||
|
|
|
@ -1,39 +1,39 @@
|
|||
Run-time Power Management Framework for I/O Devices
|
||||
Runtime Power Management Framework for I/O Devices
|
||||
|
||||
(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
|
||||
(C) 2010 Alan Stern <stern@rowland.harvard.edu>
|
||||
|
||||
1. Introduction
|
||||
|
||||
Support for run-time power management (run-time PM) of I/O devices is provided
|
||||
Support for runtime power management (runtime PM) of I/O devices is provided
|
||||
at the power management core (PM core) level by means of:
|
||||
|
||||
* The power management workqueue pm_wq in which bus types and device drivers can
|
||||
put their PM-related work items. It is strongly recommended that pm_wq be
|
||||
used for queuing all work items related to run-time PM, because this allows
|
||||
used for queuing all work items related to runtime PM, because this allows
|
||||
them to be synchronized with system-wide power transitions (suspend to RAM,
|
||||
hibernation and resume from system sleep states). pm_wq is declared in
|
||||
include/linux/pm_runtime.h and defined in kernel/power/main.c.
|
||||
|
||||
* A number of run-time PM fields in the 'power' member of 'struct device' (which
|
||||
* A number of runtime PM fields in the 'power' member of 'struct device' (which
|
||||
is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
|
||||
be used for synchronizing run-time PM operations with one another.
|
||||
be used for synchronizing runtime PM operations with one another.
|
||||
|
||||
* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in
|
||||
* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
|
||||
include/linux/pm.h).
|
||||
|
||||
* A set of helper functions defined in drivers/base/power/runtime.c that can be
|
||||
used for carrying out run-time PM operations in such a way that the
|
||||
used for carrying out runtime PM operations in such a way that the
|
||||
synchronization between them is taken care of by the PM core. Bus types and
|
||||
device drivers are encouraged to use these functions.
|
||||
|
||||
The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM
|
||||
The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
|
||||
fields of 'struct dev_pm_info' and the core helper functions provided for
|
||||
run-time PM are described below.
|
||||
runtime PM are described below.
|
||||
|
||||
2. Device Run-time PM Callbacks
|
||||
2. Device Runtime PM Callbacks
|
||||
|
||||
There are three device run-time PM callbacks defined in 'struct dev_pm_ops':
|
||||
There are three device runtime PM callbacks defined in 'struct dev_pm_ops':
|
||||
|
||||
struct dev_pm_ops {
|
||||
...
|
||||
|
@ -72,11 +72,11 @@ knows what to do to handle the device).
|
|||
not mean that the device has been put into a low power state. It is
|
||||
supposed to mean, however, that the device will not process data and will
|
||||
not communicate with the CPU(s) and RAM until the subsystem-level resume
|
||||
callback is executed for it. The run-time PM status of a device after
|
||||
callback is executed for it. The runtime PM status of a device after
|
||||
successful execution of the subsystem-level suspend callback is 'suspended'.
|
||||
|
||||
* If the subsystem-level suspend callback returns -EBUSY or -EAGAIN,
|
||||
the device's run-time PM status is 'active', which means that the device
|
||||
the device's runtime PM status is 'active', which means that the device
|
||||
_must_ be fully operational afterwards.
|
||||
|
||||
* If the subsystem-level suspend callback returns an error code different
|
||||
|
@ -104,7 +104,7 @@ the device).
|
|||
|
||||
* Once the subsystem-level resume callback has completed successfully, the PM
|
||||
core regards the device as fully operational, which means that the device
|
||||
_must_ be able to complete I/O operations as needed. The run-time PM status
|
||||
_must_ be able to complete I/O operations as needed. The runtime PM status
|
||||
of the device is then 'active'.
|
||||
|
||||
* If the subsystem-level resume callback returns an error code, the PM core
|
||||
|
@ -130,7 +130,7 @@ device in that case. The value returned by this callback is ignored by the PM
|
|||
core.
|
||||
|
||||
The helper functions provided by the PM core, described in Section 4, guarantee
|
||||
that the following constraints are met with respect to the bus type's run-time
|
||||
that the following constraints are met with respect to the bus type's runtime
|
||||
PM callbacks:
|
||||
|
||||
(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
|
||||
|
@ -142,7 +142,7 @@ PM callbacks:
|
|||
|
||||
(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
|
||||
devices (i.e. the PM core will only execute ->runtime_idle() or
|
||||
->runtime_suspend() for the devices the run-time PM status of which is
|
||||
->runtime_suspend() for the devices the runtime PM status of which is
|
||||
'active').
|
||||
|
||||
(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
|
||||
|
@ -151,7 +151,7 @@ PM callbacks:
|
|||
flag of which is set.
|
||||
|
||||
(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the
|
||||
PM core will only execute ->runtime_resume() for the devices the run-time
|
||||
PM core will only execute ->runtime_resume() for the devices the runtime
|
||||
PM status of which is 'suspended').
|
||||
|
||||
Additionally, the helper functions provided by the PM core obey the following
|
||||
|
@ -171,9 +171,9 @@ rules:
|
|||
scheduled requests to execute the other callbacks for the same device,
|
||||
except for scheduled autosuspends.
|
||||
|
||||
3. Run-time PM Device Fields
|
||||
3. Runtime PM Device Fields
|
||||
|
||||
The following device run-time PM fields are present in 'struct dev_pm_info', as
|
||||
The following device runtime PM fields are present in 'struct dev_pm_info', as
|
||||
defined in include/linux/pm.h:
|
||||
|
||||
struct timer_list suspend_timer;
|
||||
|
@ -205,7 +205,7 @@ defined in include/linux/pm.h:
|
|||
|
||||
unsigned int disable_depth;
|
||||
- used for disabling the helper funcions (they work normally if this is
|
||||
equal to zero); the initial value of it is 1 (i.e. run-time PM is
|
||||
equal to zero); the initial value of it is 1 (i.e. runtime PM is
|
||||
initially disabled for all devices)
|
||||
|
||||
unsigned int runtime_error;
|
||||
|
@ -229,10 +229,10 @@ defined in include/linux/pm.h:
|
|||
suspend to complete; means "start a resume as soon as you've suspended"
|
||||
|
||||
unsigned int run_wake;
|
||||
- set if the device is capable of generating run-time wake-up events
|
||||
- set if the device is capable of generating runtime wake-up events
|
||||
|
||||
enum rpm_status runtime_status;
|
||||
- the run-time PM status of the device; this field's initial value is
|
||||
- the runtime PM status of the device; this field's initial value is
|
||||
RPM_SUSPENDED, which means that each device is initially regarded by the
|
||||
PM core as 'suspended', regardless of its real hardware status
|
||||
|
||||
|
@ -243,7 +243,7 @@ defined in include/linux/pm.h:
|
|||
and pm_runtime_forbid() helper functions
|
||||
|
||||
unsigned int no_callbacks;
|
||||
- indicates that the device does not use the run-time PM callbacks (see
|
||||
- indicates that the device does not use the runtime PM callbacks (see
|
||||
Section 8); it may be modified only by the pm_runtime_no_callbacks()
|
||||
helper function
|
||||
|
||||
|
@ -270,16 +270,16 @@ defined in include/linux/pm.h:
|
|||
|
||||
All of the above fields are members of the 'power' member of 'struct device'.
|
||||
|
||||
4. Run-time PM Device Helper Functions
|
||||
4. Runtime PM Device Helper Functions
|
||||
|
||||
The following run-time PM helper functions are defined in
|
||||
The following runtime PM helper functions are defined in
|
||||
drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
||||
|
||||
void pm_runtime_init(struct device *dev);
|
||||
- initialize the device run-time PM fields in 'struct dev_pm_info'
|
||||
- initialize the device runtime PM fields in 'struct dev_pm_info'
|
||||
|
||||
void pm_runtime_remove(struct device *dev);
|
||||
- make sure that the run-time PM of the device will be disabled after
|
||||
- make sure that the runtime PM of the device will be disabled after
|
||||
removing the device from device hierarchy
|
||||
|
||||
int pm_runtime_idle(struct device *dev);
|
||||
|
@ -289,9 +289,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
int pm_runtime_suspend(struct device *dev);
|
||||
- execute the subsystem-level suspend callback for the device; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'suspended', or
|
||||
success, 1 if the device's runtime PM status was already 'suspended', or
|
||||
error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
|
||||
to suspend the device again in future
|
||||
to suspend the device again in future and -EACCES means that
|
||||
'power.disable_depth' is different from 0
|
||||
|
||||
int pm_runtime_autosuspend(struct device *dev);
|
||||
- same as pm_runtime_suspend() except that the autosuspend delay is taken
|
||||
|
@ -301,10 +302,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
int pm_runtime_resume(struct device *dev);
|
||||
- execute the subsystem-level resume callback for the device; returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active' or
|
||||
success, 1 if the device's runtime PM status was already 'active' or
|
||||
error code on failure, where -EAGAIN means it may be safe to attempt to
|
||||
resume the device again in future, but 'power.runtime_error' should be
|
||||
checked additionally
|
||||
checked additionally, and -EACCES means that 'power.disable_depth' is
|
||||
different from 0
|
||||
|
||||
int pm_request_idle(struct device *dev);
|
||||
- submit a request to execute the subsystem-level idle callback for the
|
||||
|
@ -321,7 +323,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
device in future, where 'delay' is the time to wait before queuing up a
|
||||
suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
|
||||
item is queued up immediately); returns 0 on success, 1 if the device's PM
|
||||
run-time status was already 'suspended', or error code if the request
|
||||
runtime status was already 'suspended', or error code if the request
|
||||
hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
|
||||
->runtime_suspend() is already scheduled and not yet expired, the new
|
||||
value of 'delay' will be used as the time to wait
|
||||
|
@ -329,7 +331,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
int pm_request_resume(struct device *dev);
|
||||
- submit a request to execute the subsystem-level resume callback for the
|
||||
device (the request is represented by a work item in pm_wq); returns 0 on
|
||||
success, 1 if the device's run-time PM status was already 'active', or
|
||||
success, 1 if the device's runtime PM status was already 'active', or
|
||||
error code if the request hasn't been queued up
|
||||
|
||||
void pm_runtime_get_noresume(struct device *dev);
|
||||
|
@ -367,22 +369,32 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
pm_runtime_autosuspend(dev) and return its result
|
||||
|
||||
void pm_runtime_enable(struct device *dev);
|
||||
- enable the run-time PM helper functions to run the device bus type's
|
||||
run-time PM callbacks described in Section 2
|
||||
- decrement the device's 'power.disable_depth' field; if that field is equal
|
||||
to zero, the runtime PM helper functions can execute subsystem-level
|
||||
callbacks described in Section 2 for the device
|
||||
|
||||
int pm_runtime_disable(struct device *dev);
|
||||
- prevent the run-time PM helper functions from running subsystem-level
|
||||
run-time PM callbacks for the device, make sure that all of the pending
|
||||
run-time PM operations on the device are either completed or canceled;
|
||||
- increment the device's 'power.disable_depth' field (if the value of that
|
||||
field was previously zero, this prevents subsystem-level runtime PM
|
||||
callbacks from being run for the device), make sure that all of the pending
|
||||
runtime PM operations on the device are either completed or canceled;
|
||||
returns 1 if there was a resume request pending and it was necessary to
|
||||
execute the subsystem-level resume callback for the device to satisfy that
|
||||
request, otherwise 0 is returned
|
||||
|
||||
int pm_runtime_barrier(struct device *dev);
|
||||
- check if there's a resume request pending for the device and resume it
|
||||
(synchronously) in that case, cancel any other pending runtime PM requests
|
||||
regarding it and wait for all runtime PM operations on it in progress to
|
||||
complete; returns 1 if there was a resume request pending and it was
|
||||
necessary to execute the subsystem-level resume callback for the device to
|
||||
satisfy that request, otherwise 0 is returned
|
||||
|
||||
void pm_suspend_ignore_children(struct device *dev, bool enable);
|
||||
- set/unset the power.ignore_children flag of the device
|
||||
|
||||
int pm_runtime_set_active(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
- clear the device's 'power.runtime_error' flag, set the device's runtime
|
||||
PM status to 'active' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
|
@ -390,7 +402,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
which is not active and the 'power.ignore_children' flag of which is unset
|
||||
|
||||
void pm_runtime_set_suspended(struct device *dev);
|
||||
- clear the device's 'power.runtime_error' flag, set the device's run-time
|
||||
- clear the device's 'power.runtime_error' flag, set the device's runtime
|
||||
PM status to 'suspended' and update its parent's counter of 'active'
|
||||
children as appropriate (it is only valid to use this function if
|
||||
'power.runtime_error' is set or 'power.disable_depth' is greater than
|
||||
|
@ -400,6 +412,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
- return true if the device's runtime PM status is 'suspended' and its
|
||||
'power.disable_depth' field is equal to zero, or false otherwise
|
||||
|
||||
bool pm_runtime_status_suspended(struct device *dev);
|
||||
- return true if the device's runtime PM status is 'suspended'
|
||||
|
||||
void pm_runtime_allow(struct device *dev);
|
||||
- set the power.runtime_auto flag for the device and decrease its usage
|
||||
counter (used by the /sys/devices/.../power/control interface to
|
||||
|
@ -411,7 +426,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
effectively prevent the device from being power managed at run time)
|
||||
|
||||
void pm_runtime_no_callbacks(struct device *dev);
|
||||
- set the power.no_callbacks flag for the device and remove the run-time
|
||||
- set the power.no_callbacks flag for the device and remove the runtime
|
||||
PM attributes from /sys/devices/.../power (or prevent them from being
|
||||
added when the device is registered)
|
||||
|
||||
|
@ -431,7 +446,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
|||
|
||||
void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
|
||||
- set the power.autosuspend_delay value to 'delay' (expressed in
|
||||
milliseconds); if 'delay' is negative then run-time suspends are
|
||||
milliseconds); if 'delay' is negative then runtime suspends are
|
||||
prevented
|
||||
|
||||
unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
|
||||
|
@ -470,76 +485,92 @@ pm_runtime_resume()
|
|||
pm_runtime_get_sync()
|
||||
pm_runtime_put_sync_suspend()
|
||||
|
||||
5. Run-time PM Initialization, Device Probing and Removal
|
||||
5. Runtime PM Initialization, Device Probing and Removal
|
||||
|
||||
Initially, the run-time PM is disabled for all devices, which means that the
|
||||
majority of the run-time PM helper funtions described in Section 4 will return
|
||||
Initially, the runtime PM is disabled for all devices, which means that the
|
||||
majority of the runtime PM helper funtions described in Section 4 will return
|
||||
-EAGAIN until pm_runtime_enable() is called for the device.
|
||||
|
||||
In addition to that, the initial run-time PM status of all devices is
|
||||
In addition to that, the initial runtime PM status of all devices is
|
||||
'suspended', but it need not reflect the actual physical state of the device.
|
||||
Thus, if the device is initially active (i.e. it is able to process I/O), its
|
||||
run-time PM status must be changed to 'active', with the help of
|
||||
runtime PM status must be changed to 'active', with the help of
|
||||
pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
|
||||
|
||||
However, if the device has a parent and the parent's run-time PM is enabled,
|
||||
However, if the device has a parent and the parent's runtime PM is enabled,
|
||||
calling pm_runtime_set_active() for the device will affect the parent, unless
|
||||
the parent's 'power.ignore_children' flag is set. Namely, in that case the
|
||||
parent won't be able to suspend at run time, using the PM core's helper
|
||||
functions, as long as the child's status is 'active', even if the child's
|
||||
run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
|
||||
runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
|
||||
the child yet or pm_runtime_disable() has been called for it). For this reason,
|
||||
once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
|
||||
should be called for it too as soon as reasonably possible or its run-time PM
|
||||
should be called for it too as soon as reasonably possible or its runtime PM
|
||||
status should be changed back to 'suspended' with the help of
|
||||
pm_runtime_set_suspended().
|
||||
|
||||
If the default initial run-time PM status of the device (i.e. 'suspended')
|
||||
If the default initial runtime PM status of the device (i.e. 'suspended')
|
||||
reflects the actual state of the device, its bus type's or its driver's
|
||||
->probe() callback will likely need to wake it up using one of the PM core's
|
||||
helper functions described in Section 4. In that case, pm_runtime_resume()
|
||||
should be used. Of course, for this purpose the device's run-time PM has to be
|
||||
should be used. Of course, for this purpose the device's runtime PM has to be
|
||||
enabled earlier by calling pm_runtime_enable().
|
||||
|
||||
If the device bus type's or driver's ->probe() or ->remove() callback runs
|
||||
If the device bus type's or driver's ->probe() callback runs
|
||||
pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
|
||||
they will fail returning -EAGAIN, because the device's usage counter is
|
||||
incremented by the core before executing ->probe() and ->remove(). Still, it
|
||||
may be desirable to suspend the device as soon as ->probe() or ->remove() has
|
||||
finished, so the PM core uses pm_runtime_idle_sync() to invoke the
|
||||
subsystem-level idle callback for the device at that time.
|
||||
incremented by the driver core before executing ->probe(). Still, it may be
|
||||
desirable to suspend the device as soon as ->probe() has finished, so the driver
|
||||
core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
|
||||
the device at that time.
|
||||
|
||||
Moreover, the driver core prevents runtime PM callbacks from racing with the bus
|
||||
notifier callback in __device_release_driver(), which is necessary, because the
|
||||
notifier is used by some subsystems to carry out operations affecting the
|
||||
runtime PM functionality. It does so by calling pm_runtime_get_sync() before
|
||||
driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This
|
||||
resumes the device if it's in the suspended state and prevents it from
|
||||
being suspended again while those routines are being executed.
|
||||
|
||||
To allow bus types and drivers to put devices into the suspended state by
|
||||
calling pm_runtime_suspend() from their ->remove() routines, the driver core
|
||||
executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
|
||||
notifications in __device_release_driver(). This requires bus types and
|
||||
drivers to make their ->remove() callbacks avoid races with runtime PM directly,
|
||||
but also it allows of more flexibility in the handling of devices during the
|
||||
removal of their drivers.
|
||||
|
||||
The user space can effectively disallow the driver of the device to power manage
|
||||
it at run time by changing the value of its /sys/devices/.../power/control
|
||||
attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
|
||||
this mechanism may also be used by the driver to effectively turn off the
|
||||
run-time power management of the device until the user space turns it on.
|
||||
Namely, during the initialization the driver can make sure that the run-time PM
|
||||
runtime power management of the device until the user space turns it on.
|
||||
Namely, during the initialization the driver can make sure that the runtime PM
|
||||
status of the device is 'active' and call pm_runtime_forbid(). It should be
|
||||
noted, however, that if the user space has already intentionally changed the
|
||||
value of /sys/devices/.../power/control to "auto" to allow the driver to power
|
||||
manage the device at run time, the driver may confuse it by using
|
||||
pm_runtime_forbid() this way.
|
||||
|
||||
6. Run-time PM and System Sleep
|
||||
6. Runtime PM and System Sleep
|
||||
|
||||
Run-time PM and system sleep (i.e., system suspend and hibernation, also known
|
||||
Runtime PM and system sleep (i.e., system suspend and hibernation, also known
|
||||
as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
|
||||
ways. If a device is active when a system sleep starts, everything is
|
||||
straightforward. But what should happen if the device is already suspended?
|
||||
|
||||
The device may have different wake-up settings for run-time PM and system sleep.
|
||||
For example, remote wake-up may be enabled for run-time suspend but disallowed
|
||||
The device may have different wake-up settings for runtime PM and system sleep.
|
||||
For example, remote wake-up may be enabled for runtime suspend but disallowed
|
||||
for system sleep (device_may_wakeup(dev) returns 'false'). When this happens,
|
||||
the subsystem-level system suspend callback is responsible for changing the
|
||||
device's wake-up setting (it may leave that to the device driver's system
|
||||
suspend routine). It may be necessary to resume the device and suspend it again
|
||||
in order to do so. The same is true if the driver uses different power levels
|
||||
or other settings for run-time suspend and system sleep.
|
||||
or other settings for runtime suspend and system sleep.
|
||||
|
||||
During system resume, devices generally should be brought back to full power,
|
||||
even if they were suspended before the system sleep began. There are several
|
||||
reasons for this, including:
|
||||
During system resume, the simplest approach is to bring all devices back to full
|
||||
power, even if they had been suspended before the system suspend began. There
|
||||
are several reasons for this, including:
|
||||
|
||||
* The device might need to switch power levels, wake-up settings, etc.
|
||||
|
||||
|
@ -554,22 +585,49 @@ reasons for this, including:
|
|||
* The device might need to be reset.
|
||||
|
||||
* Even though the device was suspended, if its usage counter was > 0 then most
|
||||
likely it would need a run-time resume in the near future anyway.
|
||||
likely it would need a runtime resume in the near future anyway.
|
||||
|
||||
* Always going back to full power is simplest.
|
||||
|
||||
If the device was suspended before the sleep began, then its run-time PM status
|
||||
will have to be updated to reflect the actual post-system sleep status. The way
|
||||
to do this is:
|
||||
If the device had been suspended before the system suspend began and it's
|
||||
brought back to full power during resume, then its runtime PM status will have
|
||||
to be updated to reflect the actual post-system sleep status. The way to do
|
||||
this is:
|
||||
|
||||
pm_runtime_disable(dev);
|
||||
pm_runtime_set_active(dev);
|
||||
pm_runtime_enable(dev);
|
||||
|
||||
The PM core always increments the run-time usage counter before calling the
|
||||
->prepare() callback and decrements it after calling the ->complete() callback.
|
||||
Hence disabling run-time PM temporarily like this will not cause any run-time
|
||||
suspend callbacks to be lost.
|
||||
The PM core always increments the runtime usage counter before calling the
|
||||
->suspend() callback and decrements it after calling the ->resume() callback.
|
||||
Hence disabling runtime PM temporarily like this will not cause any runtime
|
||||
suspend attempts to be permanently lost. If the usage count goes to zero
|
||||
following the return of the ->resume() callback, the ->runtime_idle() callback
|
||||
will be invoked as usual.
|
||||
|
||||
On some systems, however, system sleep is not entered through a global firmware
|
||||
or hardware operation. Instead, all hardware components are put into low-power
|
||||
states directly by the kernel in a coordinated way. Then, the system sleep
|
||||
state effectively follows from the states the hardware components end up in
|
||||
and the system is woken up from that state by a hardware interrupt or a similar
|
||||
mechanism entirely under the kernel's control. As a result, the kernel never
|
||||
gives control away and the states of all devices during resume are precisely
|
||||
known to it. If that is the case and none of the situations listed above takes
|
||||
place (in particular, if the system is not waking up from hibernation), it may
|
||||
be more efficient to leave the devices that had been suspended before the system
|
||||
suspend began in the suspended state.
|
||||
|
||||
The PM core does its best to reduce the probability of race conditions between
|
||||
the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
|
||||
out the following operations:
|
||||
|
||||
* During system suspend it calls pm_runtime_get_noresume() and
|
||||
pm_runtime_barrier() for every device right before executing the
|
||||
subsystem-level .suspend() callback for it. In addition to that it calls
|
||||
pm_runtime_disable() for every device right after executing the
|
||||
subsystem-level .suspend() callback for it.
|
||||
|
||||
* During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
|
||||
for every device right before and right after executing the subsystem-level
|
||||
.resume() callback for it, respectively.
|
||||
|
||||
7. Generic subsystem callbacks
|
||||
|
||||
|
@ -595,40 +653,68 @@ driver/base/power/generic_ops.c:
|
|||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_suspend_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_resume(struct device *dev);
|
||||
- invoke the ->resume() callback provided by the driver of this device and,
|
||||
if successful, change the device's runtime PM status to 'active'
|
||||
|
||||
int pm_generic_resume_noirq(struct device *dev);
|
||||
- invoke the ->resume_noirq() callback provided by the driver of this device
|
||||
|
||||
int pm_generic_freeze(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->freeze()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_freeze_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_thaw(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->thaw()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_thaw_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_poweroff(struct device *dev);
|
||||
- if the device has not been suspended at run time, invoke the ->poweroff()
|
||||
callback provided by its driver and return its result, or return 0 if not
|
||||
defined
|
||||
|
||||
int pm_generic_poweroff_noirq(struct device *dev);
|
||||
- if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
|
||||
callback provided by the device's driver and return its result, or return
|
||||
0 if not defined
|
||||
|
||||
int pm_generic_restore(struct device *dev);
|
||||
- invoke the ->restore() callback provided by the driver of this device and,
|
||||
if successful, change the device's runtime PM status to 'active'
|
||||
|
||||
int pm_generic_restore_noirq(struct device *dev);
|
||||
- invoke the ->restore_noirq() callback provided by the device's driver
|
||||
|
||||
These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
|
||||
->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(),
|
||||
or ->restore() callback pointers in the subsystem-level dev_pm_ops structures.
|
||||
->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
|
||||
->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
|
||||
->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() callback
|
||||
pointers in the subsystem-level dev_pm_ops structures.
|
||||
|
||||
If a subsystem wishes to use all of them at the same time, it can simply assign
|
||||
the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
|
||||
dev_pm_ops structure pointer.
|
||||
|
||||
Device drivers that wish to use the same function as a system suspend, freeze,
|
||||
poweroff and run-time suspend callback, and similarly for system resume, thaw,
|
||||
restore, and run-time resume, can achieve this with the help of the
|
||||
poweroff and runtime suspend callback, and similarly for system resume, thaw,
|
||||
restore, and runtime resume, can achieve this with the help of the
|
||||
UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
|
||||
last argument to NULL).
|
||||
|
||||
|
@ -638,7 +724,7 @@ Some "devices" are only logical sub-devices of their parent and cannot be
|
|||
power-managed on their own. (The prototype example is a USB interface. Entire
|
||||
USB devices can go into low-power mode or send wake-up requests, but neither is
|
||||
possible for individual interfaces.) The drivers for these devices have no
|
||||
need of run-time PM callbacks; if the callbacks did exist, ->runtime_suspend()
|
||||
need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
|
||||
and ->runtime_resume() would always return 0 without doing anything else and
|
||||
->runtime_idle() would always call pm_runtime_suspend().
|
||||
|
||||
|
@ -646,7 +732,7 @@ Subsystems can tell the PM core about these devices by calling
|
|||
pm_runtime_no_callbacks(). This should be done after the device structure is
|
||||
initialized and before it is registered (although after device registration is
|
||||
also okay). The routine will set the device's power.no_callbacks flag and
|
||||
prevent the non-debugging run-time PM sysfs attributes from being created.
|
||||
prevent the non-debugging runtime PM sysfs attributes from being created.
|
||||
|
||||
When power.no_callbacks is set, the PM core will not invoke the
|
||||
->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
|
||||
|
@ -654,7 +740,7 @@ Instead it will assume that suspends and resumes always succeed and that idle
|
|||
devices should be suspended.
|
||||
|
||||
As a consequence, the PM core will never directly inform the device's subsystem
|
||||
or driver about run-time power changes. Instead, the driver for the device's
|
||||
or driver about runtime power changes. Instead, the driver for the device's
|
||||
parent must take responsibility for telling the device's driver when the
|
||||
parent's power state changes.
|
||||
|
||||
|
@ -665,13 +751,13 @@ A device should be put in a low-power state only when there's some reason to
|
|||
think it will remain in that state for a substantial time. A common heuristic
|
||||
says that a device which hasn't been used for a while is liable to remain
|
||||
unused; following this advice, drivers should not allow devices to be suspended
|
||||
at run-time until they have been inactive for some minimum period. Even when
|
||||
at runtime until they have been inactive for some minimum period. Even when
|
||||
the heuristic ends up being non-optimal, it will still prevent devices from
|
||||
"bouncing" too rapidly between low-power and full-power states.
|
||||
|
||||
The term "autosuspend" is an historical remnant. It doesn't mean that the
|
||||
device is automatically suspended (the subsystem or driver still has to call
|
||||
the appropriate PM routines); rather it means that run-time suspends will
|
||||
the appropriate PM routines); rather it means that runtime suspends will
|
||||
automatically be delayed until the desired period of inactivity has elapsed.
|
||||
|
||||
Inactivity is determined based on the power.last_busy field. Drivers should
|
||||
|
|
|
@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier:
|
|||
size_t %zu or %zx
|
||||
ssize_t %zd or %zx
|
||||
|
||||
Raw pointer value SHOULD be printed with %p.
|
||||
Raw pointer value SHOULD be printed with %p. The kernel supports
|
||||
the following extended format specifiers for pointer types:
|
||||
|
||||
Symbols/Function Pointers:
|
||||
|
||||
%pF versatile_init+0x0/0x110
|
||||
%pf versatile_init
|
||||
%pS versatile_init+0x0/0x110
|
||||
%ps versatile_init
|
||||
%pB prev_fn_of_versatile_init+0x88/0x88
|
||||
|
||||
For printing symbols and function pointers. The 'S' and 's' specifiers
|
||||
result in the symbol name with ('S') or without ('s') offsets. Where
|
||||
this is used on a kernel without KALLSYMS - the symbol address is
|
||||
printed instead.
|
||||
|
||||
The 'B' specifier results in the symbol name with offsets and should be
|
||||
used when printing stack backtraces. The specifier takes into
|
||||
consideration the effect of compiler optimisations which may occur
|
||||
when tail-call's are used and marked with the noreturn GCC attribute.
|
||||
|
||||
On ia64, ppc64 and parisc64 architectures function pointers are
|
||||
actually function descriptors which must first be resolved. The 'F' and
|
||||
'f' specifiers perform this resolution and then provide the same
|
||||
functionality as the 'S' and 's' specifiers.
|
||||
|
||||
Kernel Pointers:
|
||||
|
||||
%pK 0x01234567 or 0x0123456789abcdef
|
||||
|
||||
For printing kernel pointers which should be hidden from unprivileged
|
||||
users. The behaviour of %pK depends on the kptr_restrict sysctl - see
|
||||
Documentation/sysctl/kernel.txt for more details.
|
||||
|
||||
Struct Resources:
|
||||
|
||||
%pr [mem 0x60000000-0x6fffffff flags 0x2200] or
|
||||
[mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
|
||||
%pR [mem 0x60000000-0x6fffffff pref] or
|
||||
[mem 0x0000000060000000-0x000000006fffffff pref]
|
||||
|
||||
For printing struct resources. The 'R' and 'r' specifiers result in a
|
||||
printed resource with ('R') or without ('r') a decoded flags member.
|
||||
|
||||
MAC/FDDI addresses:
|
||||
|
||||
%pM 00:01:02:03:04:05
|
||||
%pMF 00-01-02-03-04-05
|
||||
%pm 000102030405
|
||||
|
||||
For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm'
|
||||
specifiers result in a printed address with ('M') or without ('m') byte
|
||||
separators. The default byte separator is the colon (':').
|
||||
|
||||
Where FDDI addresses are concerned the 'F' specifier can be used after
|
||||
the 'M' specifier to use dash ('-') separators instead of the default
|
||||
separator.
|
||||
|
||||
IPv4 addresses:
|
||||
|
||||
%pI4 1.2.3.4
|
||||
%pi4 001.002.003.004
|
||||
%p[Ii][hnbl]
|
||||
|
||||
For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4'
|
||||
specifiers result in a printed address with ('i4') or without ('I4')
|
||||
leading zeros.
|
||||
|
||||
The additional 'h', 'n', 'b', and 'l' specifiers are used to specify
|
||||
host, network, big or little endian order addresses respectively. Where
|
||||
no specifier is provided the default network/big endian order is used.
|
||||
|
||||
IPv6 addresses:
|
||||
|
||||
%pI6 0001:0002:0003:0004:0005:0006:0007:0008
|
||||
%pi6 00010002000300040005000600070008
|
||||
%pI6c 1:2:3:4:5:6:7:8
|
||||
|
||||
For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6'
|
||||
specifiers result in a printed address with ('I6') or without ('i6')
|
||||
colon-separators. Leading zeros are always used.
|
||||
|
||||
The additional 'c' specifier can be used with the 'I' specifier to
|
||||
print a compressed IPv6 address as described by
|
||||
http://tools.ietf.org/html/rfc5952
|
||||
|
||||
UUID/GUID addresses:
|
||||
|
||||
%pUb 00010203-0405-0607-0809-0a0b0c0d0e0f
|
||||
%pUB 00010203-0405-0607-0809-0A0B0C0D0E0F
|
||||
%pUl 03020100-0504-0706-0809-0a0b0c0e0e0f
|
||||
%pUL 03020100-0504-0706-0809-0A0B0C0E0E0F
|
||||
|
||||
For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
|
||||
'b' and 'B' specifiers are used to specify a little endian order in
|
||||
lower ('l') or upper case ('L') hex characters - and big endian order
|
||||
in lower ('b') or upper case ('B') hex characters.
|
||||
|
||||
Where no additional specifiers are used the default little endian
|
||||
order with lower case hex characters will be printed.
|
||||
|
||||
struct va_format:
|
||||
|
||||
%pV
|
||||
|
||||
For printing struct va_format structures. These contain a format string
|
||||
and va_list as follows:
|
||||
|
||||
struct va_format {
|
||||
const char *fmt;
|
||||
va_list *va;
|
||||
};
|
||||
|
||||
Do not use this feature without some mechanism to verify the
|
||||
correctness of the format string and va_list arguments.
|
||||
|
||||
u64 SHOULD be printed with %llu/%llx, (unsigned long long):
|
||||
|
||||
|
@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t.
|
|||
Thank you for your cooperation and attention.
|
||||
|
||||
|
||||
By Randy Dunlap <rdunlap@xenotime.net>
|
||||
By Randy Dunlap <rdunlap@xenotime.net> and
|
||||
Andrew Murray <amurray@mpc-data.co.uk>
|
||||
|
|
|
@ -196,15 +196,20 @@ Support for Augmented rbtrees
|
|||
Augmented rbtree is an rbtree with "some" additional data stored in each node.
|
||||
This data can be used to augment some new functionality to rbtree.
|
||||
Augmented rbtree is an optional feature built on top of basic rbtree
|
||||
infrastructure. rbtree user who wants this feature will have an augment
|
||||
callback function in rb_root initialized.
|
||||
infrastructure. An rbtree user who wants this feature will have to call the
|
||||
augmentation functions with the user provided augmentation callback
|
||||
when inserting and erasing nodes.
|
||||
|
||||
This callback function will be called from rbtree core routines whenever
|
||||
a node has a change in one or both of its children. It is the responsibility
|
||||
of the callback function to recalculate the additional data that is in the
|
||||
rb node using new children information. Note that if this new additional
|
||||
data affects the parent node's additional data, then callback function has
|
||||
to handle it and do the recursive updates.
|
||||
On insertion, the user must call rb_augment_insert() once the new node is in
|
||||
place. This will cause the augmentation function callback to be called for
|
||||
each node between the new node and the root which has been affected by the
|
||||
insertion.
|
||||
|
||||
When erasing a node, the user must call rb_augment_erase_begin() first to
|
||||
retrieve the deepest node on the rebalance path. Then, after erasing the
|
||||
original node, the user must call rb_augment_erase_end() with the deepest
|
||||
node found earlier. This will cause the augmentation function to be called
|
||||
for each affected node between the deepest node and the root.
|
||||
|
||||
|
||||
Interval tree is an example of augmented rb tree. Reference -
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
Channel attached Tape device driver
|
||||
|
||||
-----------------------------WARNING-----------------------------------------
|
||||
This driver is considered to be EXPERIMENTAL. Do NOT use it in
|
||||
production environments. Feel free to test it and report problems back to us.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
The LINUX for zSeries tape device driver manages channel attached tape drives
|
||||
which are compatible to IBM 3480 or IBM 3490 magnetic tape subsystems. This
|
||||
includes various models of these devices (for example the 3490E).
|
||||
|
||||
|
||||
Tape driver features
|
||||
|
||||
The device driver supports a maximum of 128 tape devices.
|
||||
No official LINUX device major number is assigned to the zSeries tape device
|
||||
driver. It allocates major numbers dynamically and reports them on system
|
||||
startup.
|
||||
Typically it will get major number 254 for both the character device front-end
|
||||
and the block device front-end.
|
||||
|
||||
The tape device driver needs no kernel parameters. All supported devices
|
||||
present are detected on driver initialization at system startup or module load.
|
||||
The devices detected are ordered by their subchannel numbers. The device with
|
||||
the lowest subchannel number becomes device 0, the next one will be device 1
|
||||
and so on.
|
||||
|
||||
|
||||
Tape character device front-end
|
||||
|
||||
The usual way to read or write to the tape device is through the character
|
||||
device front-end. The zSeries tape device driver provides two character devices
|
||||
for each physical device -- the first of these will rewind automatically when
|
||||
it is closed, the second will not rewind automatically.
|
||||
|
||||
The character device nodes are named /dev/rtibm0 (rewinding) and /dev/ntibm0
|
||||
(non-rewinding) for the first device, /dev/rtibm1 and /dev/ntibm1 for the
|
||||
second, and so on.
|
||||
|
||||
The character device front-end can be used as any other LINUX tape device. You
|
||||
can write to it and read from it using LINUX facilities such as GNU tar. The
|
||||
tool mt can be used to perform control operations, such as rewinding the tape
|
||||
or skipping a file.
|
||||
|
||||
Most LINUX tape software should work with either tape character device.
|
||||
|
||||
|
||||
Tape block device front-end
|
||||
|
||||
The tape device may also be accessed as a block device in read-only mode.
|
||||
This could be used for software installation in the same way as it is used with
|
||||
other operation systems on the zSeries platform (and most LINUX
|
||||
distributions are shipped on compact disk using ISO9660 filesystems).
|
||||
|
||||
One block device node is provided for each physical device. These are named
|
||||
/dev/btibm0 for the first device, /dev/btibm1 for the second and so on.
|
||||
You should only use the ISO9660 filesystem on LINUX for zSeries tapes because
|
||||
the physical tape devices cannot perform fast seeks and the ISO9660 system is
|
||||
optimized for this situation.
|
||||
|
||||
|
||||
Tape block device example
|
||||
|
||||
In this example a tape with an ISO9660 filesystem is created using the first
|
||||
tape device. ISO9660 filesystem support must be built into your system kernel
|
||||
for this.
|
||||
The mt command is used to issue tape commands and the mkisofs command to
|
||||
create an ISO9660 filesystem:
|
||||
|
||||
- create a LINUX directory (somedir) with the contents of the filesystem
|
||||
mkdir somedir
|
||||
cp contents somedir
|
||||
|
||||
- insert a tape
|
||||
|
||||
- ensure the tape is at the beginning
|
||||
mt -f /dev/ntibm0 rewind
|
||||
|
||||
- set the blocksize of the character driver. The blocksize 2048 bytes
|
||||
is commonly used on ISO9660 CD-Roms
|
||||
mt -f /dev/ntibm0 setblk 2048
|
||||
|
||||
- write the filesystem to the character device driver
|
||||
mkisofs -o /dev/ntibm0 somedir
|
||||
|
||||
- rewind the tape again
|
||||
mt -f /dev/ntibm0 rewind
|
||||
|
||||
- Now you can mount your new filesystem as a block device:
|
||||
mount -t iso9660 -o ro,block=2048 /dev/btibm0 /mnt
|
||||
|
||||
TODO List
|
||||
|
||||
- Driver has to be stabilized still
|
||||
|
||||
BUGS
|
||||
|
||||
This driver is considered BETA, which means some weaknesses may still
|
||||
be in it.
|
||||
If an error occurs which cannot be handled by the code you will get a
|
||||
sense-data dump.In that case please do the following:
|
||||
|
||||
1. set the tape driver debug level to maximum:
|
||||
echo 6 >/proc/s390dbf/tape/level
|
||||
|
||||
2. re-perform the actions which produced the bug. (Hopefully the bug will
|
||||
reappear.)
|
||||
|
||||
3. get a snapshot from the debug-feature:
|
||||
cat /proc/s390dbf/tape/hex_ascii >somefile
|
||||
|
||||
4. Now put the snapshot together with a detailed description of the situation
|
||||
that led to the bug:
|
||||
- Which tool did you use?
|
||||
- Which hardware do you have?
|
||||
- Was your tape unit online?
|
||||
- Is it a shared tape unit?
|
||||
|
||||
5. Send an email with your bug report to:
|
||||
mailto:Linux390@de.ibm.com
|
||||
|
||||
|
|
@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
|
|||
group created using the pseudo filesystem. See example steps below to create
|
||||
task groups and modify their CPU share using the "cgroups" pseudo filesystem.
|
||||
|
||||
# mkdir /dev/cpuctl
|
||||
# mount -t cgroup -ocpu none /dev/cpuctl
|
||||
# cd /dev/cpuctl
|
||||
# mount -t tmpfs cgroup_root /sys/fs/cgroup
|
||||
# mkdir /sys/fs/cgroup/cpu
|
||||
# mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
|
||||
# cd /sys/fs/cgroup/cpu
|
||||
|
||||
# mkdir multimedia # create "multimedia" group of tasks
|
||||
# mkdir browser # create "browser" group of tasks
|
||||
|
|
|
@ -129,9 +129,8 @@ priority!
|
|||
Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
|
||||
CPU bandwidth to task groups.
|
||||
|
||||
This uses the /cgroup virtual file system and
|
||||
"/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each
|
||||
control group.
|
||||
This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
|
||||
to control the CPU time reserved for each control group.
|
||||
|
||||
For more information on working with control groups, you should read
|
||||
Documentation/cgroups/cgroups.txt as well.
|
||||
|
@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans):
|
|||
===============
|
||||
|
||||
There is work in progress to make the scheduling period for each group
|
||||
("/cgroup/<cgroup>/cpu.rt_period_us") configurable as well.
|
||||
("<cgroup>/cpu.rt_period_us") configurable as well.
|
||||
|
||||
The constraint on the period is that a subgroup must have a smaller or
|
||||
equal period to its parent. But realistically its not very useful _yet_
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
This file explains the codec-specific mixer controls.
|
||||
|
||||
Realtek codecs
|
||||
--------------
|
||||
|
||||
* Channel Mode
|
||||
This is an enum control to change the surround-channel setup,
|
||||
appears only when the surround channels are available.
|
||||
It gives the number of channels to be used, "2ch", "4ch", "6ch",
|
||||
and "8ch". According to the configuration, this also controls the
|
||||
jack-retasking of multi-I/O jacks.
|
||||
|
||||
* Auto-Mute Mode
|
||||
This is an enum control to change the auto-mute behavior of the
|
||||
headphone and line-out jacks. If built-in speakers and headphone
|
||||
and/or line-out jacks are available on a machine, this controls
|
||||
appears.
|
||||
When there are only either headphones or line-out jacks, it gives
|
||||
"Disabled" and "Enabled" state. When enabled, the speaker is muted
|
||||
automatically when a jack is plugged.
|
||||
|
||||
When both headphone and line-out jacks are present, it gives
|
||||
"Disabled", "Speaker Only" and "Line-Out+Speaker". When
|
||||
speaker-only is chosen, plugging into a headphone or a line-out jack
|
||||
mutes the speakers, but not line-outs. When line-out+speaker is
|
||||
selected, plugging to a headphone jack mutes both speakers and
|
||||
line-outs.
|
||||
|
||||
|
||||
IDT/Sigmatel codecs
|
||||
-------------------
|
||||
|
||||
* Analog Loopback
|
||||
This control enables/disables the analog-loopback circuit. This
|
||||
appears only when "loopback" is set to true in a codec hint
|
||||
(see HD-Audio.txt). Note that on some codecs the analog-loopback
|
||||
and the normal PCM playback are exclusive, i.e. when this is on, you
|
||||
won't hear any PCM stream.
|
||||
|
||||
* Swap Center/LFE
|
||||
Swaps the center and LFE channel order. Normally, the left
|
||||
corresponds to the center and the right to the LFE. When this is
|
||||
ON, the left to the LFE and the right to the center.
|
||||
|
||||
* Headphone as Line Out
|
||||
When this control is ON, treat the headphone jacks as line-out
|
||||
jacks. That is, the headphone won't auto-mute the other line-outs,
|
||||
and no HP-amp is set to the pins.
|
||||
|
||||
* Mic Jack Mode, Line Jack Mode, etc
|
||||
These enum controls the direction and the bias of the input jack
|
||||
pins. Depending on the jack type, it can set as "Mic In" and "Line
|
||||
In", for determining the input bias, or it can be set to "Line Out"
|
||||
when the pin is a multi-I/O jack for surround channels.
|
||||
|
||||
|
||||
VIA codecs
|
||||
----------
|
||||
|
||||
* Smart 5.1
|
||||
An enum control to re-task the multi-I/O jacks for surround outputs.
|
||||
When it's ON, the corresponding input jacks (usually a line-in and a
|
||||
mic-in) are switched as the surround and the CLFE output jacks.
|
||||
|
||||
* Independent HP
|
||||
When this enum control is enabled, the headphone output is routed
|
||||
from an individual stream (the third PCM such as hw:0,2) instead of
|
||||
the primary stream. In the case the headphone DAC is shared with a
|
||||
side or a CLFE-channel DAC, the DAC is switched to the headphone
|
||||
automatically.
|
||||
|
||||
* Loopback Mixing
|
||||
An enum control to determine whether the analog-loopback route is
|
||||
enabled or not. When it's enabled, the analog-loopback is mixed to
|
||||
the front-channel. Also, the same route is used for the headphone
|
||||
and speaker outputs. As a side-effect, when this mode is set, the
|
||||
individual volume controls will be no longer available for
|
||||
headphones and speakers because there is only one DAC connected to a
|
||||
mixer widget.
|
||||
|
||||
* Dynamic Power-Control
|
||||
This control determines whether the dynamic power-control per jack
|
||||
detection is enabled or not. When enabled, the widgets power state
|
||||
(D0/D3) are changed dynamically depending on the jack plugging
|
||||
state for saving power consumptions. However, if your system
|
||||
doesn't provide a proper jack-detection, this won't work; in such a
|
||||
case, turn this control OFF.
|
||||
|
||||
* Jack Detect
|
||||
This control is provided only for VT1708 codec which gives no proper
|
||||
unsolicited event per jack plug. When this is on, the driver polls
|
||||
the jack detection so that the headphone auto-mute can work, while
|
||||
turning this off would reduce the power consumption.
|
||||
|
||||
|
||||
Conexant codecs
|
||||
---------------
|
||||
|
||||
* Auto-Mute Mode
|
||||
See Reatek codecs.
|
|
@ -88,6 +88,16 @@ static void __init ts72xx_init_machine(void)
|
|||
ARRAY_SIZE(ts72xx_spi_devices));
|
||||
}
|
||||
|
||||
The driver can use DMA for the transfers also. In this case ts72xx_spi_info
|
||||
becomes:
|
||||
|
||||
static struct ep93xx_spi_info ts72xx_spi_info = {
|
||||
.num_chipselect = ARRAY_SIZE(ts72xx_spi_devices),
|
||||
.use_dma = true;
|
||||
};
|
||||
|
||||
Note that CONFIG_EP93XX_DMA should be enabled as well.
|
||||
|
||||
Thanks to
|
||||
=========
|
||||
Martin Guy, H. Hartley Sweeten and others who helped me during development of
|
||||
|
|
|
@ -22,15 +22,11 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
|
|||
found in include/linux/spi/pxa2xx_spi.h:
|
||||
|
||||
struct pxa2xx_spi_master {
|
||||
enum pxa_ssp_type ssp_type;
|
||||
u32 clock_enable;
|
||||
u16 num_chipselect;
|
||||
u8 enable_dma;
|
||||
};
|
||||
|
||||
The "pxa2xx_spi_master.ssp_type" field must have a value between 1 and 3 and
|
||||
informs the driver which features a particular SSP supports.
|
||||
|
||||
The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
|
||||
corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
|
||||
the "PXA2xx Developer Manual" section "Clocks and Power Management".
|
||||
|
@ -61,7 +57,6 @@ static struct resource pxa_spi_nssp_resources[] = {
|
|||
};
|
||||
|
||||
static struct pxa2xx_spi_master pxa_nssp_master_info = {
|
||||
.ssp_type = PXA25x_NSSP, /* Type of SSP */
|
||||
.clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
|
||||
.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
|
||||
.enable_dma = 1, /* Enables NSSP DMA */
|
||||
|
|
|
@ -13,18 +13,8 @@ static DEFINE_SPINLOCK(xxx_lock);
|
|||
The above is always safe. It will disable interrupts _locally_, but the
|
||||
spinlock itself will guarantee the global lock, so it will guarantee that
|
||||
there is only one thread-of-control within the region(s) protected by that
|
||||
lock. This works well even under UP. The above sequence under UP
|
||||
essentially is just the same as doing
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
save_flags(flags); cli();
|
||||
... critical section ...
|
||||
restore_flags(flags);
|
||||
|
||||
so the code does _not_ need to worry about UP vs SMP issues: the spinlocks
|
||||
work correctly under both (and spinlocks are actually more efficient on
|
||||
architectures that allow doing the "save_flags + cli" in one operation).
|
||||
lock. This works well even under UP also, so the code does _not_ need to
|
||||
worry about UP vs SMP issues: the spinlocks work correctly under both.
|
||||
|
||||
NOTE! Implications of spin_locks for memory are further described in:
|
||||
|
||||
|
@ -36,27 +26,7 @@ The above is usually pretty simple (you usually need and want only one
|
|||
spinlock for most things - using more than one spinlock can make things a
|
||||
lot more complex and even slower and is usually worth it only for
|
||||
sequences that you _know_ need to be split up: avoid it at all cost if you
|
||||
aren't sure). HOWEVER, it _does_ mean that if you have some code that does
|
||||
|
||||
cli();
|
||||
.. critical section ..
|
||||
sti();
|
||||
|
||||
and another sequence that does
|
||||
|
||||
spin_lock_irqsave(flags);
|
||||
.. critical section ..
|
||||
spin_unlock_irqrestore(flags);
|
||||
|
||||
then they are NOT mutually exclusive, and the critical regions can happen
|
||||
at the same time on two different CPU's. That's fine per se, but the
|
||||
critical regions had better be critical for different things (ie they
|
||||
can't stomp on each other).
|
||||
|
||||
The above is a problem mainly if you end up mixing code - for example the
|
||||
routines in ll_rw_block() tend to use cli/sti to protect the atomicity of
|
||||
their actions, and if a driver uses spinlocks instead then you should
|
||||
think about issues like the above.
|
||||
aren't sure).
|
||||
|
||||
This is really the only really hard part about spinlocks: once you start
|
||||
using spinlocks they tend to expand to areas you might not have noticed
|
||||
|
@ -120,11 +90,10 @@ Lesson 3: spinlocks revisited.
|
|||
|
||||
The single spin-lock primitives above are by no means the only ones. They
|
||||
are the most safe ones, and the ones that work under all circumstances,
|
||||
but partly _because_ they are safe they are also fairly slow. They are
|
||||
much faster than a generic global cli/sti pair, but slower than they'd
|
||||
need to be, because they do have to disable interrupts (which is just a
|
||||
single instruction on a x86, but it's an expensive one - and on other
|
||||
architectures it can be worse).
|
||||
but partly _because_ they are safe they are also fairly slow. They are slower
|
||||
than they'd need to be, because they do have to disable interrupts
|
||||
(which is just a single instruction on a x86, but it's an expensive one -
|
||||
and on other architectures it can be worse).
|
||||
|
||||
If you have a case where you have to protect a data structure across
|
||||
several CPU's and you want to use spinlocks you can potentially use
|
||||
|
|
|
@ -17,23 +17,21 @@ before actually making adjustments.
|
|||
|
||||
Currently, these files might (depending on your configuration)
|
||||
show up in /proc/sys/kernel:
|
||||
- acpi_video_flags
|
||||
|
||||
- acct
|
||||
- acpi_video_flags
|
||||
- auto_msgmni
|
||||
- bootloader_type [ X86 only ]
|
||||
- bootloader_version [ X86 only ]
|
||||
- callhome [ S390 only ]
|
||||
- auto_msgmni
|
||||
- core_pattern
|
||||
- core_pipe_limit
|
||||
- core_uses_pid
|
||||
- ctrl-alt-del
|
||||
- dentry-state
|
||||
- dmesg_restrict
|
||||
- domainname
|
||||
- hostname
|
||||
- hotplug
|
||||
- java-appletviewer [ binfmt_java, obsolete ]
|
||||
- java-interpreter [ binfmt_java, obsolete ]
|
||||
- kptr_restrict
|
||||
- kstack_depth_to_print [ X86 only ]
|
||||
- l2cr [ PPC only ]
|
||||
|
@ -48,10 +46,14 @@ show up in /proc/sys/kernel:
|
|||
- overflowgid
|
||||
- overflowuid
|
||||
- panic
|
||||
- panic_on_oops
|
||||
- panic_on_unrecovered_nmi
|
||||
- pid_max
|
||||
- powersave-nap [ PPC only ]
|
||||
- panic_on_unrecovered_nmi
|
||||
- printk
|
||||
- printk_delay
|
||||
- printk_ratelimit
|
||||
- printk_ratelimit_burst
|
||||
- randomize_va_space
|
||||
- real-root-dev ==> Documentation/initrd.txt
|
||||
- reboot-cmd [ SPARC only ]
|
||||
|
@ -62,6 +64,7 @@ show up in /proc/sys/kernel:
|
|||
- shmall
|
||||
- shmmax [ sysv ipc ]
|
||||
- shmmni
|
||||
- softlockup_thresh
|
||||
- stop-a [ SPARC only ]
|
||||
- sysrq ==> Documentation/sysrq.txt
|
||||
- tainted
|
||||
|
@ -71,15 +74,6 @@ show up in /proc/sys/kernel:
|
|||
|
||||
==============================================================
|
||||
|
||||
acpi_video_flags:
|
||||
|
||||
flags
|
||||
|
||||
See Doc*/kernel/power/video.txt, it allows mode of video boot to be
|
||||
set during run time.
|
||||
|
||||
==============================================================
|
||||
|
||||
acct:
|
||||
|
||||
highwater lowwater frequency
|
||||
|
@ -95,6 +89,25 @@ That is, suspend accounting if there left <= 2% free; resume it
|
|||
if we got >=4%; consider information about amount of free space
|
||||
valid for 30 seconds.
|
||||
|
||||
==============================================================
|
||||
|
||||
acpi_video_flags:
|
||||
|
||||
flags
|
||||
|
||||
See Doc*/kernel/power/video.txt, it allows mode of video boot to be
|
||||
set during run time.
|
||||
|
||||
==============================================================
|
||||
|
||||
auto_msgmni:
|
||||
|
||||
Enables/Disables automatic recomputing of msgmni upon memory add/remove
|
||||
or upon ipc namespace creation/removal (see the msgmni description
|
||||
above). Echoing "1" into this file enables msgmni automatic recomputing.
|
||||
Echoing "0" turns it off. auto_msgmni default value is 1.
|
||||
|
||||
|
||||
==============================================================
|
||||
|
||||
bootloader_type:
|
||||
|
@ -172,22 +185,24 @@ core_pattern is used to specify a core dumpfile pattern name.
|
|||
|
||||
core_pipe_limit:
|
||||
|
||||
This sysctl is only applicable when core_pattern is configured to pipe core
|
||||
files to a user space helper (when the first character of core_pattern is a '|',
|
||||
see above). When collecting cores via a pipe to an application, it is
|
||||
occasionally useful for the collecting application to gather data about the
|
||||
crashing process from its /proc/pid directory. In order to do this safely, the
|
||||
kernel must wait for the collecting process to exit, so as not to remove the
|
||||
crashing processes proc files prematurely. This in turn creates the possibility
|
||||
that a misbehaving userspace collecting process can block the reaping of a
|
||||
crashed process simply by never exiting. This sysctl defends against that. It
|
||||
defines how many concurrent crashing processes may be piped to user space
|
||||
applications in parallel. If this value is exceeded, then those crashing
|
||||
processes above that value are noted via the kernel log and their cores are
|
||||
skipped. 0 is a special value, indicating that unlimited processes may be
|
||||
captured in parallel, but that no waiting will take place (i.e. the collecting
|
||||
process is not guaranteed access to /proc/<crashing pid>/). This value defaults
|
||||
to 0.
|
||||
This sysctl is only applicable when core_pattern is configured to pipe
|
||||
core files to a user space helper (when the first character of
|
||||
core_pattern is a '|', see above). When collecting cores via a pipe
|
||||
to an application, it is occasionally useful for the collecting
|
||||
application to gather data about the crashing process from its
|
||||
/proc/pid directory. In order to do this safely, the kernel must wait
|
||||
for the collecting process to exit, so as not to remove the crashing
|
||||
processes proc files prematurely. This in turn creates the
|
||||
possibility that a misbehaving userspace collecting process can block
|
||||
the reaping of a crashed process simply by never exiting. This sysctl
|
||||
defends against that. It defines how many concurrent crashing
|
||||
processes may be piped to user space applications in parallel. If
|
||||
this value is exceeded, then those crashing processes above that value
|
||||
are noted via the kernel log and their cores are skipped. 0 is a
|
||||
special value, indicating that unlimited processes may be captured in
|
||||
parallel, but that no waiting will take place (i.e. the collecting
|
||||
process is not guaranteed access to /proc/<crashing pid>/). This
|
||||
value defaults to 0.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -218,14 +233,14 @@ to decide what to do with it.
|
|||
|
||||
dmesg_restrict:
|
||||
|
||||
This toggle indicates whether unprivileged users are prevented from using
|
||||
dmesg(8) to view messages from the kernel's log buffer. When
|
||||
dmesg_restrict is set to (0) there are no restrictions. When
|
||||
This toggle indicates whether unprivileged users are prevented
|
||||
from using dmesg(8) to view messages from the kernel's log buffer.
|
||||
When dmesg_restrict is set to (0) there are no restrictions. When
|
||||
dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
|
||||
dmesg(8).
|
||||
|
||||
The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the default
|
||||
value of dmesg_restrict.
|
||||
The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
|
||||
default value of dmesg_restrict.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -256,13 +271,6 @@ Default value is "/sbin/hotplug".
|
|||
|
||||
==============================================================
|
||||
|
||||
l2cr: (PPC only)
|
||||
|
||||
This flag controls the L2 cache of G3 processor boards. If
|
||||
0, the cache is disabled. Enabled if nonzero.
|
||||
|
||||
==============================================================
|
||||
|
||||
kptr_restrict:
|
||||
|
||||
This toggle indicates whether restrictions are placed on
|
||||
|
@ -283,6 +291,13 @@ kernel stack.
|
|||
|
||||
==============================================================
|
||||
|
||||
l2cr: (PPC only)
|
||||
|
||||
This flag controls the L2 cache of G3 processor boards. If
|
||||
0, the cache is disabled. Enabled if nonzero.
|
||||
|
||||
==============================================================
|
||||
|
||||
modules_disabled:
|
||||
|
||||
A toggle value indicating if modules are allowed to be loaded
|
||||
|
@ -293,6 +308,21 @@ to false.
|
|||
|
||||
==============================================================
|
||||
|
||||
nmi_watchdog:
|
||||
|
||||
Enables/Disables the NMI watchdog on x86 systems. When the value is
|
||||
non-zero the NMI watchdog is enabled and will continuously test all
|
||||
online cpus to determine whether or not they are still functioning
|
||||
properly. Currently, passing "nmi_watchdog=" parameter at boot time is
|
||||
required for this function to work.
|
||||
|
||||
If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel
|
||||
parameter), the NMI watchdog shares registers with oprofile. By
|
||||
disabling the NMI watchdog, oprofile may have more registers to
|
||||
utilize.
|
||||
|
||||
==============================================================
|
||||
|
||||
osrelease, ostype & version:
|
||||
|
||||
# cat osrelease
|
||||
|
@ -312,10 +342,10 @@ The only way to tune these values is to rebuild the kernel :-)
|
|||
|
||||
overflowgid & overflowuid:
|
||||
|
||||
if your architecture did not always support 32-bit UIDs (i.e. arm, i386,
|
||||
m68k, sh, and sparc32), a fixed UID and GID will be returned to
|
||||
applications that use the old 16-bit UID/GID system calls, if the actual
|
||||
UID or GID would exceed 65535.
|
||||
if your architecture did not always support 32-bit UIDs (i.e. arm,
|
||||
i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
|
||||
applications that use the old 16-bit UID/GID system calls, if the
|
||||
actual UID or GID would exceed 65535.
|
||||
|
||||
These sysctls allow you to change the value of the fixed UID and GID.
|
||||
The default is 65534.
|
||||
|
@ -324,9 +354,22 @@ The default is 65534.
|
|||
|
||||
panic:
|
||||
|
||||
The value in this file represents the number of seconds the
|
||||
kernel waits before rebooting on a panic. When you use the
|
||||
software watchdog, the recommended setting is 60.
|
||||
The value in this file represents the number of seconds the kernel
|
||||
waits before rebooting on a panic. When you use the software watchdog,
|
||||
the recommended setting is 60.
|
||||
|
||||
==============================================================
|
||||
|
||||
panic_on_unrecovered_nmi:
|
||||
|
||||
The default Linux behaviour on an NMI of either memory or unknown is
|
||||
to continue operation. For many environments such as scientific
|
||||
computing it is preferable that the box is taken out and the error
|
||||
dealt with than an uncorrected parity/ECC error get propagated.
|
||||
|
||||
A small number of systems do generate NMI's for bizarre random reasons
|
||||
such as power management so the default is off. That sysctl works like
|
||||
the existing panic controls already in that directory.
|
||||
|
||||
==============================================================
|
||||
|
||||
|
@ -376,6 +419,14 @@ the different loglevels.
|
|||
|
||||
==============================================================
|
||||
|
||||
printk_delay:
|
||||
|
||||
Delay each printk message in printk_delay milliseconds
|
||||
|
||||
Value from 0 - 10000 is allowed.
|
||||
|
||||
==============================================================
|
||||
|
||||
printk_ratelimit:
|
||||
|
||||
Some warning messages are rate limited. printk_ratelimit specifies
|
||||
|
@ -395,15 +446,7 @@ send before ratelimiting kicks in.
|
|||
|
||||
==============================================================
|
||||
|
||||
printk_delay:
|
||||
|
||||
Delay each printk message in printk_delay milliseconds
|
||||
|
||||
Value from 0 - 10000 is allowed.
|
||||
|
||||
==============================================================
|
||||
|
||||
randomize-va-space:
|
||||
randomize_va_space:
|
||||
|
||||
This option can be used to select the type of process address
|
||||
space randomization that is used in the system, for architectures
|
||||
|
@ -466,11 +509,11 @@ are doing anyway :)
|
|||
|
||||
==============================================================
|
||||
|
||||
shmmax:
|
||||
shmmax:
|
||||
|
||||
This value can be used to query and set the run time limit
|
||||
on the maximum shared memory segment size that can be created.
|
||||
Shared memory segments up to 1Gb are now supported in the
|
||||
Shared memory segments up to 1Gb are now supported in the
|
||||
kernel. This value defaults to SHMMAX.
|
||||
|
||||
==============================================================
|
||||
|
@ -484,7 +527,7 @@ tunable to zero will disable the softlockup detection altogether.
|
|||
|
||||
==============================================================
|
||||
|
||||
tainted:
|
||||
tainted:
|
||||
|
||||
Non-zero if the kernel has been tainted. Numeric values, which
|
||||
can be ORed together:
|
||||
|
@ -509,49 +552,11 @@ can be ORed together:
|
|||
|
||||
==============================================================
|
||||
|
||||
auto_msgmni:
|
||||
|
||||
Enables/Disables automatic recomputing of msgmni upon memory add/remove or
|
||||
upon ipc namespace creation/removal (see the msgmni description above).
|
||||
Echoing "1" into this file enables msgmni automatic recomputing.
|
||||
Echoing "0" turns it off.
|
||||
auto_msgmni default value is 1.
|
||||
|
||||
==============================================================
|
||||
|
||||
nmi_watchdog:
|
||||
|
||||
Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
|
||||
the NMI watchdog is enabled and will continuously test all online cpus to
|
||||
determine whether or not they are still functioning properly. Currently,
|
||||
passing "nmi_watchdog=" parameter at boot time is required for this function
|
||||
to work.
|
||||
|
||||
If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
|
||||
NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
|
||||
oprofile may have more registers to utilize.
|
||||
|
||||
==============================================================
|
||||
|
||||
unknown_nmi_panic:
|
||||
|
||||
The value in this file affects behavior of handling NMI. When the value is
|
||||
non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
|
||||
debugging information is displayed on console.
|
||||
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for example.
|
||||
If a system hangs up, try pressing the NMI switch.
|
||||
|
||||
==============================================================
|
||||
|
||||
panic_on_unrecovered_nmi:
|
||||
|
||||
The default Linux behaviour on an NMI of either memory or unknown is to continue
|
||||
operation. For many environments such as scientific computing it is preferable
|
||||
that the box is taken out and the error dealt with than an uncorrected
|
||||
parity/ECC error get propogated.
|
||||
|
||||
A small number of systems do generate NMI's for bizarre random reasons such as
|
||||
power management so the default is off. That sysctl works like the existing
|
||||
panic controls already in that directory.
|
||||
The value in this file affects behavior of handling NMI. When the
|
||||
value is non-zero, unknown NMI is trapped and then panic occurs. At
|
||||
that time, kernel debugging information is displayed on console.
|
||||
|
||||
NMI switch that most IA32 servers have fires unknown NMI up, for
|
||||
example. If a system hangs up, try pressing the NMI switch.
|
||||
|
|
|
@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
|
|||
|
||||
Synopsis of kprobe_events
|
||||
-------------------------
|
||||
p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
|
||||
p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
|
||||
-:[GRP/]EVENT : Clear a probe
|
||||
|
||||
GRP : Group name. If omitted, use "kprobes" for it.
|
||||
EVENT : Event name. If omitted, the event name is generated
|
||||
based on SYMBOL+offs or MEMADDR.
|
||||
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
|
||||
based on SYM+offs or MEMADDR.
|
||||
MOD : Module name which has given SYM.
|
||||
SYM[+offs] : Symbol+offset where the probe is inserted.
|
||||
MEMADDR : Address where the probe is inserted.
|
||||
|
||||
FETCHARGS : Arguments. Each probe can have up to 128 args.
|
||||
|
|
|
@ -76,6 +76,13 @@ A transfer's actual_length may be positive even when an error has been
|
|||
reported. That's because transfers often involve several packets, so that
|
||||
one or more packets could finish before an error stops further endpoint I/O.
|
||||
|
||||
For isochronous URBs, the urb status value is non-zero only if the URB is
|
||||
unlinked, the device is removed, the host controller is disabled, or the total
|
||||
transferred length is less than the requested length and the URB_SHORT_NOT_OK
|
||||
flag is set. Completion handlers for isochronous URBs should only see
|
||||
urb->status set to zero, -ENOENT, -ECONNRESET, -ESHUTDOWN, or -EREMOTEIO.
|
||||
Individual frame descriptor status fields may report more status codes.
|
||||
|
||||
|
||||
0 Transfer completed successfully
|
||||
|
||||
|
@ -132,7 +139,7 @@ one or more packets could finish before an error stops further endpoint I/O.
|
|||
device removal events immediately.
|
||||
|
||||
-EXDEV ISO transfer only partially completed
|
||||
look at individual frame status for details
|
||||
(only set in iso_frame_desc[n].status, not urb->status)
|
||||
|
||||
-EINVAL ISO madness, if this happens: Log off and go home
|
||||
|
||||
|
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* parse_vdso.c: Linux reference vDSO parser
|
||||
* Written by Andrew Lutomirski, 2011.
|
||||
*
|
||||
* This code is meant to be linked in to various programs that run on Linux.
|
||||
* As such, it is available with as few restrictions as possible. This file
|
||||
* is licensed under the Creative Commons Zero License, version 1.0,
|
||||
* available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
|
||||
*
|
||||
* The vDSO is a regular ELF DSO that the kernel maps into user space when
|
||||
* it starts a program. It works equally well in statically and dynamically
|
||||
* linked binaries.
|
||||
*
|
||||
* This code is tested on x86_64. In principle it should work on any 64-bit
|
||||
* architecture that has a vDSO.
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <elf.h>
|
||||
|
||||
/*
|
||||
* To use this vDSO parser, first call one of the vdso_init_* functions.
|
||||
* If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
|
||||
* to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
|
||||
* Then call vdso_sym for each symbol you want. For example, to look up
|
||||
* gettimeofday on x86_64, use:
|
||||
*
|
||||
* <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
|
||||
* or
|
||||
* <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
|
||||
*
|
||||
* vdso_sym will return 0 if the symbol doesn't exist or if the init function
|
||||
* failed or was not called. vdso_sym is a little slow, so its return value
|
||||
* should be cached.
|
||||
*
|
||||
* vdso_sym is threadsafe; the init functions are not.
|
||||
*
|
||||
* These are the prototypes:
|
||||
*/
|
||||
extern void vdso_init_from_auxv(void *auxv);
|
||||
extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
|
||||
extern void *vdso_sym(const char *version, const char *name);
|
||||
|
||||
|
||||
/* And here's the code. */
|
||||
|
||||
#ifndef __x86_64__
|
||||
# error Not yet ported to non-x86_64 architectures
|
||||
#endif
|
||||
|
||||
static struct vdso_info
|
||||
{
|
||||
bool valid;
|
||||
|
||||
/* Load information */
|
||||
uintptr_t load_addr;
|
||||
uintptr_t load_offset; /* load_addr - recorded vaddr */
|
||||
|
||||
/* Symbol table */
|
||||
Elf64_Sym *symtab;
|
||||
const char *symstrings;
|
||||
Elf64_Word *bucket, *chain;
|
||||
Elf64_Word nbucket, nchain;
|
||||
|
||||
/* Version table */
|
||||
Elf64_Versym *versym;
|
||||
Elf64_Verdef *verdef;
|
||||
} vdso_info;
|
||||
|
||||
/* Straight from the ELF specification. */
|
||||
static unsigned long elf_hash(const unsigned char *name)
|
||||
{
|
||||
unsigned long h = 0, g;
|
||||
while (*name)
|
||||
{
|
||||
h = (h << 4) + *name++;
|
||||
if (g = h & 0xf0000000)
|
||||
h ^= g >> 24;
|
||||
h &= ~g;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
void vdso_init_from_sysinfo_ehdr(uintptr_t base)
|
||||
{
|
||||
size_t i;
|
||||
bool found_vaddr = false;
|
||||
|
||||
vdso_info.valid = false;
|
||||
|
||||
vdso_info.load_addr = base;
|
||||
|
||||
Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
|
||||
Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
|
||||
Elf64_Dyn *dyn = 0;
|
||||
|
||||
/*
|
||||
* We need two things from the segment table: the load offset
|
||||
* and the dynamic table.
|
||||
*/
|
||||
for (i = 0; i < hdr->e_phnum; i++)
|
||||
{
|
||||
if (pt[i].p_type == PT_LOAD && !found_vaddr) {
|
||||
found_vaddr = true;
|
||||
vdso_info.load_offset = base
|
||||
+ (uintptr_t)pt[i].p_offset
|
||||
- (uintptr_t)pt[i].p_vaddr;
|
||||
} else if (pt[i].p_type == PT_DYNAMIC) {
|
||||
dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_vaddr || !dyn)
|
||||
return; /* Failed */
|
||||
|
||||
/*
|
||||
* Fish out the useful bits of the dynamic table.
|
||||
*/
|
||||
Elf64_Word *hash = 0;
|
||||
vdso_info.symstrings = 0;
|
||||
vdso_info.symtab = 0;
|
||||
vdso_info.versym = 0;
|
||||
vdso_info.verdef = 0;
|
||||
for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
|
||||
switch (dyn[i].d_tag) {
|
||||
case DT_STRTAB:
|
||||
vdso_info.symstrings = (const char *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_SYMTAB:
|
||||
vdso_info.symtab = (Elf64_Sym *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_HASH:
|
||||
hash = (Elf64_Word *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_VERSYM:
|
||||
vdso_info.versym = (Elf64_Versym *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
case DT_VERDEF:
|
||||
vdso_info.verdef = (Elf64_Verdef *)
|
||||
((uintptr_t)dyn[i].d_un.d_ptr
|
||||
+ vdso_info.load_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
|
||||
return; /* Failed */
|
||||
|
||||
if (!vdso_info.verdef)
|
||||
vdso_info.versym = 0;
|
||||
|
||||
/* Parse the hash table header. */
|
||||
vdso_info.nbucket = hash[0];
|
||||
vdso_info.nchain = hash[1];
|
||||
vdso_info.bucket = &hash[2];
|
||||
vdso_info.chain = &hash[vdso_info.nbucket + 2];
|
||||
|
||||
/* That's all we need. */
|
||||
vdso_info.valid = true;
|
||||
}
|
||||
|
||||
static bool vdso_match_version(Elf64_Versym ver,
|
||||
const char *name, Elf64_Word hash)
|
||||
{
|
||||
/*
|
||||
* This is a helper function to check if the version indexed by
|
||||
* ver matches name (which hashes to hash).
|
||||
*
|
||||
* The version definition table is a mess, and I don't know how
|
||||
* to do this in better than linear time without allocating memory
|
||||
* to build an index. I also don't know why the table has
|
||||
* variable size entries in the first place.
|
||||
*
|
||||
* For added fun, I can't find a comprehensible specification of how
|
||||
* to parse all the weird flags in the table.
|
||||
*
|
||||
* So I just parse the whole table every time.
|
||||
*/
|
||||
|
||||
/* First step: find the version definition */
|
||||
ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
|
||||
Elf64_Verdef *def = vdso_info.verdef;
|
||||
while(true) {
|
||||
if ((def->vd_flags & VER_FLG_BASE) == 0
|
||||
&& (def->vd_ndx & 0x7fff) == ver)
|
||||
break;
|
||||
|
||||
if (def->vd_next == 0)
|
||||
return false; /* No definition. */
|
||||
|
||||
def = (Elf64_Verdef *)((char *)def + def->vd_next);
|
||||
}
|
||||
|
||||
/* Now figure out whether it matches. */
|
||||
Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
|
||||
return def->vd_hash == hash
|
||||
&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
|
||||
}
|
||||
|
||||
void *vdso_sym(const char *version, const char *name)
|
||||
{
|
||||
unsigned long ver_hash;
|
||||
if (!vdso_info.valid)
|
||||
return 0;
|
||||
|
||||
ver_hash = elf_hash(version);
|
||||
Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
|
||||
|
||||
for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
|
||||
Elf64_Sym *sym = &vdso_info.symtab[chain];
|
||||
|
||||
/* Check for a defined global or weak function w/ right name. */
|
||||
if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
|
||||
continue;
|
||||
if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
|
||||
ELF64_ST_BIND(sym->st_info) != STB_WEAK)
|
||||
continue;
|
||||
if (sym->st_shndx == SHN_UNDEF)
|
||||
continue;
|
||||
if (strcmp(name, vdso_info.symstrings + sym->st_name))
|
||||
continue;
|
||||
|
||||
/* Check symbol version. */
|
||||
if (vdso_info.versym
|
||||
&& !vdso_match_version(vdso_info.versym[chain],
|
||||
version, ver_hash))
|
||||
continue;
|
||||
|
||||
return (void *)(vdso_info.load_offset + sym->st_value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vdso_init_from_auxv(void *auxv)
|
||||
{
|
||||
Elf64_auxv_t *elf_auxv = auxv;
|
||||
for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
|
||||
{
|
||||
if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
|
||||
vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
vdso_info.valid = false;
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* vdso_test.c: Sample code to test parse_vdso.c on x86_64
|
||||
* Copyright (c) 2011 Andy Lutomirski
|
||||
* Subject to the GNU General Public License, version 2
|
||||
*
|
||||
* You can amuse yourself by compiling with:
|
||||
* gcc -std=gnu99 -nostdlib
|
||||
* -Os -fno-asynchronous-unwind-tables -flto
|
||||
* vdso_test.c parse_vdso.c -o vdso_test
|
||||
* to generate a small binary with no dependencies at all.
|
||||
*/
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void *vdso_sym(const char *version, const char *name);
|
||||
extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
|
||||
extern void vdso_init_from_auxv(void *auxv);
|
||||
|
||||
/* We need a libc functions... */
|
||||
int strcmp(const char *a, const char *b)
|
||||
{
|
||||
/* This implementation is buggy: it never returns -1. */
|
||||
while (*a || *b) {
|
||||
if (*a != *b)
|
||||
return 1;
|
||||
if (*a == 0 || *b == 0)
|
||||
return 1;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ...and two syscalls. This is x86_64-specific. */
|
||||
static inline long linux_write(int fd, const void *data, size_t len)
|
||||
{
|
||||
|
||||
long ret;
|
||||
asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write),
|
||||
"D" (fd), "S" (data), "d" (len) :
|
||||
"cc", "memory", "rcx",
|
||||
"r8", "r9", "r10", "r11" );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void linux_exit(int code)
|
||||
{
|
||||
asm volatile ("syscall" : : "a" (__NR_exit), "D" (code));
|
||||
}
|
||||
|
||||
void to_base10(char *lastdig, uint64_t n)
|
||||
{
|
||||
while (n) {
|
||||
*lastdig = (n % 10) + '0';
|
||||
n /= 10;
|
||||
lastdig--;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((externally_visible)) void c_main(void **stack)
|
||||
{
|
||||
/* Parse the stack */
|
||||
long argc = (long)*stack;
|
||||
stack += argc + 2;
|
||||
|
||||
/* Now we're pointing at the environment. Skip it. */
|
||||
while(*stack)
|
||||
stack++;
|
||||
stack++;
|
||||
|
||||
/* Now we're pointing at auxv. Initialize the vDSO parser. */
|
||||
vdso_init_from_auxv((void *)stack);
|
||||
|
||||
/* Find gettimeofday. */
|
||||
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
|
||||
gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
|
||||
|
||||
if (!gtod)
|
||||
linux_exit(1);
|
||||
|
||||
struct timeval tv;
|
||||
long ret = gtod(&tv, 0);
|
||||
|
||||
if (ret == 0) {
|
||||
char buf[] = "The time is .000000\n";
|
||||
to_base10(buf + 31, tv.tv_sec);
|
||||
to_base10(buf + 38, tv.tv_usec);
|
||||
linux_write(1, buf, sizeof(buf) - 1);
|
||||
} else {
|
||||
linux_exit(ret);
|
||||
}
|
||||
|
||||
linux_exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the real entry point. It passes the initial stack into
|
||||
* the C entry point.
|
||||
*/
|
||||
asm (
|
||||
".text\n"
|
||||
".global _start\n"
|
||||
".type _start,@function\n"
|
||||
"_start:\n\t"
|
||||
"mov %rsp,%rdi\n\t"
|
||||
"jmp c_main"
|
||||
);
|
|
@ -180,6 +180,19 @@ KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time.
|
|||
If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
|
||||
cpus max.
|
||||
|
||||
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
|
||||
threads in one or more virtual CPU cores. (This is because the
|
||||
hardware requires all the hardware threads in a CPU core to be in the
|
||||
same partition.) The KVM_CAP_PPC_SMT capability indicates the number
|
||||
of vcpus per virtual core (vcore). The vcore id is obtained by
|
||||
dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
|
||||
given vcore will always be in the same physical core as each other
|
||||
(though that might be a different physical core from time to time).
|
||||
Userspace can control the threading (SMT) mode of the guest by its
|
||||
allocation of vcpu ids. For example, if userspace wants
|
||||
single-threaded guest vcpus, it should make all vcpu ids be a multiple
|
||||
of the number of vcpus per vcore.
|
||||
|
||||
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
|
@ -1143,15 +1156,10 @@ Assigns an IRQ to a passed-through device.
|
|||
|
||||
struct kvm_assigned_irq {
|
||||
__u32 assigned_dev_id;
|
||||
__u32 host_irq;
|
||||
__u32 host_irq; /* ignored (legacy field) */
|
||||
__u32 guest_irq;
|
||||
__u32 flags;
|
||||
union {
|
||||
struct {
|
||||
__u32 addr_lo;
|
||||
__u32 addr_hi;
|
||||
__u32 data;
|
||||
} guest_msi;
|
||||
__u32 reserved[12];
|
||||
};
|
||||
};
|
||||
|
@ -1239,8 +1247,10 @@ Type: vm ioctl
|
|||
Parameters: struct kvm_assigned_msix_nr (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
Set the number of MSI-X interrupts for an assigned device. This service can
|
||||
only be called once in the lifetime of an assigned device.
|
||||
Set the number of MSI-X interrupts for an assigned device. The number is
|
||||
reset again by terminating the MSI-X assignment of the device via
|
||||
KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
|
||||
point will fail.
|
||||
|
||||
struct kvm_assigned_msix_nr {
|
||||
__u32 assigned_dev_id;
|
||||
|
@ -1291,6 +1301,135 @@ Returns the tsc frequency of the guest. The unit of the return value is
|
|||
KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
|
||||
error.
|
||||
|
||||
4.56 KVM_GET_LAPIC
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_lapic_state (out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
#define KVM_APIC_REG_SIZE 0x400
|
||||
struct kvm_lapic_state {
|
||||
char regs[KVM_APIC_REG_SIZE];
|
||||
};
|
||||
|
||||
Reads the Local APIC registers and copies them into the input argument. The
|
||||
data format and layout are the same as documented in the architecture manual.
|
||||
|
||||
4.57 KVM_SET_LAPIC
|
||||
|
||||
Capability: KVM_CAP_IRQCHIP
|
||||
Architectures: x86
|
||||
Type: vcpu ioctl
|
||||
Parameters: struct kvm_lapic_state (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
||||
#define KVM_APIC_REG_SIZE 0x400
|
||||
struct kvm_lapic_state {
|
||||
char regs[KVM_APIC_REG_SIZE];
|
||||
};
|
||||
|
||||
Copies the input argument into the the Local APIC registers. The data format
|
||||
and layout are the same as documented in the architecture manual.
|
||||
|
||||
4.58 KVM_IOEVENTFD
|
||||
|
||||
Capability: KVM_CAP_IOEVENTFD
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_ioeventfd (in)
|
||||
Returns: 0 on success, !0 on error
|
||||
|
||||
This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
|
||||
within the guest. A guest write in the registered address will signal the
|
||||
provided event instead of triggering an exit.
|
||||
|
||||
struct kvm_ioeventfd {
|
||||
__u64 datamatch;
|
||||
__u64 addr; /* legal pio/mmio address */
|
||||
__u32 len; /* 1, 2, 4, or 8 bytes */
|
||||
__s32 fd;
|
||||
__u32 flags;
|
||||
__u8 pad[36];
|
||||
};
|
||||
|
||||
The following flags are defined:
|
||||
|
||||
#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
|
||||
#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
|
||||
#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
|
||||
|
||||
If datamatch flag is set, the event will be signaled only if the written value
|
||||
to the registered address is equal to datamatch in struct kvm_ioeventfd.
|
||||
|
||||
4.62 KVM_CREATE_SPAPR_TCE
|
||||
|
||||
Capability: KVM_CAP_SPAPR_TCE
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_create_spapr_tce (in)
|
||||
Returns: file descriptor for manipulating the created TCE table
|
||||
|
||||
This creates a virtual TCE (translation control entry) table, which
|
||||
is an IOMMU for PAPR-style virtual I/O. It is used to translate
|
||||
logical addresses used in virtual I/O into guest physical addresses,
|
||||
and provides a scatter/gather capability for PAPR virtual I/O.
|
||||
|
||||
/* for KVM_CAP_SPAPR_TCE */
|
||||
struct kvm_create_spapr_tce {
|
||||
__u64 liobn;
|
||||
__u32 window_size;
|
||||
};
|
||||
|
||||
The liobn field gives the logical IO bus number for which to create a
|
||||
TCE table. The window_size field specifies the size of the DMA window
|
||||
which this TCE table will translate - the table will contain one 64
|
||||
bit TCE entry for every 4kiB of the DMA window.
|
||||
|
||||
When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
|
||||
table has been created using this ioctl(), the kernel will handle it
|
||||
in real mode, updating the TCE table. H_PUT_TCE calls for other
|
||||
liobns will cause a vm exit and must be handled by userspace.
|
||||
|
||||
The return value is a file descriptor which can be passed to mmap(2)
|
||||
to map the created TCE table into userspace. This lets userspace read
|
||||
the entries written by kernel-handled H_PUT_TCE calls, and also lets
|
||||
userspace update the TCE table directly which is useful in some
|
||||
circumstances.
|
||||
|
||||
4.63 KVM_ALLOCATE_RMA
|
||||
|
||||
Capability: KVM_CAP_PPC_RMA
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_allocate_rma (out)
|
||||
Returns: file descriptor for mapping the allocated RMA
|
||||
|
||||
This allocates a Real Mode Area (RMA) from the pool allocated at boot
|
||||
time by the kernel. An RMA is a physically-contiguous, aligned region
|
||||
of memory used on older POWER processors to provide the memory which
|
||||
will be accessed by real-mode (MMU off) accesses in a KVM guest.
|
||||
POWER processors support a set of sizes for the RMA that usually
|
||||
includes 64MB, 128MB, 256MB and some larger powers of two.
|
||||
|
||||
/* for KVM_ALLOCATE_RMA */
|
||||
struct kvm_allocate_rma {
|
||||
__u64 rma_size;
|
||||
};
|
||||
|
||||
The return value is a file descriptor which can be passed to mmap(2)
|
||||
to map the allocated RMA into userspace. The mapped area can then be
|
||||
passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
|
||||
RMA for a virtual machine. The size of the RMA in bytes (which is
|
||||
fixed at host kernel boot time) is returned in the rma_size field of
|
||||
the argument structure.
|
||||
|
||||
The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
|
||||
is supported; 2 if the processor requires all virtual machines to have
|
||||
an RMA, or 1 if the processor can use an RMA but doesn't require it,
|
||||
because it supports the Virtual RMA (VRMA) facility.
|
||||
|
||||
5. The kvm_run structure
|
||||
|
||||
Application code obtains a pointer to the kvm_run structure by
|
||||
|
@ -1473,6 +1612,23 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
|
|||
necessary. Upon guest entry all guest GPRs will then be replaced by the values
|
||||
in this struct.
|
||||
|
||||
/* KVM_EXIT_PAPR_HCALL */
|
||||
struct {
|
||||
__u64 nr;
|
||||
__u64 ret;
|
||||
__u64 args[9];
|
||||
} papr_hcall;
|
||||
|
||||
This is used on 64-bit PowerPC when emulating a pSeries partition,
|
||||
e.g. with the 'pseries' machine type in qemu. It occurs when the
|
||||
guest does a hypercall using the 'sc 1' instruction. The 'nr' field
|
||||
contains the hypercall number (from the guest R3), and 'args' contains
|
||||
the arguments (from the guest R4 - R12). Userspace should put the
|
||||
return code in 'ret' and any extra returned values in args[].
|
||||
The possible hypercalls are defined in the Power Architecture Platform
|
||||
Requirements (PAPR) document available from www.power.org (free
|
||||
developer registration required to access it).
|
||||
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
|
|
|
@ -165,6 +165,10 @@ Shadow pages contain the following information:
|
|||
Contains the value of efer.nxe for which the page is valid.
|
||||
role.cr0_wp:
|
||||
Contains the value of cr0.wp for which the page is valid.
|
||||
role.smep_andnot_wp:
|
||||
Contains the value of cr4.smep && !cr0.wp for which the page is valid
|
||||
(pages for which this is true are different from other pages; see the
|
||||
treatment of cr0.wp=0 below).
|
||||
gfn:
|
||||
Either the guest page table containing the translations shadowed by this
|
||||
page, or the base page frame for linear translations. See role.direct.
|
||||
|
@ -317,6 +321,20 @@ on fault type:
|
|||
|
||||
(user write faults generate a #PF)
|
||||
|
||||
In the first case there is an additional complication if CR4.SMEP is
|
||||
enabled: since we've turned the page into a kernel page, the kernel may now
|
||||
execute it. We handle this by also setting spte.nx. If we get a user
|
||||
fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
|
||||
|
||||
To prevent an spte that was converted into a kernel page with cr0.wp=0
|
||||
from being written by the kernel after cr0.wp has changed to 1, we make
|
||||
the value of cr0.wp part of the page role. This means that an spte created
|
||||
with one value of cr0.wp cannot be used when cr0.wp has a different value -
|
||||
it will simply be missed by the shadow page lookup code. A similar issue
|
||||
exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
|
||||
changing cr4.smep to 1. To avoid this, the value of !cr0.wp && cr4.smep
|
||||
is also made a part of the page role.
|
||||
|
||||
Large pages
|
||||
===========
|
||||
|
||||
|
|
|
@ -185,3 +185,37 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
|
|||
|
||||
Currently type 2 APF will be always delivered on the same vcpu as
|
||||
type 1 was, but guest should not rely on that.
|
||||
|
||||
MSR_KVM_STEAL_TIME: 0x4b564d03
|
||||
|
||||
data: 64-byte alignment physical address of a memory area which must be
|
||||
in guest RAM, plus an enable bit in bit 0. This memory is expected to
|
||||
hold a copy of the following structure:
|
||||
|
||||
struct kvm_steal_time {
|
||||
__u64 steal;
|
||||
__u32 version;
|
||||
__u32 flags;
|
||||
__u32 pad[12];
|
||||
}
|
||||
|
||||
whose data will be filled in by the hypervisor periodically. Only one
|
||||
write, or registration, is needed for each VCPU. The interval between
|
||||
updates of this structure is arbitrary and implementation-dependent.
|
||||
The hypervisor may update this structure at any time it sees fit until
|
||||
anything with bit0 == 0 is written to it. Guest is required to make sure
|
||||
this structure is initialized to zero.
|
||||
|
||||
Fields have the following meanings:
|
||||
|
||||
version: a sequence counter. In other words, guest has to check
|
||||
this field before and after grabbing time information and make
|
||||
sure they are both equal and even. An odd version indicates an
|
||||
in-progress update.
|
||||
|
||||
flags: At this point, always zero. May be used to indicate
|
||||
changes in this structure in the future.
|
||||
|
||||
steal: the amount of time in which this vCPU did not run, in
|
||||
nanoseconds. Time during which the vcpu is idle, will not be
|
||||
reported as steal time.
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
Nested VMX
|
||||
==========
|
||||
|
||||
Overview
|
||||
---------
|
||||
|
||||
On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
|
||||
to easily and efficiently run guest operating systems. Normally, these guests
|
||||
*cannot* themselves be hypervisors running their own guests, because in VMX,
|
||||
guests cannot use VMX instructions.
|
||||
|
||||
The "Nested VMX" feature adds this missing capability - of running guest
|
||||
hypervisors (which use VMX) with their own nested guests. It does so by
|
||||
allowing a guest to use VMX instructions, and correctly and efficiently
|
||||
emulating them using the single level of VMX available in the hardware.
|
||||
|
||||
We describe in much greater detail the theory behind the nested VMX feature,
|
||||
its implementation and its performance characteristics, in the OSDI 2010 paper
|
||||
"The Turtles Project: Design and Implementation of Nested Virtualization",
|
||||
available at:
|
||||
|
||||
http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
|
||||
|
||||
|
||||
Terminology
|
||||
-----------
|
||||
|
||||
Single-level virtualization has two levels - the host (KVM) and the guests.
|
||||
In nested virtualization, we have three levels: The host (KVM), which we call
|
||||
L0, the guest hypervisor, which we call L1, and its nested guest, which we
|
||||
call L2.
|
||||
|
||||
|
||||
Known limitations
|
||||
-----------------
|
||||
|
||||
The current code supports running Linux guests under KVM guests.
|
||||
Only 64-bit guest hypervisors are supported.
|
||||
|
||||
Additional patches for running Windows under guest KVM, and Linux under
|
||||
guest VMware server, and support for nested EPT, are currently running in
|
||||
the lab, and will be sent as follow-on patchsets.
|
||||
|
||||
|
||||
Running nested VMX
|
||||
------------------
|
||||
|
||||
The nested VMX feature is disabled by default. It can be enabled by giving
|
||||
the "nested=1" option to the kvm-intel module.
|
||||
|
||||
No modifications are required to user space (qemu). However, qemu's default
|
||||
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
|
||||
explicitly enabled, by giving qemu one of the following options:
|
||||
|
||||
-cpu host (emulated CPU has all features of the real CPU)
|
||||
|
||||
-cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
|
||||
|
||||
|
||||
ABIs
|
||||
----
|
||||
|
||||
Nested VMX aims to present a standard and (eventually) fully-functional VMX
|
||||
implementation for the a guest hypervisor to use. As such, the official
|
||||
specification of the ABI that it provides is Intel's VMX specification,
|
||||
namely volume 3B of their "Intel 64 and IA-32 Architectures Software
|
||||
Developer's Manual". Not all of VMX's features are currently fully supported,
|
||||
but the goal is to eventually support them all, starting with the VMX features
|
||||
which are used in practice by popular hypervisors (KVM and others).
|
||||
|
||||
As a VMX implementation, nested VMX presents a VMCS structure to L1.
|
||||
As mandated by the spec, other than the two fields revision_id and abort,
|
||||
this structure is *opaque* to its user, who is not supposed to know or care
|
||||
about its internal structure. Rather, the structure is accessed through the
|
||||
VMREAD and VMWRITE instructions.
|
||||
Still, for debugging purposes, KVM developers might be interested to know the
|
||||
internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
|
||||
|
||||
The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
|
||||
also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
|
||||
which L0 builds to actually run L2 - how this is done is explained in the
|
||||
aforementioned paper.
|
||||
|
||||
For convenience, we repeat the content of struct vmcs12 here. If the internals
|
||||
of this structure changes, this can break live migration across KVM versions.
|
||||
VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
|
||||
struct shadow_vmcs is ever changed.
|
||||
|
||||
typedef u64 natural_width;
|
||||
struct __packed vmcs12 {
|
||||
/* According to the Intel spec, a VMCS region must start with
|
||||
* these two user-visible fields */
|
||||
u32 revision_id;
|
||||
u32 abort;
|
||||
|
||||
u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
|
||||
u32 padding[7]; /* room for future expansion */
|
||||
|
||||
u64 io_bitmap_a;
|
||||
u64 io_bitmap_b;
|
||||
u64 msr_bitmap;
|
||||
u64 vm_exit_msr_store_addr;
|
||||
u64 vm_exit_msr_load_addr;
|
||||
u64 vm_entry_msr_load_addr;
|
||||
u64 tsc_offset;
|
||||
u64 virtual_apic_page_addr;
|
||||
u64 apic_access_addr;
|
||||
u64 ept_pointer;
|
||||
u64 guest_physical_address;
|
||||
u64 vmcs_link_pointer;
|
||||
u64 guest_ia32_debugctl;
|
||||
u64 guest_ia32_pat;
|
||||
u64 guest_ia32_efer;
|
||||
u64 guest_pdptr0;
|
||||
u64 guest_pdptr1;
|
||||
u64 guest_pdptr2;
|
||||
u64 guest_pdptr3;
|
||||
u64 host_ia32_pat;
|
||||
u64 host_ia32_efer;
|
||||
u64 padding64[8]; /* room for future expansion */
|
||||
natural_width cr0_guest_host_mask;
|
||||
natural_width cr4_guest_host_mask;
|
||||
natural_width cr0_read_shadow;
|
||||
natural_width cr4_read_shadow;
|
||||
natural_width cr3_target_value0;
|
||||
natural_width cr3_target_value1;
|
||||
natural_width cr3_target_value2;
|
||||
natural_width cr3_target_value3;
|
||||
natural_width exit_qualification;
|
||||
natural_width guest_linear_address;
|
||||
natural_width guest_cr0;
|
||||
natural_width guest_cr3;
|
||||
natural_width guest_cr4;
|
||||
natural_width guest_es_base;
|
||||
natural_width guest_cs_base;
|
||||
natural_width guest_ss_base;
|
||||
natural_width guest_ds_base;
|
||||
natural_width guest_fs_base;
|
||||
natural_width guest_gs_base;
|
||||
natural_width guest_ldtr_base;
|
||||
natural_width guest_tr_base;
|
||||
natural_width guest_gdtr_base;
|
||||
natural_width guest_idtr_base;
|
||||
natural_width guest_dr7;
|
||||
natural_width guest_rsp;
|
||||
natural_width guest_rip;
|
||||
natural_width guest_rflags;
|
||||
natural_width guest_pending_dbg_exceptions;
|
||||
natural_width guest_sysenter_esp;
|
||||
natural_width guest_sysenter_eip;
|
||||
natural_width host_cr0;
|
||||
natural_width host_cr3;
|
||||
natural_width host_cr4;
|
||||
natural_width host_fs_base;
|
||||
natural_width host_gs_base;
|
||||
natural_width host_tr_base;
|
||||
natural_width host_gdtr_base;
|
||||
natural_width host_idtr_base;
|
||||
natural_width host_ia32_sysenter_esp;
|
||||
natural_width host_ia32_sysenter_eip;
|
||||
natural_width host_rsp;
|
||||
natural_width host_rip;
|
||||
natural_width paddingl[8]; /* room for future expansion */
|
||||
u32 pin_based_vm_exec_control;
|
||||
u32 cpu_based_vm_exec_control;
|
||||
u32 exception_bitmap;
|
||||
u32 page_fault_error_code_mask;
|
||||
u32 page_fault_error_code_match;
|
||||
u32 cr3_target_count;
|
||||
u32 vm_exit_controls;
|
||||
u32 vm_exit_msr_store_count;
|
||||
u32 vm_exit_msr_load_count;
|
||||
u32 vm_entry_controls;
|
||||
u32 vm_entry_msr_load_count;
|
||||
u32 vm_entry_intr_info_field;
|
||||
u32 vm_entry_exception_error_code;
|
||||
u32 vm_entry_instruction_len;
|
||||
u32 tpr_threshold;
|
||||
u32 secondary_vm_exec_control;
|
||||
u32 vm_instruction_error;
|
||||
u32 vm_exit_reason;
|
||||
u32 vm_exit_intr_info;
|
||||
u32 vm_exit_intr_error_code;
|
||||
u32 idt_vectoring_info_field;
|
||||
u32 idt_vectoring_error_code;
|
||||
u32 vm_exit_instruction_len;
|
||||
u32 vmx_instruction_info;
|
||||
u32 guest_es_limit;
|
||||
u32 guest_cs_limit;
|
||||
u32 guest_ss_limit;
|
||||
u32 guest_ds_limit;
|
||||
u32 guest_fs_limit;
|
||||
u32 guest_gs_limit;
|
||||
u32 guest_ldtr_limit;
|
||||
u32 guest_tr_limit;
|
||||
u32 guest_gdtr_limit;
|
||||
u32 guest_idtr_limit;
|
||||
u32 guest_es_ar_bytes;
|
||||
u32 guest_cs_ar_bytes;
|
||||
u32 guest_ss_ar_bytes;
|
||||
u32 guest_ds_ar_bytes;
|
||||
u32 guest_fs_ar_bytes;
|
||||
u32 guest_gs_ar_bytes;
|
||||
u32 guest_ldtr_ar_bytes;
|
||||
u32 guest_tr_ar_bytes;
|
||||
u32 guest_interruptibility_info;
|
||||
u32 guest_activity_state;
|
||||
u32 guest_sysenter_cs;
|
||||
u32 host_ia32_sysenter_cs;
|
||||
u32 padding32[8]; /* room for future expansion */
|
||||
u16 virtual_processor_id;
|
||||
u16 guest_es_selector;
|
||||
u16 guest_cs_selector;
|
||||
u16 guest_ss_selector;
|
||||
u16 guest_ds_selector;
|
||||
u16 guest_fs_selector;
|
||||
u16 guest_gs_selector;
|
||||
u16 guest_ldtr_selector;
|
||||
u16 guest_tr_selector;
|
||||
u16 host_es_selector;
|
||||
u16 host_cs_selector;
|
||||
u16 host_ss_selector;
|
||||
u16 host_ds_selector;
|
||||
u16 host_fs_selector;
|
||||
u16 host_gs_selector;
|
||||
u16 host_tr_selector;
|
||||
};
|
||||
|
||||
|
||||
Authors
|
||||
-------
|
||||
|
||||
These patches were written by:
|
||||
Abel Gordon, abelg <at> il.ibm.com
|
||||
Nadav Har'El, nyh <at> il.ibm.com
|
||||
Orit Wasserman, oritw <at> il.ibm.com
|
||||
Ben-Ami Yassor, benami <at> il.ibm.com
|
||||
Muli Ben-Yehuda, muli <at> il.ibm.com
|
||||
|
||||
With contributions by:
|
||||
Anthony Liguori, aliguori <at> us.ibm.com
|
||||
Mike Day, mdday <at> us.ibm.com
|
||||
Michael Factor, factor <at> il.ibm.com
|
||||
Zvi Dubitzky, dubi <at> il.ibm.com
|
||||
|
||||
And valuable reviews by:
|
||||
Avi Kivity, avi <at> redhat.com
|
||||
Gleb Natapov, gleb <at> redhat.com
|
||||
Marcelo Tosatti, mtosatti <at> redhat.com
|
||||
Kevin Tian, kevin.tian <at> intel.com
|
||||
and others.
|
|
@ -68,9 +68,11 @@ page that contains parts of supervisor visible register state. The guest can
|
|||
map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
|
||||
|
||||
With this hypercall issued the guest always gets the magic page mapped at the
|
||||
desired location in effective and physical address space. For now, we always
|
||||
map the page to -4096. This way we can access it using absolute load and store
|
||||
functions. The following instruction reads the first field of the magic page:
|
||||
desired location. The first parameter indicates the effective address when the
|
||||
MMU is enabled. The second parameter indicates the address in real mode, if
|
||||
applicable to the target. For now, we always map the page to -4096. This way we
|
||||
can access it using absolute load and store functions. The following
|
||||
instruction reads the first field of the magic page:
|
||||
|
||||
ld rX, -4096(0)
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
#include <asm/bootparam.h>
|
||||
#include "../../../include/linux/lguest_launcher.h"
|
||||
/*L:110
|
||||
* We can ignore the 42 include files we need for this program, but I do want
|
||||
* We can ignore the 43 include files we need for this program, but I do want
|
||||
* to draw attention to the use of kernel-style types.
|
||||
*
|
||||
* As Linus said, "C is a Spartan language, and so should your naming be." I
|
||||
|
@ -65,7 +65,6 @@ typedef uint16_t u16;
|
|||
typedef uint8_t u8;
|
||||
/*:*/
|
||||
|
||||
#define PAGE_PRESENT 0x7 /* Present, RW, Execute */
|
||||
#define BRIDGE_PFX "bridge:"
|
||||
#ifndef SIOCBRADDIF
|
||||
#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
|
||||
|
@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
|
|||
/* writev can return a partial write, so we loop here. */
|
||||
while (!iov_empty(iov, out)) {
|
||||
int len = writev(STDOUT_FILENO, iov, out);
|
||||
if (len <= 0)
|
||||
err(1, "Write to stdout gave %i", len);
|
||||
if (len <= 0) {
|
||||
warn("Write to stdout gave %i (%d)", len, errno);
|
||||
break;
|
||||
}
|
||||
iov_consume(iov, out, len);
|
||||
}
|
||||
|
||||
|
@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
|
|||
* same format: what a coincidence!
|
||||
*/
|
||||
if (writev(net_info->tunfd, iov, out) < 0)
|
||||
errx(1, "Write to tun failed?");
|
||||
warnx("Write to tun failed (%d)?", errno);
|
||||
|
||||
/*
|
||||
* Done with that one; wait_for_vq_desc() will send the interrupt if
|
||||
|
@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
|
|||
*/
|
||||
len = readv(net_info->tunfd, iov, in);
|
||||
if (len <= 0)
|
||||
err(1, "Failed to read from tun.");
|
||||
warn("Failed to read from tun (%d).", errno);
|
||||
|
||||
/*
|
||||
* Mark that packet buffer as used, but don't interrupt here. We want
|
||||
|
@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
|
|||
warnx("Device %s configuration FAILED", dev->name);
|
||||
if (dev->running)
|
||||
reset_device(dev);
|
||||
} else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
|
||||
if (!dev->running)
|
||||
start_device(dev);
|
||||
} else {
|
||||
if (dev->running)
|
||||
err(1, "Device %s features finalized twice", dev->name);
|
||||
start_device(dev);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Devices *can* be used before status is set to DRIVER_OK.
|
||||
* The original plan was that they would never do this: they
|
||||
* would always finish setting up their status bits before
|
||||
* actually touching the virtqueues. In practice, we allowed
|
||||
* them to, and they do (eg. the disk probes for partition
|
||||
* tables as part of initialization).
|
||||
*
|
||||
* If we see this, we start the device: once it's running, we
|
||||
* expect the device to catch all the notifications.
|
||||
*/
|
||||
/* Devices should not be used before features are finalized. */
|
||||
for (vq = i->vq; vq; vq = vq->next) {
|
||||
if (addr != vq->config.pfn*getpagesize())
|
||||
continue;
|
||||
if (i->running)
|
||||
errx(1, "Notification on running %s", i->name);
|
||||
/* This just calls create_thread() for each virtqueue */
|
||||
start_device(i);
|
||||
return;
|
||||
errx(1, "Notification on %s before setup!", i->name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1370,7 +1358,7 @@ static void setup_console(void)
|
|||
* --sharenet=<name> option which opens or creates a named pipe. This can be
|
||||
* used to send packets to another guest in a 1:1 manner.
|
||||
*
|
||||
* More sopisticated is to use one of the tools developed for project like UML
|
||||
* More sophisticated is to use one of the tools developed for project like UML
|
||||
* to do networking.
|
||||
*
|
||||
* Faster is to do virtio bonding in kernel. Doing this 1:1 would be
|
||||
|
@ -1380,7 +1368,7 @@ static void setup_console(void)
|
|||
* multiple inter-guest channels behind one interface, although it would
|
||||
* require some manner of hotplugging new virtio channels.
|
||||
*
|
||||
* Finally, we could implement a virtio network switch in the kernel.
|
||||
* Finally, we could use a virtio network switch in the kernel, ie. vhost.
|
||||
:*/
|
||||
|
||||
static u32 str2ip(const char *ipaddr)
|
||||
|
@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
|
|||
/* Tell the entry path not to try to reload segment registers. */
|
||||
boot->hdr.loadflags |= KEEP_SEGMENTS;
|
||||
|
||||
/*
|
||||
* We tell the kernel to initialize the Guest: this returns the open
|
||||
* /dev/lguest file descriptor.
|
||||
*/
|
||||
/* We tell the kernel to initialize the Guest. */
|
||||
tell_kernel(start);
|
||||
|
||||
/* Ensure that we terminate if a device-servicing child dies. */
|
||||
|
|
|
@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number
|
|||
of the memcg.
|
||||
|
||||
Example:
|
||||
mkdir /cgroup/hwpoison
|
||||
mkdir /sys/fs/cgroup/mem/hwpoison
|
||||
|
||||
usemem -m 100 -s 1000 &
|
||||
echo `jobs -p` > /cgroup/hwpoison/tasks
|
||||
echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
|
||||
|
||||
memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ')
|
||||
memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
|
||||
echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
|
||||
|
||||
page-types -p `pidof init` --hwpoison # shall do nothing
|
||||
|
|
|
@ -674,7 +674,7 @@ Protocol: 2.10+
|
|||
|
||||
Field name: init_size
|
||||
Type: read
|
||||
Offset/size: 0x25c/4
|
||||
Offset/size: 0x260/4
|
||||
|
||||
This field indicates the amount of linear contiguous memory starting
|
||||
at the kernel runtime start address that the kernel needs before it
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
This file documents some of the kernel entries in
|
||||
arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from
|
||||
an email from Ingo Molnar:
|
||||
|
||||
http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
|
||||
|
||||
The x86 architecture has quite a few different ways to jump into
|
||||
kernel code. Most of these entry points are registered in
|
||||
arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
|
||||
and arch/x86/ia32/ia32entry.S.
|
||||
|
||||
The IDT vector assignments are listed in arch/x86/include/irq_vectors.h.
|
||||
|
||||
Some of these entries are:
|
||||
|
||||
- system_call: syscall instruction from 64-bit code.
|
||||
|
||||
- ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall
|
||||
either way.
|
||||
|
||||
- ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit
|
||||
code
|
||||
|
||||
- interrupt: An array of entries. Every IDT vector that doesn't
|
||||
explicitly point somewhere else gets set to the corresponding
|
||||
value in interrupts. These point to a whole array of
|
||||
magically-generated functions that make their way to do_IRQ with
|
||||
the interrupt number as a parameter.
|
||||
|
||||
- emulate_vsyscall: int 0xcc, a special non-ABI entry used by
|
||||
vsyscall emulation.
|
||||
|
||||
- APIC interrupts: Various special-purpose interrupts for things
|
||||
like TLB shootdown.
|
||||
|
||||
- Architecturally-defined exceptions like divide_error.
|
||||
|
||||
There are a few complexities here. The different x86-64 entries
|
||||
have different calling conventions. The syscall and sysenter
|
||||
instructions have their own peculiar calling conventions. Some of
|
||||
the IDT entries push an error code onto the stack; others don't.
|
||||
IDT entries using the IST alternative stack mechanism need their own
|
||||
magic to get the stack frames right. (You can find some
|
||||
documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM,
|
||||
Volume 3, Chapter 6.)
|
||||
|
||||
Dealing with the swapgs instruction is especially tricky. Swapgs
|
||||
toggles whether gs is the kernel gs or the user gs. The swapgs
|
||||
instruction is rather fragile: it must nest perfectly and only in
|
||||
single depth, it should only be used if entering from user mode to
|
||||
kernel mode and then when returning to user-space, and precisely
|
||||
so. If we mess that up even slightly, we crash.
|
||||
|
||||
So when we have a secondary entry, already in kernel mode, we *must
|
||||
not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's
|
||||
not switched/swapped yet.
|
||||
|
||||
Now, there's a secondary complication: there's a cheap way to test
|
||||
which mode the CPU is in and an expensive way.
|
||||
|
||||
The cheap way is to pick this info off the entry frame on the kernel
|
||||
stack, from the CS of the ptregs area of the kernel stack:
|
||||
|
||||
xorl %ebx,%ebx
|
||||
testl $3,CS+8(%rsp)
|
||||
je error_kernelspace
|
||||
SWAPGS
|
||||
|
||||
The expensive (paranoid) way is to read back the MSR_GS_BASE value
|
||||
(which is what SWAPGS modifies):
|
||||
|
||||
movl $1,%ebx
|
||||
movl $MSR_GS_BASE,%ecx
|
||||
rdmsr
|
||||
testl %edx,%edx
|
||||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx,%ebx
|
||||
1: ret
|
||||
|
||||
and the whole paranoid non-paranoid macro complexity is about whether
|
||||
to suffer that RDMSR cost.
|
||||
|
||||
If we are at an interrupt or user-trap/gate-alike boundary then we can
|
||||
use the faster check: the stack will be a reliable indicator of
|
||||
whether SWAPGS was already done: if we see that we are a secondary
|
||||
entry interrupting kernel mode execution, then we know that the GS
|
||||
base has already been switched. If it says that we interrupted
|
||||
user-space execution then we must do the SWAPGS.
|
||||
|
||||
But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context,
|
||||
which might have triggered right after a normal entry wrote CS to the
|
||||
stack but before we executed SWAPGS, then the only safe way to check
|
||||
for GS is the slower method: the RDMSR.
|
||||
|
||||
So we try only to mark those entry methods 'paranoid' that absolutely
|
||||
need the more expensive check for the GS base - and we generate all
|
||||
'normal' entry points with the regular (faster) entry macros.
|
|
@ -67,7 +67,7 @@ Linux
|
|||
|
||||
12:已经通过CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
|
||||
CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP测试,并且同时都
|
||||
CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP测试,并且同时都
|
||||
使能。
|
||||
|
||||
13:已经都构建并且使用或者不使用 CONFIG_SMP 和 CONFIG_PREEMPT测试执行时间。
|
||||
|
|
142
MAINTAINERS
142
MAINTAINERS
|
@ -1,4 +1,5 @@
|
|||
|
||||
|
||||
List of maintainers and how to submit kernel changes
|
||||
|
||||
Please try to follow the guidelines below. This will make things
|
||||
|
@ -533,6 +534,8 @@ L: device-drivers-devel@blackfin.uclinux.org
|
|||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
W: http://wiki.analog.com/
|
||||
S: Supported
|
||||
F: sound/soc/codecs/adau*
|
||||
F: sound/soc/codecs/adav*
|
||||
F: sound/soc/codecs/ad1*
|
||||
F: sound/soc/codecs/ssm*
|
||||
|
||||
|
@ -594,6 +597,16 @@ S: Maintained
|
|||
F: arch/arm/lib/floppydma.S
|
||||
F: arch/arm/include/asm/floppy.h
|
||||
|
||||
ARM PMU PROFILING AND DEBUGGING
|
||||
M: Will Deacon <will.deacon@arm.com>
|
||||
S: Maintained
|
||||
F: arch/arm/kernel/perf_event*
|
||||
F: arch/arm/oprofile/common.c
|
||||
F: arch/arm/kernel/pmu.c
|
||||
F: arch/arm/include/asm/pmu.h
|
||||
F: arch/arm/kernel/hw_breakpoint.c
|
||||
F: arch/arm/include/asm/hw_breakpoint.h
|
||||
|
||||
ARM PORT
|
||||
M: Russell King <linux@arm.linux.org.uk>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
|
@ -1345,16 +1358,18 @@ F: drivers/auxdisplay/
|
|||
F: include/linux/cfag12864b.h
|
||||
|
||||
AVR32 ARCHITECTURE
|
||||
M: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
|
||||
M: Haavard Skinnemoen <hskinnemoen@gmail.com>
|
||||
M: Hans-Christian Egtvedt <egtvedt@samfundet.no>
|
||||
W: http://www.atmel.com/products/AVR32/
|
||||
W: http://avr32linux.org/
|
||||
W: http://avrfreaks.net/
|
||||
S: Supported
|
||||
S: Maintained
|
||||
F: arch/avr32/
|
||||
|
||||
AVR32/AT32AP MACHINE SUPPORT
|
||||
M: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
|
||||
S: Supported
|
||||
M: Haavard Skinnemoen <hskinnemoen@gmail.com>
|
||||
M: Hans-Christian Egtvedt <egtvedt@samfundet.no>
|
||||
S: Maintained
|
||||
F: arch/avr32/mach-at32ap/
|
||||
|
||||
AX.25 NETWORK LAYER
|
||||
|
@ -1390,7 +1405,6 @@ F: include/linux/backlight.h
|
|||
BATMAN ADVANCED
|
||||
M: Marek Lindner <lindner_marek@yahoo.de>
|
||||
M: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
|
||||
M: Sven Eckelmann <sven@narfation.org>
|
||||
L: b.a.t.m.a.n@lists.open-mesh.org
|
||||
W: http://www.open-mesh.org/
|
||||
S: Maintained
|
||||
|
@ -1423,7 +1437,6 @@ S: Supported
|
|||
F: arch/blackfin/
|
||||
|
||||
BLACKFIN EMAC DRIVER
|
||||
M: Michael Hennerich <michael.hennerich@analog.com>
|
||||
L: uclinux-dist-devel@blackfin.uclinux.org
|
||||
W: http://blackfin.uclinux.org
|
||||
S: Supported
|
||||
|
@ -1540,6 +1553,12 @@ L: linux-wireless@vger.kernel.org
|
|||
S: Supported
|
||||
F: drivers/staging/brcm80211/
|
||||
|
||||
BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER
|
||||
M: Bhanu Prakash Gollapudi <bprakash@broadcom.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/scsi/bnx2fc/
|
||||
|
||||
BROCADE BFA FC SCSI DRIVER
|
||||
M: Jing Huang <huangj@brocade.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
|
@ -1639,7 +1658,7 @@ CAN NETWORK LAYER
|
|||
M: Oliver Hartkopp <socketcan@hartkopp.net>
|
||||
M: Oliver Hartkopp <oliver.hartkopp@volkswagen.de>
|
||||
M: Urs Thuermann <urs.thuermann@volkswagen.de>
|
||||
L: socketcan-core@lists.berlios.de
|
||||
L: socketcan-core@lists.berlios.de (subscribers-only)
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://developer.berlios.de/projects/socketcan/
|
||||
S: Maintained
|
||||
|
@ -1651,7 +1670,7 @@ F: include/linux/can/raw.h
|
|||
|
||||
CAN NETWORK DRIVERS
|
||||
M: Wolfgang Grandegger <wg@grandegger.com>
|
||||
L: socketcan-core@lists.berlios.de
|
||||
L: socketcan-core@lists.berlios.de (subscribers-only)
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://developer.berlios.de/projects/socketcan/
|
||||
S: Maintained
|
||||
|
@ -1739,7 +1758,7 @@ S: Supported
|
|||
F: drivers/net/enic/
|
||||
|
||||
CIRRUS LOGIC EP93XX ETHERNET DRIVER
|
||||
M: Lennert Buytenhek <kernel@wantstofly.org>
|
||||
M: Hartley Sweeten <hsweeten@visionengravers.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/arm/ep93xx_eth.c
|
||||
|
@ -1762,7 +1781,8 @@ F: include/linux/clk.h
|
|||
|
||||
CISCO FCOE HBA DRIVER
|
||||
M: Abhijeet Joglekar <abjoglek@cisco.com>
|
||||
M: Joe Eykholt <jeykholt@cisco.com>
|
||||
M: Venkata Siva Vijayendra Bhamidipati <vbhamidi@cisco.com>
|
||||
M: Brian Uchino <buchino@cisco.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/scsi/fnic/
|
||||
|
@ -1889,7 +1909,6 @@ L: cpufreq@vger.kernel.org
|
|||
W: http://www.codemonkey.org.uk/projects/cpufreq/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
|
||||
S: Maintained
|
||||
F: arch/x86/kernel/cpu/cpufreq/
|
||||
F: drivers/cpufreq/
|
||||
F: include/linux/cpufreq.h
|
||||
|
||||
|
@ -2198,7 +2217,7 @@ F: drivers/acpi/dock.c
|
|||
DOCUMENTATION
|
||||
M: Randy Dunlap <rdunlap@xenotime.net>
|
||||
L: linux-doc@vger.kernel.org
|
||||
T: quilt oss.oracle.com/~rdunlap/kernel-doc-patches/current/
|
||||
T: quilt http://userweb.kernel.org/~rdunlap/kernel-doc-patches/current/
|
||||
S: Maintained
|
||||
F: Documentation/
|
||||
|
||||
|
@ -2292,8 +2311,7 @@ F: drivers/scsi/eata_pio.*
|
|||
|
||||
EBTABLES
|
||||
M: Bart De Schuymer <bart.de.schuymer@pandora.be>
|
||||
L: ebtables-user@lists.sourceforge.net
|
||||
L: ebtables-devel@lists.sourceforge.net
|
||||
L: netfilter-devel@vger.kernel.org
|
||||
W: http://ebtables.sourceforge.net/
|
||||
S: Maintained
|
||||
F: include/linux/netfilter_bridge/ebt_*.h
|
||||
|
@ -3417,10 +3435,9 @@ S: Maintained
|
|||
F: drivers/net/ipg.*
|
||||
|
||||
IPATH DRIVER
|
||||
M: Ralph Campbell <infinipath@qlogic.com>
|
||||
M: Mike Marciniszyn <infinipath@qlogic.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
T: git git://git.qlogic.com/ipath-linux-2.6
|
||||
S: Supported
|
||||
S: Maintained
|
||||
F: drivers/infiniband/hw/ipath/
|
||||
|
||||
IPMI SUBSYSTEM
|
||||
|
@ -3820,6 +3837,12 @@ S: Maintained
|
|||
F: drivers/leds/
|
||||
F: include/linux/leds.h
|
||||
|
||||
LEGACY EEPROM DRIVER
|
||||
M: Jean Delvare <khali@linux-fr.org>
|
||||
S: Maintained
|
||||
F: Documentation/misc-devices/eeprom
|
||||
F: drivers/misc/eeprom/eeprom.c
|
||||
|
||||
LEGO USB Tower driver
|
||||
M: Juergen Stuber <starblue@users.sourceforge.net>
|
||||
L: legousb-devel@lists.sourceforge.net
|
||||
|
@ -4145,7 +4168,7 @@ F: include/linux/mm.h
|
|||
F: mm/
|
||||
|
||||
MEMORY RESOURCE CONTROLLER
|
||||
M: Balbir Singh <balbir@linux.vnet.ibm.com>
|
||||
M: Balbir Singh <bsingharora@gmail.com>
|
||||
M: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
|
||||
M: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
|
||||
L: linux-mm@kvack.org
|
||||
|
@ -4252,8 +4275,7 @@ F: drivers/mmc/
|
|||
F: include/linux/mmc/
|
||||
|
||||
MULTIMEDIA CARD (MMC) ETC. OVER SPI
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
S: Odd Fixes
|
||||
S: Orphan
|
||||
F: drivers/mmc/host/mmc_spi.c
|
||||
F: include/linux/spi/mmc_spi.h
|
||||
|
||||
|
@ -4276,8 +4298,8 @@ S: Maintained
|
|||
F: drivers/usb/musb/
|
||||
|
||||
MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE)
|
||||
M: Jon Mason <mason@myri.com>
|
||||
M: Andrew Gallatin <gallatin@myri.com>
|
||||
M: Brice Goglin <brice@myri.com>
|
||||
L: netdev@vger.kernel.org
|
||||
W: http://www.myri.com/scs/download-Myri10GE.html
|
||||
S: Supported
|
||||
|
@ -4571,9 +4593,8 @@ S: Maintained
|
|||
F: drivers/mmc/host/omap.c
|
||||
|
||||
OMAP HS MMC SUPPORT
|
||||
M: Madhusudhan Chikkature <madhu.cr@ti.com>
|
||||
L: linux-omap@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Orphan
|
||||
F: drivers/mmc/host/omap_hsmmc.c
|
||||
|
||||
OMAP RANDOM NUMBER GENERATOR SUPPORT
|
||||
|
@ -4603,7 +4624,6 @@ F: drivers/media/video/omap3isp/*
|
|||
|
||||
OMAP USB SUPPORT
|
||||
M: Felipe Balbi <balbi@ti.com>
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
L: linux-usb@vger.kernel.org
|
||||
L: linux-omap@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
|
||||
|
@ -4668,6 +4688,14 @@ F: drivers/of
|
|||
F: include/linux/of*.h
|
||||
K: of_get_property
|
||||
|
||||
OPENRISC ARCHITECTURE
|
||||
M: Jonas Bonn <jonas@southpole.se>
|
||||
W: http://openrisc.net
|
||||
L: linux@lists.openrisc.net
|
||||
S: Maintained
|
||||
T: git git://openrisc.net/~jonas/linux
|
||||
F: arch/openrisc
|
||||
|
||||
OPL4 DRIVER
|
||||
M: Clemens Ladisch <clemens@ladisch.de>
|
||||
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
|
||||
|
@ -4892,7 +4920,7 @@ F: mm/percpu*.c
|
|||
F: arch/*/include/asm/percpu.h
|
||||
|
||||
PER-TASK DELAY ACCOUNTING
|
||||
M: Balbir Singh <balbir@linux.vnet.ibm.com>
|
||||
M: Balbir Singh <bsingharora@gmail.com>
|
||||
S: Maintained
|
||||
F: include/linux/delayacct.h
|
||||
F: kernel/delayacct.c
|
||||
|
@ -4947,6 +4975,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/epip/linux-2.6-unicore32.gi
|
|||
F: drivers/input/serio/i8042-unicore32io.h
|
||||
F: drivers/i2c/busses/i2c-puv3.c
|
||||
F: drivers/video/fb-puv3.c
|
||||
F: drivers/rtc/rtc-puv3.c
|
||||
|
||||
PMC SIERRA MaxRAID DRIVER
|
||||
M: Anil Ravindranath <anil_ravindranath@pmc-sierra.com>
|
||||
|
@ -4979,7 +5008,7 @@ F: drivers/power/power_supply*
|
|||
|
||||
PNP SUPPORT
|
||||
M: Adam Belay <abelay@mit.edu>
|
||||
M: Bjorn Helgaas <bjorn.helgaas@hp.com>
|
||||
M: Bjorn Helgaas <bhelgaas@google.com>
|
||||
S: Maintained
|
||||
F: drivers/pnp/
|
||||
|
||||
|
@ -5139,6 +5168,12 @@ M: Robert Jarzmik <robert.jarzmik@free.fr>
|
|||
L: rtc-linux@googlegroups.com
|
||||
S: Maintained
|
||||
|
||||
QIB DRIVER
|
||||
M: Mike Marciniszyn <infinipath@qlogic.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/infiniband/hw/qib/
|
||||
|
||||
QLOGIC QLA1280 SCSI DRIVER
|
||||
M: Michael Reed <mdr@sgi.com>
|
||||
L: linux-scsi@vger.kernel.org
|
||||
|
@ -5178,6 +5213,7 @@ S: Supported
|
|||
F: drivers/net/qlcnic/
|
||||
|
||||
QLOGIC QLGE 10Gb ETHERNET DRIVER
|
||||
M: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
|
||||
M: Ron Mercer <ron.mercer@qlogic.com>
|
||||
M: linux-driver@qlogic.com
|
||||
L: netdev@vger.kernel.org
|
||||
|
@ -5299,6 +5335,13 @@ L: reiserfs-devel@vger.kernel.org
|
|||
S: Supported
|
||||
F: fs/reiserfs/
|
||||
|
||||
REGISTER MAP ABSTRACTION
|
||||
M: Mark Brown <broonie@opensource.wolfsonmicro.com>
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git
|
||||
S: Supported
|
||||
F: drivers/base/regmap/
|
||||
F: include/linux/regmap.h
|
||||
|
||||
RFKILL
|
||||
M: Johannes Berg <johannes@sipsolutions.net>
|
||||
L: linux-wireless@vger.kernel.org
|
||||
|
@ -5984,7 +6027,6 @@ F: Documentation/serial/specialix.txt
|
|||
F: drivers/staging/tty/specialix*
|
||||
|
||||
SPI SUBSYSTEM
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
M: Grant Likely <grant.likely@secretlab.ca>
|
||||
L: spi-devel-general@lists.sourceforge.net
|
||||
Q: http://patchwork.kernel.org/project/spi-devel-general/list/
|
||||
|
@ -6100,7 +6142,7 @@ F: include/target/
|
|||
F: Documentation/target/
|
||||
|
||||
TASKSTATS STATISTICS INTERFACE
|
||||
M: Balbir Singh <balbir@linux.vnet.ibm.com>
|
||||
M: Balbir Singh <bsingharora@gmail.com>
|
||||
S: Maintained
|
||||
F: Documentation/accounting/taskstats*
|
||||
F: include/linux/taskstats*
|
||||
|
@ -6229,9 +6271,14 @@ F: drivers/char/toshiba.c
|
|||
F: include/linux/toshiba.h
|
||||
|
||||
TMIO MMC DRIVER
|
||||
M: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
|
||||
M: Ian Molton <ian@mnementh.co.uk>
|
||||
L: linux-mmc@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/mmc/host/tmio_mmc.*
|
||||
F: drivers/mmc/host/tmio_mmc*
|
||||
F: drivers/mmc/host/sh_mobile_sdhi.c
|
||||
F: include/linux/mmc/tmio.h
|
||||
F: include/linux/mmc/sh_mobile_sdhi.h
|
||||
|
||||
TMPFS (SHMEM FILESYSTEM)
|
||||
M: Hugh Dickins <hughd@google.com>
|
||||
|
@ -6308,7 +6355,7 @@ F: drivers/scsi/u14-34f.c
|
|||
|
||||
UBI FILE SYSTEM (UBIFS)
|
||||
M: Artem Bityutskiy <dedekind1@gmail.com>
|
||||
M: Adrian Hunter <adrian.hunter@nokia.com>
|
||||
M: Adrian Hunter <adrian.hunter@intel.com>
|
||||
L: linux-mtd@lists.infradead.org
|
||||
T: git git://git.infradead.org/ubifs-2.6.git
|
||||
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
|
||||
|
@ -6432,9 +6479,9 @@ S: Maintained
|
|||
F: drivers/usb/misc/rio500*
|
||||
|
||||
USB EHCI DRIVER
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
M: Alan Stern <stern@rowland.harvard.edu>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Odd Fixes
|
||||
S: Maintained
|
||||
F: Documentation/usb/ehci.txt
|
||||
F: drivers/usb/host/ehci*
|
||||
|
||||
|
@ -6448,9 +6495,10 @@ S: Maintained
|
|||
F: drivers/media/video/et61x251/
|
||||
|
||||
USB GADGET/PERIPHERAL SUBSYSTEM
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
M: Felipe Balbi <balbi@ti.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
W: http://www.linux-usb.org/gadget
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
|
||||
S: Maintained
|
||||
F: drivers/usb/gadget/
|
||||
F: include/linux/usb/gadget*
|
||||
|
@ -6460,9 +6508,15 @@ M: Jiri Kosina <jkosina@suse.cz>
|
|||
L: linux-usb@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git
|
||||
S: Maintained
|
||||
F: Documentation/usb/hiddev.txt
|
||||
F: Documentation/hid/hiddev.txt
|
||||
F: drivers/hid/usbhid/
|
||||
|
||||
USB/IP DRIVERS
|
||||
M: Matt Mooney <mfm@muteddisk.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/staging/usbip/
|
||||
|
||||
USB ISP116X DRIVER
|
||||
M: Olav Kongas <ok@artecdesign.ee>
|
||||
L: linux-usb@vger.kernel.org
|
||||
|
@ -6492,9 +6546,9 @@ S: Maintained
|
|||
F: sound/usb/midi.*
|
||||
|
||||
USB OHCI DRIVER
|
||||
M: David Brownell <dbrownell@users.sourceforge.net>
|
||||
M: Alan Stern <stern@rowland.harvard.edu>
|
||||
L: linux-usb@vger.kernel.org
|
||||
S: Odd Fixes
|
||||
S: Maintained
|
||||
F: Documentation/usb/ohci.txt
|
||||
F: drivers/usb/host/ohci*
|
||||
|
||||
|
@ -6720,6 +6774,15 @@ S: Maintained
|
|||
F: Documentation/filesystems/vfat.txt
|
||||
F: fs/fat/
|
||||
|
||||
VIDEOBUF2 FRAMEWORK
|
||||
M: Pawel Osciak <pawel@osciak.com>
|
||||
M: Marek Szyprowski <m.szyprowski@samsung.com>
|
||||
M: Kyungmin Park <kyungmin.park@samsung.com>
|
||||
L: linux-media@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/media/video/videobuf2-*
|
||||
F: include/media/videobuf2-*
|
||||
|
||||
VIRTIO CONSOLE DRIVER
|
||||
M: Amit Shah <amit.shah@redhat.com>
|
||||
L: virtualization@lists.linux-foundation.org
|
||||
|
@ -6997,6 +7060,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.
|
|||
S: Maintained
|
||||
F: drivers/platform/x86
|
||||
|
||||
X86 MCE INFRASTRUCTURE
|
||||
M: Tony Luck <tony.luck@intel.com>
|
||||
M: Borislav Petkov <bp@amd64.org>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Maintained
|
||||
F: arch/x86/kernel/cpu/mcheck/*
|
||||
|
||||
XEN HYPERVISOR INTERFACE
|
||||
M: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
|
||||
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
|
|
20
Makefile
20
Makefile
|
@ -1,7 +1,7 @@
|
|||
VERSION = 3
|
||||
PATCHLEVEL = 0
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc1
|
||||
EXTRAVERSION =
|
||||
NAME = Sneaky Weasel
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -378,7 +378,7 @@ KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
|
|||
|
||||
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
|
||||
KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
|
||||
KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
|
||||
KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION)
|
||||
|
||||
export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
|
||||
export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
|
||||
|
@ -1005,7 +1005,7 @@ endef
|
|||
|
||||
define filechk_version.h
|
||||
(echo \#define LINUX_VERSION_CODE $(shell \
|
||||
expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
|
||||
expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \
|
||||
echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';)
|
||||
endef
|
||||
|
||||
|
@ -1110,11 +1110,6 @@ modules_install: _modinst_ _modinst_post
|
|||
|
||||
PHONY += _modinst_
|
||||
_modinst_:
|
||||
@if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \
|
||||
echo "Warning: you may need to install module-init-tools"; \
|
||||
echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\
|
||||
sleep 1; \
|
||||
fi
|
||||
@rm -rf $(MODLIB)/kernel
|
||||
@rm -f $(MODLIB)/source
|
||||
@mkdir -p $(MODLIB)/kernel
|
||||
|
@ -1295,6 +1290,7 @@ help:
|
|||
@echo ' make O=dir [targets] Locate all output files in "dir", including .config'
|
||||
@echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
|
||||
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
|
||||
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
|
||||
@echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'
|
||||
@echo ' 1: warnings which may be relevant and do not occur too often'
|
||||
@echo ' 2: warnings which occur quite often but may still be relevant'
|
||||
|
@ -1531,12 +1527,8 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files))
|
|||
|
||||
# Run depmod only if we have System.map and depmod is executable
|
||||
quiet_cmd_depmod = DEPMOD $(KERNELRELEASE)
|
||||
cmd_depmod = \
|
||||
if [ -r System.map -a -x $(DEPMOD) ]; then \
|
||||
$(DEPMOD) -ae -F System.map \
|
||||
$(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) ) \
|
||||
$(KERNELRELEASE); \
|
||||
fi
|
||||
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
|
||||
$(KERNELRELEASE)
|
||||
|
||||
# Create temporary dir for module support files
|
||||
# clean it up only when building all modules
|
||||
|
|
42
README
42
README
|
@ -1,6 +1,6 @@
|
|||
Linux kernel release 2.6.xx <http://kernel.org/>
|
||||
Linux kernel release 3.x <http://kernel.org/>
|
||||
|
||||
These are the release notes for Linux version 2.6. Read them carefully,
|
||||
These are the release notes for Linux version 3. Read them carefully,
|
||||
as they tell you what this is all about, explain how to install the
|
||||
kernel, and what to do if something goes wrong.
|
||||
|
||||
|
@ -62,10 +62,10 @@ INSTALLING the kernel source:
|
|||
directory where you have permissions (eg. your home directory) and
|
||||
unpack it:
|
||||
|
||||
gzip -cd linux-2.6.XX.tar.gz | tar xvf -
|
||||
gzip -cd linux-3.X.tar.gz | tar xvf -
|
||||
|
||||
or
|
||||
bzip2 -dc linux-2.6.XX.tar.bz2 | tar xvf -
|
||||
bzip2 -dc linux-3.X.tar.bz2 | tar xvf -
|
||||
|
||||
|
||||
Replace "XX" with the version number of the latest kernel.
|
||||
|
@ -75,15 +75,15 @@ INSTALLING the kernel source:
|
|||
files. They should match the library, and not get messed up by
|
||||
whatever the kernel-du-jour happens to be.
|
||||
|
||||
- You can also upgrade between 2.6.xx releases by patching. Patches are
|
||||
- You can also upgrade between 3.x releases by patching. Patches are
|
||||
distributed in the traditional gzip and the newer bzip2 format. To
|
||||
install by patching, get all the newer patch files, enter the
|
||||
top level directory of the kernel source (linux-2.6.xx) and execute:
|
||||
top level directory of the kernel source (linux-3.x) and execute:
|
||||
|
||||
gzip -cd ../patch-2.6.xx.gz | patch -p1
|
||||
gzip -cd ../patch-3.x.gz | patch -p1
|
||||
|
||||
or
|
||||
bzip2 -dc ../patch-2.6.xx.bz2 | patch -p1
|
||||
bzip2 -dc ../patch-3.x.bz2 | patch -p1
|
||||
|
||||
(repeat xx for all versions bigger than the version of your current
|
||||
source tree, _in_order_) and you should be ok. You may want to remove
|
||||
|
@ -91,9 +91,9 @@ INSTALLING the kernel source:
|
|||
failed patches (xxx# or xxx.rej). If there are, either you or me has
|
||||
made a mistake.
|
||||
|
||||
Unlike patches for the 2.6.x kernels, patches for the 2.6.x.y kernels
|
||||
Unlike patches for the 3.x kernels, patches for the 3.x.y kernels
|
||||
(also known as the -stable kernels) are not incremental but instead apply
|
||||
directly to the base 2.6.x kernel. Please read
|
||||
directly to the base 3.x kernel. Please read
|
||||
Documentation/applying-patches.txt for more information.
|
||||
|
||||
Alternatively, the script patch-kernel can be used to automate this
|
||||
|
@ -107,14 +107,14 @@ INSTALLING the kernel source:
|
|||
an alternative directory can be specified as the second argument.
|
||||
|
||||
- If you are upgrading between releases using the stable series patches
|
||||
(for example, patch-2.6.xx.y), note that these "dot-releases" are
|
||||
not incremental and must be applied to the 2.6.xx base tree. For
|
||||
example, if your base kernel is 2.6.12 and you want to apply the
|
||||
2.6.12.3 patch, you do not and indeed must not first apply the
|
||||
2.6.12.1 and 2.6.12.2 patches. Similarly, if you are running kernel
|
||||
version 2.6.12.2 and want to jump to 2.6.12.3, you must first
|
||||
reverse the 2.6.12.2 patch (that is, patch -R) _before_ applying
|
||||
the 2.6.12.3 patch.
|
||||
(for example, patch-3.x.y), note that these "dot-releases" are
|
||||
not incremental and must be applied to the 3.x base tree. For
|
||||
example, if your base kernel is 3.0 and you want to apply the
|
||||
3.0.3 patch, you do not and indeed must not first apply the
|
||||
3.0.1 and 3.0.2 patches. Similarly, if you are running kernel
|
||||
version 3.0.2 and want to jump to 3.0.3, you must first
|
||||
reverse the 3.0.2 patch (that is, patch -R) _before_ applying
|
||||
the 3.0.3 patch.
|
||||
You can read more on this in Documentation/applying-patches.txt
|
||||
|
||||
- Make sure you have no stale .o files and dependencies lying around:
|
||||
|
@ -126,7 +126,7 @@ INSTALLING the kernel source:
|
|||
|
||||
SOFTWARE REQUIREMENTS
|
||||
|
||||
Compiling and running the 2.6.xx kernels requires up-to-date
|
||||
Compiling and running the 3.x kernels requires up-to-date
|
||||
versions of various software packages. Consult
|
||||
Documentation/Changes for the minimum version numbers required
|
||||
and how to get updates for these packages. Beware that using
|
||||
|
@ -142,11 +142,11 @@ BUILD directory for the kernel:
|
|||
Using the option "make O=output/dir" allow you to specify an alternate
|
||||
place for the output files (including .config).
|
||||
Example:
|
||||
kernel source code: /usr/src/linux-2.6.N
|
||||
kernel source code: /usr/src/linux-3.N
|
||||
build directory: /home/name/build/kernel
|
||||
|
||||
To configure and build the kernel use:
|
||||
cd /usr/src/linux-2.6.N
|
||||
cd /usr/src/linux-3.N
|
||||
make O=/home/name/build/kernel menuconfig
|
||||
make O=/home/name/build/kernel
|
||||
sudo make O=/home/name/build/kernel modules_install install
|
||||
|
|
|
@ -6,6 +6,7 @@ config ALPHA
|
|||
select HAVE_OPROFILE
|
||||
select HAVE_SYSCALL_WRAPPERS
|
||||
select HAVE_IRQ_WORK
|
||||
select HAVE_PCSPKR_PLATFORM
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_DMA_ATTRS
|
||||
select HAVE_GENERIC_HARDIRQS
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
/*
|
||||
* 8253/8254 Programmable Interval Timer
|
||||
*/
|
|
@ -56,7 +56,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
|
|||
* Given a kernel address, find the home node of the underlying memory.
|
||||
*/
|
||||
#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr))
|
||||
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
|
||||
|
||||
/*
|
||||
* Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
|
||||
|
|
|
@ -29,20 +29,6 @@
|
|||
#define DEBUGP(fmt...)
|
||||
#endif
|
||||
|
||||
void *
|
||||
module_alloc(unsigned long size)
|
||||
{
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
return vmalloc(size);
|
||||
}
|
||||
|
||||
void
|
||||
module_free(struct module *mod, void *module_region)
|
||||
{
|
||||
vfree(module_region);
|
||||
}
|
||||
|
||||
/* Allocate the GOT at the end of the core sections. */
|
||||
|
||||
struct got_entry {
|
||||
|
@ -155,14 +141,6 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
apply_relocate(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
|
||||
unsigned int relsec, struct module *me)
|
||||
{
|
||||
printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
|
||||
return -ENOEXEC;
|
||||
}
|
||||
|
||||
int
|
||||
apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
|
||||
unsigned int symindex, unsigned int relsec,
|
||||
|
@ -302,15 +280,3 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
||||
struct module *me)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
module_arch_cleanup(struct module *mod)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -409,7 +409,7 @@ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen)
|
|||
return -EFAULT;
|
||||
|
||||
len = namelen;
|
||||
if (namelen > 32)
|
||||
if (len > 32)
|
||||
len = 32;
|
||||
|
||||
down_read(&uts_sem);
|
||||
|
@ -594,7 +594,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count)
|
|||
down_read(&uts_sem);
|
||||
res = sysinfo_table[offset];
|
||||
len = strlen(res)+1;
|
||||
if (len > count)
|
||||
if ((unsigned long)len > (unsigned long)count)
|
||||
len = count;
|
||||
if (copy_to_user(buf, res, len))
|
||||
err = -EFAULT;
|
||||
|
@ -649,7 +649,7 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
|
|||
return 1;
|
||||
|
||||
case GSI_GET_HWRPB:
|
||||
if (nbytes < sizeof(*hwrpb))
|
||||
if (nbytes > sizeof(*hwrpb))
|
||||
return -EINVAL;
|
||||
if (copy_to_user(buffer, hwrpb, nbytes) != 0)
|
||||
return -EFAULT;
|
||||
|
@ -1008,6 +1008,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
|
|||
{
|
||||
struct rusage r;
|
||||
long ret, err;
|
||||
unsigned int status = 0;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
if (!ur)
|
||||
|
@ -1016,13 +1017,15 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
|
|||
old_fs = get_fs();
|
||||
|
||||
set_fs (KERNEL_DS);
|
||||
ret = sys_wait4(pid, ustatus, options, (struct rusage __user *) &r);
|
||||
ret = sys_wait4(pid, (unsigned int __user *) &status, options,
|
||||
(struct rusage __user *) &r);
|
||||
set_fs (old_fs);
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur)))
|
||||
return -EFAULT;
|
||||
|
||||
err = 0;
|
||||
err |= put_user(status, ustatus);
|
||||
err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec);
|
||||
err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec);
|
||||
err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec);
|
||||
|
|
|
@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
|
|||
data.period = event->hw.last_period;
|
||||
|
||||
if (alpha_perf_event_set_period(event, hwc, idx)) {
|
||||
if (perf_event_overflow(event, 1, &data, regs)) {
|
||||
if (perf_event_overflow(event, &data, regs)) {
|
||||
/* Interrupts coming too quickly; "throttle" the
|
||||
* counter, i.e., disable it for a little while.
|
||||
*/
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/core_cia.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/8253pit.h>
|
||||
|
||||
#include "proto.h"
|
||||
#include "irq_impl.h"
|
||||
|
|
|
@ -46,7 +46,6 @@
|
|||
#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/hwrpb.h>
|
||||
#include <asm/8253pit.h>
|
||||
#include <asm/rtc.h>
|
||||
|
||||
#include <linux/mc146818rtc.h>
|
||||
|
@ -91,7 +90,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
|
|||
#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
|
||||
#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
|
||||
|
||||
void set_irq_work_pending(void)
|
||||
void arch_irq_work_raise(void)
|
||||
{
|
||||
set_irq_work_pending_flag();
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ config ARM
|
|||
select GENERIC_ATOMIC64 if (CPU_V6 || !CPU_32v6K || !AEABI)
|
||||
select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_KPROBES if (!XIP_KERNEL && !THUMB2_KERNEL)
|
||||
select HAVE_KPROBES if !XIP_KERNEL
|
||||
select HAVE_KRETPROBES if (HAVE_KPROBES)
|
||||
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
|
||||
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
|
||||
|
@ -37,6 +37,9 @@ config ARM
|
|||
Europe. There is an ARM Linux project with a web page at
|
||||
<http://www.arm.linux.org.uk/>.
|
||||
|
||||
config ARM_HAS_SG_CHAIN
|
||||
bool
|
||||
|
||||
config HAVE_PWM
|
||||
bool
|
||||
|
||||
|
@ -642,6 +645,7 @@ config ARCH_SHMOBILE
|
|||
select NO_IOPORT
|
||||
select SPARSE_IRQ
|
||||
select MULTI_IRQ_HANDLER
|
||||
select PM_GENERIC_DOMAINS if PM
|
||||
help
|
||||
Support for Renesas's SH-Mobile and R-Mobile ARM platforms.
|
||||
|
||||
|
@ -1346,7 +1350,6 @@ config SMP_ON_UP
|
|||
|
||||
config HAVE_ARM_SCU
|
||||
bool
|
||||
depends on SMP
|
||||
help
|
||||
This option enables support for the ARM system coherency unit
|
||||
|
||||
|
@ -1715,17 +1718,34 @@ config ZBOOT_ROM
|
|||
Say Y here if you intend to execute your compressed kernel image
|
||||
(zImage) directly from ROM or flash. If unsure, say N.
|
||||
|
||||
choice
|
||||
prompt "Include SD/MMC loader in zImage (EXPERIMENTAL)"
|
||||
depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
|
||||
default ZBOOT_ROM_NONE
|
||||
help
|
||||
Include experimental SD/MMC loading code in the ROM-able zImage.
|
||||
With this enabled it is possible to write the the ROM-able zImage
|
||||
kernel image to an MMC or SD card and boot the kernel straight
|
||||
from the reset vector. At reset the processor Mask ROM will load
|
||||
the first part of the the ROM-able zImage which in turn loads the
|
||||
rest the kernel image to RAM.
|
||||
|
||||
config ZBOOT_ROM_NONE
|
||||
bool "No SD/MMC loader in zImage (EXPERIMENTAL)"
|
||||
help
|
||||
Do not load image from SD or MMC
|
||||
|
||||
config ZBOOT_ROM_MMCIF
|
||||
bool "Include MMCIF loader in zImage (EXPERIMENTAL)"
|
||||
depends on ZBOOT_ROM && ARCH_SH7372 && EXPERIMENTAL
|
||||
help
|
||||
Say Y here to include experimental MMCIF loading code in the
|
||||
ROM-able zImage. With this enabled it is possible to write the
|
||||
the ROM-able zImage kernel image to an MMC card and boot the
|
||||
kernel straight from the reset vector. At reset the processor
|
||||
Mask ROM will load the first part of the the ROM-able zImage
|
||||
which in turn loads the rest the kernel image to RAM using the
|
||||
MMCIF hardware block.
|
||||
Load image from MMCIF hardware block.
|
||||
|
||||
config ZBOOT_ROM_SH_MOBILE_SDHI
|
||||
bool "Include SuperH Mobile SDHI loader in zImage (EXPERIMENTAL)"
|
||||
help
|
||||
Load image from SDHI hardware block
|
||||
|
||||
endchoice
|
||||
|
||||
config CMDLINE
|
||||
string "Default kernel command string"
|
||||
|
|
|
@ -6,13 +6,19 @@
|
|||
|
||||
OBJS =
|
||||
|
||||
# Ensure that mmcif loader code appears early in the image
|
||||
# Ensure that MMCIF loader code appears early in the image
|
||||
# to minimise that number of bocks that have to be read in
|
||||
# order to load it.
|
||||
ifeq ($(CONFIG_ZBOOT_ROM_MMCIF),y)
|
||||
ifeq ($(CONFIG_ARCH_SH7372),y)
|
||||
OBJS += mmcif-sh7372.o
|
||||
endif
|
||||
|
||||
# Ensure that SDHI loader code appears early in the image
|
||||
# to minimise that number of bocks that have to be read in
|
||||
# order to load it.
|
||||
ifeq ($(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI),y)
|
||||
OBJS += sdhi-shmobile.o
|
||||
OBJS += sdhi-sh7372.o
|
||||
endif
|
||||
|
||||
AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
|
||||
|
|
|
@ -25,14 +25,14 @@
|
|||
/* load board-specific initialization code */
|
||||
#include <mach/zboot.h>
|
||||
|
||||
#ifdef CONFIG_ZBOOT_ROM_MMCIF
|
||||
/* Load image from MMC */
|
||||
adr sp, __tmp_stack + 128
|
||||
#if defined(CONFIG_ZBOOT_ROM_MMCIF) || defined(CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI)
|
||||
/* Load image from MMC/SD */
|
||||
adr sp, __tmp_stack + 256
|
||||
ldr r0, __image_start
|
||||
ldr r1, __image_end
|
||||
subs r1, r1, r0
|
||||
ldr r0, __load_base
|
||||
bl mmcif_loader
|
||||
bl mmc_loader
|
||||
|
||||
/* Jump to loaded code */
|
||||
ldr r0, __loaded
|
||||
|
@ -51,9 +51,9 @@ __loaded:
|
|||
.long __continue
|
||||
.align
|
||||
__tmp_stack:
|
||||
.space 128
|
||||
.space 256
|
||||
__continue:
|
||||
#endif /* CONFIG_ZBOOT_ROM_MMCIF */
|
||||
#endif /* CONFIG_ZBOOT_ROM_MMC || CONFIG_ZBOOT_ROM_SH_MOBILE_SDHI */
|
||||
|
||||
b 1f
|
||||
__atags:@ tag #1
|
||||
|
|
|
@ -353,7 +353,8 @@ not_relocated: mov r0, #0
|
|||
mov r0, #0 @ must be zero
|
||||
mov r1, r7 @ restore architecture number
|
||||
mov r2, r8 @ restore atags pointer
|
||||
mov pc, r4 @ call kernel
|
||||
ARM( mov pc, r4 ) @ call kernel
|
||||
THUMB( bx r4 ) @ entry point is always ARM
|
||||
|
||||
.align 2
|
||||
.type LC0, #object
|
||||
|
@ -597,6 +598,8 @@ __common_mmu_cache_on:
|
|||
sub pc, lr, r0, lsr #32 @ properly flush pipeline
|
||||
#endif
|
||||
|
||||
#define PROC_ENTRY_SIZE (4*5)
|
||||
|
||||
/*
|
||||
* Here follow the relocatable cache support functions for the
|
||||
* various processors. This is a generic hook for locating an
|
||||
|
@ -624,7 +627,7 @@ call_cache_fn: adr r12, proc_types
|
|||
ARM( addeq pc, r12, r3 ) @ call cache function
|
||||
THUMB( addeq r12, r3 )
|
||||
THUMB( moveq pc, r12 ) @ call cache function
|
||||
add r12, r12, #4*5
|
||||
add r12, r12, #PROC_ENTRY_SIZE
|
||||
b 1b
|
||||
|
||||
/*
|
||||
|
@ -691,9 +694,9 @@ proc_types:
|
|||
|
||||
.word 0x41069260 @ ARM926EJ-S (v5TEJ)
|
||||
.word 0xff0ffff0
|
||||
b __arm926ejs_mmu_cache_on
|
||||
b __armv4_mmu_cache_off
|
||||
b __armv5tej_mmu_cache_flush
|
||||
W(b) __arm926ejs_mmu_cache_on
|
||||
W(b) __armv4_mmu_cache_off
|
||||
W(b) __armv5tej_mmu_cache_flush
|
||||
|
||||
.word 0x00007000 @ ARM7 IDs
|
||||
.word 0x0000f000
|
||||
|
@ -794,6 +797,16 @@ proc_types:
|
|||
|
||||
.size proc_types, . - proc_types
|
||||
|
||||
/*
|
||||
* If you get a "non-constant expression in ".if" statement"
|
||||
* error from the assembler on this line, check that you have
|
||||
* not accidentally written a "b" instruction where you should
|
||||
* have written W(b).
|
||||
*/
|
||||
.if (. - proc_types) % PROC_ENTRY_SIZE != 0
|
||||
.error "The size of one or more proc_types entries is wrong."
|
||||
.endif
|
||||
|
||||
/*
|
||||
* Turn off the Cache and MMU. ARMv3 does not support
|
||||
* reading the control register, but ARMv4 does.
|
||||
|
|
|
@ -40,7 +40,7 @@
|
|||
* to an MMC card
|
||||
* # dd if=vrl4.out of=/dev/sdx bs=512 seek=1
|
||||
*/
|
||||
asmlinkage void mmcif_loader(unsigned char *buf, unsigned long len)
|
||||
asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
|
||||
{
|
||||
mmc_init_progress();
|
||||
mmc_update_progress(MMC_PROGRESS_ENTER);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue