diff --git a/CREDITS b/CREDITS
index 5b1edf3a38a2..7fb4c73e0228 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3642,11 +3642,9 @@ S: Beaverton, OR 97005
S: USA
N: Michal Wronski
-E: wrona@mat.uni.torun.pl
-W: http://www.mat.uni.torun.pl/~wrona
+E: Michal.Wronski@motorola.com
D: POSIX message queues fs (with K. Benedyczak)
-S: ul. Teczowa 23/12
-S: 80-680 Gdansk-Sobieszewo
+S: Krakow
S: Poland
N: Frank Xia
diff --git a/Documentation/Changes b/Documentation/Changes
index 783ddc3ce4e8..86b86399d61d 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -139,9 +139,14 @@ You'll probably want to upgrade.
Ksymoops
--------
-If the unthinkable happens and your kernel oopses, you'll need a 2.4
-version of ksymoops to decode the report; see REPORTING-BUGS in the
-root of the Linux source for more information.
+If the unthinkable happens and your kernel oopses, you may need the
+ksymoops tool to decode it, but in most cases you don't.
+In the 2.6 kernel it is generally preferred to build the kernel with
+CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
+(this also produces better output than ksymoops).
+If for some reason your kernel is not build with CONFIG_KALLSYMS and
+you have no way to rebuild and reproduce the Oops with that option, then
+you can still decode that Oops with ksymoops.
Module-Init-Tools
-----------------
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index fa3e29ad8a46..7018f5c6a447 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
- gadget.xml libata.xml mtdnand.xml librs.xml
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
###
# The build process is as follows (targets):
diff --git a/Documentation/DocBook/journal-api.tmpl b/Documentation/DocBook/journal-api.tmpl
index 341aaa4ce481..2077f9a28c19 100644
--- a/Documentation/DocBook/journal-api.tmpl
+++ b/Documentation/DocBook/journal-api.tmpl
@@ -306,7 +306,7 @@ an example.
Journal Level
!Efs/jbd/journal.c
-!Efs/jbd/recovery.c
+!Ifs/jbd/recovery.c
Transasction Level
!Efs/jbd/transaction.c
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index ec474e5a25ed..a8316b1a3e3d 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -118,7 +118,7 @@ X!Ilib/string.c
User Space Memory Access
!Iinclude/asm-i386/uaccess.h
-!Iarch/i386/lib/usercopy.c
+!Earch/i386/lib/usercopy.c
More Memory Management Functions
!Iinclude/linux/rmap.h
@@ -174,7 +174,6 @@ X!Ilib/string.c
The Linux VFS
The Filesystem types
!Iinclude/linux/fs.h
-!Einclude/linux/fs.h
The Directory Cache
!Efs/dcache.c
@@ -266,7 +265,7 @@ X!Ekernel/module.c
Hardware Interfaces
Interrupt Handling
-!Ikernel/irq/manage.c
+!Ekernel/irq/manage.c
Resources Management
@@ -501,7 +500,7 @@ KAO -->
!Edrivers/video/modedb.c
Frame Buffer Macintosh Video Mode Database
-!Idrivers/video/macmodes.c
+!Edrivers/video/macmodes.c
Frame Buffer Fonts
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
new file mode 100644
index 000000000000..1becf27ba27e
--- /dev/null
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -0,0 +1,160 @@
+
+
+ ]>
+
+
+
+ RapidIO Subsystem Guide
+
+
+
+ Matt
+ Porter
+
+
+ mporter@kernel.crashing.org
+ mporter@mvista.com
+
+
+
+
+
+
+ 2005
+ MontaVista Software, Inc.
+
+
+
+
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+
+
+
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+
+
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+
+
+
+ For more details see the file COPYING in the source
+ distribution of Linux.
+
+
+
+
+
+
+
+ Introduction
+
+ RapidIO is a high speed switched fabric interconnect with
+ features aimed at the embedded market. RapidIO provides
+ support for memory-mapped I/O as well as message-based
+ transactions over the switched fabric network. RapidIO has
+ a standardized discovery mechanism not unlike the PCI bus
+ standard that allows simple detection of devices in a
+ network.
+
+
+ This documentation is provided for developers intending
+ to support RapidIO on new architectures, write new drivers,
+ or to understand the subsystem internals.
+
+
+
+
+ Known Bugs and Limitations
+
+
+ Bugs
+ None. ;)
+
+
+ Limitations
+
+
+ Access/management of RapidIO memory regions is not supported
+ Multiple host enumeration is not supported
+
+
+
+
+
+
+ RapidIO driver interface
+
+ Drivers are provided a set of calls in order
+ to interface with the subsystem to gather info
+ on devices, request/map memory region resources,
+ and manage mailboxes/doorbells.
+
+
+ Functions
+!Iinclude/linux/rio_drv.h
+!Edrivers/rapidio/rio-driver.c
+!Edrivers/rapidio/rio.c
+
+
+
+
+ Internals
+
+
+ This chapter contains the autogenerated documentation of the RapidIO
+ subsystem.
+
+
+ Structures
+!Iinclude/linux/rio.h
+
+ Enumeration and Discovery
+!Idrivers/rapidio/rio-scan.c
+
+ Driver functionality
+!Idrivers/rapidio/rio.c
+!Idrivers/rapidio/rio-access.c
+
+ Device model support
+!Idrivers/rapidio/rio-driver.c
+
+ Sysfs support
+!Idrivers/rapidio/rio-sysfs.c
+
+ PPC32 support
+!Iarch/ppc/kernel/rio.c
+!Earch/ppc/syslib/ppc85xx_rio.c
+!Iarch/ppc/syslib/ppc85xx_rio.c
+
+
+
+
+ Credits
+
+ The following people have contributed to the RapidIO
+ subsystem directly or indirectly:
+
+ Matt Portermporter@kernel.crashing.org
+ Randy Vinsonrvinson@mvista.com
+ Dan Malekdan@embeddedalley.com
+
+
+
+ The following people have contributed to this document:
+
+ Matt Portermporter@kernel.crashing.org
+
+
+
+
diff --git a/Documentation/MSI-HOWTO.txt b/Documentation/MSI-HOWTO.txt
index 63edc5f847c4..3ec6c720b016 100644
--- a/Documentation/MSI-HOWTO.txt
+++ b/Documentation/MSI-HOWTO.txt
@@ -10,14 +10,22 @@
This guide describes the basics of Message Signaled Interrupts (MSI),
the advantages of using MSI over traditional interrupt mechanisms,
and how to enable your driver to use MSI or MSI-X. Also included is
-a Frequently Asked Questions.
+a Frequently Asked Questions (FAQ) section.
+
+1.1 Terminology
+
+PCI devices can be single-function or multi-function. In either case,
+when this text talks about enabling or disabling MSI on a "device
+function," it is referring to one specific PCI device and function and
+not to all functions on a PCI device (unless the PCI device has only
+one function).
2. Copyright 2003 Intel Corporation
3. What is MSI/MSI-X?
Message Signaled Interrupt (MSI), as described in the PCI Local Bus
-Specification Revision 2.3 or latest, is an optional feature, and a
+Specification Revision 2.3 or later, is an optional feature, and a
required feature for PCI Express devices. MSI enables a device function
to request service by sending an Inbound Memory Write on its PCI bus to
the FSB as a Message Signal Interrupt transaction. Because MSI is
@@ -27,7 +35,7 @@ supported.
A PCI device that supports MSI must also support pin IRQ assertion
interrupt mechanism to provide backward compatibility for systems that
-do not support MSI. In Systems, which support MSI, the bus driver is
+do not support MSI. In systems which support MSI, the bus driver is
responsible for initializing the message address and message data of
the device function's MSI/MSI-X capability structure during device
initial configuration.
@@ -61,17 +69,17 @@ over the MSI capability structure as described below.
- MSI and MSI-X both support per-vector masking. Per-vector
masking is an optional extension of MSI but a required
- feature for MSI-X. Per-vector masking provides the kernel
- the ability to mask/unmask MSI when servicing its software
- interrupt service routing handler. If per-vector masking is
+ feature for MSI-X. Per-vector masking provides the kernel the
+ ability to mask/unmask a single MSI while running its
+ interrupt service routine. If per-vector masking is
not supported, then the device driver should provide the
hardware/software synchronization to ensure that the device
generates MSI when the driver wants it to do so.
4. Why use MSI?
-As a benefit the simplification of board design, MSI allows board
-designers to remove out of band interrupt routing. MSI is another
+As a benefit to the simplification of board design, MSI allows board
+designers to remove out-of-band interrupt routing. MSI is another
step towards a legacy-free environment.
Due to increasing pressure on chipset and processor packages to
@@ -87,7 +95,7 @@ support. As a result, the PCI Express technology requires MSI
support for better interrupt performance.
Using MSI enables the device functions to support two or more
-vectors, which can be configured to target different CPU's to
+vectors, which can be configured to target different CPUs to
increase scalability.
5. Configuring a driver to use MSI/MSI-X
@@ -119,13 +127,13 @@ pci_enable_msi() explicitly.
int pci_enable_msi(struct pci_dev *dev)
-With this new API, any existing device driver, which like to have
-MSI enabled on its device function, must call this API to enable MSI
+With this new API, a device driver that wants to have MSI
+enabled on its device function must call this API to enable MSI.
A successful call will initialize the MSI capability structure
with ONE vector, regardless of whether a device function is
capable of supporting multiple messages. This vector replaces the
-pre-assigned dev->irq with a new MSI vector. To avoid the conflict
-of new assigned vector with existing pre-assigned vector requires
+pre-assigned dev->irq with a new MSI vector. To avoid a conflict
+of the new assigned vector with existing pre-assigned vector requires
a device driver to call this API before calling request_irq().
5.2.2 API pci_disable_msi
@@ -137,14 +145,14 @@ when a device driver is unloading. This API restores dev->irq with
the pre-assigned IOAPIC vector and switches a device's interrupt
mode to PCI pin-irq assertion/INTx emulation mode.
-Note that a device driver should always call free_irq() on MSI vector
-it has done request_irq() on before calling this API. Failure to do
-so results a BUG_ON() and a device will be left with MSI enabled and
+Note that a device driver should always call free_irq() on the MSI vector
+that it has done request_irq() on before calling this API. Failure to do
+so results in a BUG_ON() and a device will be left with MSI enabled and
leaks its vector.
5.2.3 MSI mode vs. legacy mode diagram
-The below diagram shows the events, which switches the interrupt
+The below diagram shows the events which switch the interrupt
mode on the MSI-capable device function between MSI mode and
PIN-IRQ assertion mode.
@@ -155,9 +163,9 @@ PIN-IRQ assertion mode.
------------ pci_disable_msi ------------------------
-Figure 1.0 MSI Mode vs. Legacy Mode
+Figure 1. MSI Mode vs. Legacy Mode
-In Figure 1.0, a device operates by default in legacy mode. Legacy
+In Figure 1, a device operates by default in legacy mode. Legacy
in this context means PCI pin-irq assertion or PCI-Express INTx
emulation. A successful MSI request (using pci_enable_msi()) switches
a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
@@ -166,11 +174,11 @@ assigned MSI vector will replace dev->irq.
To return back to its default mode, a device driver should always call
pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
-device driver should always call free_irq() on MSI vector it has done
-request_irq() on before calling pci_disable_msi(). Failure to do so
-results a BUG_ON() and a device will be left with MSI enabled and
+device driver should always call free_irq() on the MSI vector it has
+done request_irq() on before calling pci_disable_msi(). Failure to do
+so results in a BUG_ON() and a device will be left with MSI enabled and
leaks its vector. Otherwise, the PCI subsystem restores a device's
-dev->irq with a pre-assigned IOAPIC vector and marks released
+dev->irq with a pre-assigned IOAPIC vector and marks the released
MSI vector as unused.
Once being marked as unused, there is no guarantee that the PCI
@@ -178,8 +186,8 @@ subsystem will reserve this MSI vector for a device. Depending on
the availability of current PCI vector resources and the number of
MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
-For the case where the PCI subsystem re-assigned this MSI vector
-another driver, a request to switching back to MSI mode may result
+For the case where the PCI subsystem re-assigns this MSI vector to
+another driver, a request to switch back to MSI mode may result
in being assigned a different MSI vector or a failure if no more
vectors are available.
@@ -208,12 +216,12 @@ Unlike the function pci_enable_msi(), the function pci_enable_msix()
does not replace the pre-assigned IOAPIC dev->irq with a new MSI
vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
into the field vector of each element contained in a second argument.
-Note that the pre-assigned IO-APIC dev->irq is valid only if the device
-operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt of
+Note that the pre-assigned IOAPIC dev->irq is valid only if the device
+operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
using dev->irq by the device driver to request for interrupt service
may result unpredictabe behavior.
-For each MSI-X vector granted, a device driver is responsible to call
+For each MSI-X vector granted, a device driver is responsible for calling
other functions like request_irq(), enable_irq(), etc. to enable
this vector with its corresponding interrupt service handler. It is
a device driver's choice to assign all vectors with the same
@@ -224,13 +232,13 @@ service handler.
The PCI 3.0 specification has implementation notes that MMIO address
space for a device's MSI-X structure should be isolated so that the
-software system can set different page for controlling accesses to
-the MSI-X structure. The implementation of MSI patch requires the PCI
+software system can set different pages for controlling accesses to the
+MSI-X structure. The implementation of MSI support requires the PCI
subsystem, not a device driver, to maintain full control of the MSI-X
-table/MSI-X PBA and MMIO address space of the MSI-X table/MSI-X PBA.
-A device driver is prohibited from requesting the MMIO address space
-of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem will fail
-enabling MSI-X on its hardware device when it calls the function
+table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
+table/MSI-X PBA. A device driver is prohibited from requesting the MMIO
+address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
+will fail enabling MSI-X on its hardware device when it calls the function
pci_enable_msix().
5.3.2 Handling MSI-X allocation
@@ -274,9 +282,9 @@ For the case where fewer MSI-X vectors are allocated to a function
than requested, the function pci_enable_msix() will return the
maximum number of MSI-X vectors available to the caller. A device
driver may re-send its request with fewer or equal vectors indicated
-in a return. For example, if a device driver requests 5 vectors, but
-the number of available vectors is 3 vectors, a value of 3 will be a
-return as a result of pci_enable_msix() call. A function could be
+in the return. For example, if a device driver requests 5 vectors, but
+the number of available vectors is 3 vectors, a value of 3 will be
+returned as a result of pci_enable_msix() call. A function could be
designed for its driver to use only 3 MSI-X table entries as
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
patch does not support multiple entries with the same vector. Such
@@ -285,49 +293,46 @@ as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
pci_enable_msix(). Below are the reasons why supporting multiple
entries with the same vector is an undesirable solution.
- - The PCI subsystem can not determine which entry, which
- generated the message, to mask/unmask MSI while handling
+ - The PCI subsystem cannot determine the entry that
+ generated the message to mask/unmask MSI while handling
software driver ISR. Attempting to walk through all MSI-X
table entries (2048 max) to mask/unmask any match vector
is an undesirable solution.
- - Walk through all MSI-X table entries (2048 max) to handle
+ - Walking through all MSI-X table entries (2048 max) to handle
SMP affinity of any match vector is an undesirable solution.
5.3.4 API pci_enable_msix
-int pci_enable_msix(struct pci_dev *dev, u32 *entries, int nvec)
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
This API enables a device driver to request the PCI subsystem
-for enabling MSI-X messages on its hardware device. Depending on
+to enable MSI-X messages on its hardware device. Depending on
the availability of PCI vectors resources, the PCI subsystem enables
-either all or nothing.
+either all or none of the requested vectors.
-Argument dev points to the device (pci_dev) structure.
+Argument 'dev' points to the device (pci_dev) structure.
-Argument entries is a pointer of unsigned integer type. The number of
-elements is indicated in argument nvec. The content of each element
-will be mapped to the following struct defined in /driver/pci/msi.h.
+Argument 'entries' is a pointer to an array of msix_entry structs.
+The number of entries is indicated in argument 'nvec'.
+struct msix_entry is defined in /driver/pci/msi.h:
struct msix_entry {
u16 vector; /* kernel uses to write alloc vector */
u16 entry; /* driver uses to specify entry */
};
-A device driver is responsible for initializing the field entry of
-each element with unique entry supported by MSI-X table. Otherwise,
+A device driver is responsible for initializing the field 'entry' of
+each element with a unique entry supported by MSI-X table. Otherwise,
-EINVAL will be returned as a result. A successful return of zero
-indicates the PCI subsystem completes initializing each of requested
+indicates the PCI subsystem completed initializing each of the requested
entries of the MSI-X table with message address and message data.
Last but not least, the PCI subsystem will write the 1:1
-vector-to-entry mapping into the field vector of each element. A
-device driver is responsible of keeping track of allocated MSI-X
+vector-to-entry mapping into the field 'vector' of each element. A
+device driver is responsible for keeping track of allocated MSI-X
vectors in its internal data structure.
-Argument nvec is an integer indicating the number of messages
-requested.
-
-A return of zero indicates that the number of MSI-X vectors is
+A return of zero indicates that the number of MSI-X vectors was
successfully allocated. A return of greater than zero indicates
MSI-X vector shortage. Or a return of less than zero indicates
a failure. This failure may be a result of duplicate entries
@@ -341,12 +346,12 @@ void pci_disable_msix(struct pci_dev *dev)
This API should always be used to undo the effect of pci_enable_msix()
when a device driver is unloading. Note that a device driver should
always call free_irq() on all MSI-X vectors it has done request_irq()
-on before calling this API. Failure to do so results a BUG_ON() and
+on before calling this API. Failure to do so results in a BUG_ON() and
a device will be left with MSI-X enabled and leaks its vectors.
5.3.6 MSI-X mode vs. legacy mode diagram
-The below diagram shows the events, which switches the interrupt
+The below diagram shows the events which switch the interrupt
mode on the MSI-X capable device function between MSI-X mode and
PIN-IRQ assertion mode (legacy).
@@ -356,22 +361,22 @@ PIN-IRQ assertion mode (legacy).
| | ===============> | |
------------ pci_disable_msix ------------------------
-Figure 2.0 MSI-X Mode vs. Legacy Mode
+Figure 2. MSI-X Mode vs. Legacy Mode
-In Figure 2.0, a device operates by default in legacy mode. A
+In Figure 2, a device operates by default in legacy mode. A
successful MSI-X request (using pci_enable_msix()) switches a
device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
stored in dev->irq will be saved by the PCI subsystem; however,
unlike MSI mode, the PCI subsystem will not replace dev->irq with
assigned MSI-X vector because the PCI subsystem already writes the 1:1
-vector-to-entry mapping into the field vector of each element
+vector-to-entry mapping into the field 'vector' of each element
specified in second argument.
To return back to its default mode, a device driver should always call
pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
a device driver should always call free_irq() on all MSI-X vectors it
has done request_irq() on before calling pci_disable_msix(). Failure
-to do so results a BUG_ON() and a device will be left with MSI-X
+to do so results in a BUG_ON() and a device will be left with MSI-X
enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
device function's interrupt mode from MSI-X mode to legacy mode and
marks all allocated MSI-X vectors as unused.
@@ -383,53 +388,56 @@ MSI/MSI-X requests from other drivers, these MSI-X vectors may be
re-assigned.
For the case where the PCI subsystem re-assigned these MSI-X vectors
-to other driver, a request to switching back to MSI-X mode may result
+to other drivers, a request to switch back to MSI-X mode may result
being assigned with another set of MSI-X vectors or a failure if no
more vectors are available.
-5.4 Handling function implementng both MSI and MSI-X capabilities
+5.4 Handling function implementing both MSI and MSI-X capabilities
For the case where a function implements both MSI and MSI-X
capabilities, the PCI subsystem enables a device to run either in MSI
mode or MSI-X mode but not both. A device driver determines whether it
wants MSI or MSI-X enabled on its hardware device. Once a device
-driver requests for MSI, for example, it is prohibited to request for
+driver requests for MSI, for example, it is prohibited from requesting
MSI-X; in other words, a device driver is not permitted to ping-pong
between MSI mod MSI-X mode during a run-time.
5.5 Hardware requirements for MSI/MSI-X support
+
MSI/MSI-X support requires support from both system hardware and
individual hardware device functions.
5.5.1 System hardware support
+
Since the target of MSI address is the local APIC CPU, enabling
-MSI/MSI-X support in Linux kernel is dependent on whether existing
-system hardware supports local APIC. Users should verify their
-system whether it runs when CONFIG_X86_LOCAL_APIC=y.
+MSI/MSI-X support in the Linux kernel is dependent on whether existing
+system hardware supports local APIC. Users should verify that their
+system supports local APIC operation by testing that it runs when
+CONFIG_X86_LOCAL_APIC=y.
In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
however, in UP environment, users must manually set
CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
-CONFIG_PCI_MSI enables the VECTOR based scheme and
-the option for MSI-capable device drivers to selectively enable
-MSI/MSI-X.
+CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
+MSI-capable device drivers to selectively enable MSI/MSI-X.
Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
vector is allocated new during runtime and MSI/MSI-X support does not
depend on BIOS support. This key independency enables MSI/MSI-X
-support on future IOxAPIC free platform.
+support on future IOxAPIC free platforms.
5.5.2 Device hardware support
+
The hardware device function supports MSI by indicating the
MSI/MSI-X capability structure on its PCI capability list. By
default, this capability structure will not be initialized by
the kernel to enable MSI during the system boot. In other words,
the device function is running on its default pin assertion mode.
Note that in many cases the hardware supporting MSI have bugs,
-which may result in system hang. The software driver of specific
-MSI-capable hardware is responsible for whether calling
+which may result in system hangs. The software driver of specific
+MSI-capable hardware is responsible for deciding whether to call
pci_enable_msi or not. A return of zero indicates the kernel
-successfully initializes the MSI/MSI-X capability structure of the
+successfully initialized the MSI/MSI-X capability structure of the
device function. The device function is now running on MSI/MSI-X mode.
5.6 How to tell whether MSI/MSI-X is enabled on device function
@@ -439,10 +447,10 @@ pci_enable_msi()/pci_enable_msix() indicates to a device driver that
its device function is initialized successfully and ready to run in
MSI/MSI-X mode.
-At the user level, users can use command 'cat /proc/interrupts'
-to display the vector allocated for a device and its interrupt
-MSI/MSI-X mode ("PCI MSI"/"PCI MSIX"). Below shows below MSI mode is
-enabled on a SCSI Adaptec 39320D Ultra320.
+At the user level, users can use the command 'cat /proc/interrupts'
+to display the vectors allocated for devices and their interrupt
+MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
+enabled on a SCSI Adaptec 39320D Ultra320 controller.
CPU0 CPU1
0: 324639 0 IO-APIC-edge timer
@@ -453,8 +461,8 @@ enabled on a SCSI Adaptec 39320D Ultra320.
15: 1 0 IO-APIC-edge ide1
169: 0 0 IO-APIC-level uhci-hcd
185: 0 0 IO-APIC-level uhci-hcd
-193: 138 10 PCI MSI aic79xx
-201: 30 0 PCI MSI aic79xx
+193: 138 10 PCI-MSI aic79xx
+201: 30 0 PCI-MSI aic79xx
225: 30 0 IO-APIC-level aic7xxx
233: 30 0 IO-APIC-level aic7xxx
NMI: 0 0
@@ -490,8 +498,8 @@ target address set as 0xfeexxxxx, as conformed to PCI
specification 2.3 or latest, then it should work.
Q4. From the driver point of view, if the MSI is lost because
-of the errors occur during inbound memory write, then it may
-wait for ever. Is there a mechanism for it to recover?
+of errors occurring during inbound memory write, then it may
+wait forever. Is there a mechanism for it to recover?
A4. Since the target of the transaction is an inbound memory
write, all transaction termination conditions (Retry,
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 354d89c78377..15da16861fa3 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -772,8 +772,6 @@ RCU pointer/list traversal:
list_for_each_entry_rcu
list_for_each_continue_rcu (to be deprecated in favor of new
list_for_each_entry_continue_rcu)
- hlist_for_each_rcu (to be deprecated in favor of
- hlist_for_each_entry_rcu)
hlist_for_each_entry_rcu
RCU pointer update:
diff --git a/Documentation/arm/README b/Documentation/arm/README
index a6f718e90a86..5ed6f3530b86 100644
--- a/Documentation/arm/README
+++ b/Documentation/arm/README
@@ -8,10 +8,9 @@ Compilation of kernel
---------------------
In order to compile ARM Linux, you will need a compiler capable of
- generating ARM ELF code with GNU extensions. GCC 2.95.1, EGCS
- 1.1.2, and GCC 3.3 are known to be good compilers. Fortunately, you
- needn't guess. The kernel will report an error if your compiler is
- a recognized offender.
+ generating ARM ELF code with GNU extensions. GCC 3.3 is known to be
+ a good compiler. Fortunately, you needn't guess. The kernel will report
+ an error if your compiler is a recognized offender.
To build ARM Linux natively, you shouldn't have to alter the ARCH = line
in the top level Makefile. However, if you don't have the ARM Linux ELF
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index b7de82e9c0e0..3e73231695b3 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -25,7 +25,7 @@
#include
#include
-#include "connector.h"
+#include
static struct cb_id cn_test_id = { 0x123, 0x456 };
static char cn_test_name[] = "cn_test";
@@ -104,7 +104,7 @@ static int cn_test_want_notify(void)
req->first = cn_test_id.val + 20;
req->range = 10;
- NETLINK_CB(skb).dst_groups = ctl->group;
+ NETLINK_CB(skb).dst_group = ctl->group;
//netlink_broadcast(nls, skb, 0, ctl->group, GFP_ATOMIC);
netlink_unicast(nls, skb, 0, 0);
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
index dca274ff4005..a5009c8300f3 100644
--- a/Documentation/device-mapper/snapshot.txt
+++ b/Documentation/device-mapper/snapshot.txt
@@ -19,7 +19,6 @@ There are two dm targets available: snapshot and snapshot-origin.
*) snapshot-origin
which will normally have one or more snapshots based on it.
-You must create the snapshot-origin device before you can create snapshots.
Reads will be mapped directly to the backing device. For each write, the
original data will be saved in the of each snapshot to keep
its visible content unchanged, at least until the fills up.
@@ -27,7 +26,7 @@ its visible content unchanged, at least until the fills up.
*) snapshot
-A snapshot is created of the block device. Changed chunks of
+A snapshot of the block device is created. Changed chunks of
sectors will be stored on the . Writes will
only go to the . Reads will come from the or
from for unchanged data. will often be
@@ -37,6 +36,8 @@ the amount of free space and expand the before it fills up.
is P (Persistent) or N (Not persistent - will not survive
after reboot).
+The difference is that for transient snapshots less metadata must be
+saved on disk - they can be kept in memory by the kernel.
How this is used by LVM2
diff --git a/Documentation/fb/vesafb.txt b/Documentation/fb/vesafb.txt
index 62db6758d1c1..ee277dd204b0 100644
--- a/Documentation/fb/vesafb.txt
+++ b/Documentation/fb/vesafb.txt
@@ -146,10 +146,10 @@ pmipal Use the protected mode interface for palette changes.
mtrr:n setup memory type range registers for the vesafb framebuffer
where n:
- 0 - disabled (equivalent to nomtrr)
+ 0 - disabled (equivalent to nomtrr) (default)
1 - uncachable
2 - write-back
- 3 - write-combining (default)
+ 3 - write-combining
4 - write-through
If you see the following in dmesg, choose the type that matches the
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index b67189a8d8d4..decdf9917e0d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -69,6 +69,22 @@ Who: Grant Coady
---------------------------
+What: remove EXPORT_SYMBOL(panic_timeout)
+When: April 2006
+Files: kernel/panic.c
+Why: No modular usage in the kernel.
+Who: Adrian Bunk
+
+---------------------------
+
+What: remove EXPORT_SYMBOL(insert_resource)
+When: April 2006
+Files: kernel/resource.c
+Why: No modular usage in the kernel.
+Who: Adrian Bunk
+
+---------------------------
+
What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
When: November 2005
Files: drivers/pcmcia/: pcmcia_ioctl.c
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt
new file mode 100644
index 000000000000..4c0c575a4012
--- /dev/null
+++ b/Documentation/filesystems/dentry-locking.txt
@@ -0,0 +1,173 @@
+RCU-based dcache locking model
+==============================
+
+On many workloads, the most common operation on dcache is to look up a
+dentry, given a parent dentry and the name of the child. Typically,
+for every open(), stat() etc., the dentry corresponding to the
+pathname will be looked up by walking the tree starting with the first
+component of the pathname and using that dentry along with the next
+component to look up the next level and so on. Since it is a frequent
+operation for workloads like multiuser environments and web servers,
+it is important to optimize this path.
+
+Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
+every component during path look-up. Since 2.5.10 onwards, fast-walk
+algorithm changed this by holding the dcache_lock at the beginning and
+walking as many cached path component dentries as possible. This
+significantly decreases the number of acquisition of
+dcache_lock. However it also increases the lock hold time
+significantly and affects performance in large SMP machines. Since
+2.5.62 kernel, dcache has been using a new locking model that uses RCU
+to make dcache look-up lock-free.
+
+The current dcache locking model is not very different from the
+existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
+protected the hash chain, d_child, d_alias, d_lru lists as well as
+d_inode and several other things like mount look-up. RCU-based changes
+affect only the way the hash chain is protected. For everything else
+the dcache_lock must be taken for both traversing as well as
+updating. The hash chain updates too take the dcache_lock. The
+significant change is the way d_lookup traverses the hash chain, it
+doesn't acquire the dcache_lock for this and rely on RCU to ensure
+that the dentry has not been *freed*.
+
+
+Dcache locking details
+======================
+
+For many multi-user workloads, open() and stat() on files are very
+frequently occurring operations. Both involve walking of path names to
+find the dentry corresponding to the concerned file. In 2.4 kernel,
+dcache_lock was held during look-up of each path component. Contention
+and cache-line bouncing of this global lock caused significant
+scalability problems. With the introduction of RCU in Linux kernel,
+this was worked around by making the look-up of path components during
+path walking lock-free.
+
+
+Safe lock-free look-up of dcache hash table
+===========================================
+
+Dcache is a complex data structure with the hash table entries also
+linked together in other lists. In 2.4 kernel, dcache_lock protected
+all the lists. We applied RCU only on hash chain walking. The rest of
+the lists are still protected by dcache_lock. Some of the important
+changes are :
+
+1. The deletion from hash chain is done using hlist_del_rcu() macro
+ which doesn't initialize next pointer of the deleted dentry and
+ this allows us to walk safely lock-free while a deletion is
+ happening.
+
+2. Insertion of a dentry into the hash table is done using
+ hlist_add_head_rcu() which take care of ordering the writes - the
+ writes to the dentry must be visible before the dentry is
+ inserted. This works in conjunction with hlist_for_each_rcu() while
+ walking the hash chain. The only requirement is that all
+ initialization to the dentry must be done before
+ hlist_add_head_rcu() since we don't have dcache_lock protection
+ while traversing the hash chain. This isn't different from the
+ existing code.
+
+3. The dentry looked up without holding dcache_lock by cannot be
+ returned for walking if it is unhashed. It then may have a NULL
+ d_inode or other bogosity since RCU doesn't protect the other
+ fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
+ indicate unhashed dentries and use this in conjunction with a
+ per-dentry lock (d_lock). Once looked up without the dcache_lock,
+ we acquire the per-dentry lock (d_lock) and check if the dentry is
+ unhashed. If so, the look-up is failed. If not, the reference count
+ of the dentry is increased and the dentry is returned.
+
+4. Once a dentry is looked up, it must be ensured during the path walk
+ for that component it doesn't go away. In pre-2.5.10 code, this was
+ done holding a reference to the dentry. dcache_rcu does the same.
+ In some sense, dcache_rcu path walking looks like the pre-2.5.10
+ version.
+
+5. All dentry hash chain updates must take the dcache_lock as well as
+ the per-dentry lock in that order. dput() does this to ensure that
+ a dentry that has just been looked up in another CPU doesn't get
+ deleted before dget() can be done on it.
+
+6. There are several ways to do reference counting of RCU protected
+ objects. One such example is in ipv4 route cache where deferred
+ freeing (using call_rcu()) is done as soon as the reference count
+ goes to zero. This cannot be done in the case of dentries because
+ tearing down of dentries require blocking (dentry_iput()) which
+ isn't supported from RCU callbacks. Instead, tearing down of
+ dentries happen synchronously in dput(), but actual freeing happens
+ later when RCU grace period is over. This allows safe lock-free
+ walking of the hash chains, but a matched dentry may have been
+ partially torn down. The checking of DCACHE_UNHASHED flag with
+ d_lock held detects such dentries and prevents them from being
+ returned from look-up.
+
+
+Maintaining POSIX rename semantics
+==================================
+
+Since look-up of dentries is lock-free, it can race against a
+concurrent rename operation. For example, during rename of file A to
+B, look-up of either A or B must succeed. So, if look-up of B happens
+after A has been removed from the hash chain but not added to the new
+hash chain, it may fail. Also, a comparison while the name is being
+written concurrently by a rename may result in false positive matches
+violating rename semantics. Issues related to race with rename are
+handled as described below :
+
+1. Look-up can be done in two ways - d_lookup() which is safe from
+ simultaneous renames and __d_lookup() which is not. If
+ __d_lookup() fails, it must be followed up by a d_lookup() to
+ correctly determine whether a dentry is in the hash table or
+ not. d_lookup() protects look-ups using a sequence lock
+ (rename_lock).
+
+2. The name associated with a dentry (d_name) may be changed if a
+ rename is allowed to happen simultaneously. To avoid memcmp() in
+ __d_lookup() go out of bounds due to a rename and false positive
+ comparison, the name comparison is done while holding the
+ per-dentry lock. This prevents concurrent renames during this
+ operation.
+
+3. Hash table walking during look-up may move to a different bucket as
+ the current dentry is moved to a different bucket due to rename.
+ But we use hlists in dcache hash table and they are
+ null-terminated. So, even if a dentry moves to a different bucket,
+ hash chain walk will terminate. [with a list_head list, it may not
+ since termination is when the list_head in the original bucket is
+ reached]. Since we redo the d_parent check and compare name while
+ holding d_lock, lock-free look-up will not race against d_move().
+
+4. There can be a theoretical race when a dentry keeps coming back to
+ original bucket due to double moves. Due to this look-up may
+ consider that it has never moved and can end up in a infinite loop.
+ But this is not any worse that theoretical livelocks we already
+ have in the kernel.
+
+
+Important guidelines for filesystem developers related to dcache_rcu
+====================================================================
+
+1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
+ don't change. Only dcache internal implementation changes. However
+ filesystems *must not* delete from the dentry hash chains directly
+ using the list macros like allowed earlier. They must use dcache
+ APIs like d_drop() or __d_drop() depending on the situation.
+
+2. d_flags is now protected by a per-dentry lock (d_lock). All access
+ to d_flags must be protected by it.
+
+3. For a hashed dentry, checking of d_count needs to be protected by
+ d_lock.
+
+
+Papers and other documentation on dcache locking
+================================================
+
+1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
+
+2. http://lse.sourceforge.net/locking/dcache/dcache.html
+
+
+
diff --git a/Documentation/filesystems/devfs/README b/Documentation/filesystems/devfs/README
index 54366ecc241f..aabfba24bc2e 100644
--- a/Documentation/filesystems/devfs/README
+++ b/Documentation/filesystems/devfs/README
@@ -1812,11 +1812,6 @@ it may overflow the messages buffer, but try to get as much of it as
you can
-if you get an Oops, run ksymoops to decode it so that the
-names of the offending functions are provided. A non-decoded Oops is
-pretty useless
-
-
send a copy of your devfsd configuration file(s)
send the bug report to me first.
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
new file mode 100644
index 000000000000..b3404a032596
--- /dev/null
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -0,0 +1,195 @@
+ramfs, rootfs and initramfs
+October 17, 2005
+Rob Landley
+=============================
+
+What is ramfs?
+--------------
+
+Ramfs is a very simple filesystem that exports Linux's disk caching
+mechanisms (the page cache and dentry cache) as a dynamically resizable
+ram-based filesystem.
+
+Normally all files are cached in memory by Linux. Pages of data read from
+backing store (usually the block device the filesystem is mounted on) are kept
+around in case it's needed again, but marked as clean (freeable) in case the
+Virtual Memory system needs the memory for something else. Similarly, data
+written to files is marked clean as soon as it has been written to backing
+store, but kept around for caching purposes until the VM reallocates the
+memory. A similar mechanism (the dentry cache) greatly speeds up access to
+directories.
+
+With ramfs, there is no backing store. Files written into ramfs allocate
+dentries and page cache as usual, but there's nowhere to write them to.
+This means the pages are never marked clean, so they can't be freed by the
+VM when it's looking to recycle memory.
+
+The amount of code required to implement ramfs is tiny, because all the
+work is done by the existing Linux caching infrastructure. Basically,
+you're mounting the disk cache as a filesystem. Because of this, ramfs is not
+an optional component removable via menuconfig, since there would be negligible
+space savings.
+
+ramfs and ramdisk:
+------------------
+
+The older "ram disk" mechanism created a synthetic block device out of
+an area of ram and used it as backing store for a filesystem. This block
+device was of fixed size, so the filesystem mounted on it was of fixed
+size. Using a ram disk also required unnecessarily copying memory from the
+fake block device into the page cache (and copying changes back out), as well
+as creating and destroying dentries. Plus it needed a filesystem driver
+(such as ext2) to format and interpret this data.
+
+Compared to ramfs, this wastes memory (and memory bus bandwidth), creates
+unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks
+to avoid this copying by playing with the page tables, but they're unpleasantly
+complicated and turn out to be about as expensive as the copying anyway.)
+More to the point, all the work ramfs is doing has to happen _anyway_,
+since all file access goes through the page and dentry caches. The ram
+disk is simply unnecessary, ramfs is internally much simpler.
+
+Another reason ramdisks are semi-obsolete is that the introduction of
+loopback devices offered a more flexible and convenient way to create
+synthetic block devices, now from files instead of from chunks of memory.
+See losetup (8) for details.
+
+ramfs and tmpfs:
+----------------
+
+One downside of ramfs is you can keep writing data into it until you fill
+up all memory, and the VM can't free it because the VM thinks that files
+should get written to backing store (rather than swap space), but ramfs hasn't
+got any backing store. Because of this, only root (or a trusted user) should
+be allowed write access to a ramfs mount.
+
+A ramfs derivative called tmpfs was created to add size limits, and the ability
+to write the data to swap space. Normal users can be allowed write access to
+tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information.
+
+What is rootfs?
+---------------
+
+Rootfs is a special instance of ramfs, which is always present in 2.6 systems.
+(It's used internally as the starting and stopping point for searches of the
+kernel's doubly-linked list of mount points.)
+
+Most systems just mount another filesystem over it and ignore it. The
+amount of space an empty instance of ramfs takes up is tiny.
+
+What is initramfs?
+------------------
+
+All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is
+extracted into rootfs when the kernel boots up. After extracting, the kernel
+checks to see if rootfs contains a file "init", and if so it executes it as PID
+1. If found, this init process is responsible for bringing the system the
+rest of the way up, including locating and mounting the real root device (if
+any). If rootfs does not contain an init program after the embedded cpio
+archive is extracted into it, the kernel will fall through to the older code
+to locate and mount a root partition, then exec some variant of /sbin/init
+out of that.
+
+All this differs from the old initrd in several ways:
+
+ - The old initrd was a separate file, while the initramfs archive is linked
+ into the linux kernel image. (The directory linux-*/usr is devoted to
+ generating this archive during the build.)
+
+ - The old initrd file was a gzipped filesystem image (in some file format,
+ such as ext2, that had to be built into the kernel), while the new
+ initramfs archive is a gzipped cpio archive (like tar only simpler,
+ see cpio(1) and Documentation/early-userspace/buffer-format.txt).
+
+ - The program run by the old initrd (which was called /initrd, not /init) did
+ some setup and then returned to the kernel, while the init program from
+ initramfs is not expected to return to the kernel. (If /init needs to hand
+ off control it can overmount / with a new root device and exec another init
+ program. See the switch_root utility, below.)
+
+ - When switching another root device, initrd would pivot_root and then
+ umount the ramdisk. But initramfs is rootfs: you can neither pivot_root
+ rootfs, nor unmount it. Instead delete everything out of rootfs to
+ free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs
+ with the new root (cd /newmount; mount --move . /; chroot .), attach
+ stdin/stdout/stderr to the new /dev/console, and exec the new init.
+
+ Since this is a remarkably persnickity process (and involves deleting
+ commands before you can run them), the klibc package introduced a helper
+ program (utils/run_init.c) to do all this for you. Most other packages
+ (such as busybox) have named this command "switch_root".
+
+Populating initramfs:
+---------------------
+
+The 2.6 kernel build process always creates a gzipped cpio format initramfs
+archive and links it into the resulting kernel binary. By default, this
+archive is empty (consuming 134 bytes on x86). The config option
+CONFIG_INITRAMFS_SOURCE (for some reason buried under devices->block devices
+in menuconfig, and living in usr/Kconfig) can be used to specify a source for
+the initramfs archive, which will automatically be incorporated into the
+resulting binary. This option can point to an existing gzipped cpio archive, a
+directory containing files to be archived, or a text file specification such
+as the following example:
+
+ dir /dev 755 0 0
+ nod /dev/console 644 0 0 c 5 1
+ nod /dev/loop0 644 0 0 b 7 0
+ dir /bin 755 1000 1000
+ slink /bin/sh busybox 777 0 0
+ file /bin/busybox initramfs/busybox 755 0 0
+ dir /proc 755 0 0
+ dir /sys 755 0 0
+ dir /mnt 755 0 0
+ file /init initramfs/init.sh 755 0 0
+
+One advantage of the text file is that root access is not required to
+set permissions or create device nodes in the new archive. (Note that those
+two example "file" entries expect to find files named "init.sh" and "busybox" in
+a directory called "initramfs", under the linux-2.6.* directory. See
+Documentation/early-userspace/README for more details.)
+
+If you don't already understand what shared libraries, devices, and paths
+you need to get a minimal root filesystem up and running, here are some
+references:
+http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
+http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
+http://www.linuxfromscratch.org/lfs/view/stable/
+
+The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
+designed to be a tiny C library to statically link early userspace
+code against, along with some related utilities. It is BSD licensed.
+
+I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
+myself. These are LGPL and GPL, respectively.
+
+In theory you could use glibc, but that's not well suited for small embedded
+uses like this. (A "hello world" program statically linked against glibc is
+over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
+name lookups, even when otherwise statically linked.)
+
+Future directions:
+------------------
+
+Today (2.6.14), initramfs is always compiled in, but not always used. The
+kernel falls back to legacy boot code that is reached only if initramfs does
+not contain an /init program. The fallback is legacy code, there to ensure a
+smooth transition and allowing early boot functionality to gradually move to
+"early userspace" (I.E. initramfs).
+
+The move to early userspace is necessary because finding and mounting the real
+root device is complex. Root partitions can span multiple devices (raid or
+separate journal). They can be out on the network (requiring dhcp, setting a
+specific mac address, logging into a server, etc). They can live on removable
+media, with dynamically allocated major/minor numbers and persistent naming
+issues requiring a full udev implementation to sort out. They can be
+compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned,
+and so on.
+
+This kind of complexity (which inevitably includes policy) is rightly handled
+in userspace. Both klibc and busybox/uClibc are working on simple initramfs
+packages to drop into a kernel build, and when standard solutions are ready
+and widely deployed, the kernel's legacy early boot code will become obsolete
+and a candidate for the feature removal schedule.
+
+But that's a while off yet.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f042c12e0ed2..ee4c0a8b8db7 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -3,7 +3,7 @@
Original author: Richard Gooch
- Last updated on August 25, 2005
+ Last updated on October 28, 2005
Copyright (C) 1999 Richard Gooch
Copyright (C) 2005 Pekka Enberg
@@ -11,62 +11,61 @@
This file is released under the GPLv2.
-What is it?
-===========
+Introduction
+============
-The Virtual File System (otherwise known as the Virtual Filesystem
-Switch) is the software layer in the kernel that provides the
-filesystem interface to userspace programs. It also provides an
-abstraction within the kernel which allows different filesystem
-implementations to coexist.
+The Virtual File System (also known as the Virtual Filesystem Switch)
+is the software layer in the kernel that provides the filesystem
+interface to userspace programs. It also provides an abstraction
+within the kernel which allows different filesystem implementations to
+coexist.
+
+VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
+on are called from a process context. Filesystem locking is described
+in the document Documentation/filesystems/Locking.
-A Quick Look At How It Works
-============================
+Directory Entry Cache (dcache)
+------------------------------
-In this section I'll briefly describe how things work, before
-launching into the details. I'll start with describing what happens
-when user programs open and manipulate files, and then look from the
-other view which is how a filesystem is supported and subsequently
-mounted.
+The VFS implements the open(2), stat(2), chmod(2), and similar system
+calls. The pathname argument that is passed to them is used by the VFS
+to search through the directory entry cache (also known as the dentry
+cache or dcache). This provides a very fast look-up mechanism to
+translate a pathname (filename) into a specific dentry. Dentries live
+in RAM and are never saved to disc: they exist only for performance.
+
+The dentry cache is meant to be a view into your entire filespace. As
+most computers cannot fit all dentries in the RAM at the same time,
+some bits of the cache are missing. In order to resolve your pathname
+into a dentry, the VFS may have to resort to creating dentries along
+the way, and then loading the inode. This is done by looking up the
+inode.
-Opening a File
---------------
+The Inode Object
+----------------
-The VFS implements the open(2), stat(2), chmod(2) and similar system
-calls. The pathname argument is used by the VFS to search through the
-directory entry cache (dentry cache or "dcache"). This provides a very
-fast look-up mechanism to translate a pathname (filename) into a
-specific dentry.
+An individual dentry usually has a pointer to an inode. Inodes are
+filesystem objects such as regular files, directories, FIFOs and other
+beasts. They live either on the disc (for block device filesystems)
+or in the memory (for pseudo filesystems). Inodes that live on the
+disc are copied into the memory when required and changes to the inode
+are written back to disc. A single inode can be pointed to by multiple
+dentries (hard links, for example, do this).
-An individual dentry usually has a pointer to an inode. Inodes are the
-things that live on disc drives, and can be regular files (you know:
-those things that you write data into), directories, FIFOs and other
-beasts. Dentries live in RAM and are never saved to disc: they exist
-only for performance. Inodes live on disc and are copied into memory
-when required. Later any changes are written back to disc. The inode
-that lives in RAM is a VFS inode, and it is this which the dentry
-points to. A single inode can be pointed to by multiple dentries
-(think about hardlinks).
+To look up an inode requires that the VFS calls the lookup() method of
+the parent directory inode. This method is installed by the specific
+filesystem implementation that the inode lives in. Once the VFS has
+the required dentry (and hence the inode), we can do all those boring
+things like open(2) the file, or stat(2) it to peek at the inode
+data. The stat(2) operation is fairly simple: once the VFS has the
+dentry, it peeks at the inode data and passes some of it back to
+userspace.
-The dcache is meant to be a view into your entire filespace. Unlike
-Linus, most of us losers can't fit enough dentries into RAM to cover
-all of our filespace, so the dcache has bits missing. In order to
-resolve your pathname into a dentry, the VFS may have to resort to
-creating dentries along the way, and then loading the inode. This is
-done by looking up the inode.
-To look up an inode (usually read from disc) requires that the VFS
-calls the lookup() method of the parent directory inode. This method
-is installed by the specific filesystem implementation that the inode
-lives in. There will be more on this later.
-
-Once the VFS has the required dentry (and hence the inode), we can do
-all those boring things like open(2) the file, or stat(2) it to peek
-at the inode data. The stat(2) operation is fairly simple: once the
-VFS has the dentry, it peeks at the inode data and passes some of it
-back to userspace.
+The File Object
+---------------
Opening a file requires another operation: allocation of a file
structure (this is the kernel-side implementation of file
@@ -74,51 +73,39 @@ descriptors). The freshly allocated file structure is initialized with
a pointer to the dentry and a set of file operation member functions.
These are taken from the inode data. The open() file method is then
called so the specific filesystem implementation can do it's work. You
-can see that this is another switch performed by the VFS.
-
-The file structure is placed into the file descriptor table for the
-process.
+can see that this is another switch performed by the VFS. The file
+structure is placed into the file descriptor table for the process.
Reading, writing and closing files (and other assorted VFS operations)
is done by using the userspace file descriptor to grab the appropriate
-file structure, and then calling the required file structure method
-function to do whatever is required.
-
-For as long as the file is open, it keeps the dentry "open" (in use),
-which in turn means that the VFS inode is still in use.
-
-All VFS system calls (i.e. open(2), stat(2), read(2), write(2),
-chmod(2) and so on) are called from a process context. You should
-assume that these calls are made without any kernel locks being
-held. This means that the processes may be executing the same piece of
-filesystem or driver code at the same time, on different
-processors. You should ensure that access to shared resources is
-protected by appropriate locks.
+file structure, and then calling the required file structure method to
+do whatever is required. For as long as the file is open, it keeps the
+dentry in use, which in turn means that the VFS inode is still in use.
Registering and Mounting a Filesystem
--------------------------------------
+=====================================
-If you want to support a new kind of filesystem in the kernel, all you
-need to do is call register_filesystem(). You pass a structure
-describing the filesystem implementation (struct file_system_type)
-which is then added to an internal table of supported filesystems. You
-can do:
+To register and unregister a filesystem, use the following API
+functions:
-% cat /proc/filesystems
+ #include
-to see what filesystems are currently available on your system.
+ extern int register_filesystem(struct file_system_type *);
+ extern int unregister_filesystem(struct file_system_type *);
-When a request is made to mount a block device onto a directory in
-your filespace the VFS will call the appropriate method for the
-specific filesystem. The dentry for the mount point will then be
-updated to point to the root inode for the new filesystem.
+The passed struct file_system_type describes your filesystem. When a
+request is made to mount a device onto a directory in your filespace,
+the VFS will call the appropriate get_sb() method for the specific
+filesystem. The dentry for the mount point will then be updated to
+point to the root inode for the new filesystem.
-It's now time to look at things in more detail.
+You can see all filesystems that are registered to the kernel in the
+file /proc/filesystems.
struct file_system_type
-=======================
+-----------------------
This describes the filesystem. As of kernel 2.6.13, the following
members are defined:
@@ -197,8 +184,14 @@ A fill_super() method implementation has the following arguments:
int silent: whether or not to be silent on error
+The Superblock Object
+=====================
+
+A superblock object represents a mounted filesystem.
+
+
struct super_operations
-=======================
+-----------------------
This describes how the VFS can manipulate the superblock of your
filesystem. As of kernel 2.6.13, the following members are defined:
@@ -286,9 +279,9 @@ or bottom half).
a superblock. The second parameter indicates whether the method
should wait until the write out has been completed. Optional.
- write_super_lockfs: called when VFS is locking a filesystem and forcing
- it into a consistent state. This function is currently used by the
- Logical Volume Manager (LVM).
+ write_super_lockfs: called when VFS is locking a filesystem and
+ forcing it into a consistent state. This method is currently
+ used by the Logical Volume Manager (LVM).
unlockfs: called when VFS is unlocking a filesystem and making it writable
again.
@@ -317,8 +310,14 @@ field. This is a pointer to a "struct inode_operations" which
describes the methods that can be performed on individual inodes.
+The Inode Object
+================
+
+An inode object represents an object within the filesystem.
+
+
struct inode_operations
-=======================
+-----------------------
This describes how the VFS can manipulate an inode in your
filesystem. As of kernel 2.6.13, the following members are defined:
@@ -394,51 +393,62 @@ otherwise noted.
will probably need to call d_instantiate() just as you would
in the create() method
+ rename: called by the rename(2) system call to rename the object to
+ have the parent and name given by the second inode and dentry.
+
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
follow_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
- symbolic links. This function returns a void pointer cookie
+ symbolic links. This method returns a void pointer cookie
that is passed to put_link().
put_link: called by the VFS to release resources allocated by
- follow_link(). The cookie returned by follow_link() is passed to
- to this function as the last parameter. It is used by filesystems
- such as NFS where page cache is not stable (i.e. page that was
- installed when the symbolic link walk started might not be in the
- page cache at the end of the walk).
+ follow_link(). The cookie returned by follow_link() is passed
+ to to this method as the last parameter. It is used by
+ filesystems such as NFS where page cache is not stable
+ (i.e. page that was installed when the symbolic link walk
+ started might not be in the page cache at the end of the
+ walk).
- truncate: called by the VFS to change the size of a file. The i_size
- field of the inode is set to the desired size by the VFS before
- this function is called. This function is called by the truncate(2)
- system call and related functionality.
+ truncate: called by the VFS to change the size of a file. The
+ i_size field of the inode is set to the desired size by the
+ VFS before this method is called. This method is called by
+ the truncate(2) system call and related functionality.
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
- setattr: called by the VFS to set attributes for a file. This function is
- called by chmod(2) and related system calls.
+ setattr: called by the VFS to set attributes for a file. This method
+ is called by chmod(2) and related system calls.
- getattr: called by the VFS to get attributes of a file. This function is
- called by stat(2) and related system calls.
+ getattr: called by the VFS to get attributes of a file. This method
+ is called by stat(2) and related system calls.
setxattr: called by the VFS to set an extended attribute for a file.
- Extended attribute is a name:value pair associated with an inode. This
- function is called by setxattr(2) system call.
+ Extended attribute is a name:value pair associated with an
+ inode. This method is called by setxattr(2) system call.
- getxattr: called by the VFS to retrieve the value of an extended attribute
- name. This function is called by getxattr(2) function call.
+ getxattr: called by the VFS to retrieve the value of an extended
+ attribute name. This method is called by getxattr(2) function
+ call.
- listxattr: called by the VFS to list all extended attributes for a given
- file. This function is called by listxattr(2) system call.
+ listxattr: called by the VFS to list all extended attributes for a
+ given file. This method is called by listxattr(2) system call.
- removexattr: called by the VFS to remove an extended attribute from a file.
- This function is called by removexattr(2) system call.
+ removexattr: called by the VFS to remove an extended attribute from
+ a file. This method is called by removexattr(2) system call.
+
+
+The Address Space Object
+========================
+
+The address space object is used to identify pages in the page cache.
struct address_space_operations
-===============================
+-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
your filesystem. As of kernel 2.6.13, the following members are defined:
@@ -502,8 +512,14 @@ struct address_space_operations {
it. An example implementation can be found in fs/ext2/xip.c.
+The File Object
+===============
+
+A file object represents a file opened by a process.
+
+
struct file_operations
-======================
+----------------------
This describes how the VFS can manipulate an open file. As of kernel
2.6.13, the following members are defined:
@@ -661,7 +677,7 @@ of child dentries. Child dentries are basically like files in a
directory.
-Directory Entry Cache APIs
+Directory Entry Cache API
--------------------------
There are a number of functions defined which permit a filesystem to
@@ -705,178 +721,24 @@ manipulate dentries:
and the dentry is returned. The caller must use d_put()
to free the dentry when it finishes using it.
-
-RCU-based dcache locking model
-------------------------------
-
-On many workloads, the most common operation on dcache is
-to look up a dentry, given a parent dentry and the name
-of the child. Typically, for every open(), stat() etc.,
-the dentry corresponding to the pathname will be looked
-up by walking the tree starting with the first component
-of the pathname and using that dentry along with the next
-component to look up the next level and so on. Since it
-is a frequent operation for workloads like multiuser
-environments and web servers, it is important to optimize
-this path.
-
-Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus
-in every component during path look-up. Since 2.5.10 onwards,
-fast-walk algorithm changed this by holding the dcache_lock
-at the beginning and walking as many cached path component
-dentries as possible. This significantly decreases the number
-of acquisition of dcache_lock. However it also increases the
-lock hold time significantly and affects performance in large
-SMP machines. Since 2.5.62 kernel, dcache has been using
-a new locking model that uses RCU to make dcache look-up
-lock-free.
-
-The current dcache locking model is not very different from the existing
-dcache locking model. Prior to 2.5.62 kernel, dcache_lock
-protected the hash chain, d_child, d_alias, d_lru lists as well
-as d_inode and several other things like mount look-up. RCU-based
-changes affect only the way the hash chain is protected. For everything
-else the dcache_lock must be taken for both traversing as well as
-updating. The hash chain updates too take the dcache_lock.
-The significant change is the way d_lookup traverses the hash chain,
-it doesn't acquire the dcache_lock for this and rely on RCU to
-ensure that the dentry has not been *freed*.
+For further information on dentry locking, please refer to the document
+Documentation/filesystems/dentry-locking.txt.
-Dcache locking details
-----------------------
+Resources
+=========
-For many multi-user workloads, open() and stat() on files are
-very frequently occurring operations. Both involve walking
-of path names to find the dentry corresponding to the
-concerned file. In 2.4 kernel, dcache_lock was held
-during look-up of each path component. Contention and
-cache-line bouncing of this global lock caused significant
-scalability problems. With the introduction of RCU
-in Linux kernel, this was worked around by making
-the look-up of path components during path walking lock-free.
+(Note some of these resources are not up-to-date with the latest kernel
+ version.)
+Creating Linux virtual filesystems. 2002
+
-Safe lock-free look-up of dcache hash table
-===========================================
+The Linux Virtual File-system Layer by Neil Brown. 1999
+
-Dcache is a complex data structure with the hash table entries
-also linked together in other lists. In 2.4 kernel, dcache_lock
-protected all the lists. We applied RCU only on hash chain
-walking. The rest of the lists are still protected by dcache_lock.
-Some of the important changes are :
+A tour of the Linux VFS by Michael K. Johnson. 1996
+
-1. The deletion from hash chain is done using hlist_del_rcu() macro which
- doesn't initialize next pointer of the deleted dentry and this
- allows us to walk safely lock-free while a deletion is happening.
-
-2. Insertion of a dentry into the hash table is done using
- hlist_add_head_rcu() which take care of ordering the writes -
- the writes to the dentry must be visible before the dentry
- is inserted. This works in conjunction with hlist_for_each_rcu()
- while walking the hash chain. The only requirement is that
- all initialization to the dentry must be done before hlist_add_head_rcu()
- since we don't have dcache_lock protection while traversing
- the hash chain. This isn't different from the existing code.
-
-3. The dentry looked up without holding dcache_lock by cannot be
- returned for walking if it is unhashed. It then may have a NULL
- d_inode or other bogosity since RCU doesn't protect the other
- fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
- indicate unhashed dentries and use this in conjunction with a
- per-dentry lock (d_lock). Once looked up without the dcache_lock,
- we acquire the per-dentry lock (d_lock) and check if the
- dentry is unhashed. If so, the look-up is failed. If not, the
- reference count of the dentry is increased and the dentry is returned.
-
-4. Once a dentry is looked up, it must be ensured during the path
- walk for that component it doesn't go away. In pre-2.5.10 code,
- this was done holding a reference to the dentry. dcache_rcu does
- the same. In some sense, dcache_rcu path walking looks like
- the pre-2.5.10 version.
-
-5. All dentry hash chain updates must take the dcache_lock as well as
- the per-dentry lock in that order. dput() does this to ensure
- that a dentry that has just been looked up in another CPU
- doesn't get deleted before dget() can be done on it.
-
-6. There are several ways to do reference counting of RCU protected
- objects. One such example is in ipv4 route cache where
- deferred freeing (using call_rcu()) is done as soon as
- the reference count goes to zero. This cannot be done in
- the case of dentries because tearing down of dentries
- require blocking (dentry_iput()) which isn't supported from
- RCU callbacks. Instead, tearing down of dentries happen
- synchronously in dput(), but actual freeing happens later
- when RCU grace period is over. This allows safe lock-free
- walking of the hash chains, but a matched dentry may have
- been partially torn down. The checking of DCACHE_UNHASHED
- flag with d_lock held detects such dentries and prevents
- them from being returned from look-up.
-
-
-Maintaining POSIX rename semantics
-==================================
-
-Since look-up of dentries is lock-free, it can race against
-a concurrent rename operation. For example, during rename
-of file A to B, look-up of either A or B must succeed.
-So, if look-up of B happens after A has been removed from the
-hash chain but not added to the new hash chain, it may fail.
-Also, a comparison while the name is being written concurrently
-by a rename may result in false positive matches violating
-rename semantics. Issues related to race with rename are
-handled as described below :
-
-1. Look-up can be done in two ways - d_lookup() which is safe
- from simultaneous renames and __d_lookup() which is not.
- If __d_lookup() fails, it must be followed up by a d_lookup()
- to correctly determine whether a dentry is in the hash table
- or not. d_lookup() protects look-ups using a sequence
- lock (rename_lock).
-
-2. The name associated with a dentry (d_name) may be changed if
- a rename is allowed to happen simultaneously. To avoid memcmp()
- in __d_lookup() go out of bounds due to a rename and false
- positive comparison, the name comparison is done while holding the
- per-dentry lock. This prevents concurrent renames during this
- operation.
-
-3. Hash table walking during look-up may move to a different bucket as
- the current dentry is moved to a different bucket due to rename.
- But we use hlists in dcache hash table and they are null-terminated.
- So, even if a dentry moves to a different bucket, hash chain
- walk will terminate. [with a list_head list, it may not since
- termination is when the list_head in the original bucket is reached].
- Since we redo the d_parent check and compare name while holding
- d_lock, lock-free look-up will not race against d_move().
-
-4. There can be a theoretical race when a dentry keeps coming back
- to original bucket due to double moves. Due to this look-up may
- consider that it has never moved and can end up in a infinite loop.
- But this is not any worse that theoretical livelocks we already
- have in the kernel.
-
-
-Important guidelines for filesystem developers related to dcache_rcu
-====================================================================
-
-1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
- don't change. Only dcache internal implementation changes. However
- filesystems *must not* delete from the dentry hash chains directly
- using the list macros like allowed earlier. They must use dcache
- APIs like d_drop() or __d_drop() depending on the situation.
-
-2. d_flags is now protected by a per-dentry lock (d_lock). All
- access to d_flags must be protected by it.
-
-3. For a hashed dentry, checking of d_count needs to be protected
- by d_lock.
-
-
-Papers and other documentation on dcache locking
-================================================
-
-1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
-
-2. http://lse.sourceforge.net/locking/dcache/dcache.html
+A small trail through the Linux kernel by Andries Brouwer. 2001
+
diff --git a/Documentation/hpet.txt b/Documentation/hpet.txt
index 4e7cc8d3359b..e52457581f47 100644
--- a/Documentation/hpet.txt
+++ b/Documentation/hpet.txt
@@ -1,18 +1,21 @@
High Precision Event Timer Driver for Linux
-The High Precision Event Timer (HPET) hardware is the future replacement for the 8254 and Real
-Time Clock (RTC) periodic timer functionality. Each HPET can have up two 32 timers. It is possible
-to configure the first two timers as legacy replacements for 8254 and RTC periodic. A specification
-done by INTEL and Microsoft can be found at http://www.intel.com/labs/platcomp/hpet/hpetspec.htm.
+The High Precision Event Timer (HPET) hardware is the future replacement
+for the 8254 and Real Time Clock (RTC) periodic timer functionality.
+Each HPET can have up two 32 timers. It is possible to configure the
+first two timers as legacy replacements for 8254 and RTC periodic timers.
+A specification done by Intel and Microsoft can be found at
+.
-The driver supports detection of HPET driver allocation and initialization of the HPET before the
-driver module_init routine is called. This enables platform code which uses timer 0 or 1 as the
-main timer to intercept HPET initialization. An example of this initialization can be found in
+The driver supports detection of HPET driver allocation and initialization
+of the HPET before the driver module_init routine is called. This enables
+platform code which uses timer 0 or 1 as the main timer to intercept HPET
+initialization. An example of this initialization can be found in
arch/i386/kernel/time_hpet.c.
-The driver provides two APIs which are very similar to the API found in the rtc.c driver.
-There is a user space API and a kernel space API. An example user space program is provided
-below.
+The driver provides two APIs which are very similar to the API found in
+the rtc.c driver. There is a user space API and a kernel space API.
+An example user space program is provided below.
#include
#include
@@ -290,9 +293,8 @@ The kernel API has three interfaces exported from the driver:
hpet_unregister(struct hpet_task *tp)
hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
-The kernel module using this interface fills in the ht_func and ht_data members of the
-hpet_task structure before calling hpet_register. hpet_control simply vectors to the hpet_ioctl
-routine and has the same commands and respective arguments as the user API. hpet_unregister
+The kernel module using this interface fills in the ht_func and ht_data
+members of the hpet_task structure before calling hpet_register.
+hpet_control simply vectors to the hpet_ioctl routine and has the same
+commands and respective arguments as the user API. hpet_unregister
is used to terminate usage of the HPET timer reserved by hpet_register.
-
-
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index 769f925c8526..87f4d052e39c 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -130,8 +130,6 @@ Code Seq# Include File Comments
'i' 00-3F linux/i2o.h
'j' 00-3F linux/joystick.h
-'k' all asm-sparc/kbio.h
- asm-sparc64/kbio.h
'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
'l' 40-7F linux/udf_fs_i.h in development:
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
index bd8eefa17587..af67faccf4de 100644
--- a/Documentation/magic-number.txt
+++ b/Documentation/magic-number.txt
@@ -120,7 +120,7 @@ ISDN_NET_MAGIC 0x49344C02 isdn_net_local_s drivers/isdn/i4l/isdn_net_li
SAVEKMSG_MAGIC2 0x4B4D5347 savekmsg arch/*/amiga/config.c
STLI_BOARDMAGIC 0x4bc6c825 stlibrd include/linux/istallion.h
CS_STATE_MAGIC 0x4c4f4749 cs_state sound/oss/cs46xx.c
-SLAB_C_MAGIC 0x4f17a36d kmem_cache_s mm/slab.c
+SLAB_C_MAGIC 0x4f17a36d kmem_cache mm/slab.c
COW_MAGIC 0x4f4f4f4d cow_header_v1 arch/um/drivers/ubd_user.c
I810_CARD_MAGIC 0x5072696E i810_card sound/oss/i810_audio.c
TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt
index c6bd25f5d61d..e6c39c5831f5 100644
--- a/Documentation/networking/decnet.txt
+++ b/Documentation/networking/decnet.txt
@@ -176,8 +176,6 @@ information (_most_ of which _is_ _essential_) includes:
- Which client caused the problem ?
- How much data was being transferred ?
- Was the network congested ?
- - If there was a kernel panic, please run the output through ksymoops
- before sending it to me, otherwise its _useless_.
- How can the problem be reproduced ?
- Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
tcpdump don't understand how to dump DECnet properly, so including
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index 66eaaab7773d..c563842ed805 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -1,6 +1,6 @@
NOTE: ksymoops is useless on 2.6. Please use the Oops in its original format
(from dmesg, etc). Ignore any references in this or other docs to "decoding
-the Oops" or "running it through ksymoops". If you post an Oops fron 2.6 that
+the Oops" or "running it through ksymoops". If you post an Oops from 2.6 that
has been run through ksymoops, people will just tell you to repost it.
Quick Summary
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
index 526d6dd267ea..912bed87c758 100644
--- a/Documentation/power/video.txt
+++ b/Documentation/power/video.txt
@@ -11,9 +11,9 @@ boot video card. (Kernel usually does not even contain video card
driver -- vesafb and vgacon are widely used).
This is not problem for swsusp, because during swsusp resume, BIOS is
-run normally so video card is normally initialized. S3 has absolutely
-no chance of working with SMP/HT. Be sure it to turn it off before
-testing (swsusp should work ok, OTOH).
+run normally so video card is normally initialized. It should not be
+problem for S1 standby, because hardware should retain its state over
+that.
There are a few types of systems where video works after S3 resume:
@@ -64,7 +64,7 @@ your video card (good luck getting docs :-(). Maybe suspending from X
(proper X, knowing your hardware, not XF68_FBcon) might have better
chance of working.
-Table of known working systems:
+Table of known working notebooks:
Model hack (or "how to do it")
------------------------------------------------------------------------------
@@ -73,7 +73,7 @@ Acer TM 242FX vbetool (6)
Acer TM C110 video_post (8)
Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
Acer TM 4052LCi s3_bios (2)
-Acer TM 636Lci s3_bios vga=normal (2)
+Acer TM 636Lci s3_bios,s3_mode (4)
Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back
Acer TM 660 ??? (*)
Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6)
@@ -137,6 +137,13 @@ Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****)
Toshiba M30 (2) xor X with nvidia driver using internal AGP
Uniwill 244IIO ??? (*)
+Known working desktop systems
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mainboard Graphics card hack (or "how to do it")
+------------------------------------------------------------------------------
+Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
+
(*) from http://www.ubuntulinux.org/wiki/HoaryPMResults, not sure
which options to use. If you know, please tell me.
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.txt
index 19461958e2bd..df09758bf3fe 100644
--- a/Documentation/s390/driver-model.txt
+++ b/Documentation/s390/driver-model.txt
@@ -8,11 +8,10 @@ All devices which can be addressed by means of ccws are called 'CCW devices' -
even if they aren't actually driven by ccws.
All ccw devices are accessed via a subchannel, this is reflected in the
-structures under root/:
+structures under devices/:
-root/
- - sys
- - legacy
+devices/
+ - system/
- css0/
- 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
@@ -36,7 +35,7 @@ availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
online: An interface to set the device online and offline.
In the special case of the device being disconnected (see the
- notify function under 1.2), piping 0 to online will focibly delete
+ notify function under 1.2), piping 0 to online will forcibly delete
the device.
The device drivers can add entries to export per-device data and interfaces.
@@ -222,7 +221,7 @@ and are called 'chp0.'. They have no driver and do not belong to any bus.
Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
only the logical state and not the physical state, since we cannot track the
latter consistently due to lacking machine support (we don't need to be aware
-of anyway).
+of it anyway).
status - Can be 'online' or 'offline'.
Piping 'on' or 'off' sets the chpid logically online/offline.
@@ -235,12 +234,16 @@ status - Can be 'online' or 'offline'.
3. System devices
-----------------
-Note: cpus may yet be added here.
-
3.1 xpram
---------
-xpram shows up under sys/ as 'xpram'.
+xpram shows up under devices/system/ as 'xpram'.
+
+3.2 cpus
+--------
+
+For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
+attribute 'online' which can be 0 or 1.
4. Other devices
diff --git a/Documentation/sharedsubtree.txt b/Documentation/sharedsubtree.txt
new file mode 100644
index 000000000000..2d8f403eb6eb
--- /dev/null
+++ b/Documentation/sharedsubtree.txt
@@ -0,0 +1,1060 @@
+Shared Subtrees
+---------------
+
+Contents:
+ 1) Overview
+ 2) Features
+ 3) smount command
+ 4) Use-case
+ 5) Detailed semantics
+ 6) Quiz
+ 7) FAQ
+ 8) Implementation
+
+
+1) Overview
+-----------
+
+Consider the following situation:
+
+A process wants to clone its own namespace, but still wants to access the CD
+that got mounted recently. Shared subtree semantics provide the necessary
+mechanism to accomplish the above.
+
+It provides the necessary building blocks for features like per-user-namespace
+and versioned filesystem.
+
+2) Features
+-----------
+
+Shared subtree provides four different flavors of mounts; struct vfsmount to be
+precise
+
+ a. shared mount
+ b. slave mount
+ c. private mount
+ d. unbindable mount
+
+
+2a) A shared mount can be replicated to as many mountpoints and all the
+replicas continue to be exactly same.
+
+ Here is an example:
+
+ Lets say /mnt has a mount that is shared.
+ mount --make-shared /mnt
+
+ note: mount command does not yet support the --make-shared flag.
+ I have included a small C program which does the same by executing
+ 'smount /mnt shared'
+
+ #mount --bind /mnt /tmp
+ The above command replicates the mount at /mnt to the mountpoint /tmp
+ and the contents of both the mounts remain identical.
+
+ #ls /mnt
+ a b c
+
+ #ls /tmp
+ a b c
+
+ Now lets say we mount a device at /tmp/a
+ #mount /dev/sd0 /tmp/a
+
+ #ls /tmp/a
+ t1 t2 t2
+
+ #ls /mnt/a
+ t1 t2 t2
+
+ Note that the mount has propagated to the mount at /mnt as well.
+
+ And the same is true even when /dev/sd0 is mounted on /mnt/a. The
+ contents will be visible under /tmp/a too.
+
+
+2b) A slave mount is like a shared mount except that mount and umount events
+ only propagate towards it.
+
+ All slave mounts have a master mount which is a shared.
+
+ Here is an example:
+
+ Lets say /mnt has a mount which is shared.
+ #mount --make-shared /mnt
+
+ Lets bind mount /mnt to /tmp
+ #mount --bind /mnt /tmp
+
+ the new mount at /tmp becomes a shared mount and it is a replica of
+ the mount at /mnt.
+
+ Now lets make the mount at /tmp; a slave of /mnt
+ #mount --make-slave /tmp
+ [or smount /tmp slave]
+
+ lets mount /dev/sd0 on /mnt/a
+ #mount /dev/sd0 /mnt/a
+
+ #ls /mnt/a
+ t1 t2 t3
+
+ #ls /tmp/a
+ t1 t2 t3
+
+ Note the mount event has propagated to the mount at /tmp
+
+ However lets see what happens if we mount something on the mount at /tmp
+
+ #mount /dev/sd1 /tmp/b
+
+ #ls /tmp/b
+ s1 s2 s3
+
+ #ls /mnt/b
+
+ Note how the mount event has not propagated to the mount at
+ /mnt
+
+
+2c) A private mount does not forward or receive propagation.
+
+ This is the mount we are familiar with. Its the default type.
+
+
+2d) A unbindable mount is a unbindable private mount
+
+ lets say we have a mount at /mnt and we make is unbindable
+
+ #mount --make-unbindable /mnt
+ [ smount /mnt unbindable ]
+
+ Lets try to bind mount this mount somewhere else.
+ # mount --bind /mnt /tmp
+ mount: wrong fs type, bad option, bad superblock on /mnt,
+ or too many mounted file systems
+
+ Binding a unbindable mount is a invalid operation.
+
+
+3) smount command
+
+ Currently the mount command is not aware of shared subtree features.
+ Work is in progress to add the support in mount ( util-linux package ).
+ Till then use the following program.
+
+ ------------------------------------------------------------------------
+ //
+ //this code was developed my Miklos Szeredi
+ //and modified by Ram Pai
+ // sample usage:
+ // smount /tmp shared
+ //
+ #include
+ #include
+ #include
+ #include
+ #include
+
+ #ifndef MS_REC
+ #define MS_REC 0x4000 /* 16384: Recursive loopback */
+ #endif
+
+ #ifndef MS_SHARED
+ #define MS_SHARED 1<<20 /* Shared */
+ #endif
+
+ #ifndef MS_PRIVATE
+ #define MS_PRIVATE 1<<18 /* Private */
+ #endif
+
+ #ifndef MS_SLAVE
+ #define MS_SLAVE 1<<19 /* Slave */
+ #endif
+
+ #ifndef MS_UNBINDABLE
+ #define MS_UNBINDABLE 1<<17 /* Unbindable */
+ #endif
+
+ int main(int argc, char *argv[])
+ {
+ int type;
+ if(argc != 3) {
+ fprintf(stderr, "usage: %s dir "
+ "\n" , argv[0]);
+ return 1;
+ }
+
+ fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
+
+ if (strcmp(argv[2],"rshared")==0)
+ type=(MS_SHARED|MS_REC);
+ else if (strcmp(argv[2],"rslave")==0)
+ type=(MS_SLAVE|MS_REC);
+ else if (strcmp(argv[2],"rprivate")==0)
+ type=(MS_PRIVATE|MS_REC);
+ else if (strcmp(argv[2],"runbindable")==0)
+ type=(MS_UNBINDABLE|MS_REC);
+ else if (strcmp(argv[2],"shared")==0)
+ type=MS_SHARED;
+ else if (strcmp(argv[2],"slave")==0)
+ type=MS_SLAVE;
+ else if (strcmp(argv[2],"private")==0)
+ type=MS_PRIVATE;
+ else if (strcmp(argv[2],"unbindable")==0)
+ type=MS_UNBINDABLE;
+ else {
+ fprintf(stderr, "invalid operation: %s\n", argv[2]);
+ return 1;
+ }
+ setfsuid(getuid());
+
+ if(mount("", argv[1], "dontcare", type, "") == -1) {
+ perror("mount");
+ return 1;
+ }
+ return 0;
+ }
+ -----------------------------------------------------------------------
+
+ Copy the above code snippet into smount.c
+ gcc -o smount smount.c
+
+
+ (i) To mark all the mounts under /mnt as shared execute the following
+ command:
+
+ smount /mnt rshared
+ the corresponding syntax planned for mount command is
+ mount --make-rshared /mnt
+
+ just to mark a mount /mnt as shared, execute the following
+ command:
+ smount /mnt shared
+ the corresponding syntax planned for mount command is
+ mount --make-shared /mnt
+
+ (ii) To mark all the shared mounts under /mnt as slave execute the
+ following
+
+ command:
+ smount /mnt rslave
+ the corresponding syntax planned for mount command is
+ mount --make-rslave /mnt
+
+ just to mark a mount /mnt as slave, execute the following
+ command:
+ smount /mnt slave
+ the corresponding syntax planned for mount command is
+ mount --make-slave /mnt
+
+ (iii) To mark all the mounts under /mnt as private execute the
+ following command:
+
+ smount /mnt rprivate
+ the corresponding syntax planned for mount command is
+ mount --make-rprivate /mnt
+
+ just to mark a mount /mnt as private, execute the following
+ command:
+ smount /mnt private
+ the corresponding syntax planned for mount command is
+ mount --make-private /mnt
+
+ NOTE: by default all the mounts are created as private. But if
+ you want to change some shared/slave/unbindable mount as
+ private at a later point in time, this command can help.
+
+ (iv) To mark all the mounts under /mnt as unbindable execute the
+ following
+
+ command:
+ smount /mnt runbindable
+ the corresponding syntax planned for mount command is
+ mount --make-runbindable /mnt
+
+ just to mark a mount /mnt as unbindable, execute the following
+ command:
+ smount /mnt unbindable
+ the corresponding syntax planned for mount command is
+ mount --make-unbindable /mnt
+
+
+4) Use cases
+------------
+
+ A) A process wants to clone its own namespace, but still wants to
+ access the CD that got mounted recently.
+
+ Solution:
+
+ The system administrator can make the mount at /cdrom shared
+ mount --bind /cdrom /cdrom
+ mount --make-shared /cdrom
+
+ Now any process that clones off a new namespace will have a
+ mount at /cdrom which is a replica of the same mount in the
+ parent namespace.
+
+ So when a CD is inserted and mounted at /cdrom that mount gets
+ propagated to the other mount at /cdrom in all the other clone
+ namespaces.
+
+ B) A process wants its mounts invisible to any other process, but
+ still be able to see the other system mounts.
+
+ Solution:
+
+ To begin with, the administrator can mark the entire mount tree
+ as shareable.
+
+ mount --make-rshared /
+
+ A new process can clone off a new namespace. And mark some part
+ of its namespace as slave
+
+ mount --make-rslave /myprivatetree
+
+ Hence forth any mounts within the /myprivatetree done by the
+ process will not show up in any other namespace. However mounts
+ done in the parent namespace under /myprivatetree still shows
+ up in the process's namespace.
+
+
+ Apart from the above semantics this feature provides the
+ building blocks to solve the following problems:
+
+ C) Per-user namespace
+
+ The above semantics allows a way to share mounts across
+ namespaces. But namespaces are associated with processes. If
+ namespaces are made first class objects with user API to
+ associate/disassociate a namespace with userid, then each user
+ could have his/her own namespace and tailor it to his/her
+ requirements. Offcourse its needs support from PAM.
+
+ D) Versioned files
+
+ If the entire mount tree is visible at multiple locations, then
+ a underlying versioning file system can return different
+ version of the file depending on the path used to access that
+ file.
+
+ An example is:
+
+ mount --make-shared /
+ mount --rbind / /view/v1
+ mount --rbind / /view/v2
+ mount --rbind / /view/v3
+ mount --rbind / /view/v4
+
+ and if /usr has a versioning filesystem mounted, than that
+ mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
+ /view/v4/usr too
+
+ A user can request v3 version of the file /usr/fs/namespace.c
+ by accessing /view/v3/usr/fs/namespace.c . The underlying
+ versioning filesystem can then decipher that v3 version of the
+ filesystem is being requested and return the corresponding
+ inode.
+
+5) Detailed semantics:
+-------------------
+ The section below explains the detailed semantics of
+ bind, rbind, move, mount, umount and clone-namespace operations.
+
+ Note: the word 'vfsmount' and the noun 'mount' have been used
+ to mean the same thing, throughout this document.
+
+5a) Mount states
+
+ A given mount can be in one of the following states
+ 1) shared
+ 2) slave
+ 3) shared and slave
+ 4) private
+ 5) unbindable
+
+ A 'propagation event' is defined as event generated on a vfsmount
+ that leads to mount or unmount actions in other vfsmounts.
+
+ A 'peer group' is defined as a group of vfsmounts that propagate
+ events to each other.
+
+ (1) Shared mounts
+
+ A 'shared mount' is defined as a vfsmount that belongs to a
+ 'peer group'.
+
+ For example:
+ mount --make-shared /mnt
+ mount --bin /mnt /tmp
+
+ The mount at /mnt and that at /tmp are both shared and belong
+ to the same peer group. Anything mounted or unmounted under
+ /mnt or /tmp reflect in all the other mounts of its peer
+ group.
+
+
+ (2) Slave mounts
+
+ A 'slave mount' is defined as a vfsmount that receives
+ propagation events and does not forward propagation events.
+
+ A slave mount as the name implies has a master mount from which
+ mount/unmount events are received. Events do not propagate from
+ the slave mount to the master. Only a shared mount can be made
+ a slave by executing the following command
+
+ mount --make-slave mount
+
+ A shared mount that is made as a slave is no more shared unless
+ modified to become shared.
+
+ (3) Shared and Slave
+
+ A vfsmount can be both shared as well as slave. This state
+ indicates that the mount is a slave of some vfsmount, and
+ has its own peer group too. This vfsmount receives propagation
+ events from its master vfsmount, and also forwards propagation
+ events to its 'peer group' and to its slave vfsmounts.
+
+ Strictly speaking, the vfsmount is shared having its own
+ peer group, and this peer-group is a slave of some other
+ peer group.
+
+ Only a slave vfsmount can be made as 'shared and slave' by
+ either executing the following command
+ mount --make-shared mount
+ or by moving the slave vfsmount under a shared vfsmount.
+
+ (4) Private mount
+
+ A 'private mount' is defined as vfsmount that does not
+ receive or forward any propagation events.
+
+ (5) Unbindable mount
+
+ A 'unbindable mount' is defined as vfsmount that does not
+ receive or forward any propagation events and cannot
+ be bind mounted.
+
+
+ State diagram:
+ The state diagram below explains the state transition of a mount,
+ in response to various commands.
+ ------------------------------------------------------------------------
+ | |make-shared | make-slave | make-private |make-unbindab|
+ --------------|------------|--------------|--------------|-------------|
+ |shared |shared |*slave/private| private | unbindable |
+ | | | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |slave |shared | **slave | private | unbindable |
+ | |and slave | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |shared |shared | slave | private | unbindable |
+ |and slave |and slave | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |private |shared | **private | private | unbindable |
+ |-------------|------------|--------------|--------------|-------------|
+ |unbindable |shared |**unbindable | private | unbindable |
+ ------------------------------------------------------------------------
+
+ * if the shared mount is the only mount in its peer group, making it
+ slave, makes it private automatically. Note that there is no master to
+ which it can be slaved to.
+
+ ** slaving a non-shared mount has no effect on the mount.
+
+ Apart from the commands listed below, the 'move' operation also changes
+ the state of a mount depending on type of the destination mount. Its
+ explained in section 5d.
+
+5b) Bind semantics
+
+ Consider the following command
+
+ mount --bind A/a B/b
+
+ where 'A' is the source mount, 'a' is the dentry in the mount 'A', 'B'
+ is the destination mount and 'b' is the dentry in the destination mount.
+
+ The outcome depends on the type of mount of 'A' and 'B'. The table
+ below contains quick reference.
+ ---------------------------------------------------------------------------
+ | BIND MOUNT OPERATION |
+ |**************************************************************************
+ |source(A)->| shared | private | slave | unbindable |
+ | dest(B) | | | | |
+ | | | | | | |
+ | v | | | | |
+ |**************************************************************************
+ | shared | shared | shared | shared & slave | invalid |
+ | | | | | |
+ |non-shared| shared | private | slave | invalid |
+ ***************************************************************************
+
+ Details:
+
+ 1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
+ which is clone of 'A', is created. Its root dentry is 'a' . 'C' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+ are created and mounted at the dentry 'b' on all mounts where 'B'
+ propagates to. A new propagation tree containing 'C1',..,'Cn' is
+ created. This propagation tree is identical to the propagation tree of
+ 'B'. And finally the peer-group of 'C' is merged with the peer group
+ of 'A'.
+
+ 2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
+ which is clone of 'A', is created. Its root dentry is 'a'. 'C' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+ are created and mounted at the dentry 'b' on all mounts where 'B'
+ propagates to. A new propagation tree is set containing all new mounts
+ 'C', 'C1', .., 'Cn' with exactly the same configuration as the
+ propagation tree for 'B'.
+
+ 3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
+ mount 'C' which is clone of 'A', is created. Its root dentry is 'a' .
+ 'C' is mounted on mount 'B' at dentry 'b'. Also new mounts 'C1', 'C2',
+ 'C3' ... are created and mounted at the dentry 'b' on all mounts where
+ 'B' propagates to. A new propagation tree containing the new mounts
+ 'C','C1',.. 'Cn' is created. This propagation tree is identical to the
+ propagation tree for 'B'. And finally the mount 'C' and its peer group
+ is made the slave of mount 'Z'. In other words, mount 'C' is in the
+ state 'slave and shared'.
+
+ 4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
+ invalid operation.
+
+ 5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+ unbindable) mount. A new mount 'C' which is clone of 'A', is created.
+ Its root dentry is 'a'. 'C' is mounted on mount 'B' at dentry 'b'.
+
+ 6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
+ which is a clone of 'A' is created. Its root dentry is 'a'. 'C' is
+ mounted on mount 'B' at dentry 'b'. 'C' is made a member of the
+ peer-group of 'A'.
+
+ 7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
+ new mount 'C' which is a clone of 'A' is created. Its root dentry is
+ 'a'. 'C' is mounted on mount 'B' at dentry 'b'. Also 'C' is set as a
+ slave mount of 'Z'. In other words 'A' and 'C' are both slave mounts of
+ 'Z'. All mount/unmount events on 'Z' propagates to 'A' and 'C'. But
+ mount/unmount on 'A' do not propagate anywhere else. Similarly
+ mount/unmount on 'C' do not propagate anywhere else.
+
+ 8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
+ invalid operation. A unbindable mount cannot be bind mounted.
+
+5c) Rbind semantics
+
+ rbind is same as bind. Bind replicates the specified mount. Rbind
+ replicates all the mounts in the tree belonging to the specified mount.
+ Rbind mount is bind mount applied to all the mounts in the tree.
+
+ If the source tree that is rbind has some unbindable mounts,
+ then the subtree under the unbindable mount is pruned in the new
+ location.
+
+ eg: lets say we have the following mount tree.
+
+ A
+ / \
+ B C
+ / \ / \
+ D E F G
+
+ Lets say all the mount except the mount C in the tree are
+ of a type other than unbindable.
+
+ If this tree is rbound to say Z
+
+ We will have the following tree at the new location.
+
+ Z
+ |
+ A'
+ /
+ B' Note how the tree under C is pruned
+ / \ in the new location.
+ D' E'
+
+
+
+5d) Move semantics
+
+ Consider the following command
+
+ mount --move A B/b
+
+ where 'A' is the source mount, 'B' is the destination mount and 'b' is
+ the dentry in the destination mount.
+
+ The outcome depends on the type of the mount of 'A' and 'B'. The table
+ below is a quick reference.
+ ---------------------------------------------------------------------------
+ | MOVE MOUNT OPERATION |
+ |**************************************************************************
+ | source(A)->| shared | private | slave | unbindable |
+ | dest(B) | | | | |
+ | | | | | | |
+ | v | | | | |
+ |**************************************************************************
+ | shared | shared | shared |shared and slave| invalid |
+ | | | | | |
+ |non-shared| shared | private | slave | unbindable |
+ ***************************************************************************
+ NOTE: moving a mount residing under a shared mount is invalid.
+
+ Details follow:
+
+ 1. 'A' is a shared mount and 'B' is a shared mount. The mount 'A' is
+ mounted on mount 'B' at dentry 'b'. Also new mounts 'A1', 'A2'...'An'
+ are created and mounted at dentry 'b' on all mounts that receive
+ propagation from mount 'B'. A new propagation tree is created in the
+ exact same configuration as that of 'B'. This new propagation tree
+ contains all the new mounts 'A1', 'A2'... 'An'. And this new
+ propagation tree is appended to the already existing propagation tree
+ of 'A'.
+
+ 2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'A1', 'A2'... 'An'
+ are created and mounted at dentry 'b' on all mounts that receive
+ propagation from mount 'B'. The mount 'A' becomes a shared mount and a
+ propagation tree is created which is identical to that of
+ 'B'. This new propagation tree contains all the new mounts 'A1',
+ 'A2'... 'An'.
+
+ 3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. The
+ mount 'A' is mounted on mount 'B' at dentry 'b'. Also new mounts 'A1',
+ 'A2'... 'An' are created and mounted at dentry 'b' on all mounts that
+ receive propagation from mount 'B'. A new propagation tree is created
+ in the exact same configuration as that of 'B'. This new propagation
+ tree contains all the new mounts 'A1', 'A2'... 'An'. And this new
+ propagation tree is appended to the already existing propagation tree of
+ 'A'. Mount 'A' continues to be the slave mount of 'Z' but it also
+ becomes 'shared'.
+
+ 4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
+ is invalid. Because mounting anything on the shared mount 'B' can
+ create new mounts that get mounted on the mounts that receive
+ propagation from 'B'. And since the mount 'A' is unbindable, cloning
+ it to mount at other mountpoints is not possible.
+
+ 5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+ unbindable) mount. The mount 'A' is mounted on mount 'B' at dentry 'b'.
+
+ 6. 'A' is a shared mount and 'B' is a non-shared mount. The mount 'A'
+ is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
+ shared mount.
+
+ 7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
+ The mount 'A' is mounted on mount 'B' at dentry 'b'. Mount 'A'
+ continues to be a slave mount of mount 'Z'.
+
+ 8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
+ 'A' is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
+ unbindable mount.
+
+5e) Mount semantics
+
+ Consider the following command
+
+ mount device B/b
+
+ 'B' is the destination mount and 'b' is the dentry in the destination
+ mount.
+
+ The above operation is the same as bind operation with the exception
+ that the source mount is always a private mount.
+
+
+5f) Unmount semantics
+
+ Consider the following command
+
+ umount A
+
+ where 'A' is a mount mounted on mount 'B' at dentry 'b'.
+
+ If mount 'B' is shared, then all most-recently-mounted mounts at dentry
+ 'b' on mounts that receive propagation from mount 'B' and does not have
+ sub-mounts within them are unmounted.
+
+ Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+ each other.
+
+ lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+ 'B1', 'B2' and 'B3' respectively.
+
+ lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+ mount 'B1', 'B2' and 'B3' respectively.
+
+ if 'C1' is unmounted, all the mounts that are most-recently-mounted on
+ 'B1' and on the mounts that 'B1' propagates-to are unmounted.
+
+ 'B1' propagates to 'B2' and 'B3'. And the most recently mounted mount
+ on 'B2' at dentry 'b' is 'C2', and that of mount 'B3' is 'C3'.
+
+ So all 'C1', 'C2' and 'C3' should be unmounted.
+
+ If any of 'C2' or 'C3' has some child mounts, then that mount is not
+ unmounted, but all other mounts are unmounted. However if 'C1' is told
+ to be unmounted and 'C1' has some sub-mounts, the umount operation is
+ failed entirely.
+
+5g) Clone Namespace
+
+ A cloned namespace contains all the mounts as that of the parent
+ namespace.
+
+ Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+ child namespace.
+
+ If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
+ each other.
+
+ If 'A' is a slave mount of 'Z', then 'B' is also the slave mount of
+ 'Z'.
+
+ If 'A' is a private mount, then 'B' is a private mount too.
+
+ If 'A' is unbindable mount, then 'B' is a unbindable mount too.
+
+
+6) Quiz
+
+ A. What is the result of the following command sequence?
+
+ mount --bind /mnt /mnt
+ mount --make-shared /mnt
+ mount --bind /mnt /tmp
+ mount --move /tmp /mnt/1
+
+ what should be the contents of /mnt /mnt/1 /mnt/1/1 should be?
+ Should they all be identical? or should /mnt and /mnt/1 be
+ identical only?
+
+
+ B. What is the result of the following command sequence?
+
+ mount --make-rshared /
+ mkdir -p /v/1
+ mount --rbind / /v/1
+
+ what should be the content of /v/1/v/1 be?
+
+
+ C. What is the result of the following command sequence?
+
+ mount --bind /mnt /mnt
+ mount --make-shared /mnt
+ mkdir -p /mnt/1/2/3 /mnt/1/test
+ mount --bind /mnt/1 /tmp
+ mount --make-slave /mnt
+ mount --make-shared /mnt
+ mount --bind /mnt/1/2 /tmp1
+ mount --make-slave /mnt
+
+ At this point we have the first mount at /tmp and
+ its root dentry is 1. Lets call this mount 'A'
+ And then we have a second mount at /tmp1 with root
+ dentry 2. Lets call this mount 'B'
+ Next we have a third mount at /mnt with root dentry
+ mnt. Lets call this mount 'C'
+
+ 'B' is the slave of 'A' and 'C' is a slave of 'B'
+ A -> B -> C
+
+ at this point if we execute the following command
+
+ mount --bind /bin /tmp/test
+
+ The mount is attempted on 'A'
+
+ will the mount propagate to 'B' and 'C' ?
+
+ what would be the contents of
+ /mnt/1/test be?
+
+7) FAQ
+
+ Q1. Why is bind mount needed? How is it different from symbolic links?
+ symbolic links can get stale if the destination mount gets
+ unmounted or moved. Bind mounts continue to exist even if the
+ other mount is unmounted or moved.
+
+ Q2. Why can't the shared subtree be implemented using exportfs?
+
+ exportfs is a heavyweight way of accomplishing part of what
+ shared subtree can do. I cannot imagine a way to implement the
+ semantics of slave mount using exportfs?
+
+ Q3 Why is unbindable mount needed?
+
+ Lets say we want to replicate the mount tree at multiple
+ locations within the same subtree.
+
+ if one rbind mounts a tree within the same subtree 'n' times
+ the number of mounts created is an exponential function of 'n'.
+ Having unbindable mount can help prune the unneeded bind
+ mounts. Here is a example.
+
+ step 1:
+ lets say the root tree has just two directories with
+ one vfsmount.
+ root
+ / \
+ tmp usr
+
+ And we want to replicate the tree at multiple
+ mountpoints under /root/tmp
+
+ step2:
+ mount --make-shared /root
+
+ mkdir -p /tmp/m1
+
+ mount --rbind /root /tmp/m1
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ /
+ m1
+ / \
+ tmp usr
+ /
+ m1
+
+ it has two vfsmounts
+
+ step3:
+ mkdir -p /tmp/m2
+ mount --rbind /root /tmp/m2
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \
+ m1 m2
+ / \ / \
+ tmp usr tmp usr
+ / \ /
+ m1 m2 m1
+ / \ / \
+ tmp usr tmp usr
+ / / \
+ m1 m1 m2
+ / \
+ tmp usr
+ / \
+ m1 m2
+
+ it has 6 vfsmounts
+
+ step 4:
+ mkdir -p /tmp/m3
+ mount --rbind /root /tmp/m3
+
+ I wont' draw the tree..but it has 24 vfsmounts
+
+
+ at step i the number of vfsmounts is V[i] = i*V[i-1].
+ This is an exponential function. And this tree has way more
+ mounts than what we really needed in the first place.
+
+ One could use a series of umount at each step to prune
+ out the unneeded mounts. But there is a better solution.
+ Unclonable mounts come in handy here.
+
+ step 1:
+ lets say the root tree has just two directories with
+ one vfsmount.
+ root
+ / \
+ tmp usr
+
+ How do we set up the same tree at multiple locations under
+ /root/tmp
+
+ step2:
+ mount --bind /root/tmp /root/tmp
+
+ mount --make-rshared /root
+ mount --make-unbindable /root/tmp
+
+ mkdir -p /tmp/m1
+
+ mount --rbind /root /tmp/m1
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ /
+ m1
+ / \
+ tmp usr
+
+ step3:
+ mkdir -p /tmp/m2
+ mount --rbind /root /tmp/m2
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \
+ m1 m2
+ / \ / \
+ tmp usr tmp usr
+
+ step4:
+
+ mkdir -p /tmp/m3
+ mount --rbind /root /tmp/m3
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \ \
+ m1 m2 m3
+ / \ / \ / \
+ tmp usr tmp usr tmp usr
+
+8) Implementation
+
+8A) Datastructure
+
+ 4 new fields are introduced to struct vfsmount
+ ->mnt_share
+ ->mnt_slave_list
+ ->mnt_slave
+ ->mnt_master
+
+ ->mnt_share links togather all the mount to/from which this vfsmount
+ send/receives propagation events.
+
+ ->mnt_slave_list links all the mounts to which this vfsmount propagates
+ to.
+
+ ->mnt_slave links togather all the slaves that its master vfsmount
+ propagates to.
+
+ ->mnt_master points to the master vfsmount from which this vfsmount
+ receives propagation.
+
+ ->mnt_flags takes two more flags to indicate the propagation status of
+ the vfsmount. MNT_SHARE indicates that the vfsmount is a shared
+ vfsmount. MNT_UNCLONABLE indicates that the vfsmount cannot be
+ replicated.
+
+ All the shared vfsmounts in a peer group form a cyclic list through
+ ->mnt_share.
+
+ All vfsmounts with the same ->mnt_master form on a cyclic list anchored
+ in ->mnt_master->mnt_slave_list and going through ->mnt_slave.
+
+ ->mnt_master can point to arbitrary (and possibly different) members
+ of master peer group. To find all immediate slaves of a peer group
+ you need to go through _all_ ->mnt_slave_list of its members.
+ Conceptually it's just a single set - distribution among the
+ individual lists does not affect propagation or the way propagation
+ tree is modified by operations.
+
+ A example propagation tree looks as shown in the figure below.
+ [ NOTE: Though it looks like a forest, if we consider all the shared
+ mounts as a conceptual entity called 'pnode', it becomes a tree]
+
+
+ A <--> B <--> C <---> D
+ /|\ /| |\
+ / F G J K H I
+ /
+ E<-->K
+ /|\
+ M L N
+
+ In the above figure A,B,C and D all are shared and propagate to each
+ other. 'A' has got 3 slave mounts 'E' 'F' and 'G' 'C' has got 2 slave
+ mounts 'J' and 'K' and 'D' has got two slave mounts 'H' and 'I'.
+ 'E' is also shared with 'K' and they propagate to each other. And
+ 'K' has 3 slaves 'M', 'L' and 'N'
+
+ A's ->mnt_share links with the ->mnt_share of 'B' 'C' and 'D'
+
+ A's ->mnt_slave_list links with ->mnt_slave of 'E', 'K', 'F' and 'G'
+
+ E's ->mnt_share links with ->mnt_share of K
+ 'E', 'K', 'F', 'G' have their ->mnt_master point to struct
+ vfsmount of 'A'
+ 'M', 'L', 'N' have their ->mnt_master point to struct vfsmount of 'K'
+ K's ->mnt_slave_list links with ->mnt_slave of 'M', 'L' and 'N'
+
+ C's ->mnt_slave_list links with ->mnt_slave of 'J' and 'K'
+ J and K's ->mnt_master points to struct vfsmount of C
+ and finally D's ->mnt_slave_list links with ->mnt_slave of 'H' and 'I'
+ 'H' and 'I' have their ->mnt_master pointing to struct vfsmount of 'D'.
+
+
+ NOTE: The propagation tree is orthogonal to the mount tree.
+
+
+8B Algorithm:
+
+ The crux of the implementation resides in rbind/move operation.
+
+ The overall algorithm breaks the operation into 3 phases: (look at
+ attach_recursive_mnt() and propagate_mnt())
+
+ 1. prepare phase.
+ 2. commit phases.
+ 3. abort phases.
+
+ Prepare phase:
+
+ for each mount in the source tree:
+ a) Create the necessary number of mount trees to
+ be attached to each of the mounts that receive
+ propagation from the destination mount.
+ b) Do not attach any of the trees to its destination.
+ However note down its ->mnt_parent and ->mnt_mountpoint
+ c) Link all the new mounts to form a propagation tree that
+ is identical to the propagation tree of the destination
+ mount.
+
+ If this phase is successful, there should be 'n' new
+ propagation trees; where 'n' is the number of mounts in the
+ source tree. Go to the commit phase
+
+ Also there should be 'm' new mount trees, where 'm' is
+ the number of mounts to which the destination mount
+ propagates to.
+
+ if any memory allocations fail, go to the abort phase.
+
+ Commit phase
+ attach each of the mount trees to their corresponding
+ destination mounts.
+
+ Abort phase
+ delete all the newly created trees.
+
+ NOTE: all the propagation related functionality resides in the file
+ pnode.c
+
+
+------------------------------------------------------------------------
+
+version 0.1 (created the initial document, Ram Pai linuxram@us.ibm.com)
+version 0.2 (Incorporated comments from Al Viro)
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 13cba955cb5a..2f27f391c7cc 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -167,7 +167,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
spdif - Support SPDIF I/O
- Default: disabled
- Module supports autoprobe and multiple chips (max 8).
+ This module supports one chip and autoprobe.
The power-management is supported.
@@ -206,7 +206,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
See "AC97 Quirk Option" section below.
spdif_aclink - S/PDIF transfer over AC-link (default = 1)
- This module supports up to 8 cards and autoprobe.
+ This module supports one card and autoprobe.
ATI IXP has two different methods to control SPDIF output. One is
over AC-link and another is over the "direct" SPDIF output. The
@@ -218,7 +218,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
Module for ATI IXP 150/200/250 AC97 modem controllers.
- Module supports up to 8 cards.
+ This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.
@@ -637,7 +637,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
model - force the model name
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
- Module supports up to 8 cards.
+ This module supports one card and autoprobe.
Each codec may have a model table for different configurations.
If your machine isn't listed there, the default (usually minimal)
@@ -663,6 +663,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
adjusted. Appearing only when compiled with
$CONFIG_SND_DEBUG=y
+ ALC260
+ hp HP machines
+ fujitsu Fujitsu S7020
+
CMI9880
minimal 3-jack in back
min_fp 3-jack in back, 2-jack in front
@@ -811,7 +815,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
semaphores (e.g. on some ASUS laptops)
(default off)
- Module supports autoprobe and multiple bus-master chips (max 8).
+ This module supports one chip and autoprobe.
Note: the latest driver supports auto-detection of chip clock.
if you still encounter too fast playback, specify the clock
@@ -830,7 +834,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_clock - AC'97 codec clock base (0 = auto-detect)
- This module supports up to 8 cards and autoprobe.
+ This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.
@@ -950,8 +954,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
use_cache - 0 or 1 (disabled by default)
vaio_hack - alias buffer_top=0x25a800
reset_workaround - enable AC97 RESET workaround for some laptops
+ reset_workaround2 - enable extended AC97 RESET workaround for some
+ other laptops
- Module supports autoprobe and multiple chips (max 8).
+ This module supports one chip and autoprobe.
The power-management is supported.
@@ -980,6 +986,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
workaround is enabled automatically. For other laptops with a
hard freeze, you can try reset_workaround=1 option.
+ Note: Dell Latitude CSx laptops have another problem regarding
+ AC97 RESET. On these laptops, reset_workaround2 option is
+ turned on as default. This option is worth to try if the
+ previous reset_workaround option doesn't help.
+
Note: This driver is really crappy. It's a porting from the
OSS driver, which is a result of black-magic reverse engineering.
The detection of codec will fail if the driver is loaded *after*
@@ -1310,7 +1321,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_quirk - AC'97 workaround for strange hardware
See "AC97 Quirk Option" section below.
- Module supports autoprobe and multiple bus-master chips (max 8).
+ This module supports one chip and autoprobe.
Note: on some SMP motherboards like MSI 694D the interrupts might
not be generated properly. In such a case, please try to
@@ -1352,7 +1363,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_clock - AC'97 codec clock base (default 48000Hz)
- Module supports up to 8 cards.
+ This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index 24e85520890b..260334c98d95 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -18,8 +18,8 @@
- March 6, 2005
- 0.3.4
+ October 6, 2005
+ 0.3.5
@@ -30,7 +30,7 @@
- Copyright (c) 2002-2004 Takashi Iwai tiwai@suse.de
+ Copyright (c) 2002-2005 Takashi Iwai tiwai@suse.de
@@ -1433,25 +1433,10 @@
res_port) {
- release_resource(chip->res_port);
- kfree_nocheck(chip->res_port);
- }
+ release_and_free_resource(chip->res_port);
]]>
-
- As you can see, the resource pointer is also to be freed
- via kfree_nocheck() after
- release_resource() is called. You
- cannot use kfree() here, because on ALSA,
- kfree() may be a wrapper to its own
- allocator with the memory debugging. Since the resource pointer
- is allocated externally outside the ALSA, it must be released
- via the native
- kfree().
- kfree_nocheck() is used for that; it calls
- the native kfree() without wrapper.
@@ -2190,8 +2175,7 @@ struct _snd_pcm_runtime {
unsigned int rate_den;
/* -- SW params -- */
- int tstamp_timespec; /* use timeval (0) or timespec (1) */
- snd_pcm_tstamp_t tstamp_mode; /* mmap timestamp is updated */
+ struct timespec tstamp_mode; /* mmap timestamp is updated */
unsigned int period_step;
unsigned int sleep_min; /* min ticks to sleep */
snd_pcm_uframes_t xfer_align; /* xfer size need to be a multiple */
@@ -3709,8 +3693,7 @@ struct _snd_pcm_runtime {
Here, the chip instance is retrieved via
snd_kcontrol_chip() macro. This macro
- converts from kcontrol->private_data to the type defined by
- chip_t. The
+ just accesses to kcontrol->private_data. The
kcontrol->private_data field is
given as the argument of snd_ctl_new()
(see the later subsection
@@ -5998,32 +5981,23 @@ struct _snd_pcm_runtime {
The first argument is the expression to evaluate, and the
second argument is the action if it fails. When
CONFIG_SND_DEBUG, is set, it will show an
- error message such as BUG? (xxx) (called from
- yyy). When no debug flag is set, this is
- ignored.
+ error message such as BUG? (xxx)
+ together with stack trace.
-
-
-
- snd_runtime_check()
- This macro is quite similar with
- snd_assert(). Unlike
- snd_assert(), the expression is always
- evaluated regardless of
- CONFIG_SND_DEBUG. When
- CONFIG_SND_DEBUG is set, the macro will
- show a message like ERROR (xx) (called from
- yyy).
+ When no debug flag is set, this macro is ignored.
snd_BUG()
- It calls snd_assert(0,) -- that is, just
- prints the error message at the point. It's useful to show that
- a fatal error happens there.
+ It shows BUG? message and
+ stack trace as well as snd_assert at the point.
+ It's useful to show that a fatal error happens there.
+
+
+ When no debug flag is set, this macro is ignored.
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
index 1829009db771..3f1c5464b1c9 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/sparse.txt
@@ -41,9 +41,9 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
vs cpu-endian vs whatever), and there the constant "0" really _is_
special.
-Modify top-level Makefile to say
+Use
-CHECK = sparse -Wbitwise
+ make C=[12] CF=-Wbitwise
or you don't get any checking at all.
diff --git a/Documentation/video4linux/bttv/README.freeze b/Documentation/video4linux/bttv/README.freeze
index 51f8d4379a94..4259dccc8287 100644
--- a/Documentation/video4linux/bttv/README.freeze
+++ b/Documentation/video4linux/bttv/README.freeze
@@ -27,9 +27,9 @@ information out of a register+stack dump printed by the kernel on
protection faults (so-called "kernel oops").
If you run into some kind of deadlock, you can try to dump a call trace
-for each process using sysrq-t (see Documentation/sysrq.txt). ksymoops
-will translate these dumps into kernel symbols too. This way it is
-possible to figure where *exactly* some process in "D" state is stuck.
+for each process using sysrq-t (see Documentation/sysrq.txt).
+This way it is possible to figure where *exactly* some process in "D"
+state is stuck.
I've seen reports that bttv 0.7.x crashes whereas 0.8.x works rock solid
for some people. Thus probably a small buglet left somewhere in bttv
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 1b9bcd1fe98b..1ad9af1ca4d0 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -13,12 +13,13 @@ This optimization is more critical now as bigger and bigger physical memories
Users can use the huge page support in Linux kernel by either using the mmap
system call or standard SYSv shared memory system calls (shmget, shmat).
-First the Linux kernel needs to be built with CONFIG_HUGETLB_PAGE (present
-under Processor types and feature) and CONFIG_HUGETLBFS (present under file
-system option on config menu) config options.
+First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
+(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
+automatically when CONFIG_HUGETLBFS is selected) configuration
+options.
The kernel built with hugepage support should show the number of configured
-hugepages in the system by running the "cat /proc/meminfo" command.
+hugepages in the system by running the "cat /proc/meminfo" command.
/proc/meminfo also provides information about the total number of hugetlb
pages configured in the kernel. It also displays information about the
@@ -38,19 +39,19 @@ in the kernel.
/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
pages in the kernel. Super user can dynamically request more (or free some
-pre-configured) hugepages.
-The allocation( or deallocation) of hugetlb pages is posible only if there are
+pre-configured) hugepages.
+The allocation (or deallocation) of hugetlb pages is possible only if there are
enough physically contiguous free pages in system (freeing of hugepages is
-possible only if there are enough hugetlb pages free that can be transfered
+possible only if there are enough hugetlb pages free that can be transfered
back to regular memory pool).
Pages that are used as hugetlb pages are reserved inside the kernel and can
-not be used for other purposes.
+not be used for other purposes.
Once the kernel with Hugetlb page support is built and running, a user can
use either the mmap system call or shared memory system calls to start using
the huge pages. It is required that the system administrator preallocate
-enough memory for huge page purposes.
+enough memory for huge page purposes.
Use the following command to dynamically allocate/deallocate hugepages:
@@ -80,9 +81,9 @@ memory (huge pages) allowed for that filesystem (/mnt/huge). The size is
rounded down to HPAGE_SIZE. The option nr_inode sets the maximum number of
inodes that /mnt/huge can use. If the size or nr_inode options are not
provided on command line then no limits are set. For size and nr_inodes
-options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
-example, size=2K has the same meaning as size=2048. An example is given at
-the end of this document.
+options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
+example, size=2K has the same meaning as size=2048. An example is given at
+the end of this document.
read and write system calls are not supported on files that reside on hugetlb
file systems.
diff --git a/MAINTAINERS b/MAINTAINERS
index f08a1434b217..995bfd8e6e7f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -297,6 +297,11 @@ P: Richard Purdie
M: rpurdie@rpsys.net
S: Maintained
+ARM/TOSA MACHINE SUPPORT
+P: Dirk Opfer
+M: dirk@opfer-online.de
+S: Maintained
+
ARM/PLEB SUPPORT
P: Peter Chubb
M: pleb@gelato.unsw.edu.au
@@ -1072,6 +1077,26 @@ P: Jaroslav Kysela
M: perex@suse.cz
S: Maintained
+HPET: High Precision Event Timers driver (hpet.c)
+P: Clemens Ladisch
+M: clemens@ladisch.de
+S: Maintained
+
+HPET: i386
+P: Venkatesh Pallipadi (Venki)
+M: venkatesh.pallipadi@intel.com
+S: Maintained
+
+HPET: x86_64
+P: Andi Kleen and Vojtech Pavlik
+M: ak@muc.de and vojtech@suse.cz
+S: Maintained
+
+HPET: ACPI hpet.c
+P: Bob Picco
+M: bob.picco@hp.com
+S: Maintained
+
HPFS FILESYSTEM
P: Mikulas Patocka
M: mikulas@artax.karlin.mff.cuni.cz
@@ -2046,6 +2071,12 @@ P: Matt Mackall
M: mpm@selenic.com
S: Maintained
+RAPIDIO SUBSYSTEM
+P: Matt Porter
+M: mporter@kernel.crashing.org
+L: linux-kernel@vger.kernel.org
+S: Maintained
+
REAL TIME CLOCK DRIVER
P: Paul Gortmaker
M: p_gortmaker@yahoo.com
@@ -2450,10 +2481,10 @@ L: linux-kernel@vger.kernel.org
S: Maintained
TRIVIAL PATCHES
-P: Rusty Russell
-M: trivial@rustcorp.com.au
+P: Adrian Bunk
+M: trivial@kernel.org
L: linux-kernel@vger.kernel.org
-W: http://www.kernel.org/pub/linux/kernel/people/rusty/trivial/
+W: http://www.kernel.org/pub/linux/kernel/people/bunk/trivial/
S: Maintained
TMS380 TOKEN-RING NETWORK DRIVER
diff --git a/Makefile b/Makefile
index 79601320ac3e..ea96da1572d5 100644
--- a/Makefile
+++ b/Makefile
@@ -346,7 +346,8 @@ AFLAGS_KERNEL =
# Use LINUXINCLUDE when you must reference the include/ directory.
# Needed to be compatible with the O= option
LINUXINCLUDE := -Iinclude \
- $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include)
+ $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \
+ -imacros include/linux/autoconf.h
CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE)
@@ -582,7 +583,7 @@ export MODLIB
ifeq ($(KBUILD_EXTMOD),)
-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/
+core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
@@ -1249,11 +1250,6 @@ tags: FORCE
# Scripts to check various things for consistency
# ---------------------------------------------------------------------------
-configcheck:
- find * $(RCS_FIND_IGNORE) \
- -name '*.[hcS]' -type f -print | sort \
- | xargs $(PERL) -w scripts/checkconfig.pl
-
includecheck:
find * $(RCS_FIND_IGNORE) \
-name '*.[hcS]' -type f -print | sort \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 296bc03d1cf1..3bfef0934c9d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -324,7 +324,7 @@ menu "Kernel Features"
config SMP
bool "Symmetric Multi-Processing (EXPERIMENTAL)"
- depends on EXPERIMENTAL && BROKEN #&& n
+ depends on EXPERIMENTAL && REALVIEW_MPCORE
help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
@@ -356,6 +356,16 @@ config HOTPLUG_CPU
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
+config LOCAL_TIMERS
+ bool "Use local timer interrupts"
+ depends on SMP && n
+ default y
+ help
+ Enable support for local timers on SMP platforms, rather then the
+ legacy IPI broadcast method. Local timers allows the system
+ accounting to be spread across the timer interval, preventing a
+ "thundering herd" at every timer tick.
+
config PREEMPT
bool "Preemptible Kernel (EXPERIMENTAL)"
depends on EXPERIMENTAL
@@ -585,7 +595,7 @@ config FPE_NWFPE
config FPE_NWFPE_XP
bool "Support extended precision"
- depends on FPE_NWFPE && !CPU_BIG_ENDIAN
+ depends on FPE_NWFPE
help
Say Y to include 80-bit support in the kernel floating-point
emulator. Otherwise, only 32 and 64-bit support is compiled in.
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 50f13eec6cd7..5ab94584baee 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -283,8 +283,14 @@ void flush_window(void)
putstr(".");
}
+#ifndef arch_error
+#define arch_error(x)
+#endif
+
static void error(char *x)
{
+ arch_error(x);
+
putstr("\n\n");
putstr(x);
putstr("\n\n -- System halted");
diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c
index bb4eff614413..c7fdf390cef9 100644
--- a/arch/arm/common/scoop.c
+++ b/arch/arm/common/scoop.c
@@ -19,12 +19,6 @@
#define SCOOP_REG(d,adr) (*(volatile unsigned short*)(d +(adr)))
-/* PCMCIA to Scoop linkage structures for pxa2xx_sharpsl.c
- There is no easy way to link multiple scoop devices into one
- single entity for the pxa2xx_pcmcia device */
-int scoop_num;
-struct scoop_pcmcia_dev *scoop_devs;
-
struct scoop_dev {
void *base;
spinlock_t scoop_lock;
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 7b17a87a3311..7a3261f0bf79 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -9,6 +9,7 @@
*/
#include
#include
+#include
#include
#include
#include
@@ -126,6 +127,9 @@ EXPORT_SYMBOL(__put_user_2);
EXPORT_SYMBOL(__put_user_4);
EXPORT_SYMBOL(__put_user_8);
+ /* crypto hash */
+EXPORT_SYMBOL(sha_transform);
+
/* gcc lib functions */
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index be439cab92c6..d9fb819bf7cc 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -47,6 +47,13 @@
movne r0, sp
adrne lr, 1b
bne do_IPI
+
+#ifdef CONFIG_LOCAL_TIMERS
+ test_for_ltirq r0, r6, r5, lr
+ movne r0, sp
+ adrne lr, 1b
+ bne do_local_timer
+#endif
#endif
.endm
@@ -785,7 +792,7 @@ __kuser_helper_end:
* SP points to a minimal amount of processor-private memory, the address
* of which is copied into r0 for the mode specific abort handler.
*/
- .macro vector_stub, name, correction=0
+ .macro vector_stub, name, mode, correction=0
.align 5
vector_\name:
@@ -805,15 +812,14 @@ vector_\name:
@ Prepare for SVC32 mode. IRQs remain disabled.
@
mrs r0, cpsr
- bic r0, r0, #MODE_MASK
- orr r0, r0, #SVC_MODE
+ eor r0, r0, #(\mode ^ SVC_MODE)
msr spsr_cxsf, r0
@
@ the branch table must immediately follow this code
@
- mov r0, sp
and lr, lr, #0x0f
+ mov r0, sp
ldr lr, [pc, lr, lsl #2]
movs pc, lr @ branch to handler in SVC mode
.endm
@@ -823,7 +829,7 @@ __stubs_start:
/*
* Interrupt dispatcher
*/
- vector_stub irq, 4
+ vector_stub irq, IRQ_MODE, 4
.long __irq_usr @ 0 (USR_26 / USR_32)
.long __irq_invalid @ 1 (FIQ_26 / FIQ_32)
@@ -846,7 +852,7 @@ __stubs_start:
* Data abort dispatcher
* Enter in ABT mode, spsr = USR CPSR, lr = USR PC
*/
- vector_stub dabt, 8
+ vector_stub dabt, ABT_MODE, 8
.long __dabt_usr @ 0 (USR_26 / USR_32)
.long __dabt_invalid @ 1 (FIQ_26 / FIQ_32)
@@ -869,7 +875,7 @@ __stubs_start:
* Prefetch abort dispatcher
* Enter in ABT mode, spsr = USR CPSR, lr = USR PC
*/
- vector_stub pabt, 4
+ vector_stub pabt, ABT_MODE, 4
.long __pabt_usr @ 0 (USR_26 / USR_32)
.long __pabt_invalid @ 1 (FIQ_26 / FIQ_32)
@@ -892,7 +898,7 @@ __stubs_start:
* Undef instr entry dispatcher
* Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
*/
- vector_stub und
+ vector_stub und, UND_MODE
.long __und_usr @ 0 (USR_26 / USR_32)
.long __und_invalid @ 1 (FIQ_26 / FIQ_32)
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 9def4404e1f2..d7099dbbb879 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -264,6 +264,7 @@ int show_interrupts(struct seq_file *p, void *v)
#endif
#ifdef CONFIG_SMP
show_ipi_list(p);
+ show_local_irqs(p);
#endif
seq_printf(p, "Err: %10lu\n", irq_err_count);
}
@@ -995,7 +996,7 @@ void __init init_irq_proc(void)
struct proc_dir_entry *dir;
int irq;
- dir = proc_mkdir("irq", 0);
+ dir = proc_mkdir("irq", NULL);
if (!dir)
return;
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 9bd8609a2926..9a340e790da5 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -648,7 +648,7 @@ static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
#endif
-static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
unsigned long tmp;
int ret;
@@ -782,53 +782,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
return ret;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
-{
- struct task_struct *child;
- int ret;
-
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret == 0)
- ret = do_ptrace(request, child, addr, data);
-
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
- return ret;
-}
-
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c9b69771f92e..85774165e9fd 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -338,7 +338,8 @@ void cpu_init(void)
BUG();
}
- dump_cpu_info(cpu);
+ if (system_state == SYSTEM_BOOTING)
+ dump_cpu_info(cpu);
/*
* setup stacks for re-entrant exception handlers
@@ -838,7 +839,12 @@ static int c_show(struct seq_file *m, void *v)
#if defined(CONFIG_SMP)
for_each_online_cpu(i) {
- seq_printf(m, "Processor\t: %d\n", i);
+ /*
+ * glibc reads /proc/cpuinfo to determine the number of
+ * online processors, looking for lines beginning with
+ * "processor". Give glibc what it expects.
+ */
+ seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n",
per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ),
(per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index edb5a406922f..77e2e9ca89fa 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -142,7 +142,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
ret = -EIO;
}
- secondary_data.stack = 0;
+ secondary_data.stack = NULL;
secondary_data.pgdir = 0;
*pmd_offset(pgd, PHYS_OFFSET) = __pmd(0);
@@ -184,6 +184,11 @@ int __cpuexit __cpu_disable(void)
*/
migrate_irqs();
+ /*
+ * Stop the local timer for this CPU.
+ */
+ local_timer_stop(cpu);
+
/*
* Flush user cache and TLB mappings, and then remove this CPU
* from the vm mask set of all processes.
@@ -289,6 +294,11 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
*/
cpu_set(cpu, cpu_online_map);
+ /*
+ * Setup local timer for this CPU.
+ */
+ local_timer_setup(cpu);
+
/*
* OK, it's off to the idle thread for us
*/
@@ -359,8 +369,8 @@ static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg)
* You must not call this function with disabled interrupts, from a
* hardware interrupt handler, nor from a bottom half handler.
*/
-int smp_call_function_on_cpu(void (*func)(void *info), void *info, int retry,
- int wait, cpumask_t callmap)
+static int smp_call_function_on_cpu(void (*func)(void *info), void *info,
+ int retry, int wait, cpumask_t callmap)
{
struct smp_call_struct data;
unsigned long timeout;
@@ -454,6 +464,18 @@ void show_ipi_list(struct seq_file *p)
seq_putc(p, '\n');
}
+void show_local_irqs(struct seq_file *p)
+{
+ unsigned int cpu;
+
+ seq_printf(p, "LOC: ");
+
+ for_each_present_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs);
+
+ seq_putc(p, '\n');
+}
+
static void ipi_timer(struct pt_regs *regs)
{
int user = user_mode(regs);
@@ -464,6 +486,18 @@ static void ipi_timer(struct pt_regs *regs)
irq_exit();
}
+#ifdef CONFIG_LOCAL_TIMERS
+asmlinkage void do_local_timer(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ if (local_timer_ack()) {
+ irq_stat[cpu].local_timer_irqs++;
+ ipi_timer(regs);
+ }
+}
+#endif
+
/*
* ipi_call_function - handle IPI from smp_call_function()
*
@@ -515,7 +549,7 @@ static void ipi_cpu_stop(unsigned int cpu)
*
* Bit 0 - Inter-processor function call
*/
-void do_IPI(struct pt_regs *regs)
+asmlinkage void do_IPI(struct pt_regs *regs)
{
unsigned int cpu = smp_processor_id();
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
diff --git a/arch/arm/mach-aaec2000/clock.c b/arch/arm/mach-aaec2000/clock.c
index 99e019169dda..0340ddc4824e 100644
--- a/arch/arm/mach-aaec2000/clock.c
+++ b/arch/arm/mach-aaec2000/clock.c
@@ -14,6 +14,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/arch/arm/mach-epxa10db/mm.c b/arch/arm/mach-epxa10db/mm.c
index e8832d0910ee..cfd0d2182d44 100644
--- a/arch/arm/mach-epxa10db/mm.c
+++ b/arch/arm/mach-epxa10db/mm.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
#include
diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c
index a1b153d1626c..a4bafee77a06 100644
--- a/arch/arm/mach-integrator/impd1.c
+++ b/arch/arm/mach-integrator/impd1.c
@@ -420,8 +420,7 @@ static int impd1_probe(struct lm_device *dev)
free_impd1:
if (impd1 && impd1->base)
iounmap(impd1->base);
- if (impd1)
- kfree(impd1);
+ kfree(impd1);
release_lm:
release_mem_region(dev->resource.start, SZ_4K);
return ret;
diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c
index df140962bb0f..6851abaf5524 100644
--- a/arch/arm/mach-ixp2000/core.c
+++ b/arch/arm/mach-ixp2000/core.c
@@ -84,63 +84,54 @@ static struct map_desc ixp2000_io_desc[] __initdata = {
.virtual = IXP2000_CAP_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_CAP_PHYS_BASE),
.length = IXP2000_CAP_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_INTCTL_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_INTCTL_PHYS_BASE),
.length = IXP2000_INTCTL_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CREG_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CREG_PHYS_BASE),
.length = IXP2000_PCI_CREG_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CSR_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CSR_PHYS_BASE),
.length = IXP2000_PCI_CSR_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_MSF_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_MSF_PHYS_BASE),
.length = IXP2000_MSF_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_IO_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_IO_PHYS_BASE),
.length = IXP2000_PCI_IO_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CFG0_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CFG0_PHYS_BASE),
.length = IXP2000_PCI_CFG0_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CFG1_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CFG1_PHYS_BASE),
.length = IXP2000_PCI_CFG1_SIZE,
- .type = MT_DEVICE
+ .type = MT_IXP2000_DEVICE,
}
};
void __init ixp2000_map_io(void)
{
- extern unsigned int processor_id;
-
/*
- * On IXP2400 CPUs we need to use MT_IXP2000_DEVICE for
- * tweaking the PMDs so XCB=101. On IXP2800s we use the normal
- * PMD flags.
+ * On IXP2400 CPUs we need to use MT_IXP2000_DEVICE so that
+ * XCB=101 (to avoid triggering erratum #66), and given that
+ * this mode speeds up I/O accesses and we have write buffer
+ * flushes in the right places anyway, it doesn't hurt to use
+ * XCB=101 for all IXP2000s.
*/
- if ((processor_id & 0xfffffff0) == 0x69054190) {
- int i;
-
- printk(KERN_INFO "Enabling IXP2400 erratum #66 workaround\n");
-
- for(i=0;i
#include
#include
+#include
#include
#include
#include
diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c
index ac4dd4336160..f74b9af112dc 100644
--- a/arch/arm/mach-pxa/pm.c
+++ b/arch/arm/mach-pxa/pm.c
@@ -12,6 +12,7 @@
*/
#include
#include
+#include
#include
#include
#include
@@ -19,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -72,7 +74,7 @@ enum { SLEEP_SAVE_START = 0,
};
-static int pxa_pm_enter(suspend_state_t state)
+int pxa_pm_enter(suspend_state_t state)
{
unsigned long sleep_save[SLEEP_SAVE_SIZE];
unsigned long checksum = 0;
@@ -191,6 +193,8 @@ static int pxa_pm_enter(suspend_state_t state)
return 0;
}
+EXPORT_SYMBOL_GPL(pxa_pm_enter);
+
unsigned long sleep_phys_sp(void *sp)
{
return virt_to_phys(sp);
@@ -199,21 +203,25 @@ unsigned long sleep_phys_sp(void *sp)
/*
* Called after processes are frozen, but before we shut down devices.
*/
-static int pxa_pm_prepare(suspend_state_t state)
+int pxa_pm_prepare(suspend_state_t state)
{
extern int pxa_cpu_pm_prepare(suspend_state_t state);
return pxa_cpu_pm_prepare(state);
}
+EXPORT_SYMBOL_GPL(pxa_pm_prepare);
+
/*
* Called after devices are re-setup, but before processes are thawed.
*/
-static int pxa_pm_finish(suspend_state_t state)
+int pxa_pm_finish(suspend_state_t state)
{
return 0;
}
+EXPORT_SYMBOL_GPL(pxa_pm_finish);
+
/*
* Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk.
*/
@@ -230,4 +238,4 @@ static int __init pxa_pm_init(void)
return 0;
}
-late_initcall(pxa_pm_init);
+device_initcall(pxa_pm_init);
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index ad6a13f95a62..eef3de26ad37 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -65,6 +65,27 @@ struct platform_device poodle_scoop_device = {
.resource = poodle_scoop_resources,
};
+static void poodle_pcmcia_init(void)
+{
+ /* Setup default state of GPIO outputs
+ before we enable them as outputs. */
+ GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
+ GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
+ GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO52_nPCE_1) |
+ GPIO_bit(GPIO53_nPCE_2);
+
+ pxa_gpio_mode(GPIO48_nPOE_MD);
+ pxa_gpio_mode(GPIO49_nPWE_MD);
+ pxa_gpio_mode(GPIO50_nPIOR_MD);
+ pxa_gpio_mode(GPIO51_nPIOW_MD);
+ pxa_gpio_mode(GPIO55_nPREG_MD);
+ pxa_gpio_mode(GPIO56_nPWAIT_MD);
+ pxa_gpio_mode(GPIO57_nIOIS16_MD);
+ pxa_gpio_mode(GPIO52_nPCE_1_MD);
+ pxa_gpio_mode(GPIO53_nPCE_2_MD);
+ pxa_gpio_mode(GPIO54_pSKTSEL_MD);
+}
+
static struct scoop_pcmcia_dev poodle_pcmcia_scoop[] = {
{
.dev = &poodle_scoop_device.dev,
@@ -74,6 +95,14 @@ static struct scoop_pcmcia_dev poodle_pcmcia_scoop[] = {
},
};
+static struct scoop_pcmcia_config poodle_pcmcia_config = {
+ .devs = &poodle_pcmcia_scoop[0],
+ .num_devs = 1,
+ .pcmcia_init = poodle_pcmcia_init,
+};
+
+EXPORT_SYMBOL(poodle_scoop_device);
+
/* LoCoMo device */
static struct resource locomo_resources[] = {
@@ -268,8 +297,7 @@ static void __init poodle_init(void)
pxa_set_mci_info(&poodle_mci_platform_data);
pxa_set_ficp_info(&poodle_ficp_platform_data);
- scoop_num = 1;
- scoop_devs = &poodle_pcmcia_scoop[0];
+ platform_scoop_config = &poodle_pcmcia_config;
ret = platform_add_devices(devices, ARRAY_SIZE(devices));
if (ret) {
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 6c6878cd2207..4e9a699ee428 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -104,6 +104,66 @@ struct platform_device spitzscoop2_device = {
.resource = spitz_scoop2_resources,
};
+#define SPITZ_PWR_SD 0x01
+#define SPITZ_PWR_CF 0x02
+
+/* Power control is shared with between one of the CF slots and SD */
+static void spitz_card_pwr_ctrl(int device, unsigned short new_cpr)
+{
+ unsigned short cpr = read_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR);
+
+ if (new_cpr & 0x0007) {
+ set_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
+ if (!(cpr & 0x0002) && !(cpr & 0x0004))
+ mdelay(5);
+ if (device == SPITZ_PWR_CF)
+ cpr |= 0x0002;
+ if (device == SPITZ_PWR_SD)
+ cpr |= 0x0004;
+ write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | new_cpr);
+ } else {
+ if (device == SPITZ_PWR_CF)
+ cpr &= ~0x0002;
+ if (device == SPITZ_PWR_SD)
+ cpr &= ~0x0004;
+ write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | new_cpr);
+ if (!(cpr & 0x0002) && !(cpr & 0x0004)) {
+ mdelay(1);
+ reset_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
+ }
+ }
+}
+
+static void spitz_pcmcia_init(void)
+{
+ /* Setup default state of GPIO outputs
+ before we enable them as outputs. */
+ GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
+ GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
+ GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO54_nPCE_2);
+ GPSR(GPIO85_nPCE_1) = GPIO_bit(GPIO85_nPCE_1);
+
+ pxa_gpio_mode(GPIO48_nPOE_MD);
+ pxa_gpio_mode(GPIO49_nPWE_MD);
+ pxa_gpio_mode(GPIO50_nPIOR_MD);
+ pxa_gpio_mode(GPIO51_nPIOW_MD);
+ pxa_gpio_mode(GPIO55_nPREG_MD);
+ pxa_gpio_mode(GPIO56_nPWAIT_MD);
+ pxa_gpio_mode(GPIO57_nIOIS16_MD);
+ pxa_gpio_mode(GPIO85_nPCE_1_MD);
+ pxa_gpio_mode(GPIO54_nPCE_2_MD);
+ pxa_gpio_mode(GPIO104_pSKTSEL_MD);
+}
+
+static void spitz_pcmcia_pwr(struct device *scoop, unsigned short cpr, int nr)
+{
+ /* Only need to override behaviour for slot 0 */
+ if (nr == 0)
+ spitz_card_pwr_ctrl(SPITZ_PWR_CF, cpr);
+ else
+ write_scoop_reg(scoop, SCOOP_CPR, cpr);
+}
+
static struct scoop_pcmcia_dev spitz_pcmcia_scoop[] = {
{
.dev = &spitzscoop_device.dev,
@@ -117,6 +177,16 @@ static struct scoop_pcmcia_dev spitz_pcmcia_scoop[] = {
},
};
+static struct scoop_pcmcia_config spitz_pcmcia_config = {
+ .devs = &spitz_pcmcia_scoop[0],
+ .num_devs = 2,
+ .pcmcia_init = spitz_pcmcia_init,
+ .power_ctrl = spitz_pcmcia_pwr,
+};
+
+EXPORT_SYMBOL(spitzscoop_device);
+EXPORT_SYMBOL(spitzscoop2_device);
+
/*
* Spitz SSP Device
@@ -235,27 +305,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in
return 0;
}
-/* Power control is shared with one of the CF slots so we have a mess */
static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
{
struct pxamci_platform_data* p_d = dev->platform_data;
- unsigned short cpr = read_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR);
-
- if (( 1 << vdd) & p_d->ocr_mask) {
- /* printk(KERN_DEBUG "%s: on\n", __FUNCTION__); */
- set_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
- mdelay(2);
- write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | 0x04);
- } else {
- /* printk(KERN_DEBUG "%s: off\n", __FUNCTION__); */
- write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr & ~0x04);
-
- if (!(cpr | 0x02)) {
- mdelay(1);
- reset_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
- }
- }
+ if (( 1 << vdd) & p_d->ocr_mask)
+ spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0004);
+ else
+ spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000);
}
static int spitz_mci_get_ro(struct device *dev)
@@ -351,8 +408,8 @@ static void __init common_init(void)
static void __init spitz_init(void)
{
- scoop_num = 2;
- scoop_devs = &spitz_pcmcia_scoop[0];
+ platform_scoop_config = &spitz_pcmcia_config;
+
spitz_bl_machinfo.set_bl_intensity = spitz_bl_set_intensity;
common_init();
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 7dad3f1465e0..b9b2057349eb 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -132,11 +132,13 @@ static void __init pxa_timer_init(void)
tv.tv_sec = pxa_get_rtc_time();
do_settimeofday(&tv);
- OSMR0 = 0; /* set initial match at 0 */
+ OIER = 0; /* disable any timer interrupts */
+ OSCR = LATCH*2; /* push OSCR out of the way */
+ OSMR0 = LATCH; /* set initial match */
OSSR = 0xf; /* clear status on all timers */
setup_irq(IRQ_OST0, &pxa_timer_irq);
- OIER |= OIER_E0; /* enable match on timer 0 to cause interrupts */
- OSCR = 0; /* initialize free-running timer, force first match */
+ OIER = OIER_E0; /* enable match on timer 0 to cause interrupts */
+ OSCR = 0; /* initialize free-running timer */
}
#ifdef CONFIG_NO_IDLE_HZ
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
new file mode 100644
index 000000000000..c312054dfb88
--- /dev/null
+++ b/arch/arm/mach-pxa/tosa.c
@@ -0,0 +1,306 @@
+/*
+ * Support for Sharp SL-C6000x PDAs
+ * Model: (Tosa)
+ *
+ * Copyright (c) 2005 Dirk Opfer
+ *
+ * Based on code written by Sharp/Lineo for 2.4 kernels
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include "generic.h"
+
+
+/*
+ * SCOOP Device
+ */
+static struct resource tosa_scoop_resources[] = {
+ [0] = {
+ .start = TOSA_CF_PHYS,
+ .end = TOSA_CF_PHYS + 0xfff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct scoop_config tosa_scoop_setup = {
+ .io_dir = TOSA_SCOOP_IO_DIR,
+ .io_out = TOSA_SCOOP_IO_OUT,
+
+};
+
+struct platform_device tosascoop_device = {
+ .name = "sharp-scoop",
+ .id = 0,
+ .dev = {
+ .platform_data = &tosa_scoop_setup,
+ },
+ .num_resources = ARRAY_SIZE(tosa_scoop_resources),
+ .resource = tosa_scoop_resources,
+};
+
+
+/*
+ * SCOOP Device Jacket
+ */
+static struct resource tosa_scoop_jc_resources[] = {
+ [0] = {
+ .start = TOSA_SCOOP_PHYS + 0x40,
+ .end = TOSA_SCOOP_PHYS + 0xfff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct scoop_config tosa_scoop_jc_setup = {
+ .io_dir = TOSA_SCOOP_JC_IO_DIR,
+ .io_out = TOSA_SCOOP_JC_IO_OUT,
+};
+
+struct platform_device tosascoop_jc_device = {
+ .name = "sharp-scoop",
+ .id = 1,
+ .dev = {
+ .platform_data = &tosa_scoop_jc_setup,
+ .parent = &tosascoop_device.dev,
+ },
+ .num_resources = ARRAY_SIZE(tosa_scoop_jc_resources),
+ .resource = tosa_scoop_jc_resources,
+};
+
+/*
+ * PCMCIA
+ */
+static struct scoop_pcmcia_dev tosa_pcmcia_scoop[] = {
+{
+ .dev = &tosascoop_device.dev,
+ .irq = TOSA_IRQ_GPIO_CF_IRQ,
+ .cd_irq = TOSA_IRQ_GPIO_CF_CD,
+ .cd_irq_str = "PCMCIA0 CD",
+},{
+ .dev = &tosascoop_jc_device.dev,
+ .irq = TOSA_IRQ_GPIO_JC_CF_IRQ,
+ .cd_irq = -1,
+},
+};
+
+static void tosa_pcmcia_init(void)
+{
+ /* Setup default state of GPIO outputs
+ before we enable them as outputs. */
+ GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
+ GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
+ GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO52_nPCE_1) |
+ GPIO_bit(GPIO53_nPCE_2);
+
+ pxa_gpio_mode(GPIO48_nPOE_MD);
+ pxa_gpio_mode(GPIO49_nPWE_MD);
+ pxa_gpio_mode(GPIO50_nPIOR_MD);
+ pxa_gpio_mode(GPIO51_nPIOW_MD);
+ pxa_gpio_mode(GPIO55_nPREG_MD);
+ pxa_gpio_mode(GPIO56_nPWAIT_MD);
+ pxa_gpio_mode(GPIO57_nIOIS16_MD);
+ pxa_gpio_mode(GPIO52_nPCE_1_MD);
+ pxa_gpio_mode(GPIO53_nPCE_2_MD);
+ pxa_gpio_mode(GPIO54_pSKTSEL_MD);
+}
+
+static struct scoop_pcmcia_config tosa_pcmcia_config = {
+ .devs = &tosa_pcmcia_scoop[0],
+ .num_devs = 2,
+ .pcmcia_init = tosa_pcmcia_init,
+};
+
+/*
+ * USB Device Controller
+ */
+static void tosa_udc_command(int cmd)
+{
+ switch(cmd) {
+ case PXA2XX_UDC_CMD_CONNECT:
+ set_scoop_gpio(&tosascoop_jc_device.dev,TOSA_SCOOP_JC_USB_PULLUP);
+ break;
+ case PXA2XX_UDC_CMD_DISCONNECT:
+ reset_scoop_gpio(&tosascoop_jc_device.dev,TOSA_SCOOP_JC_USB_PULLUP);
+ break;
+ }
+}
+
+static int tosa_udc_is_connected(void)
+{
+ return ((GPLR(TOSA_GPIO_USB_IN) & GPIO_bit(TOSA_GPIO_USB_IN)) == 0);
+}
+
+
+static struct pxa2xx_udc_mach_info udc_info __initdata = {
+ .udc_command = tosa_udc_command,
+ .udc_is_connected = tosa_udc_is_connected,
+};
+
+/*
+ * MMC/SD Device
+ */
+static struct pxamci_platform_data tosa_mci_platform_data;
+
+static int tosa_mci_init(struct device *dev, irqreturn_t (*tosa_detect_int)(int, void *, struct pt_regs *), void *data)
+{
+ int err;
+
+ /* setup GPIO for PXA25x MMC controller */
+ pxa_gpio_mode(GPIO6_MMCCLK_MD);
+ pxa_gpio_mode(GPIO8_MMCCS0_MD);
+ pxa_gpio_mode(TOSA_GPIO_nSD_DETECT | GPIO_IN);
+
+ tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
+
+ err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int, SA_INTERRUPT,
+ "MMC/SD card detect", data);
+ if (err) {
+ printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
+ return -1;
+ }
+
+ set_irq_type(TOSA_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
+
+ return 0;
+}
+
+static void tosa_mci_setpower(struct device *dev, unsigned int vdd)
+{
+ struct pxamci_platform_data* p_d = dev->platform_data;
+
+ if (( 1 << vdd) & p_d->ocr_mask) {
+ set_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_PWR_ON);
+ } else {
+ reset_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_PWR_ON);
+ }
+}
+
+static int tosa_mci_get_ro(struct device *dev)
+{
+ return (read_scoop_reg(&tosascoop_device.dev, SCOOP_GPWR)&TOSA_SCOOP_SD_WP);
+}
+
+static void tosa_mci_exit(struct device *dev, void *data)
+{
+ free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
+}
+
+static struct pxamci_platform_data tosa_mci_platform_data = {
+ .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
+ .init = tosa_mci_init,
+ .get_ro = tosa_mci_get_ro,
+ .setpower = tosa_mci_setpower,
+ .exit = tosa_mci_exit,
+};
+
+/*
+ * Irda
+ */
+static void tosa_irda_transceiver_mode(struct device *dev, int mode)
+{
+ if (mode & IR_OFF) {
+ reset_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_IR_POWERDWN);
+ pxa_gpio_mode(GPIO47_STTXD|GPIO_DFLT_LOW);
+ pxa_gpio_mode(GPIO47_STTXD|GPIO_OUT);
+ } else {
+ pxa_gpio_mode(GPIO47_STTXD_MD);
+ set_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_IR_POWERDWN);
+ }
+}
+
+static struct pxaficp_platform_data tosa_ficp_platform_data = {
+ .transceiver_cap = IR_SIRMODE | IR_OFF,
+ .transceiver_mode = tosa_irda_transceiver_mode,
+};
+
+/*
+ * Tosa Keyboard
+ */
+static struct platform_device tosakbd_device = {
+ .name = "tosa-keyboard",
+ .id = -1,
+};
+
+static struct platform_device *devices[] __initdata = {
+ &tosascoop_device,
+ &tosascoop_jc_device,
+ &tosakbd_device,
+};
+
+static void __init tosa_init(void)
+{
+ pxa_gpio_mode(TOSA_GPIO_ON_RESET | GPIO_IN);
+ pxa_gpio_mode(TOSA_GPIO_TC6393_INT | GPIO_IN);
+ pxa_gpio_mode(TOSA_GPIO_USB_IN | GPIO_IN);
+
+ /* setup sleep mode values */
+ PWER = 0x00000002;
+ PFER = 0x00000000;
+ PRER = 0x00000002;
+ PGSR0 = 0x00000000;
+ PGSR1 = 0x00FF0002;
+ PGSR2 = 0x00014000;
+ PCFR |= PCFR_OPDE;
+
+ /* enable batt_fault */
+ PMCR = 0x01;
+
+ pxa_set_mci_info(&tosa_mci_platform_data);
+ pxa_set_udc_info(&udc_info);
+ pxa_set_ficp_info(&tosa_ficp_platform_data);
+ platform_scoop_config = &tosa_pcmcia_config;
+
+ platform_add_devices(devices, ARRAY_SIZE(devices));
+}
+
+static void __init fixup_tosa(struct machine_desc *desc,
+ struct tag *tags, char **cmdline, struct meminfo *mi)
+{
+ sharpsl_save_param();
+ mi->nr_banks=1;
+ mi->bank[0].start = 0xa0000000;
+ mi->bank[0].node = 0;
+ mi->bank[0].size = (64*1024*1024);
+}
+
+MACHINE_START(TOSA, "SHARP Tosa")
+ .phys_ram = 0xa0000000,
+ .phys_io = 0x40000000,
+ .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc,
+ .fixup = fixup_tosa,
+ .map_io = pxa_map_io,
+ .init_irq = pxa_init_irq,
+ .init_machine = tosa_init,
+ .timer = &pxa_timer,
+MACHINE_END
diff --git a/arch/arm/mach-realview/Kconfig b/arch/arm/mach-realview/Kconfig
index 4b63dc9eabfe..129976866d47 100644
--- a/arch/arm/mach-realview/Kconfig
+++ b/arch/arm/mach-realview/Kconfig
@@ -8,4 +8,13 @@ config MACH_REALVIEW_EB
help
Include support for the ARM(R) RealView Emulation Baseboard platform.
+config REALVIEW_MPCORE
+ bool "Support MPcore tile"
+ depends on MACH_REALVIEW_EB
+ help
+ Enable support for the MPCore tile on the Realview platform.
+ Since there are device address and interrupt differences, a
+ kernel built with this option enabled is not compatible with
+ other tiles.
+
endmenu
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index 8d37ea1605fd..011a85c10627 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -4,3 +4,4 @@
obj-y := core.o clock.o
obj-$(CONFIG_MACH_REALVIEW_EB) += realview_eb.o
+obj-$(CONFIG_SMP) += platsmp.o headsmp.o
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 482eb512ebe8..4ea60d8b6e36 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -550,6 +550,11 @@ static irqreturn_t realview_timer_interrupt(int irq, void *dev_id, struct pt_reg
timer_tick(regs);
+#ifdef CONFIG_SMP
+ smp_send_timer();
+ update_process_times(user_mode(regs));
+#endif
+
write_sequnlock(&xtime_lock);
return IRQ_HANDLED;
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 575599db74db..d83e8bad2038 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -23,6 +23,7 @@
#define __ASM_ARCH_REALVIEW_H
#include
+#include
#include
#define __io_address(n) __io(IO_ADDRESS(n))
diff --git a/arch/arm/mach-realview/headsmp.S b/arch/arm/mach-realview/headsmp.S
new file mode 100644
index 000000000000..4075473cf68a
--- /dev/null
+++ b/arch/arm/mach-realview/headsmp.S
@@ -0,0 +1,39 @@
+/*
+ * linux/arch/arm/mach-realview/headsmp.S
+ *
+ * Copyright (c) 2003 ARM Limited
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include
+#include
+
+ __INIT
+
+/*
+ * Realview specific entry point for secondary CPUs. This provides
+ * a "holding pen" into which all secondary cores are held until we're
+ * ready for them to initialise.
+ */
+ENTRY(realview_secondary_startup)
+ mrc p15, 0, r0, c0, c0, 5
+ and r0, r0, #15
+ adr r4, 1f
+ ldmia r4, {r5, r6}
+ sub r4, r4, r5
+ add r6, r6, r4
+pen: ldr r7, [r6]
+ cmp r7, r0
+ bne pen
+
+ /*
+ * we've been released from the holding pen: secondary_stack
+ * should now contain the SVC stack for this core
+ */
+ b secondary_startup
+
+1: .long .
+ .long pen_release
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
new file mode 100644
index 000000000000..09b35f62247a
--- /dev/null
+++ b/arch/arm/mach-realview/platsmp.c
@@ -0,0 +1,195 @@
+/*
+ * linux/arch/arm/mach-realview/platsmp.c
+ *
+ * Copyright (C) 2002 ARM Ltd.
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include "core.h"
+
+extern void realview_secondary_startup(void);
+
+/*
+ * control for which core is the next to come out of the secondary
+ * boot "holding pen"
+ */
+volatile int __cpuinitdata pen_release = -1;
+
+static unsigned int __init get_core_count(void)
+{
+ unsigned int ncores;
+
+ ncores = __raw_readl(__io_address(REALVIEW_MPCORE_SCU_BASE) + SCU_CONFIG);
+
+ return (ncores & 0x03) + 1;
+}
+
+static DEFINE_SPINLOCK(boot_lock);
+
+void __cpuinit platform_secondary_init(unsigned int cpu)
+{
+ /*
+ * the primary core may have used a "cross call" soft interrupt
+ * to get this processor out of WFI in the BootMonitor - make
+ * sure that we are no longer being sent this soft interrupt
+ */
+ smp_cross_call_done(cpumask_of_cpu(cpu));
+
+ /*
+ * if any interrupts are already enabled for the primary
+ * core (e.g. timer irq), then they will not have been enabled
+ * for us: do so
+ */
+ gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
+
+ /*
+ * let the primary processor know we're out of the
+ * pen, then head off into the C entry point
+ */
+ pen_release = -1;
+
+ /*
+ * Synchronise with the boot thread.
+ */
+ spin_lock(&boot_lock);
+ spin_unlock(&boot_lock);
+}
+
+int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+ unsigned long timeout;
+
+ /*
+ * set synchronisation state between this boot processor
+ * and the secondary one
+ */
+ spin_lock(&boot_lock);
+
+ /*
+ * The secondary processor is waiting to be released from
+ * the holding pen - release it, then wait for it to flag
+ * that it has been released by resetting pen_release.
+ *
+ * Note that "pen_release" is the hardware CPU ID, whereas
+ * "cpu" is Linux's internal ID.
+ */
+ pen_release = cpu;
+ flush_cache_all();
+
+ /*
+ * XXX
+ *
+ * This is a later addition to the booting protocol: the
+ * bootMonitor now puts secondary cores into WFI, so
+ * poke_milo() no longer gets the cores moving; we need
+ * to send a soft interrupt to wake the secondary core.
+ * Use smp_cross_call() for this, since there's little
+ * point duplicating the code here
+ */
+ smp_cross_call(cpumask_of_cpu(cpu));
+
+ timeout = jiffies + (1 * HZ);
+ while (time_before(jiffies, timeout)) {
+ if (pen_release == -1)
+ break;
+
+ udelay(10);
+ }
+
+ /*
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+ spin_unlock(&boot_lock);
+
+ return pen_release != -1 ? -ENOSYS : 0;
+}
+
+static void __init poke_milo(void)
+{
+ extern void secondary_startup(void);
+
+ /* nobody is to be released from the pen yet */
+ pen_release = -1;
+
+ /*
+ * write the address of secondary startup into the system-wide
+ * flags register, then clear the bottom two bits, which is what
+ * BootMonitor is waiting for
+ */
+#if 1
+#define REALVIEW_SYS_FLAGSS_OFFSET 0x30
+ __raw_writel(virt_to_phys(realview_secondary_startup),
+ __io_address(REALVIEW_SYS_BASE) +
+ REALVIEW_SYS_FLAGSS_OFFSET);
+#define REALVIEW_SYS_FLAGSC_OFFSET 0x34
+ __raw_writel(3,
+ __io_address(REALVIEW_SYS_BASE) +
+ REALVIEW_SYS_FLAGSC_OFFSET);
+#endif
+
+ mb();
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ unsigned int ncores = get_core_count();
+ unsigned int cpu = smp_processor_id();
+ int i;
+
+ /* sanity check */
+ if (ncores == 0) {
+ printk(KERN_ERR
+ "Realview: strange CM count of 0? Default to 1\n");
+
+ ncores = 1;
+ }
+
+ if (ncores > NR_CPUS) {
+ printk(KERN_WARNING
+ "Realview: no. of cores (%d) greater than configured "
+ "maximum of %d - clipping\n",
+ ncores, NR_CPUS);
+ ncores = NR_CPUS;
+ }
+
+ smp_store_cpu_info(cpu);
+
+ /*
+ * are we trying to boot more cores than exist?
+ */
+ if (max_cpus > ncores)
+ max_cpus = ncores;
+
+ /*
+ * Initialise the possible/present maps.
+ * cpu_possible_map describes the set of CPUs which may be present
+ * cpu_present_map describes the set of CPUs populated
+ */
+ for (i = 0; i < max_cpus; i++) {
+ cpu_set(i, cpu_possible_map);
+ cpu_set(i, cpu_present_map);
+ }
+
+ /*
+ * Do we need any more CPUs? If so, then let them know where
+ * to start. Note that, on modern versions of MILO, the "poke"
+ * doesn't actually do anything until each individual core is
+ * sent a soft interrupt to get it out of WFI
+ */
+ if (max_cpus > 1)
+ poke_milo();
+}
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index 267bb07e39b7..7dc32503fdf2 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -136,6 +136,11 @@ static struct amba_device *amba_devs[] __initdata = {
static void __init gic_init_irq(void)
{
+#ifdef CONFIG_REALVIEW_MPCORE
+ writel(0x0000a05f, __io_address(REALVIEW_SYS_LOCK));
+ writel(0x008003c0, __io_address(REALVIEW_SYS_BASE) + 0xd8);
+ writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
+#endif
gic_dist_init(__io_address(REALVIEW_GIC_DIST_BASE));
gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
}
diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig
index c796bcdd6158..0b9d7ca49ec1 100644
--- a/arch/arm/mach-s3c2410/Kconfig
+++ b/arch/arm/mach-s3c2410/Kconfig
@@ -121,6 +121,14 @@ config S3C2410_BOOT_WATCHDOG
system resets depends on the value of PCLK. The timeout on an
200MHz s3c2410 should be about 30 seconds.
+config S3C2410_BOOT_ERROR_RESET
+ bool "S3C2410 Reboot on decompression error"
+ depends on ARCH_S3C2410
+ help
+ Say y here to use the watchdog to reset the system if the
+ kernel decompressor detects an error during decompression.
+
+
comment "S3C2410 Setup"
config S3C2410_DMA
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 0b71c896bbd1..1be2567a7486 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -89,32 +89,63 @@
/* macros to modify the physical addresses for io space */
-#define PA_CS2(item) ((item) + S3C2410_CS2)
-#define PA_CS3(item) ((item) + S3C2410_CS3)
-#define PA_CS4(item) ((item) + S3C2410_CS4)
-#define PA_CS5(item) ((item) + S3C2410_CS5)
+#define PA_CS2(item) (__phys_to_pfn((item) + S3C2410_CS2))
+#define PA_CS3(item) (__phys_to_pfn((item) + S3C2410_CS3))
+#define PA_CS4(item) (__phys_to_pfn((item) + S3C2410_CS4))
+#define PA_CS5(item) (__phys_to_pfn((item) + S3C2410_CS5))
static struct map_desc bast_iodesc[] __initdata = {
/* ISA IO areas */
-
- { (u32)S3C24XX_VA_ISA_BYTE, PA_CS2(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
- { (u32)S3C24XX_VA_ISA_WORD, PA_CS3(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
-
- /* we could possibly compress the next set down into a set of smaller tables
- * pagetables, but that would mean using an L2 section, and it still means
- * we cannot actually feed the same register to an LDR due to 16K spacing
- */
-
+ {
+ .virtual = (u32)S3C24XX_VA_ISA_BYTE,
+ .pfn = PA_CS2(BAST_PA_ISAIO),
+ .length = SZ_16M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)S3C24XX_VA_ISA_WORD,
+ .pfn = PA_CS3(BAST_PA_ISAIO),
+ .length = SZ_16M,
+ .type = MT_DEVICE,
+ },
/* bast CPLD control registers, and external interrupt controls */
- { (u32)BAST_VA_CTRL1, BAST_PA_CTRL1, SZ_1M, MT_DEVICE },
- { (u32)BAST_VA_CTRL2, BAST_PA_CTRL2, SZ_1M, MT_DEVICE },
- { (u32)BAST_VA_CTRL3, BAST_PA_CTRL3, SZ_1M, MT_DEVICE },
- { (u32)BAST_VA_CTRL4, BAST_PA_CTRL4, SZ_1M, MT_DEVICE },
-
+ {
+ .virtual = (u32)BAST_VA_CTRL1,
+ .pfn = __phys_to_pfn(BAST_PA_CTRL1),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)BAST_VA_CTRL2,
+ .pfn = __phys_to_pfn(BAST_PA_CTRL2),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)BAST_VA_CTRL3,
+ .pfn = __phys_to_pfn(BAST_PA_CTRL3),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)BAST_VA_CTRL4,
+ .pfn = __phys_to_pfn(BAST_PA_CTRL4),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ },
/* PC104 IRQ mux */
- { (u32)BAST_VA_PC104_IRQREQ, BAST_PA_PC104_IRQREQ, SZ_1M, MT_DEVICE },
- { (u32)BAST_VA_PC104_IRQRAW, BAST_PA_PC104_IRQRAW, SZ_1M, MT_DEVICE },
- { (u32)BAST_VA_PC104_IRQMASK, BAST_PA_PC104_IRQMASK, SZ_1M, MT_DEVICE },
+ {
+ .virtual = (u32)BAST_VA_PC104_IRQREQ,
+ .pfn = __phys_to_pfn(BAST_PA_PC104_IRQREQ),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)BAST_VA_PC104_IRQRAW,
+ .pfn = __phys_to_pfn(BAST_PA_PC104_IRQRAW),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)BAST_VA_PC104_IRQMASK,
+ .pfn = __phys_to_pfn(BAST_PA_PC104_IRQMASK),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ },
/* peripheral space... one for each of fast/slow/byte/16bit */
/* note, ide is only decoded in word space, even though some registers
diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c
index 46b259673c18..ae7e099bf6c8 100644
--- a/arch/arm/mach-s3c2410/mach-vr1000.c
+++ b/arch/arm/mach-s3c2410/mach-vr1000.c
@@ -74,27 +74,47 @@
/* macros to modify the physical addresses for io space */
-#define PA_CS2(item) ((item) + S3C2410_CS2)
-#define PA_CS3(item) ((item) + S3C2410_CS3)
-#define PA_CS4(item) ((item) + S3C2410_CS4)
-#define PA_CS5(item) ((item) + S3C2410_CS5)
+#define PA_CS2(item) (__phys_to_pfn((item) + S3C2410_CS2))
+#define PA_CS3(item) (__phys_to_pfn((item) + S3C2410_CS3))
+#define PA_CS4(item) (__phys_to_pfn((item) + S3C2410_CS4))
+#define PA_CS5(item) (__phys_to_pfn((item) + S3C2410_CS5))
static struct map_desc vr1000_iodesc[] __initdata = {
/* ISA IO areas */
+ {
+ .virtual = (u32)S3C24XX_VA_ISA_BYTE,
+ .pfn = PA_CS2(BAST_PA_ISAIO),
+ .length = SZ_16M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)S3C24XX_VA_ISA_WORD,
+ .pfn = PA_CS3(BAST_PA_ISAIO),
+ .length = SZ_16M,
+ .type = MT_DEVICE,
+ },
- { (u32)S3C24XX_VA_ISA_BYTE, PA_CS2(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
- { (u32)S3C24XX_VA_ISA_WORD, PA_CS3(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
-
- /* we could possibly compress the next set down into a set of smaller tables
- * pagetables, but that would mean using an L2 section, and it still means
- * we cannot actually feed the same register to an LDR due to 16K spacing
- */
-
- /* bast CPLD control registers, and external interrupt controls */
- { (u32)VR1000_VA_CTRL1, VR1000_PA_CTRL1, SZ_1M, MT_DEVICE },
- { (u32)VR1000_VA_CTRL2, VR1000_PA_CTRL2, SZ_1M, MT_DEVICE },
- { (u32)VR1000_VA_CTRL3, VR1000_PA_CTRL3, SZ_1M, MT_DEVICE },
- { (u32)VR1000_VA_CTRL4, VR1000_PA_CTRL4, SZ_1M, MT_DEVICE },
+ /* CPLD control registers, and external interrupt controls */
+ {
+ .virtual = (u32)VR1000_VA_CTRL1,
+ .pfn = __phys_to_pfn(VR1000_PA_CTRL1),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)VR1000_VA_CTRL2,
+ .pfn = __phys_to_pfn(VR1000_PA_CTRL2),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)VR1000_VA_CTRL3,
+ .pfn = __phys_to_pfn(VR1000_PA_CTRL3),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ }, {
+ .virtual = (u32)VR1000_VA_CTRL4,
+ .pfn = __phys_to_pfn(VR1000_PA_CTRL4),
+ .length = SZ_1M,
+ .type = MT_DEVICE,
+ },
/* peripheral space... one for each of fast/slow/byte/16bit */
/* note, ide is only decoded in word space, even though some registers
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 47e0420623fc..e4b435e634e4 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -124,11 +124,13 @@ static void __init sa1100_timer_init(void)
tv.tv_sec = sa1100_get_rtc_time();
do_settimeofday(&tv);
- OSMR0 = 0; /* set initial match at 0 */
+ OIER = 0; /* disable any timer interrupts */
+ OSCR = LATCH*2; /* push OSCR out of the way */
+ OSMR0 = LATCH; /* set initial match */
OSSR = 0xf; /* clear status on all timers */
setup_irq(IRQ_OST0, &sa1100_timer_irq);
- OIER |= OIER_E0; /* enable match on timer 0 to cause interrupts */
- OSCR = 0; /* initialize free-running timer, force first match */
+ OIER = OIER_E0; /* enable match on timer 0 to cause interrupts */
+ OSCR = 0; /* initialize free-running timer */
}
#ifdef CONFIG_NO_IDLE_HZ
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index fb5b40289de2..9e50127be635 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -354,7 +354,7 @@ void __init build_mem_type_table(void)
{
struct cachepolicy *cp;
unsigned int cr = get_cr();
- unsigned int user_pgprot;
+ unsigned int user_pgprot, kern_pgprot;
int cpu_arch = cpu_architecture();
int i;
@@ -381,7 +381,7 @@ void __init build_mem_type_table(void)
}
cp = &cache_policies[cachepolicy];
- user_pgprot = cp->pte;
+ kern_pgprot = user_pgprot = cp->pte;
/*
* ARMv6 and above have extended page tables.
@@ -393,6 +393,7 @@ void __init build_mem_type_table(void)
*/
mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4;
mem_types[MT_ROM].prot_sect &= ~PMD_BIT4;
+
/*
* Mark cache clean areas and XIP ROM read only
* from SVC mode and no access from userspace.
@@ -412,32 +413,47 @@ void __init build_mem_type_table(void)
* (iow, non-global)
*/
user_pgprot |= L_PTE_ASID;
+
+#ifdef CONFIG_SMP
+ /*
+ * Mark memory with the "shared" attribute for SMP systems
+ */
+ user_pgprot |= L_PTE_SHARED;
+ kern_pgprot |= L_PTE_SHARED;
+ mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
+#endif
}
+ for (i = 0; i < 16; i++) {
+ unsigned long v = pgprot_val(protection_map[i]);
+ v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
+ protection_map[i] = __pgprot(v);
+ }
+
+ mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
+ mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
+
if (cpu_arch >= CPU_ARCH_ARMv5) {
- mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
- mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
+#ifndef CONFIG_SMP
+ /*
+ * Only use write-through for non-SMP systems
+ */
+ mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
+ mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
+#endif
} else {
- mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte;
- mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte;
mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
}
+ pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
+ L_PTE_DIRTY | L_PTE_WRITE |
+ L_PTE_EXEC | kern_pgprot);
+
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_ROM].prot_sect |= cp->pmd;
- for (i = 0; i < 16; i++) {
- unsigned long v = pgprot_val(protection_map[i]);
- v = (v & ~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot;
- protection_map[i] = __pgprot(v);
- }
-
- pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
- L_PTE_DIRTY | L_PTE_WRITE |
- L_PTE_EXEC | cp->pte);
-
switch (cp->pmd) {
case PMD_SECT_WT:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 9bb5fff406fb..92f3ca31b7b9 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
@@ -112,6 +113,9 @@ ENTRY(cpu_v6_dcache_clean_area)
ENTRY(cpu_v6_switch_mm)
mov r2, #0
ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id
+#ifdef CONFIG_SMP
+ orr r0, r0, #2 @ set shared pgtable
+#endif
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
@@ -140,7 +144,7 @@ ENTRY(cpu_v6_switch_mm)
ENTRY(cpu_v6_set_pte)
str r1, [r0], #-2048 @ linux version
- bic r2, r1, #0x000007f0
+ bic r2, r1, #0x000003f0
bic r2, r2, #0x00000003
orr r2, r2, #PTE_EXT_AP0 | 2
@@ -191,6 +195,23 @@ cpu_v6_name:
* - cache type register is implemented
*/
__v6_setup:
+#ifdef CONFIG_SMP
+ /* Set up the SCU on core 0 only */
+ mrc p15, 0, r0, c0, c0, 5 @ CPU core number
+ ands r0, r0, #15
+ moveq r0, #0x10000000 @ SCU_BASE
+ orreq r0, r0, #0x00100000
+ ldreq r5, [r0, #SCU_CTRL]
+ orreq r5, r5, #1
+ streq r5, [r0, #SCU_CTRL]
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+ mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode
+ orr r0, r0, #0x20
+ mcr p15, 0, r0, c1, c0, 1
+#endif
+#endif
+
mov r0, #0
mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
@@ -198,6 +219,9 @@ __v6_setup:
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs
mcr p15, 0, r0, c2, c0, 2 @ TTB control register
+#ifdef CONFIG_SMP
+ orr r4, r4, #2 @ set shared pgtable
+#endif
mcr p15, 0, r4, c2, c0, 1 @ load TTB1
#ifdef CONFIG_VFP
mrc p15, 0, r0, c1, c0, 2
diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h
index 9677ae8448e8..da4c616b6c49 100644
--- a/arch/arm/nwfpe/fpa11.h
+++ b/arch/arm/nwfpe/fpa11.h
@@ -60,7 +60,7 @@ typedef union tagFPREG {
#ifdef CONFIG_FPE_NWFPE_XP
floatx80 fExtended;
#else
- int padding[3];
+ u32 padding[3];
#endif
} FPREG;
diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c
index b0db5cbcc3b1..32859fa8dcfc 100644
--- a/arch/arm/nwfpe/fpa11_cpdt.c
+++ b/arch/arm/nwfpe/fpa11_cpdt.c
@@ -59,8 +59,13 @@ static inline void loadExtended(const unsigned int Fn, const unsigned int __user
p = (unsigned int *) &fpa11->fpreg[Fn].fExtended;
fpa11->fType[Fn] = typeExtended;
get_user(p[0], &pMem[0]); /* sign & exponent */
+#ifdef __ARMEB__
+ get_user(p[1], &pMem[1]); /* ms bits */
+ get_user(p[2], &pMem[2]); /* ls bits */
+#else
get_user(p[1], &pMem[2]); /* ls bits */
get_user(p[2], &pMem[1]); /* ms bits */
+#endif
}
#endif
@@ -177,8 +182,13 @@ static inline void storeExtended(const unsigned int Fn, unsigned int __user *pMe
}
put_user(val.i[0], &pMem[0]); /* sign & exp */
+#ifdef __ARMEB__
+ put_user(val.i[1], &pMem[1]); /* msw */
+ put_user(val.i[2], &pMem[2]);
+#else
put_user(val.i[1], &pMem[2]);
put_user(val.i[2], &pMem[1]); /* msw */
+#endif
}
#endif
diff --git a/arch/arm/nwfpe/fpopcode.c b/arch/arm/nwfpe/fpopcode.c
index 4c9f5703148c..67ff2ab08ea0 100644
--- a/arch/arm/nwfpe/fpopcode.c
+++ b/arch/arm/nwfpe/fpopcode.c
@@ -29,14 +29,14 @@
#ifdef CONFIG_FPE_NWFPE_XP
const floatx80 floatx80Constant[] = {
- {0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
- {0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
- {0x4000, 0x8000000000000000ULL}, /* extended 2.0 */
- {0x4000, 0xc000000000000000ULL}, /* extended 3.0 */
- {0x4001, 0x8000000000000000ULL}, /* extended 4.0 */
- {0x4001, 0xa000000000000000ULL}, /* extended 5.0 */
- {0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
- {0x4002, 0xa000000000000000ULL} /* extended 10.0 */
+ { .high = 0x0000, .low = 0x0000000000000000ULL},/* extended 0.0 */
+ { .high = 0x3fff, .low = 0x8000000000000000ULL},/* extended 1.0 */
+ { .high = 0x4000, .low = 0x8000000000000000ULL},/* extended 2.0 */
+ { .high = 0x4000, .low = 0xc000000000000000ULL},/* extended 3.0 */
+ { .high = 0x4001, .low = 0x8000000000000000ULL},/* extended 4.0 */
+ { .high = 0x4001, .low = 0xa000000000000000ULL},/* extended 5.0 */
+ { .high = 0x3ffe, .low = 0x8000000000000000ULL},/* extended 0.5 */
+ { .high = 0x4002, .low = 0xa000000000000000ULL},/* extended 10.0 */
};
#endif
diff --git a/arch/arm/nwfpe/softfloat-specialize b/arch/arm/nwfpe/softfloat-specialize
index acf409144763..d4a4c8e06635 100644
--- a/arch/arm/nwfpe/softfloat-specialize
+++ b/arch/arm/nwfpe/softfloat-specialize
@@ -332,6 +332,7 @@ static floatx80 commonNaNToFloatx80( commonNaNT a )
z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+ z.__padding = 0;
return z;
}
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c
index f9f049132a17..0f9656e482ba 100644
--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -531,6 +531,7 @@ INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
z.low = zSig;
z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+ z.__padding = 0;
return z;
}
@@ -2831,6 +2832,7 @@ static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, flo
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
+ z.__padding = 0;
return z;
}
if ( aExp == 0 ) {
@@ -2950,6 +2952,7 @@ floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
+ z.__padding = 0;
return z;
}
return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
@@ -3015,6 +3018,7 @@ floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
+ z.__padding = 0;
return z;
}
roundData->exception |= float_flag_divbyzero;
@@ -3093,6 +3097,7 @@ floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
+ z.__padding = 0;
return z;
}
normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@@ -3184,6 +3189,7 @@ floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
+ z.__padding = 0;
return z;
}
if ( aExp == 0 ) {
diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h
index 14151700b6b2..978c699673c6 100644
--- a/arch/arm/nwfpe/softfloat.h
+++ b/arch/arm/nwfpe/softfloat.h
@@ -51,11 +51,17 @@ input or output the `floatx80' type will be defined.
Software IEC/IEEE floating-point types.
-------------------------------------------------------------------------------
*/
-typedef unsigned long int float32;
-typedef unsigned long long float64;
+typedef u32 float32;
+typedef u64 float64;
typedef struct {
- unsigned short high;
- unsigned long long low;
+#ifdef __ARMEB__
+ u16 __padding;
+ u16 high;
+#else
+ u16 high;
+ u16 __padding;
+#endif
+ u64 low;
} floatx80;
/*
diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c
index cf7e977d18c8..4e6b7356a722 100644
--- a/arch/arm26/kernel/ptrace.c
+++ b/arch/arm26/kernel/ptrace.c
@@ -546,7 +546,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
sizeof(struct user_fp)) ? -EFAULT : 0;
}
-static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
unsigned long tmp;
int ret;
@@ -665,53 +665,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
return ret;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
-{
- struct task_struct *child;
- int ret;
-
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret == 0)
- ret = do_ptrace(request, child, addr, data);
-
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
- return ret;
-}
-
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/cris/arch-v10/README.mm b/arch/cris/arch-v10/README.mm
index 6f08903f3139..517d1f027fe8 100644
--- a/arch/cris/arch-v10/README.mm
+++ b/arch/cris/arch-v10/README.mm
@@ -177,7 +177,7 @@ The example address is 0xd004000c; in binary this is:
Given the top-level Page Directory, the offset in that directory is calculated
using the upper 8 bits:
-extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
+static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
{
return mm->pgd + (address >> PGDIR_SHIFT);
}
@@ -190,14 +190,14 @@ The pgd_t from our example will therefore be the 208'th (0xd0) entry in mm->pgd.
Since the Middle Directory does not exist, it is a unity mapping:
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
{
return (pmd_t *) dir;
}
The Page Table provides the final lookup by using bits 13 to 23 as index:
-extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
+static inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
{
return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) &
(PTRS_PER_PTE - 1));
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index 130dd214e41d..6cbd34a27b90 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -76,55 +76,11 @@ ptrace_disable(struct task_struct *child)
* (in user space) where the result of the ptrace call is written (instead of
* being returned).
*/
-asmlinkage int
-sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
- lock_kernel();
- ret = -EPERM;
-
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
-
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
-
- if (child)
- get_task_struct(child);
-
- read_unlock(&tasklist_lock);
-
- if (!child)
- goto out;
-
- ret = -EPERM;
-
- if (pid == 1) /* Leave the init process alone! */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* Read word at location address. */
case PTRACE_PEEKTEXT:
@@ -289,10 +245,7 @@ sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
}
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 693771961f85..19bcad05716f 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -476,7 +476,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* OK, we're invoking a handler
*/
-extern inline void
+static inline void
handle_signal(int canrestart, unsigned long sig,
siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs * regs)
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index ca72076c630a..501fa52d8d3a 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -277,7 +277,7 @@ struct file_operations cryptocop_fops = {
static void free_cdesc(struct cryptocop_dma_desc *cdesc)
{
DEBUG(printk("free_cdesc: cdesc 0x%p, from_pool=%d\n", cdesc, cdesc->from_pool));
- if (cdesc->free_buf) kfree(cdesc->free_buf);
+ kfree(cdesc->free_buf);
if (cdesc->from_pool) {
unsigned long int flags;
@@ -2950,15 +2950,15 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
put_page(outpages[i]);
}
- if (digest_result) kfree(digest_result);
- if (inpages) kfree(inpages);
- if (outpages) kfree(outpages);
+ kfree(digest_result);
+ kfree(inpages);
+ kfree(outpages);
if (cop){
- if (cop->tfrm_op.indata) kfree(cop->tfrm_op.indata);
- if (cop->tfrm_op.outdata) kfree(cop->tfrm_op.outdata);
+ kfree(cop->tfrm_op.indata);
+ kfree(cop->tfrm_op.outdata);
kfree(cop);
}
- if (jc) kfree(jc);
+ kfree(jc);
DEBUG(print_lock_status());
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index 208489da2a87..5528b83a622b 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -99,55 +99,11 @@ ptrace_disable(struct task_struct *child)
}
-asmlinkage int
-sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
- lock_kernel();
- ret = -EPERM;
-
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
-
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
-
- if (child)
- get_task_struct(child);
-
- read_unlock(&tasklist_lock);
-
- if (!child)
- goto out;
-
- ret = -EPERM;
-
- if (pid == 1) /* Leave the init process alone! */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* Read word at location address. */
case PTRACE_PEEKTEXT:
@@ -347,10 +303,7 @@ sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
}
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 0a3614dab887..99e59b3eacf8 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -513,7 +513,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
/* Invoke a singal handler to, well, handle the signal. */
-extern inline void
+static inline void
handle_signal(int canrestart, unsigned long sig,
siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs * regs)
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index a92ac9877582..1780df3ed9e5 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -16,7 +16,7 @@
#include
#include
-extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
unsigned long phys_addr, pgprot_t prot)
{
unsigned long end;
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index cb335a14a315..f953484e7d59 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -106,48 +106,11 @@ void ptrace_enable(struct task_struct *child)
child->thread.frame0->__status |= REG__STATUS_STEP;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
unsigned long tmp;
int ret;
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -351,10 +314,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
return ret;
}
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
index a569fe4aa284..0ff6f79b0fed 100644
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -57,43 +57,10 @@ void ptrace_disable(struct task_struct *child)
h8300_disable_trace(child);
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret;
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA: {
@@ -251,10 +218,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
return ret;
}
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index bac0da731ee3..dbf90ad6eac3 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -997,8 +997,21 @@ source "drivers/Kconfig"
source "fs/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/i386/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/i386/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index 5228c40a6fb2..c48b424dd640 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW
This option will cause messages to be printed if free stack space
drops below a certain limit.
-config KPROBES
- bool "Kprobes"
- depends on DEBUG_KERNEL
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 7c724ffa08bb..496a2c9909fe 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -559,14 +559,20 @@ void __devinit setup_local_APIC(void)
* If Linux enabled the LAPIC against the BIOS default
* disable it down before re-entering the BIOS on shutdown.
* Otherwise the BIOS may get confused and not power-off.
+ * Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
*/
void lapic_shutdown(void)
{
- if (!cpu_has_apic || !enabled_via_apicbase)
+ if (!cpu_has_apic)
return;
local_irq_disable();
- disable_local_APIC();
+ clear_local_APIC();
+
+ if (enabled_via_apicbase)
+ disable_local_APIC();
+
local_irq_enable();
}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index d2ef0c2aa93e..86e80c551478 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -447,8 +447,7 @@ static char * apm_event_name[] = {
"system standby resume",
"capabilities change"
};
-#define NR_APM_EVENT_NAME \
- (sizeof(apm_event_name) / sizeof(apm_event_name[0]))
+#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name)
typedef struct lookup_t {
int key;
@@ -479,7 +478,7 @@ static const lookup_t error_table[] = {
{ APM_NO_ERROR, "BIOS did not set a return code" },
{ APM_NOT_PRESENT, "No APM present" }
};
-#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
+#define ERROR_COUNT ARRAY_SIZE(error_table)
/**
* apm_error - display an APM error
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 74145a33cb0f..c145fb30002e 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
-extern void mcheck_init(struct cpuinfo_x86 *c);
-
extern int disable_pse;
static void default_init(struct cpuinfo_x86 * c)
@@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
}
/* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
mcheck_init(c);
-#endif
+
if (c == &boot_cpu_data)
sysenter_setup();
enable_sep_cpu();
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index caa9f7711343..871366b83b3f 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -377,10 +377,9 @@ acpi_cpufreq_cpu_init (
arg0.buffer.length = 12;
arg0.buffer.pointer = (u8 *) arg0_buf;
- data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
+ data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
if (!data)
return (-ENOMEM);
- memset(data, 0, sizeof(struct cpufreq_acpi_io));
acpi_io_data[cpu] = data;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 73a5dc5b26b8..edcd626001da 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -171,10 +171,9 @@ static int get_ranges (unsigned char *pst)
unsigned int speed;
u8 fid, vid;
- powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
+ powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
if (!powernow_table)
return -ENOMEM;
- memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
for (j=0 ; j < number_scales; j++) {
fid = *pst++;
@@ -305,16 +304,13 @@ static int powernow_acpi_init(void)
goto err0;
}
- acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
+ acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
GFP_KERNEL);
-
if (!acpi_processor_perf) {
retval = -ENOMEM;
goto err0;
}
- memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
-
if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
retval = -EIO;
goto err1;
@@ -337,14 +333,12 @@ static int powernow_acpi_init(void)
goto err2;
}
- powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
+ powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
if (!powernow_table) {
retval = -ENOMEM;
goto err2;
}
- memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
-
pc.val = (unsigned long) acpi_processor_perf->states[0].control;
for (i = 0; i < number_scales; i++) {
u8 fid, vid;
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 2d5c9adba0cd..68a1fc87f4ca 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -462,7 +462,6 @@ static int check_supported_cpu(unsigned int cpu)
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(cpu));
- schedule();
if (smp_processor_id() != cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
@@ -497,9 +496,7 @@ static int check_supported_cpu(unsigned int cpu)
out:
set_cpus_allowed(current, oldmask);
- schedule();
return rc;
-
}
static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
@@ -913,7 +910,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
- schedule();
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
@@ -968,8 +964,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
err_out:
set_cpus_allowed(current, oldmask);
- schedule();
-
return ret;
}
@@ -991,12 +985,11 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
if (!check_supported_cpu(pol->cpu))
return -ENODEV;
- data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
+ data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
if (!data) {
printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
return -ENOMEM;
}
- memset(data,0,sizeof(struct powernow_k8_data));
data->cpu = pol->cpu;
@@ -1026,7 +1019,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
- schedule();
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
@@ -1045,7 +1037,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed(current, oldmask);
- schedule();
pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
pol->cpus = cpu_core_map[pol->cpu];
@@ -1080,7 +1071,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
err_out:
set_cpus_allowed(current, oldmask);
- schedule();
powernow_k8_cpu_exit_acpi(data);
kfree(data);
@@ -1116,17 +1106,14 @@ static unsigned int powernowk8_get (unsigned int cpu)
set_cpus_allowed(current, oldmask);
return 0;
}
- preempt_disable();
-
+
if (query_current_values_with_pending_wait(data))
goto out;
khz = find_khz_freq_from_fid(data->currfid);
- out:
- preempt_enable_no_resched();
+out:
set_cpus_allowed(current, oldmask);
-
return khz;
}
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index 1465974256c9..edb9873e27e3 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -67,7 +67,7 @@ static const struct cpu_id cpu_ids[] = {
[CPU_MP4HT_D0] = {15, 3, 4 },
[CPU_MP4HT_E0] = {15, 4, 1 },
};
-#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0]))
+#define N_IDS ARRAY_SIZE(cpu_ids)
struct cpu_model
{
@@ -423,12 +423,11 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
}
}
- centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
+ centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL);
if (!centrino_model[cpu]) {
result = -ENOMEM;
goto err_unreg;
}
- memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
centrino_model[cpu]->model_name=NULL;
centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index 7c6b9c73522f..fc5d5215e23d 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -68,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
/* AMD K7 machine check is Intel like */
-void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
+void amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index 2cf25d2ba0f1..6170af3c271a 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -16,7 +16,7 @@
#include "mce.h"
-int mce_disabled __devinitdata = 0;
+int mce_disabled = 0;
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
-void __devinit mcheck_init(struct cpuinfo_x86 *c)
+void mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled==1)
return;
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 1d1e885f500a..fd2c459a31ef 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -77,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal regulation detect and init */
-static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
+static void intel_init_thermal(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned int cpu = smp_processor_id();
@@ -231,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
-void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c
index 3a2e24baddc7..94bc43d950cf 100644
--- a/arch/i386/kernel/cpu/mcheck/p5.c
+++ b/arch/i386/kernel/cpu/mcheck/p5.c
@@ -28,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c
index 979b18bc95c1..deeae42ce199 100644
--- a/arch/i386/kernel/cpu/mcheck/p6.c
+++ b/arch/i386/kernel/cpu/mcheck/p6.c
@@ -79,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
/* Set up machine check reporting for processors with Intel style MCE */
-void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c
index 5b9d2dd411d3..9e424b6c293d 100644
--- a/arch/i386/kernel/cpu/mcheck/winchip.c
+++ b/arch/i386/kernel/cpu/mcheck/winchip.c
@@ -22,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting on the Winchip C6 series */
-void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
+void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
machine_check_vector = winchip_machine_check;
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index f2b37654777f..b59a34dbe262 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -108,8 +108,11 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
/*
* Sets the lazy trigger so that the next I/O operation will
* reload the correct bitmap.
+ * Reset the owner so that a process switch will not set
+ * tss->io_bitmap_base to IO_BITMAP_OFFSET.
*/
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+ tss->io_bitmap_owner = NULL;
put_cpu();
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 6345b430b105..32b0c24ab9a6 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -31,22 +31,16 @@
#include
#include
#include
-#include
#include
#include
#include
#include
-static struct kprobe *current_kprobe;
-static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
-static struct kprobe *kprobe_prev;
-static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
-static struct pt_regs jprobe_saved_regs;
-static long *jprobe_saved_esp;
-/* copy of the kernel stack at the probe fire time */
-static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
void jprobe_return_end(void);
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -91,29 +85,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
{
}
-static inline void save_previous_kprobe(void)
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- kprobe_prev = current_kprobe;
- kprobe_status_prev = kprobe_status;
- kprobe_old_eflags_prev = kprobe_old_eflags;
- kprobe_saved_eflags_prev = kprobe_saved_eflags;
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
+ kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
}
-static inline void restore_previous_kprobe(void)
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- current_kprobe = kprobe_prev;
- kprobe_status = kprobe_status_prev;
- kprobe_old_eflags = kprobe_old_eflags_prev;
- kprobe_saved_eflags = kprobe_saved_eflags_prev;
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
+ kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
}
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
+static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
{
- current_kprobe = p;
- kprobe_saved_eflags = kprobe_old_eflags
+ __get_cpu_var(current_kprobe) = p;
+ kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
= (regs->eflags & (TF_MASK | IF_MASK));
if (is_IF_modifier(p->opcode))
- kprobe_saved_eflags &= ~IF_MASK;
+ kcb->kprobe_saved_eflags &= ~IF_MASK;
}
static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -127,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->eip = (unsigned long)&p->ainsn.insn;
}
+/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
{
@@ -157,9 +153,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
int ret = 0;
kprobe_opcode_t *addr = NULL;
unsigned long *lp;
+ struct kprobe_ctlblk *kcb;
- /* We're in an interrupt, but this is clear and BUG()-safe. */
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
/* Check if the application is using LDT entry for its code segment and
* calculate the address by reading the base address from the LDT entry.
*/
@@ -173,15 +175,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
}
/* Check we're not actually recursing */
if (kprobe_running()) {
- /* We *are* holding lock here, so this is safe.
- Disarm the probe we just hit, and ignore it. */
p = get_kprobe(addr);
if (p) {
- if (kprobe_status == KPROBE_HIT_SS &&
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
regs->eflags &= ~TF_MASK;
- regs->eflags |= kprobe_saved_eflags;
- unlock_kprobes();
+ regs->eflags |= kcb->kprobe_saved_eflags;
goto no_kprobe;
}
/* We have reentered the kprobe_handler(), since
@@ -190,26 +189,23 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* just single step on the instruction of the new probe
* without calling any user handlers.
*/
- save_previous_kprobe();
- set_current_kprobe(p, regs);
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
p->nmissed++;
prepare_singlestep(p, regs);
- kprobe_status = KPROBE_REENTER;
+ kcb->kprobe_status = KPROBE_REENTER;
return 1;
} else {
- p = current_kprobe;
+ p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
}
}
- /* If it's not ours, can't be delete race, (we hold lock). */
goto no_kprobe;
}
- lock_kprobes();
p = get_kprobe(addr);
if (!p) {
- unlock_kprobes();
if (regs->eflags & VM_MASK) {
/* We are in virtual-8086 mode. Return 0 */
goto no_kprobe;
@@ -232,8 +228,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
- kprobe_status = KPROBE_HIT_ACTIVE;
- set_current_kprobe(p, regs);
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (p->pre_handler && p->pre_handler(p, regs))
/* handler has already set things up, so skip ss setup */
@@ -241,7 +237,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
ss_probe:
prepare_singlestep(p, regs);
- kprobe_status = KPROBE_HIT_SS;
+ kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
no_kprobe:
@@ -269,9 +265,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
struct hlist_node *node, *tmp;
- unsigned long orig_ret_address = 0;
+ unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+ spin_lock_irqsave(&kretprobe_lock, flags);
head = kretprobe_inst_table_head(current);
/*
@@ -310,14 +307,15 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
regs->eip = orig_ret_address;
- unlock_kprobes();
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we have handled unlocking
- * and re-enabling preemption.
- */
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
return 1;
}
@@ -343,7 +341,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*/
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = (unsigned long *)®s->esp;
unsigned long next_eip = 0;
@@ -353,7 +352,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
switch (p->ainsn.insn[0]) {
case 0x9c: /* pushfl */
*tos &= ~(TF_MASK | IF_MASK);
- *tos |= kprobe_old_eflags;
+ *tos |= kcb->kprobe_old_eflags;
break;
case 0xc3: /* ret/lret */
case 0xcb:
@@ -394,27 +393,30 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
- * remain disabled thoroughout this function. And we hold kprobe lock.
+ * remain disabled thoroughout this function.
*/
static inline int post_kprobe_handler(struct pt_regs *regs)
{
- if (!kprobe_running())
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
return 0;
- if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
- kprobe_status = KPROBE_HIT_SSDONE;
- current_kprobe->post_handler(current_kprobe, regs, 0);
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
}
- resume_execution(current_kprobe, regs);
- regs->eflags |= kprobe_saved_eflags;
+ resume_execution(cur, regs, kcb);
+ regs->eflags |= kcb->kprobe_saved_eflags;
/*Restore back the original saved kprobes variables and continue. */
- if (kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe();
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
goto out;
}
- unlock_kprobes();
+ reset_current_kprobe();
out:
preempt_enable_no_resched();
@@ -429,18 +431,19 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
return 1;
}
-/* Interrupts disabled, kprobe_lock held. */
static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
- if (current_kprobe->fault_handler
- && current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1;
- if (kprobe_status & KPROBE_HIT_SS) {
- resume_execution(current_kprobe, regs);
- regs->eflags |= kprobe_old_eflags;
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs, kcb);
+ regs->eflags |= kcb->kprobe_old_eflags;
- unlock_kprobes();
+ reset_current_kprobe();
preempt_enable_no_resched();
}
return 0;
@@ -453,39 +456,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
switch (val) {
case DIE_INT3:
if (kprobe_handler(args->regs))
- return NOTIFY_STOP;
+ ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs))
- return NOTIFY_STOP;
+ ret = NOTIFY_STOP;
break;
case DIE_GPF:
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- return NOTIFY_STOP;
- break;
case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id() */
+ preempt_disable();
if (kprobe_running() &&
kprobe_fault_handler(args->regs, args->trapnr))
- return NOTIFY_STOP;
+ ret = NOTIFY_STOP;
+ preempt_enable();
break;
default:
break;
}
- return NOTIFY_DONE;
+ return ret;
}
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- jprobe_saved_regs = *regs;
- jprobe_saved_esp = ®s->esp;
- addr = (unsigned long)jprobe_saved_esp;
+ kcb->jprobe_saved_regs = *regs;
+ kcb->jprobe_saved_esp = ®s->esp;
+ addr = (unsigned long)(kcb->jprobe_saved_esp);
/*
* TBD: As Linus pointed out, gcc assumes that the callee
@@ -494,7 +499,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* we also save and restore enough stack bytes to cover
* the argument area.
*/
- memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
+ memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
+ MIN_STACK_SIZE(addr));
regs->eflags &= ~IF_MASK;
regs->eip = (unsigned long)(jp->entry);
return 1;
@@ -502,36 +508,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
void __kprobes jprobe_return(void)
{
- preempt_enable_no_resched();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
asm volatile (" xchgl %%ebx,%%esp \n"
" int3 \n"
" .globl jprobe_return_end \n"
" jprobe_return_end: \n"
" nop \n"::"b"
- (jprobe_saved_esp):"memory");
+ (kcb->jprobe_saved_esp):"memory");
}
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
u8 *addr = (u8 *) (regs->eip - 1);
- unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
+ unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
struct jprobe *jp = container_of(p, struct jprobe, kp);
if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
- if (®s->esp != jprobe_saved_esp) {
+ if (®s->esp != kcb->jprobe_saved_esp) {
struct pt_regs *saved_regs =
- container_of(jprobe_saved_esp, struct pt_regs, esp);
+ container_of(kcb->jprobe_saved_esp,
+ struct pt_regs, esp);
printk("current esp %p does not match saved esp %p\n",
- ®s->esp, jprobe_saved_esp);
+ ®s->esp, kcb->jprobe_saved_esp);
printk("Saved registers for jprobe %p\n", jp);
show_registers(saved_regs);
printk("Current registers\n");
show_registers(regs);
BUG();
}
- *regs = jprobe_saved_regs;
- memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
+ *regs = kcb->jprobe_saved_regs;
+ memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
MIN_STACK_SIZE(stack_addr));
+ preempt_enable_no_resched();
return 1;
}
return 0;
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index fe1ffa55587d..983f95707e11 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *null)
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index 8600faeea29d..558bb207720f 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = {
{ .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
};
-#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource))
+#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
/**
* mca_read_and_store_pos - read the POS registers into a memory buffer
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index efd11f09c996..5ffbb4b7ad05 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child,
return 0;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
struct user * dummy = NULL;
int i, ret;
unsigned long __user *datap = (unsigned long __user *)data;
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+ out_tsk:
return ret;
}
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c
index c9b87330aeea..10e21a4773dd 100644
--- a/arch/i386/kernel/reboot_fixups.c
+++ b/arch/i386/kernel/reboot_fixups.c
@@ -10,6 +10,7 @@
#include
#include
+#include
static void cs5530a_warm_reset(struct pci_dev *dev)
{
@@ -42,7 +43,7 @@ void mach_reboot_fixups(void)
struct pci_dev *dev;
int i;
- for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) {
+ for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
cur = &(fixups_table[i]);
dev = pci_get_device(cur->vendor, cur->device, NULL);
if (!dev)
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c
index 69e203a0d330..9c968ae67c43 100644
--- a/arch/i386/kernel/scx200.c
+++ b/arch/i386/kernel/scx200.c
@@ -12,6 +12,7 @@
#include
#include
+#include
/* Verify that the configuration block really is there */
#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 01b618e73ecd..47ec76794d02 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -68,11 +68,9 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Package ID of each logical CPU */
int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
/* Core ID of each logical CPU */
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
@@ -612,7 +610,7 @@ static inline void __inquire_remote_apic(int apicid)
printk("Inquiring remote APIC #%d...\n", apicid);
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
printk("... APIC #%d %s: ", apicid, names[i]);
/*
diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig
index 5ade19801b97..d8a84088471a 100644
--- a/arch/i386/oprofile/Kconfig
+++ b/arch/i386/oprofile/Kconfig
@@ -1,7 +1,3 @@
-
-menu "Profiling support"
- depends on EXPERIMENTAL
-
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
@@ -19,5 +15,3 @@ config OPROFILE
If unsure, say N.
-endmenu
-
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 1f1572692e0b..50a0bef8c85f 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -118,6 +118,7 @@ void __restore_processor_state(struct saved_context *ctxt)
fix_processor_context();
do_fpu_end();
mtrr_ap_init();
+ mcheck_init(&boot_cpu_data);
}
void restore_processor_state(void)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3b4248cff9a7..d4de8a4814be 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -191,6 +191,7 @@ config IOSAPIC
config IA64_SGI_SN_XP
tristate "Support communication between SGI SSIs"
+ depends on IA64_GENERIC || IA64_SGI_SN2
select IA64_UNCACHED_ALLOCATOR
help
An SGI machine can be divided into multiple Single System
@@ -426,8 +427,21 @@ config GENERIC_PENDING_IRQ
source "arch/ia64/hp/sim/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/ia64/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/ia64/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
index fda67ac993d7..de9d507ba0fd 100644
--- a/arch/ia64/Kconfig.debug
+++ b/arch/ia64/Kconfig.debug
@@ -2,17 +2,6 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config KPROBES
- bool "Kprobes"
- depends on DEBUG_KERNEL
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-
-
choice
prompt "Physical memory granularity"
default IA64_GRANULE_64MB
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index b42ec37be51c..19ee635eeb70 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -642,10 +642,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
info->event = 0;
info->tty = 0;
if (info->blocked_open) {
- if (info->close_delay) {
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(info->close_delay);
- }
+ if (info->close_delay)
+ schedule_timeout_interruptible(info->close_delay);
wake_up_interruptible(&info->open_wait);
}
info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 471086b808a4..96736a119c91 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -26,7 +26,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -38,13 +37,8 @@
extern void jprobe_inst_return(void);
-/* kprobe_status settings */
-#define KPROBE_HIT_ACTIVE 0x00000001
-#define KPROBE_HIT_SS 0x00000002
-
-static struct kprobe *current_kprobe, *kprobe_prev;
-static unsigned long kprobe_status, kprobe_status_prev;
-static struct pt_regs jprobe_saved_regs;
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
enum instruction_type {A, I, M, F, B, L, X, u};
static enum instruction_type bundle_encoding[32][3] = {
@@ -313,21 +307,22 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
return 0;
}
-static inline void save_previous_kprobe(void)
+static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- kprobe_prev = current_kprobe;
- kprobe_status_prev = kprobe_status;
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
}
-static inline void restore_previous_kprobe(void)
+static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- current_kprobe = kprobe_prev;
- kprobe_status = kprobe_status_prev;
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ kcb->kprobe_status = kcb->prev_kprobe.status;
}
-static inline void set_current_kprobe(struct kprobe *p)
+static inline void set_current_kprobe(struct kprobe *p,
+ struct kprobe_ctlblk *kcb)
{
- current_kprobe = p;
+ __get_cpu_var(current_kprobe) = p;
}
static void kretprobe_trampoline(void)
@@ -347,10 +342,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
struct hlist_node *node, *tmp;
- unsigned long orig_ret_address = 0;
+ unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =
((struct fnptr *)kretprobe_trampoline)->ip;
+ spin_lock_irqsave(&kretprobe_lock, flags);
head = kretprobe_inst_table_head(current);
/*
@@ -389,17 +385,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
regs->cr_iip = orig_ret_address;
- unlock_kprobes();
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we have handled unlocking
- * and re-enabling preemption.
- */
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
return 1;
}
+/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
{
@@ -606,17 +604,22 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
int ret = 0;
struct pt_regs *regs = args->regs;
kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
+ struct kprobe_ctlblk *kcb;
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing
+ */
preempt_disable();
+ kcb = get_kprobe_ctlblk();
/* Handle recursion cases */
if (kprobe_running()) {
p = get_kprobe(addr);
if (p) {
- if ( (kprobe_status == KPROBE_HIT_SS) &&
+ if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
(p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
ia64_psr(regs)->ss = 0;
- unlock_kprobes();
goto no_kprobe;
}
/* We have reentered the pre_kprobe_handler(), since
@@ -625,17 +628,17 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
* just single step on the instruction of the new probe
* without calling any user handlers.
*/
- save_previous_kprobe();
- set_current_kprobe(p);
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, kcb);
p->nmissed++;
prepare_ss(p, regs);
- kprobe_status = KPROBE_REENTER;
+ kcb->kprobe_status = KPROBE_REENTER;
return 1;
} else if (args->err == __IA64_BREAK_JPROBE) {
/*
* jprobe instrumented function just completed
*/
- p = current_kprobe;
+ p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
}
@@ -645,10 +648,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
}
}
- lock_kprobes();
p = get_kprobe(addr);
if (!p) {
- unlock_kprobes();
if (!is_ia64_break_inst(regs)) {
/*
* The breakpoint instruction was removed right
@@ -665,8 +666,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
goto no_kprobe;
}
- kprobe_status = KPROBE_HIT_ACTIVE;
- set_current_kprobe(p);
+ set_current_kprobe(p, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (p->pre_handler && p->pre_handler(p, regs))
/*
@@ -678,7 +679,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
ss_probe:
prepare_ss(p, regs);
- kprobe_status = KPROBE_HIT_SS;
+ kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
no_kprobe:
@@ -688,23 +689,25 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
static int __kprobes post_kprobes_handler(struct pt_regs *regs)
{
- if (!kprobe_running())
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
return 0;
- if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
- kprobe_status = KPROBE_HIT_SSDONE;
- current_kprobe->post_handler(current_kprobe, regs, 0);
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
}
- resume_execution(current_kprobe, regs);
+ resume_execution(cur, regs);
/*Restore back the original saved kprobes variables and continue. */
- if (kprobe_status == KPROBE_REENTER) {
- restore_previous_kprobe();
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
goto out;
}
-
- unlock_kprobes();
+ reset_current_kprobe();
out:
preempt_enable_no_resched();
@@ -713,16 +716,15 @@ static int __kprobes post_kprobes_handler(struct pt_regs *regs)
static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
{
- if (!kprobe_running())
- return 0;
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (current_kprobe->fault_handler &&
- current_kprobe->fault_handler(current_kprobe, regs, trapnr))
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1;
- if (kprobe_status & KPROBE_HIT_SS) {
- resume_execution(current_kprobe, regs);
- unlock_kprobes();
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ resume_execution(cur, regs);
+ reset_current_kprobe();
preempt_enable_no_resched();
}
@@ -733,31 +735,38 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
switch(val) {
case DIE_BREAK:
if (pre_kprobes_handler(args))
- return NOTIFY_STOP;
+ ret = NOTIFY_STOP;
break;
case DIE_SS:
if (post_kprobes_handler(args->regs))
- return NOTIFY_STOP;
+ ret = NOTIFY_STOP;
break;
case DIE_PAGE_FAULT:
- if (kprobes_fault_handler(args->regs, args->trapnr))
- return NOTIFY_STOP;
+ /* kprobe_running() needs smp_processor_id() */
+ preempt_disable();
+ if (kprobe_running() &&
+ kprobes_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
default:
break;
}
- return NOTIFY_DONE;
+ return ret;
}
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* save architectural state */
- jprobe_saved_regs = *regs;
+ kcb->jprobe_saved_regs = *regs;
/* after rfi, execute the jprobe instrumented function */
regs->cr_iip = addr & ~0xFULL;
@@ -775,7 +784,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
- *regs = jprobe_saved_regs;
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ *regs = kcb->jprobe_saved_regs;
+ preempt_enable_no_resched();
return 1;
}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f7dfc107cb7b..410d4804fa6e 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4940,7 +4940,7 @@ sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
error_args:
- if (args_k) kfree(args_k);
+ kfree(args_k);
DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fc56ca2da358..3af6de36a482 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -92,6 +92,13 @@ extern void efi_initialize_iomem_resources(struct resource *,
extern char _text[], _end[], _etext[];
unsigned long ia64_max_cacheline_size;
+
+int dma_get_cache_alignment(void)
+{
+ return ia64_max_cacheline_size;
+}
+EXPORT_SYMBOL(dma_get_cache_alignment);
+
unsigned long ia64_iobase; /* virtual address for I/O accesses */
EXPORT_SYMBOL(ia64_iobase);
struct io_space io_space[MAX_IO_SPACES];
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
index 56e6f614b04a..97271ab484dc 100644
--- a/arch/ia64/oprofile/Kconfig
+++ b/arch/ia64/oprofile/Kconfig
@@ -1,7 +1,3 @@
-
-menu "Profiling support"
- depends on EXPERIMENTAL
-
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
@@ -22,5 +18,3 @@ config OPROFILE
If unsure, say N.
-endmenu
-
diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index 6df7fb60dfea..e79bbc94216d 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -212,10 +212,8 @@ int atari_tt_hwclk( int op, struct rtc_time *t )
* additionally the RTC_SET bit is set to prevent an update cycle.
*/
- while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP ) {
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HWCLK_POLL_INTERVAL);
- }
+ while( RTC_READ(RTC_FREQ_SELECT) & RTC_UIP )
+ schedule_timeout_interruptible(HWCLK_POLL_INTERVAL);
local_irq_save(flags);
RTC_WRITE( RTC_CONTROL, ctrl | RTC_SET );
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index f7f1d2e5b90b..7e54422685cf 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -121,48 +121,11 @@ void ptrace_disable(struct task_struct *child)
child->thread.work.syscall_trace = 0;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
unsigned long tmp;
int i, ret = 0;
- lock_kernel();
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED) {
- ret = -EPERM;
- goto out;
- }
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- goto out;
- }
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (unlikely(!child)) {
- ret = -ESRCH;
- goto out;
- }
-
- /* you may not mess with init */
- if (unlikely(pid == 1)) {
- ret = -EPERM;
- goto out_tsk;
- }
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -317,14 +280,10 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
out_eio:
- ret = -EIO;
- goto out_tsk;
+ return -EIO;
}
asmlinkage void syscall_trace(void)
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 8520df9cee6d..b96498120fe9 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -71,6 +71,11 @@ config M5206e
help
Motorola ColdFire 5206e processor support.
+config M520x
+ bool "MCF520x"
+ help
+ Freescale Coldfire 5207/5208 processor support.
+
config M523x
bool "MCF523x"
help
@@ -120,7 +125,7 @@ config M527x
config COLDFIRE
bool
- depends on (M5206 || M5206e || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
+ depends on (M5206 || M5206e || M520x || M523x || M5249 || M527x || M5272 || M528x || M5307 || M5407)
default y
choice
@@ -322,6 +327,12 @@ config ELITE
help
Support for the Motorola M5206eLITE board.
+config M5208EVB
+ bool "Freescale M5208EVB board support"
+ depends on M520x
+ help
+ Support for the Freescale Coldfire M5208EVB.
+
config M5235EVB
bool "Freescale M5235EVB support"
depends on M523x
@@ -465,10 +476,10 @@ config ARNEWSH
default y
depends on (ARN5206 || ARN5307)
-config MOTOROLA
+config FREESCALE
bool
default y
- depends on (M5206eC3 || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
+ depends on (M5206eC3 || M5208EVB || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5407C3)
config HW_FEITH
bool
diff --git a/arch/m68knommu/Makefile b/arch/m68knommu/Makefile
index b8fdf191b8f6..b6b5c14e55fd 100644
--- a/arch/m68knommu/Makefile
+++ b/arch/m68knommu/Makefile
@@ -14,6 +14,7 @@ platform-$(CONFIG_M68VZ328) := 68VZ328
platform-$(CONFIG_M68360) := 68360
platform-$(CONFIG_M5206) := 5206
platform-$(CONFIG_M5206e) := 5206e
+platform-$(CONFIG_M520x) := 520x
platform-$(CONFIG_M523x) := 523x
platform-$(CONFIG_M5249) := 5249
platform-$(CONFIG_M527x) := 527x
@@ -29,7 +30,7 @@ board-$(CONFIG_UCDIMM) := ucdimm
board-$(CONFIG_UCQUICC) := uCquicc
board-$(CONFIG_DRAGEN2) := de2
board-$(CONFIG_ARNEWSH) := ARNEWSH
-board-$(CONFIG_MOTOROLA) := MOTOROLA
+board-$(CONFIG_FREESCALE) := FREESCALE
board-$(CONFIG_M5235EVB) := M5235EVB
board-$(CONFIG_M5271EVB) := M5271EVB
board-$(CONFIG_M5275EVB) := M5275EVB
@@ -41,6 +42,7 @@ board-$(CONFIG_SECUREEDGEMP3) := MP3
board-$(CONFIG_CLEOPATRA) := CLEOPATRA
board-$(CONFIG_senTec) := senTec
board-$(CONFIG_SNEHA) := SNEHA
+board-$(CONFIG_M5208EVB) := M5208EVB
board-$(CONFIG_MOD5272) := MOD5272
BOARD := $(board-y)
@@ -56,6 +58,7 @@ MODEL := $(model-y)
#
cpuclass-$(CONFIG_M5206) := 5307
cpuclass-$(CONFIG_M5206e) := 5307
+cpuclass-$(CONFIG_M520x) := 5307
cpuclass-$(CONFIG_M523x) := 5307
cpuclass-$(CONFIG_M5249) := 5307
cpuclass-$(CONFIG_M527x) := 5307
@@ -80,6 +83,7 @@ export PLATFORM BOARD MODEL CPUCLASS
#
cflags-$(CONFIG_M5206) := -m5200 -Wa,-S -Wa,-m5200
cflags-$(CONFIG_M5206e) := -m5200 -Wa,-S -Wa,-m5200
+cflags-$(CONFIG_M520x) := -m5307 -Wa,-S -Wa,-m5307
cflags-$(CONFIG_M523x) := -m5307 -Wa,-S -Wa,-m5307
cflags-$(CONFIG_M5249) := -m5200 -Wa,-S -Wa,-m5200
cflags-$(CONFIG_M527x) := -m5307 -Wa,-S -Wa,-m5307
@@ -95,7 +99,6 @@ cflags-$(CONFIG_M68360) := -m68332
AFLAGS += $(cflags-y)
CFLAGS += $(cflags-y)
-CFLAGS += -fno-builtin
CFLAGS += -O1 -g
CFLAGS += -D__linux__
CFLAGS += -DUTS_SYSNAME=\"uClinux\"
diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c
index cd3ffe12653e..b988c7bdc6e4 100644
--- a/arch/m68knommu/kernel/asm-offsets.c
+++ b/arch/m68knommu/kernel/asm-offsets.c
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#define DEFINE(sym, val) \
diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c
index 621d7b91ccfe..262ab8c72e5f 100644
--- a/arch/m68knommu/kernel/ptrace.c
+++ b/arch/m68knommu/kernel/ptrace.c
@@ -101,43 +101,10 @@ void ptrace_disable(struct task_struct *child)
put_reg(child, PT_SR, tmp);
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(truct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret;
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -357,10 +324,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
return ret;
}
diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index a220345e9746..abb80fa2b940 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -107,6 +107,9 @@ void (*mach_power_off)( void ) = NULL;
#if defined(CONFIG_M5206e)
#define CPU "COLDFIRE(m5206e)"
#endif
+#if defined(CONFIG_M520x)
+ #define CPU "COLDFIRE(m520x)"
+#endif
#if defined(CONFIG_M523x)
#define CPU "COLDFIRE(m523x)"
#endif
@@ -132,7 +135,7 @@ void (*mach_power_off)( void ) = NULL;
#define CPU "COLDFIRE(m5407)"
#endif
#ifndef CPU
- #define CPU "UNKOWN"
+ #define CPU "UNKNOWN"
#endif
/* (es) */
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index 47f06787190d..0eab92ca4b97 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -124,6 +124,14 @@
#define RAM_LENGTH 0x3e0000
#endif
+/*
+ * The Freescale 5208EVB board has 32MB of RAM.
+ */
+#if defined(CONFIG_M5208EVB)
+#define RAM_START 0x40020000
+#define RAM_LENGTH 0x01e00000
+#endif
+
/*
* The senTec COBRA5272 board has nearly the same memory layout as
* the M5272C3. We assume 16MiB ram.
@@ -275,6 +283,7 @@ SECTIONS {
*(__ksymtab_strings)
/* Built-in module parameters */
+ . = ALIGN(4) ;
__start___param = .;
*(__param)
__stop___param = .;
diff --git a/arch/m68knommu/platform/520x/Makefile b/arch/m68knommu/platform/520x/Makefile
new file mode 100644
index 000000000000..e861b05106bc
--- /dev/null
+++ b/arch/m68knommu/platform/520x/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for the M5208 specific file.
+#
+
+#
+# If you want to play with the HW breakpoints then you will
+# need to add define this, which will give you a stack backtrace
+# on the console port whenever a DBG interrupt occurs. You have to
+# set up you HW breakpoints to trigger a DBG interrupt:
+#
+# EXTRA_CFLAGS += -DTRAP_DBG_INTERRUPT
+# EXTRA_AFLAGS += -DTRAP_DBG_INTERRUPT
+#
+
+ifdef CONFIG_FULLDEBUG
+AFLAGS += -DDEBUGGER_COMPATIBLE_CACHE=1
+endif
+
+obj-y := config.o
diff --git a/arch/m68knommu/platform/520x/config.c b/arch/m68knommu/platform/520x/config.c
new file mode 100644
index 000000000000..71dea2e0f452
--- /dev/null
+++ b/arch/m68knommu/platform/520x/config.c
@@ -0,0 +1,65 @@
+/***************************************************************************/
+
+/*
+ * linux/arch/m68knommu/platform/520x/config.c
+ *
+ * Copyright (C) 2005, Freescale (www.freescale.com)
+ * Copyright (C) 2005, Intec Automation (mike@steroidmicros.com)
+ * Copyright (C) 1999-2003, Greg Ungerer (gerg@snapgear.com)
+ * Copyright (C) 2001-2003, SnapGear Inc. (www.snapgear.com)
+ */
+
+/***************************************************************************/
+
+#include
+#include
+#include
+#include
+#include
+
+/***************************************************************************/
+
+/*
+ * DMA channel base address table.
+ */
+unsigned int dma_base_addr[MAX_M68K_DMA_CHANNELS];
+unsigned int dma_device_address[MAX_M68K_DMA_CHANNELS];
+
+/***************************************************************************/
+
+void coldfire_pit_tick(void);
+void coldfire_pit_init(irqreturn_t (*handler)(int, void *, struct pt_regs *));
+unsigned long coldfire_pit_offset(void);
+void coldfire_trap_init(void);
+void coldfire_reset(void);
+
+/***************************************************************************/
+
+/*
+ * Program the vector to be an auto-vectored.
+ */
+
+void mcf_autovector(unsigned int vec)
+{
+ /* Everything is auto-vectored on the 520x devices */
+}
+
+/***************************************************************************/
+
+void config_BSP(char *commandp, int size)
+{
+#ifdef CONFIG_BOOTPARAM
+ strncpy(commandp, CONFIG_BOOTPARAM_STRING, size);
+ commandp[size-1] = 0;
+#else
+ memset(commandp, 0, size);
+#endif
+
+ mach_sched_init = coldfire_pit_init;
+ mach_tick = coldfire_pit_tick;
+ mach_gettimeoffset = coldfire_pit_offset;
+ mach_trap_init = coldfire_trap_init;
+ mach_reset = coldfire_reset;
+}
+
+/***************************************************************************/
diff --git a/arch/m68knommu/platform/5307/Makefile b/arch/m68knommu/platform/5307/Makefile
index 6fe5a2b8fb08..8d1619dc1ea6 100644
--- a/arch/m68knommu/platform/5307/Makefile
+++ b/arch/m68knommu/platform/5307/Makefile
@@ -19,6 +19,7 @@ endif
obj-$(CONFIG_COLDFIRE) += entry.o vectors.o ints.o
obj-$(CONFIG_M5206) += timers.o
obj-$(CONFIG_M5206e) += timers.o
+obj-$(CONFIG_M520x) += pit.o
obj-$(CONFIG_M523x) += pit.o
obj-$(CONFIG_M5249) += timers.o
obj-$(CONFIG_M527x) += pit.o
diff --git a/arch/m68knommu/platform/5307/head.S b/arch/m68knommu/platform/5307/head.S
index 7f4ba837901f..c30c462b99b1 100644
--- a/arch/m68knommu/platform/5307/head.S
+++ b/arch/m68knommu/platform/5307/head.S
@@ -113,6 +113,9 @@
#define MEM_BASE 0x02000000
#define VBR_BASE 0x20000000 /* vectors in SRAM */
#endif
+#if defined(CONFIG_M5208EVB)
+#define MEM_BASE 0x40000000
+#endif
#ifndef MEM_BASE
#define MEM_BASE 0x00000000 /* memory base at address 0 */
diff --git a/arch/m68knommu/platform/5307/ints.c b/arch/m68knommu/platform/5307/ints.c
index 0117754d44f3..a134fb2f0566 100644
--- a/arch/m68knommu/platform/5307/ints.c
+++ b/arch/m68knommu/platform/5307/ints.c
@@ -26,6 +26,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/arch/m68knommu/platform/5307/pit.c b/arch/m68knommu/platform/5307/pit.c
index a9b2c2e7e280..323f2677e49d 100644
--- a/arch/m68knommu/platform/5307/pit.c
+++ b/arch/m68knommu/platform/5307/pit.c
@@ -3,7 +3,7 @@
/*
* pit.c -- Motorola ColdFire PIT timer. Currently this type of
* hardware timer only exists in the Motorola ColdFire
- * 5270/5271 and 5282 CPUs.
+ * 5270/5271, 5282 and other CPUs.
*
* Copyright (C) 1999-2004, Greg Ungerer (gerg@snapgear.com)
* Copyright (C) 2001-2004, SnapGear Inc. (www.snapgear.com)
@@ -47,10 +47,10 @@ void coldfire_pit_init(irqreturn_t (*handler)(int, void *, struct pt_regs *))
icrp = (volatile unsigned char *) (MCF_IPSBAR + MCFICM_INTC0 +
MCFINTC_ICR0 + MCFINT_PIT1);
- *icrp = 0x2b; /* PIT1 with level 5, priority 3 */
+ *icrp = ICR_INTRCONF;
- imrp = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IMRH);
- *imrp &= ~(1 << (MCFINT_PIT1 - 32));
+ imrp = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFPIT_IMR);
+ *imrp &= ~MCFPIT_IMR_IBIT;
/* Set up PIT timer 1 as poll clock */
tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1);
@@ -70,7 +70,7 @@ unsigned long coldfire_pit_offset(void)
unsigned long pmr, pcntr, offset;
tp = (volatile struct mcfpit *) (MCF_IPSBAR + MCFPIT_BASE1);
- ipr = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFINTC_IPRH);
+ ipr = (volatile unsigned long *) (MCF_IPSBAR + MCFICM_INTC0 + MCFPIT_IMR);
pmr = *(&tp->pmr);
pcntr = *(&tp->pcntr);
@@ -80,7 +80,7 @@ unsigned long coldfire_pit_offset(void)
* timer interupt is pending, then add on a ticks worth of time.
*/
offset = ((pmr - pcntr) * (1000000 / HZ)) / pmr;
- if ((offset < (1000000 / HZ / 2)) && (*ipr & (1 << (MCFINT_PIT1 - 32))))
+ if ((offset < (1000000 / HZ / 2)) && (*ipr & MCFPIT_IMR_IBIT))
offset += 1000000 / HZ;
return offset;
}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0097a0d53b3b..e380a8322a94 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -958,7 +958,7 @@ config SOC_PNX8550
bool
select DMA_NONCOHERENT
select HW_HAS_PCI
- select SYS_HAS_CPU_R4X00
+ select SYS_HAS_CPU_MIPS32_R1
select SYS_SUPPORTS_32BIT_KERNEL
config SWAP_IO_SPACE
diff --git a/arch/mips/boot/.gitignore b/arch/mips/boot/.gitignore
new file mode 100644
index 000000000000..ba63401c6e10
--- /dev/null
+++ b/arch/mips/boot/.gitignore
@@ -0,0 +1,4 @@
+mkboot
+elf2ecoff
+zImage
+zImage.tmp
diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig
index 95f84d711912..555837e4c06f 100644
--- a/arch/mips/configs/pnx8550-jbs_defconfig
+++ b/arch/mips/configs/pnx8550-jbs_defconfig
@@ -129,7 +129,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
#
# CPU selection
#
-# CONFIG_CPU_MIPS32_R1 is not set
+CONFIG_CPU_MIPS32_R1=y
# CONFIG_CPU_MIPS32_R2 is not set
# CONFIG_CPU_MIPS64_R1 is not set
# CONFIG_CPU_MIPS64_R2 is not set
@@ -137,7 +137,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
# CONFIG_CPU_TX39XX is not set
# CONFIG_CPU_VR41XX is not set
# CONFIG_CPU_R4300 is not set
-CONFIG_CPU_R4X00=y
+# CONFIG_CPU_R4X00 is not set
# CONFIG_CPU_TX49XX is not set
# CONFIG_CPU_R5000 is not set
# CONFIG_CPU_R5432 is not set
@@ -148,10 +148,11 @@ CONFIG_CPU_R4X00=y
# CONFIG_CPU_RM7000 is not set
# CONFIG_CPU_RM9000 is not set
# CONFIG_CPU_SB1 is not set
-CONFIG_SYS_HAS_CPU_R4X00=y
+CONFIG_SYS_HAS_CPU_MIPS32_R1=y
+CONFIG_CPU_MIPS32=y
+CONFIG_CPU_MIPSR1=y
CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y
#
# Kernel type
@@ -162,11 +163,11 @@ CONFIG_PAGE_SIZE_4KB=y
# CONFIG_PAGE_SIZE_8KB is not set
# CONFIG_PAGE_SIZE_16KB is not set
# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_CPU_HAS_PREFETCH=y
# CONFIG_MIPS_MT is not set
# CONFIG_64BIT_PHYS_ADDR is not set
# CONFIG_CPU_ADVANCED is not set
CONFIG_CPU_HAS_LLSC=y
-CONFIG_CPU_HAS_LLDSCD=y
CONFIG_CPU_HAS_SYNC=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
diff --git a/arch/mips/configs/pnx8550-v2pci_defconfig b/arch/mips/configs/pnx8550-v2pci_defconfig
index deb24c29ac0a..05e65206a7b4 100644
--- a/arch/mips/configs/pnx8550-v2pci_defconfig
+++ b/arch/mips/configs/pnx8550-v2pci_defconfig
@@ -128,7 +128,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
#
# CPU selection
#
-# CONFIG_CPU_MIPS32_R1 is not set
+CONFIG_CPU_MIPS32_R1=y
# CONFIG_CPU_MIPS32_R2 is not set
# CONFIG_CPU_MIPS64_R1 is not set
# CONFIG_CPU_MIPS64_R2 is not set
@@ -136,7 +136,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
# CONFIG_CPU_TX39XX is not set
# CONFIG_CPU_VR41XX is not set
# CONFIG_CPU_R4300 is not set
-CONFIG_CPU_R4X00=y
+# CONFIG_CPU_R4X00 is not set
# CONFIG_CPU_TX49XX is not set
# CONFIG_CPU_R5000 is not set
# CONFIG_CPU_R5432 is not set
@@ -147,10 +147,11 @@ CONFIG_CPU_R4X00=y
# CONFIG_CPU_RM7000 is not set
# CONFIG_CPU_RM9000 is not set
# CONFIG_CPU_SB1 is not set
-CONFIG_SYS_HAS_CPU_R4X00=y
+CONFIG_SYS_HAS_CPU_MIPS32_R1=y
+CONFIG_CPU_MIPS32=y
+CONFIG_CPU_MIPSR1=y
CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y
CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y
-CONFIG_CPU_SUPPORTS_64BIT_KERNEL=y
#
# Kernel type
@@ -161,6 +162,7 @@ CONFIG_PAGE_SIZE_4KB=y
# CONFIG_PAGE_SIZE_8KB is not set
# CONFIG_PAGE_SIZE_16KB is not set
# CONFIG_PAGE_SIZE_64KB is not set
+CONFIG_CPU_HAS_PREFETCH=y
# CONFIG_MIPS_MT is not set
# CONFIG_64BIT_PHYS_ADDR is not set
CONFIG_CPU_ADVANCED=y
diff --git a/arch/mips/ddb5xxx/common/rtc_ds1386.c b/arch/mips/ddb5xxx/common/rtc_ds1386.c
index f5b11508ff2f..995896ac0e39 100644
--- a/arch/mips/ddb5xxx/common/rtc_ds1386.c
+++ b/arch/mips/ddb5xxx/common/rtc_ds1386.c
@@ -41,7 +41,9 @@ rtc_ds1386_get_time(void)
u8 byte;
u8 temp;
unsigned int year, month, day, hour, minute, second;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* let us freeze external registers */
byte = READ_RTC(0xB);
byte &= 0x3f;
@@ -60,6 +62,7 @@ rtc_ds1386_get_time(void)
/* enable time transfer */
byte |= 0x80;
WRITE_RTC(0xB, byte);
+ spin_unlock_irqrestore(&rtc_lock, flags);
/* calc hour */
if (temp & 0x40) {
@@ -81,7 +84,9 @@ rtc_ds1386_set_time(unsigned long t)
u8 byte;
u8 temp;
u8 year, month, day, hour, minute, second;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* let us freeze external registers */
byte = READ_RTC(0xB);
byte &= 0x3f;
@@ -133,6 +138,7 @@ rtc_ds1386_set_time(unsigned long t)
if (second != READ_RTC(0x1)) {
WRITE_RTC(0x1, second);
}
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index dc7091caa7aa..174822344131 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -37,10 +37,25 @@
#include
+/*
+ * Returns true if a clock update is in progress
+ */
+static inline unsigned char dec_rtc_is_updating(void)
+{
+ unsigned char uip;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return uip;
+}
+
static unsigned long dec_rtc_get_time(void)
{
unsigned int year, mon, day, hour, min, sec, real_year;
int i;
+ unsigned long flags;
/* The Linux interpretation of the DS1287 clock register contents:
* When the Update-In-Progress (UIP) flag goes from 1 to 0, the
@@ -49,11 +64,12 @@ static unsigned long dec_rtc_get_time(void)
*/
/* read RTC exactly on falling edge of update flag */
for (i = 0; i < 1000000; i++) /* may take up to 1 second... */
- if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+ if (dec_rtc_is_updating())
break;
for (i = 0; i < 1000000; i++) /* must try at least 2.228 ms */
- if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ if (!dec_rtc_is_updating())
break;
+ spin_lock_irqsave(&rtc_lock, flags);
/* Isn't this overkill? UIP above should guarantee consistency */
do {
sec = CMOS_READ(RTC_SECONDS);
@@ -77,6 +93,7 @@ static unsigned long dec_rtc_get_time(void)
* of unused BBU RAM locations.
*/
real_year = CMOS_READ(RTC_DEC_YEAR);
+ spin_unlock_irqrestore(&rtc_lock, flags);
year += real_year - 72 + 2000;
return mktime(year, mon, day, hour, min, sec);
@@ -95,6 +112,8 @@ static int dec_rtc_set_mmss(unsigned long nowtime)
int real_seconds, real_minutes, cmos_minutes;
unsigned char save_control, save_freq_select;
+ /* irq are locally disabled here */
+ spin_lock(&rtc_lock);
/* tell the clock it's being set */
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE((save_control | RTC_SET), RTC_CONTROL);
@@ -141,6 +160,7 @@ static int dec_rtc_set_mmss(unsigned long nowtime)
*/
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ spin_unlock(&rtc_lock);
return retval;
}
diff --git a/arch/mips/jmr3927/common/rtc_ds1742.c b/arch/mips/jmr3927/common/rtc_ds1742.c
index 1ae4318e1358..8b407d7dc460 100644
--- a/arch/mips/jmr3927/common/rtc_ds1742.c
+++ b/arch/mips/jmr3927/common/rtc_ds1742.c
@@ -57,7 +57,9 @@ rtc_ds1742_get_time(void)
{
unsigned int year, month, day, hour, minute, second;
unsigned int century;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(RTC_READ, RTC_CONTROL);
second = BCD2BIN(CMOS_READ(RTC_SECONDS) & RTC_SECONDS_MASK);
minute = BCD2BIN(CMOS_READ(RTC_MINUTES));
@@ -67,6 +69,7 @@ rtc_ds1742_get_time(void)
year = BCD2BIN(CMOS_READ(RTC_YEAR));
century = BCD2BIN(CMOS_READ(RTC_CENTURY) & RTC_CENTURY_MASK);
CMOS_WRITE(0, RTC_CONTROL);
+ spin_unlock_irqrestore(&rtc_lock, flags);
year += century * 100;
@@ -81,7 +84,9 @@ rtc_ds1742_set_time(unsigned long t)
u8 year, month, day, hour, minute, second;
u8 cmos_year, cmos_month, cmos_day, cmos_hour, cmos_minute, cmos_second;
int cmos_century;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(RTC_READ, RTC_CONTROL);
cmos_second = (u8)(CMOS_READ(RTC_SECONDS) & RTC_SECONDS_MASK);
cmos_minute = (u8)CMOS_READ(RTC_MINUTES);
@@ -139,6 +144,7 @@ rtc_ds1742_set_time(unsigned long t)
/* RTC_CENTURY and RTC_CONTROL share same address... */
CMOS_WRITE(cmos_century, RTC_CONTROL);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c
index 908e63684208..dd118c60bcd0 100644
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -502,8 +502,7 @@ asmlinkage int irix_sigpoll_sys(unsigned long __user *set,
while(1) {
long tmp = 0;
- current->state = TASK_INTERRUPTIBLE;
- expire = schedule_timeout(expire);
+ expire = schedule_timeout_interruptible(expire);
for (i=0; i<=4; i++)
tmp |= (current->pending.signal.sig[i] & kset.sig[i]);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f1b0f3e1f95b..510da5fda567 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -174,51 +174,10 @@ int ptrace_setfpregs (struct task_struct *child, __u32 __user *data)
return 0;
}
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret;
-#if 0
- printk("ptrace(r=%d,pid=%d,addr=%08lx,data=%08lx)\n",
- (int) request, (int) pid, (unsigned long) addr,
- (unsigned long) data);
-#endif
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- if ((ret = security_ptrace(current->parent, current)))
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -319,7 +278,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if (!cpu_has_dsp) {
tmp = 0;
ret = -EIO;
- goto out_tsk;
+ goto out;
}
if (child->thread.dsp.used_dsp) {
dregs = __get_dsp_regs(child);
@@ -333,14 +292,14 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if (!cpu_has_dsp) {
tmp = 0;
ret = -EIO;
- goto out_tsk;
+ goto out;
}
tmp = child->thread.dsp.dspcontrol;
break;
default:
tmp = 0;
ret = -EIO;
- goto out_tsk;
+ goto out;
}
ret = put_user(tmp, (unsigned long __user *) data);
break;
@@ -495,11 +454,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+ out:
return ret;
}
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 8c81f3cb4e2d..1d855112bac2 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -20,42 +20,42 @@
#include
#include
#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
#include
#include
#include
#include
-#include
-#include
+#include
#include
#include
-#include
#include
+#include
-#define RTLX_MAJOR 64
#define RTLX_TARG_VPE 1
-struct rtlx_info *rtlx;
+static struct rtlx_info *rtlx;
static int major;
static char module_name[] = "rtlx";
-static inline int spacefree(int read, int write, int size);
+static struct irqaction irq;
+static int irq_num;
+
+static inline int spacefree(int read, int write, int size)
+{
+ if (read == write) {
+ /*
+ * never fill the buffer completely, so indexes are always
+ * equal if empty and only empty, or !equal if data available
+ */
+ return size - 1;
+ }
+
+ return ((read + size - write) % size) - 1;
+}
static struct chan_waitqueues {
wait_queue_head_t rt_queue;
wait_queue_head_t lx_queue;
} channel_wqs[RTLX_CHANNELS];
-static struct irqaction irq;
-static int irq_num;
-
extern void *vpe_get_shared(int index);
static void rtlx_dispatch(struct pt_regs *regs)
@@ -63,9 +63,8 @@ static void rtlx_dispatch(struct pt_regs *regs)
do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ, regs);
}
-irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- irqreturn_t r = IRQ_HANDLED;
int i;
for (i = 0; i < RTLX_CHANNELS; i++) {
@@ -75,30 +74,7 @@ irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
wake_up_interruptible(&channel_wqs[i].lx_queue);
}
- return r;
-}
-
-void dump_rtlx(void)
-{
- int i;
-
- printk("id 0x%lx state %d\n", rtlx->id, rtlx->state);
-
- for (i = 0; i < RTLX_CHANNELS; i++) {
- struct rtlx_channel *chan = &rtlx->channel[i];
-
- printk(" rt_state %d lx_state %d buffer_size %d\n",
- chan->rt_state, chan->lx_state, chan->buffer_size);
-
- printk(" rt_read %d rt_write %d\n",
- chan->rt_read, chan->rt_write);
-
- printk(" lx_read %d lx_write %d\n",
- chan->lx_read, chan->lx_write);
-
- printk(" rt_buffer <%s>\n", chan->rt_buffer);
- printk(" lx_buffer <%s>\n", chan->lx_buffer);
- }
+ return IRQ_HANDLED;
}
/* call when we have the address of the shared structure from the SP side. */
@@ -108,7 +84,7 @@ static int rtlx_init(struct rtlx_info *rtlxi)
if (rtlxi->id != RTLX_ID) {
printk(KERN_WARNING "no valid RTLX id at 0x%p\n", rtlxi);
- return (-ENOEXEC);
+ return -ENOEXEC;
}
/* initialise the wait queues */
@@ -120,9 +96,8 @@ static int rtlx_init(struct rtlx_info *rtlxi)
/* set up for interrupt handling */
memset(&irq, 0, sizeof(struct irqaction));
- if (cpu_has_vint) {
+ if (cpu_has_vint)
set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
- }
irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
irq.handler = rtlx_interrupt;
@@ -132,7 +107,8 @@ static int rtlx_init(struct rtlx_info *rtlxi)
setup_irq(irq_num, &irq);
rtlx = rtlxi;
- return (0);
+
+ return 0;
}
/* only allow one open process at a time to open each channel */
@@ -147,36 +123,36 @@ static int rtlx_open(struct inode *inode, struct file *filp)
if (rtlx == NULL) {
struct rtlx_info **p;
if( (p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
- printk(" vpe_get_shared is NULL. Has an SP program been loaded?\n");
- return (-EFAULT);
+ printk(KERN_ERR "vpe_get_shared is NULL. "
+ "Has an SP program been loaded?\n");
+ return -EFAULT;
}
if (*p == NULL) {
- printk(" vpe_shared %p %p\n", p, *p);
- return (-EFAULT);
+ printk(KERN_ERR "vpe_shared %p %p\n", p, *p);
+ return -EFAULT;
}
if ((ret = rtlx_init(*p)) < 0)
- return (ret);
+ return ret;
}
chan = &rtlx->channel[minor];
- /* already open? */
- if (chan->lx_state == RTLX_STATE_OPENED)
- return (-EBUSY);
+ if (test_and_set_bit(RTLX_STATE_OPENED, &chan->lx_state))
+ return -EBUSY;
- chan->lx_state = RTLX_STATE_OPENED;
- return (0);
+ return 0;
}
static int rtlx_release(struct inode *inode, struct file *filp)
{
- int minor;
+ int minor = MINOR(inode->i_rdev);
- minor = MINOR(inode->i_rdev);
- rtlx->channel[minor].lx_state = RTLX_STATE_UNUSED;
- return (0);
+ clear_bit(RTLX_STATE_OPENED, &rtlx->channel[minor].lx_state);
+ smp_mb__after_clear_bit();
+
+ return 0;
}
static unsigned int rtlx_poll(struct file *file, poll_table * wait)
@@ -199,12 +175,13 @@ static unsigned int rtlx_poll(struct file *file, poll_table * wait)
if (spacefree(chan->rt_read, chan->rt_write, chan->buffer_size))
mask |= POLLOUT | POLLWRNORM;
- return (mask);
+ return mask;
}
static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
loff_t * ppos)
{
+ unsigned long failed;
size_t fl = 0L;
int minor;
struct rtlx_channel *lx;
@@ -216,7 +193,7 @@ static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
/* data available? */
if (lx->lx_write == lx->lx_read) {
if (file->f_flags & O_NONBLOCK)
- return (0); // -EAGAIN makes cat whinge
+ return 0; /* -EAGAIN makes cat whinge */
/* go to sleep */
add_wait_queue(&channel_wqs[minor].lx_queue, &wait);
@@ -232,39 +209,39 @@ static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
}
/* find out how much in total */
- count = min( count,
- (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
+ count = min(count,
+ (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
/* then how much from the read pointer onwards */
- fl = min( count, (size_t)lx->buffer_size - lx->lx_read);
+ fl = min(count, (size_t)lx->buffer_size - lx->lx_read);
- copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
+ failed = copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
+ if (failed) {
+ count = fl - failed;
+ goto out;
+ }
/* and if there is anything left at the beginning of the buffer */
- if ( count - fl )
- copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
+ if (count - fl) {
+ failed = copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
+ if (failed) {
+ count -= failed;
+ goto out;
+ }
+ }
+out:
/* update the index */
lx->lx_read += count;
lx->lx_read %= lx->buffer_size;
- return (count);
-}
-
-static inline int spacefree(int read, int write, int size)
-{
- if (read == write) {
- /* never fill the buffer completely, so indexes are always equal if empty
- and only empty, or !equal if data available */
- return (size - 1);
- }
-
- return ((read + size - write) % size) - 1;
+ return count;
}
static ssize_t rtlx_write(struct file *file, const char __user * buffer,
size_t count, loff_t * ppos)
{
+ unsigned long failed;
int minor;
struct rtlx_channel *rt;
size_t fl;
@@ -277,7 +254,7 @@ static ssize_t rtlx_write(struct file *file, const char __user * buffer,
if (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size)) {
if (file->f_flags & O_NONBLOCK)
- return (-EAGAIN);
+ return -EAGAIN;
add_wait_queue(&channel_wqs[minor].rt_queue, &wait);
set_current_state(TASK_INTERRUPTIBLE);
@@ -290,52 +267,64 @@ static ssize_t rtlx_write(struct file *file, const char __user * buffer,
}
/* total number of bytes to copy */
- count = min( count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
+ count = min(count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
/* first bit from write pointer to the end of the buffer, or count */
fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
- copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
+ failed = copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
+ if (failed) {
+ count = fl - failed;
+ goto out;
+ }
/* if there's any left copy to the beginning of the buffer */
- if( count - fl )
- copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
+ if (count - fl) {
+ failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
+ if (failed) {
+ count -= failed;
+ goto out;
+ }
+ }
+out:
rt->rt_write += count;
rt->rt_write %= rt->buffer_size;
- return(count);
+ return count;
}
static struct file_operations rtlx_fops = {
- .owner = THIS_MODULE,
- .open = rtlx_open,
- .release = rtlx_release,
- .write = rtlx_write,
- .read = rtlx_read,
- .poll = rtlx_poll
+ .owner = THIS_MODULE,
+ .open = rtlx_open,
+ .release = rtlx_release,
+ .write = rtlx_write,
+ .read = rtlx_read,
+ .poll = rtlx_poll
};
-static int rtlx_module_init(void)
+static char register_chrdev_failed[] __initdata =
+ KERN_ERR "rtlx_module_init: unable to register device\n";
+
+static int __init rtlx_module_init(void)
{
- if ((major = register_chrdev(RTLX_MAJOR, module_name, &rtlx_fops)) < 0) {
- printk("rtlx_module_init: unable to register device\n");
- return (-EBUSY);
+ major = register_chrdev(0, module_name, &rtlx_fops);
+ if (major < 0) {
+ printk(register_chrdev_failed);
+ return major;
}
- if (major == 0)
- major = RTLX_MAJOR;
-
- return (0);
+ return 0;
}
-static void rtlx_module_exit(void)
+static void __exit rtlx_module_exit(void)
{
unregister_chrdev(major, module_name);
}
module_init(rtlx_module_init);
module_exit(rtlx_module_exit);
+
MODULE_DESCRIPTION("MIPS RTLX");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc");
+MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc.");
MODULE_LICENSE("GPL");
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 9202a17db8f7..05e09eedabff 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -384,9 +384,6 @@ int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs,
return 0;
}
-extern void setup_rt_frame_n32(struct k_sigaction * ka,
- struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info);
-
static inline int handle_signal(unsigned long sig, siginfo_t *info,
struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs)
{
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index dbe821303125..e315d3f6aa6e 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -647,8 +647,8 @@ static inline void *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
return (void *)((sp - frame_size) & ALMASK);
}
-void setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
- int signr, sigset_t *set)
+int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
+ int signr, sigset_t *set)
{
struct sigframe *frame;
int err = 0;
@@ -694,13 +694,15 @@ void setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
current->comm, current->pid,
frame, regs->cp0_epc, frame->sf_code);
#endif
- return;
+ return 1;
give_sigsegv:
force_sigsegv(signr, current);
+ return 0;
}
-void setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info)
+int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs,
+ int signr, sigset_t *set, siginfo_t *info)
{
struct rt_sigframe32 *frame;
int err = 0;
@@ -763,10 +765,11 @@ void setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, int signr,
current->comm, current->pid,
frame, regs->cp0_epc, frame->rs_code);
#endif
- return;
+ return 1;
give_sigsegv:
force_sigsegv(signr, current);
+ return 0;
}
static inline int handle_signal(unsigned long sig, siginfo_t *info,
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 97fefcc9dbe7..06be405be399 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -58,10 +58,6 @@
typedef void *vpe_handle;
-// defined here because the kernel module loader doesn't have
-// anything to do with it.
-#define SHN_MIPS_SCOMMON 0xff03
-
#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
#endif
@@ -69,11 +65,8 @@ typedef void *vpe_handle;
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
-// temp number,
-#define VPE_MAJOR 63
-
static char module_name[] = "vpe";
-static int major = 0;
+static int major;
/* grab the likely amount of memory we will need. */
#ifdef CONFIG_MIPS_VPE_LOADER_TOM
@@ -98,22 +91,7 @@ enum tc_state {
TC_STATE_DYNAMIC
};
-struct vpe;
-typedef struct tc {
- enum tc_state state;
- int index;
-
- /* parent VPE */
- struct vpe *pvpe;
-
- /* The list of TC's with this VPE */
- struct list_head tc;
-
- /* The global list of tc's */
- struct list_head list;
-} tc_t;
-
-typedef struct vpe {
+struct vpe {
enum vpe_state state;
/* (device) minor associated with this vpe */
@@ -135,7 +113,21 @@ typedef struct vpe {
/* shared symbol address */
void *shared_ptr;
-} vpe_t;
+};
+
+struct tc {
+ enum tc_state state;
+ int index;
+
+ /* parent VPE */
+ struct vpe *pvpe;
+
+ /* The list of TC's with this VPE */
+ struct list_head tc;
+
+ /* The global list of tc's */
+ struct list_head list;
+};
struct vpecontrol_ {
/* Virtual processing elements */
@@ -146,7 +138,7 @@ struct vpecontrol_ {
} vpecontrol;
static void release_progmem(void *ptr);
-static void dump_vpe(vpe_t * v);
+static void dump_vpe(struct vpe * v);
extern void save_gp_address(unsigned int secbase, unsigned int rel);
/* get the vpe associated with this minor */
@@ -197,13 +189,11 @@ struct vpe *alloc_vpe(int minor)
{
struct vpe *v;
- if ((v = kmalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
+ if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
printk(KERN_WARNING "VPE: alloc_vpe no mem\n");
return NULL;
}
- memset(v, 0, sizeof(struct vpe));
-
INIT_LIST_HEAD(&v->tc);
list_add_tail(&v->list, &vpecontrol.vpe_list);
@@ -216,13 +206,11 @@ struct tc *alloc_tc(int index)
{
struct tc *t;
- if ((t = kmalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
+ if ((t = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
printk(KERN_WARNING "VPE: alloc_tc no mem\n");
return NULL;
}
- memset(t, 0, sizeof(struct tc));
-
INIT_LIST_HEAD(&t->tc);
list_add_tail(&t->list, &vpecontrol.tc_list);
@@ -412,16 +400,17 @@ static int apply_r_mips_26(struct module *me, uint32_t *location,
return -ENOEXEC;
}
-/* Not desperately convinced this is a good check of an overflow condition
- anyway. But it gets in the way of handling undefined weak symbols which
- we want to set to zero.
- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
- printk(KERN_ERR
- "module %s: relocation overflow\n",
- me->name);
- return -ENOEXEC;
- }
-*/
+/*
+ * Not desperately convinced this is a good check of an overflow condition
+ * anyway. But it gets in the way of handling undefined weak symbols which
+ * we want to set to zero.
+ * if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
+ * printk(KERN_ERR
+ * "module %s: relocation overflow\n",
+ * me->name);
+ * return -ENOEXEC;
+ * }
+ */
*location = (*location & ~0x03ffffff) |
((*location + (v >> 2)) & 0x03ffffff);
@@ -681,7 +670,7 @@ static void dump_tclist(void)
}
/* We are prepared so configure and start the VPE... */
-int vpe_run(vpe_t * v)
+int vpe_run(struct vpe * v)
{
unsigned long val;
struct tc *t;
@@ -772,7 +761,7 @@ int vpe_run(vpe_t * v)
return 0;
}
-static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
+static unsigned long find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
unsigned int symindex, const char *strtab,
struct module *mod)
{
@@ -792,10 +781,12 @@ static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
return 0;
}
-/* Allocates a VPE with some program code space(the load address), copies the contents
- of the program (p)buffer performing relocatations/etc, free's it when finished.
+/*
+ * Allocates a VPE with some program code space(the load address), copies
+ * the contents of the program (p)buffer performing relocatations/etc,
+ * free's it when finished.
*/
-int vpe_elfload(vpe_t * v)
+int vpe_elfload(struct vpe * v)
{
Elf_Ehdr *hdr;
Elf_Shdr *sechdrs;
@@ -931,7 +922,7 @@ int vpe_elfload(vpe_t * v)
return err;
}
-static void dump_vpe(vpe_t * v)
+static void dump_vpe(struct vpe * v)
{
struct tc *t;
@@ -947,7 +938,7 @@ static void dump_vpe(vpe_t * v)
static int vpe_open(struct inode *inode, struct file *filp)
{
int minor;
- vpe_t *v;
+ struct vpe *v;
/* assume only 1 device at the mo. */
if ((minor = MINOR(inode->i_rdev)) != 1) {
@@ -1001,7 +992,7 @@ static int vpe_open(struct inode *inode, struct file *filp)
static int vpe_release(struct inode *inode, struct file *filp)
{
int minor, ret = 0;
- vpe_t *v;
+ struct vpe *v;
Elf_Ehdr *hdr;
minor = MINOR(inode->i_rdev);
@@ -1035,7 +1026,7 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer,
{
int minor;
size_t ret = count;
- vpe_t *v;
+ struct vpe *v;
minor = MINOR(file->f_dentry->d_inode->i_rdev);
if ((v = get_vpe(minor)) == NULL)
@@ -1180,14 +1171,11 @@ static int __init vpe_module_init(void)
return -ENODEV;
}
- if ((major = register_chrdev(VPE_MAJOR, module_name, &vpe_fops) < 0)) {
+ if ((major = register_chrdev(0, module_name, &vpe_fops) < 0)) {
printk("VPE loader: unable to register character device\n");
- return -EBUSY;
+ return major;
}
- if (major == 0)
- major = VPE_MAJOR;
-
dmt();
dvpe();
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index 9d7812e03dcd..7dced67c55eb 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -8,6 +8,7 @@
#include
#include
#include
+#include
#include "ds1603.h"
@@ -138,19 +139,27 @@ static void rtc_end_op(void)
unsigned long ds1603_read(void)
{
unsigned long word;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
rtc_init_op();
rtc_write_byte(READ_TIME_CMD);
word = rtc_read_word();
rtc_end_op();
+ spin_unlock_irqrestore(&rtc_lock, flags);
return word;
}
int ds1603_set(unsigned long time)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
rtc_init_op();
rtc_write_byte(SET_TIME_CMD);
rtc_write_word(time);
rtc_end_op();
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/momentum/jaguar_atx/setup.c b/arch/mips/momentum/jaguar_atx/setup.c
index 768bf4406452..bab192ddc185 100644
--- a/arch/mips/momentum/jaguar_atx/setup.c
+++ b/arch/mips/momentum/jaguar_atx/setup.c
@@ -149,7 +149,9 @@ arch_initcall(per_cpu_mappings);
unsigned long m48t37y_get_time(void)
{
unsigned int year, month, day, hour, min, sec;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* stop the update */
rtc_base[0x7ff8] = 0x40;
@@ -166,6 +168,7 @@ unsigned long m48t37y_get_time(void)
/* start the update */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return mktime(year, month, day, hour, min, sec);
}
@@ -173,11 +176,13 @@ unsigned long m48t37y_get_time(void)
int m48t37y_set_time(unsigned long sec)
{
struct rtc_time tm;
+ unsigned long flags;
/* convert to a more useful format -- note months count from 0 */
to_tm(sec, &tm);
tm.tm_mon += 1;
+ spin_lock_irqsave(&rtc_lock, flags);
/* enable writing */
rtc_base[0x7ff8] = 0x80;
@@ -201,6 +206,7 @@ int m48t37y_set_time(unsigned long sec)
/* disable writing */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/momentum/ocelot_3/setup.c b/arch/mips/momentum/ocelot_3/setup.c
index a7803e08f9db..c9b7ff8148ec 100644
--- a/arch/mips/momentum/ocelot_3/setup.c
+++ b/arch/mips/momentum/ocelot_3/setup.c
@@ -135,7 +135,9 @@ void setup_wired_tlb_entries(void)
unsigned long m48t37y_get_time(void)
{
unsigned int year, month, day, hour, min, sec;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* stop the update */
rtc_base[0x7ff8] = 0x40;
@@ -152,6 +154,7 @@ unsigned long m48t37y_get_time(void)
/* start the update */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return mktime(year, month, day, hour, min, sec);
}
@@ -159,11 +162,13 @@ unsigned long m48t37y_get_time(void)
int m48t37y_set_time(unsigned long sec)
{
struct rtc_time tm;
+ unsigned long flags;
/* convert to a more useful format -- note months count from 0 */
to_tm(sec, &tm);
tm.tm_mon += 1;
+ spin_lock_irqsave(&rtc_lock, flags);
/* enable writing */
rtc_base[0x7ff8] = 0x80;
@@ -187,6 +192,7 @@ int m48t37y_set_time(unsigned long sec)
/* disable writing */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/momentum/ocelot_c/setup.c b/arch/mips/momentum/ocelot_c/setup.c
index ce70fc96f160..2755c1547473 100644
--- a/arch/mips/momentum/ocelot_c/setup.c
+++ b/arch/mips/momentum/ocelot_c/setup.c
@@ -140,7 +140,9 @@ unsigned long m48t37y_get_time(void)
unsigned char* rtc_base = (unsigned char*)0xfc800000;
#endif
unsigned int year, month, day, hour, min, sec;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* stop the update */
rtc_base[0x7ff8] = 0x40;
@@ -157,6 +159,7 @@ unsigned long m48t37y_get_time(void)
/* start the update */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return mktime(year, month, day, hour, min, sec);
}
@@ -169,11 +172,13 @@ int m48t37y_set_time(unsigned long sec)
unsigned char* rtc_base = (unsigned char*)0xfc800000;
#endif
struct rtc_time tm;
+ unsigned long flags;
/* convert to a more useful format -- note months count from 0 */
to_tm(sec, &tm);
tm.tm_mon += 1;
+ spin_lock_irqsave(&rtc_lock, flags);
/* enable writing */
rtc_base[0x7ff8] = 0x80;
@@ -197,6 +202,7 @@ int m48t37y_set_time(unsigned long sec)
/* disable writing */
rtc_base[0x7ff8] = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index bdc2ab55bed6..059755b5ed57 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -73,7 +73,9 @@ void __init bus_error_init(void)
unsigned long m48t37y_get_time(void)
{
unsigned int year, month, day, hour, min, sec;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
/* Stop the update to the time */
m48t37_base->control = 0x40;
@@ -88,6 +90,7 @@ unsigned long m48t37y_get_time(void)
/* Start the update to the time again */
m48t37_base->control = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return mktime(year, month, day, hour, min, sec);
}
@@ -95,11 +98,13 @@ unsigned long m48t37y_get_time(void)
int m48t37y_set_time(unsigned long sec)
{
struct rtc_time tm;
+ unsigned long flags;
/* convert to a more useful format -- note months count from 0 */
to_tm(sec, &tm);
tm.tm_mon += 1;
+ spin_lock_irqsave(&rtc_lock, flags);
/* enable writing */
m48t37_base->control = 0x80;
@@ -123,6 +128,7 @@ int m48t37y_set_time(unsigned long sec)
/* disable writing */
m48t37_base->control = 0x00;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index df9b5694328a..b7300cc5c5ad 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -35,7 +35,9 @@ static unsigned long indy_rtc_get_time(void)
{
unsigned int yrs, mon, day, hrs, min, sec;
unsigned int save_control;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
save_control = hpc3c0->rtcregs[RTC_CMD] & 0xff;
hpc3c0->rtcregs[RTC_CMD] = save_control | RTC_TE;
@@ -47,6 +49,7 @@ static unsigned long indy_rtc_get_time(void)
yrs = BCD2BIN(hpc3c0->rtcregs[RTC_YEAR] & 0xff);
hpc3c0->rtcregs[RTC_CMD] = save_control;
+ spin_unlock_irqrestore(&rtc_lock, flags);
if (yrs < 45)
yrs += 30;
@@ -60,6 +63,7 @@ static int indy_rtc_set_time(unsigned long tim)
{
struct rtc_time tm;
unsigned int save_control;
+ unsigned long flags;
to_tm(tim, &tm);
@@ -68,6 +72,7 @@ static int indy_rtc_set_time(unsigned long tim)
if (tm.tm_year >= 100)
tm.tm_year -= 100;
+ spin_lock_irqsave(&rtc_lock, flags);
save_control = hpc3c0->rtcregs[RTC_CMD] & 0xff;
hpc3c0->rtcregs[RTC_CMD] = save_control | RTC_TE;
@@ -80,6 +85,7 @@ static int indy_rtc_set_time(unsigned long tim)
hpc3c0->rtcregs[RTC_HUNDREDTH_SECOND] = 0;
hpc3c0->rtcregs[RTC_CMD] = save_control;
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
diff --git a/arch/mips/sibyte/swarm/rtc_m41t81.c b/arch/mips/sibyte/swarm/rtc_m41t81.c
index 5b4fc26c1b36..c13914bdda59 100644
--- a/arch/mips/sibyte/swarm/rtc_m41t81.c
+++ b/arch/mips/sibyte/swarm/rtc_m41t81.c
@@ -144,6 +144,7 @@ static int m41t81_write(uint8_t addr, int b)
int m41t81_set_time(unsigned long t)
{
struct rtc_time tm;
+ unsigned long flags;
to_tm(t, &tm);
@@ -153,6 +154,7 @@ int m41t81_set_time(unsigned long t)
* believe we should finish writing min within a second.
*/
+ spin_lock_irqsave(&rtc_lock, flags);
tm.tm_sec = BIN2BCD(tm.tm_sec);
m41t81_write(M41T81REG_SC, tm.tm_sec);
@@ -180,6 +182,7 @@ int m41t81_set_time(unsigned long t)
tm.tm_year %= 100;
tm.tm_year = BIN2BCD(tm.tm_year);
m41t81_write(M41T81REG_YR, tm.tm_year);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
@@ -187,19 +190,23 @@ int m41t81_set_time(unsigned long t)
unsigned long m41t81_get_time(void)
{
unsigned int year, mon, day, hour, min, sec;
+ unsigned long flags;
/*
* min is valid if two reads of sec are the same.
*/
for (;;) {
+ spin_lock_irqsave(&rtc_lock, flags);
sec = m41t81_read(M41T81REG_SC);
min = m41t81_read(M41T81REG_MN);
if (sec == m41t81_read(M41T81REG_SC)) break;
+ spin_unlock_irqrestore(&rtc_lock, flags);
}
hour = m41t81_read(M41T81REG_HR) & 0x3f;
day = m41t81_read(M41T81REG_DT);
mon = m41t81_read(M41T81REG_MO);
year = m41t81_read(M41T81REG_YR);
+ spin_unlock_irqrestore(&rtc_lock, flags);
sec = BCD2BIN(sec);
min = BCD2BIN(min);
diff --git a/arch/mips/sibyte/swarm/rtc_xicor1241.c b/arch/mips/sibyte/swarm/rtc_xicor1241.c
index d9ff9323f24e..f4a178836415 100644
--- a/arch/mips/sibyte/swarm/rtc_xicor1241.c
+++ b/arch/mips/sibyte/swarm/rtc_xicor1241.c
@@ -113,9 +113,11 @@ int xicor_set_time(unsigned long t)
{
struct rtc_time tm;
int tmp;
+ unsigned long flags;
to_tm(t, &tm);
+ spin_lock_irqsave(&rtc_lock, flags);
/* unlock writes to the CCR */
xicor_write(X1241REG_SR, X1241REG_SR_WEL);
xicor_write(X1241REG_SR, X1241REG_SR_WEL | X1241REG_SR_RWEL);
@@ -160,6 +162,7 @@ int xicor_set_time(unsigned long t)
xicor_write(X1241REG_HR, tmp);
xicor_write(X1241REG_SR, 0);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return 0;
}
@@ -167,7 +170,9 @@ int xicor_set_time(unsigned long t)
unsigned long xicor_get_time(void)
{
unsigned int year, mon, day, hour, min, sec, y2k;
+ unsigned long flags;
+ spin_lock_irqsave(&rtc_lock, flags);
sec = xicor_read(X1241REG_SC);
min = xicor_read(X1241REG_MN);
hour = xicor_read(X1241REG_HR);
@@ -183,6 +188,7 @@ unsigned long xicor_get_time(void)
mon = xicor_read(X1241REG_MO);
year = xicor_read(X1241REG_YR);
y2k = xicor_read(X1241REG_Y2K);
+ spin_unlock_irqrestore(&rtc_lock, flags);
sec = BCD2BIN(sec);
min = BCD2BIN(min);
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 18130c3748f3..b6fe202a620d 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -78,52 +78,13 @@ void ptrace_disable(struct task_struct *child)
pa_psw(child)->l = 0;
}
-long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
long ret;
#ifdef DEBUG_PTRACE
long oaddr=addr, odata=data;
#endif
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
-
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
-
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
-
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
- ret = -EPERM;
- if (pid == 1) /* no messing around with init! */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA: {
@@ -383,11 +344,11 @@ long sys_ptrace(long request, long pid, long addr, long data)
case PTRACE_GETEVENTMSG:
ret = put_user(child->ptrace_message, (unsigned int __user *) data);
- goto out_tsk;
+ goto out;
default:
ret = ptrace_request(child, request, addr, data);
- goto out_tsk;
+ goto out;
}
out_wake_notrap:
@@ -396,10 +357,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
wake_up_process(child);
ret = 0;
out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
- DBG("sys_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
+ DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n",
request, pid, oaddr, odata, ret);
return ret;
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f4e25c648fbb..1493c7896fe3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -404,6 +404,14 @@ config CPU_FREQ_PMAC
this currently includes some models of iBook & Titanium
PowerBook.
+config CPU_FREQ_PMAC64
+ bool "Support for some Apple G5s"
+ depends on CPU_FREQ && PMAC_SMU && PPC64
+ select CPU_FREQ_TABLE
+ help
+ This adds support for frequency switching on Apple iMac G5,
+ and some of the more recent desktop G5 machines as well.
+
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
depends on 6xx && (PPC_PREP || PPC_PMAC)
@@ -484,6 +492,7 @@ source "fs/Kconfig.binfmt"
config FORCE_MAX_ZONEORDER
int
depends on PPC64
+ default "9" if PPC_64K_PAGES
default "13"
config MATH_EMULATION
@@ -603,6 +612,16 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
+config PPC_64K_PAGES
+ bool "64k page size"
+ depends on PPC64
+ help
+ This option changes the kernel logical page size to 64k. On machines
+ without processor support for 64k pages, the kernel will simulate
+ them by loading each individual 4k page on demand transparently,
+ while on hardware with such support, it will be used to map
+ normal application pages.
+
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on PPC64 && SMP
@@ -907,8 +926,21 @@ source "arch/powerpc/platforms/iseries/Kconfig"
source "lib/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/powerpc/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 0baf64ec80d0..30a30bf559ea 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -9,16 +9,6 @@ config DEBUG_STACKOVERFLOW
This option will cause messages to be printed if free stack space
drops below a certain limit.
-config KPROBES
- bool "Kprobes"
- depends on DEBUG_KERNEL && PPC64
- help
- Kprobes allows you to trap at almost any kernel address and
- execute a callback function. register_kprobe() establishes
- a probepoint and specifies the callback. Kprobes is useful
- for kernel debugging, non-intrusive instrumentation and testing.
- If in doubt, say "N".
-
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL && PPC64
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 6323065fbf2c..e76854f8c121 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -1,18 +1,32 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-rc4
-# Thu Oct 20 08:30:23 2005
+# Linux kernel version: 2.6.14
+# Mon Nov 7 13:37:59 2005
#
+CONFIG_PPC64=y
CONFIG_64BIT=y
+CONFIG_PPC_MERGE=y
CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC=y
CONFIG_EARLY_PRINTK=y
CONFIG_COMPAT=y
+CONFIG_SYSVIPC_COMPAT=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
+
+#
+# Processor support
+#
+CONFIG_POWER4_ONLY=y
+CONFIG_POWER4=y
+CONFIG_PPC_FPU=y
+CONFIG_ALTIVEC=y
+CONFIG_PPC_STD_MMU=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
#
# Code maturity level options
@@ -67,30 +81,60 @@ CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
-CONFIG_SYSVIPC_COMPAT=y
#
# Platform support
#
-# CONFIG_PPC_ISERIES is not set
CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_ISERIES is not set
+# CONFIG_EMBEDDED6xx is not set
+# CONFIG_APUS is not set
# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_BPA is not set
CONFIG_PPC_PMAC=y
-# CONFIG_PPC_MAPLE is not set
-CONFIG_PPC=y
-CONFIG_PPC64=y
-CONFIG_PPC_OF=y
-CONFIG_MPIC=y
-CONFIG_ALTIVEC=y
-CONFIG_KEXEC=y
-CONFIG_U3_DART=y
CONFIG_PPC_PMAC64=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_POWER4_ONLY=y
+# CONFIG_PPC_MAPLE is not set
+# CONFIG_PPC_CELL is not set
+CONFIG_PPC_OF=y
+CONFIG_U3_DART=y
+CONFIG_MPIC=y
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_PMAC64=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Kernel options
+#
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+# CONFIG_PREEMPT_BKL is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_FORCE_MAX_ZONEORDER=13
CONFIG_IOMMU_VMERGE=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_KEXEC=y
+CONFIG_IRQ_ALL_CPUS=y
+# CONFIG_NUMA is not set
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
@@ -100,28 +144,21 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_NUMA is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_PPC_64K_PAGES is not set
# CONFIG_SCHED_SMT is not set
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_PREEMPT_BKL is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_SECCOMP=y
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_HOTPLUG_CPU is not set
CONFIG_PROC_DEVICETREE=y
# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+CONFIG_SECCOMP=y
CONFIG_ISA_DMA_API=y
#
-# Bus Options
+# Bus options
#
+CONFIG_GENERIC_ISA_DMA=y
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_INDIRECT_PCI is not set
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
CONFIG_PCI_LEGACY_PROC=y
@@ -136,6 +173,7 @@ CONFIG_PCI_LEGACY_PROC=y
# PCI Hotplug Support
#
# CONFIG_HOTPLUG_PCI is not set
+CONFIG_KERNEL_START=0xc000000000000000
#
# Networking
@@ -276,6 +314,10 @@ CONFIG_LLC=y
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
CONFIG_NET_CLS_ROUTE=y
@@ -348,6 +390,11 @@ CONFIG_IOSCHED_NOOP=y
CONFIG_IOSCHED_AS=y
CONFIG_IOSCHED_DEADLINE=y
CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
# CONFIG_ATA_OVER_ETH is not set
#
@@ -449,6 +496,7 @@ CONFIG_SCSI_SPI_ATTRS=y
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
@@ -465,10 +513,12 @@ CONFIG_SCSI_SATA_SVW=y
# CONFIG_SCSI_ATA_PIIX is not set
# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
-# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_PDC_ADMA is not set
# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
# CONFIG_SCSI_SATA_SX4 is not set
# CONFIG_SCSI_SATA_SIL is not set
+# CONFIG_SCSI_SATA_SIL24 is not set
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
@@ -567,6 +617,9 @@ CONFIG_IEEE1394_RAWIO=y
CONFIG_ADB_PMU=y
CONFIG_PMAC_SMU=y
CONFIG_THERM_PM72=y
+CONFIG_WINDFARM=y
+CONFIG_WINDFARM_PM81=y
+CONFIG_WINDFARM_PM91=y
#
# Network device support
@@ -603,6 +656,7 @@ CONFIG_SUNGEM=y
# CONFIG_NET_TULIP is not set
# CONFIG_HP100 is not set
# CONFIG_NET_PCI is not set
+# CONFIG_FEC_8XX is not set
#
# Ethernet (1000 Mbit)
@@ -768,6 +822,7 @@ CONFIG_MAX_RAW_DEVS=256
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
@@ -820,6 +875,7 @@ CONFIG_I2C_PMAC_SMU=y
# CONFIG_SENSORS_PCF8591 is not set
# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_RTC_X1205_I2C is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
@@ -876,10 +932,9 @@ CONFIG_FB_OF=y
# CONFIG_FB_ASILIANT is not set
# CONFIG_FB_IMSTT is not set
# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_NVIDIA is not set
-CONFIG_FB_RIVA=y
-# CONFIG_FB_RIVA_I2C is not set
-# CONFIG_FB_RIVA_DEBUG is not set
+CONFIG_FB_NVIDIA=y
+CONFIG_FB_NVIDIA_I2C=y
+# CONFIG_FB_RIVA is not set
# CONFIG_FB_MATROX is not set
# CONFIG_FB_RADEON_OLD is not set
CONFIG_FB_RADEON=y
@@ -924,7 +979,96 @@ CONFIG_LCD_DEVICE=y
#
# Sound
#
-# CONFIG_SOUND is not set
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+# CONFIG_SND_SEQ_DUMMY is not set
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_GENERIC_DRIVER=y
+
+#
+# Generic devices
+#
+# CONFIG_SND_DUMMY is not set
+# CONFIG_SND_VIRMIDI is not set
+# CONFIG_SND_MTPAV is not set
+# CONFIG_SND_SERIAL_U16550 is not set
+# CONFIG_SND_MPU401 is not set
+
+#
+# PCI devices
+#
+# CONFIG_SND_ALI5451 is not set
+# CONFIG_SND_ATIIXP is not set
+# CONFIG_SND_ATIIXP_MODEM is not set
+# CONFIG_SND_AU8810 is not set
+# CONFIG_SND_AU8820 is not set
+# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AZT3328 is not set
+# CONFIG_SND_BT87X is not set
+# CONFIG_SND_CS46XX is not set
+# CONFIG_SND_CS4281 is not set
+# CONFIG_SND_EMU10K1 is not set
+# CONFIG_SND_EMU10K1X is not set
+# CONFIG_SND_CA0106 is not set
+# CONFIG_SND_KORG1212 is not set
+# CONFIG_SND_MIXART is not set
+# CONFIG_SND_NM256 is not set
+# CONFIG_SND_RME32 is not set
+# CONFIG_SND_RME96 is not set
+# CONFIG_SND_RME9652 is not set
+# CONFIG_SND_HDSP is not set
+# CONFIG_SND_HDSPM is not set
+# CONFIG_SND_TRIDENT is not set
+# CONFIG_SND_YMFPCI is not set
+# CONFIG_SND_AD1889 is not set
+# CONFIG_SND_ALS4000 is not set
+# CONFIG_SND_CMIPCI is not set
+# CONFIG_SND_ENS1370 is not set
+# CONFIG_SND_ENS1371 is not set
+# CONFIG_SND_ES1938 is not set
+# CONFIG_SND_ES1968 is not set
+# CONFIG_SND_MAESTRO3 is not set
+# CONFIG_SND_FM801 is not set
+# CONFIG_SND_ICE1712 is not set
+# CONFIG_SND_ICE1724 is not set
+# CONFIG_SND_INTEL8X0 is not set
+# CONFIG_SND_INTEL8X0M is not set
+# CONFIG_SND_SONICVIBES is not set
+# CONFIG_SND_VIA82XX is not set
+# CONFIG_SND_VIA82XX_MODEM is not set
+# CONFIG_SND_VX222 is not set
+# CONFIG_SND_HDA_INTEL is not set
+
+#
+# ALSA PowerMac devices
+#
+CONFIG_SND_POWERMAC=m
+CONFIG_SND_POWERMAC_AUTO_DRC=y
+
+#
+# USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+# CONFIG_SND_USB_USX2Y is not set
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
#
# USB support
@@ -958,12 +1102,16 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=y
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=y
# CONFIG_USB_STORAGE_DEBUG is not set
@@ -1074,6 +1222,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
+# CONFIG_USB_SERIAL_NOKIA_DKU2 is not set
CONFIG_USB_SERIAL_PL2303=m
# CONFIG_USB_SERIAL_HP4X is not set
CONFIG_USB_SERIAL_SAFE=m
@@ -1310,6 +1459,20 @@ CONFIG_NLS_ISO8859_15=y
# CONFIG_NLS_KOI8_U is not set
CONFIG_NLS_UTF8=y
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+
#
# Profiling support
#
@@ -1331,12 +1494,14 @@ CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_INFO is not set
CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_VM is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUGGER is not set
-# CONFIG_PPCDBG is not set
CONFIG_IRQSTACKS=y
+CONFIG_BOOTX_TEXT=y
#
# Security options
@@ -1376,17 +1541,3 @@ CONFIG_CRYPTO_TEST=m
#
# Hardware crypto devices
#
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=m
-# CONFIG_CRC16 is not set
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=m
-CONFIG_TEXTSEARCH=y
-CONFIG_TEXTSEARCH_KMP=m
-CONFIG_TEXTSEARCH_BM=m
-CONFIG_TEXTSEARCH_FSM=m
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index bc5a3689cc05..b75757251994 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -125,6 +125,9 @@ int main(void)
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_64K_PAGES
+ DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
+#endif
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b91345fa0805..cc4e9eb1c13f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GR */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003a0000,
.cpu_name = "POWER5 (gr)",
@@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = {
.oprofile_model = &op_model_power4,
#endif
},
- { /* Power5 */
+ { /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5 (gs)",
@@ -929,6 +929,16 @@ struct cpu_spec cpu_specs[] = {
.icache_bsize = 32,
.dcache_bsize = 32,
},
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xff000fff,
+ .pvr_value = 0x53000890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
+ CPU_FTR_USE_TB,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ },
#endif /* CONFIG_44x */
#ifdef CONFIG_FSL_BOOKE
{ /* e200z5 */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 45d81976987f..16ab40daa738 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -195,11 +195,11 @@ exception_marker:
#define EX_R12 24
#define EX_R13 32
#define EX_SRR0 40
-#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */
#define EX_DAR 48
-#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
+#define EX_R3 64
+#define EX_LR 72
#define EXCEPTION_PROLOG_PSERIES(area, label) \
mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
@@ -419,17 +419,22 @@ data_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_DAR
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x400, instruction_access)
@@ -440,17 +445,22 @@ instruction_access_slb_pSeries:
mtspr SPRN_SPRG1,r13
RUNLATCH_ON(r13)
mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
+ mfcr r9
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
std r10,PACA_EXSLB+EX_R10(r13)
std r11,PACA_EXSLB+EX_R11(r13)
std r12,PACA_EXSLB+EX_R12(r13)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r9,SPRN_SPRG1
- std r9,PACA_EXSLB+EX_R13(r13)
- mfcr r9
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
mfspr r12,SPRN_SRR1 /* and SRR1 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- b .do_slb_miss /* Rel. branch works in real mode */
+ b .slb_miss_realmode /* Rel. branch works in real mode */
STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
STD_EXCEPTION_PSERIES(0x600, alignment)
@@ -508,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries)
mfspr r12,SPRN_SPRG2
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
+/*
+ * We have some room here we use that to put
+ * the peries slb miss user trampoline code so it's reasonably
+ * away from slb_miss_user_common to avoid problems with rfid
+ *
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+#ifdef __DISABLED__
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
/*
* Vectors for the FWNMI option. Share common code.
*/
@@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
.globl data_access_slb_iSeries
data_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
mfspr r3,SPRN_DAR
- b .do_slb_miss
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
.globl instruction_access_slb_iSeries
instruction_access_slb_iSeries:
mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+ mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
std r3,PACA_EXSLB+EX_R3(r13)
- ld r12,PACALPPACA+LPPACASRR1(r13)
- ld r3,PACALPPACA+LPPACASRR0(r13)
- b .do_slb_miss
+ ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+ std r9,PACA_EXSLB+EX_R9(r13)
+ mfcr r9
+#ifdef __DISABLED__
+ cmpdi r3,0
+ bge .slb_miss_user_iseries
+#endif
+ std r10,PACA_EXSLB+EX_R10(r13)
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ mfspr r10,SPRN_SPRG1
+ std r10,PACA_EXSLB+EX_R13(r13)
+ ld r12,PACALPPACA+LPPACASRR1(r13);
+ b .slb_miss_realmode
+
+#ifdef __DISABLED__
+slb_miss_user_iseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfspr r10,SPRG1
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ EXCEPTION_PROLOG_ISERIES_2
+ b slb_miss_user_common
+#endif
MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
@@ -809,6 +888,126 @@ instruction_access_common:
li r5,0x400
b .do_hash_page /* Try to handle as hpte fault */
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl .slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b .handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+ mflr r10
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl .slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+ ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_slb
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+#ifdef CONFIG_PPC_ISERIES
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl .save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .unrecoverable_exception
+ b 1b
+
.align 7
.globl hardware_interrupt_common
.globl hardware_interrupt_entry
@@ -1138,62 +1337,6 @@ _GLOBAL(do_stab_bolted)
rfid
b . /* prevent speculative execution */
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(do_slb_miss)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate /* handle it */
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
/*
* Space for CPU0's segment table.
*
@@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start)
#endif
/* Initialize the first segment table (or SLB) entry */
ld r3,PACASTABVIRT(r13) /* get addr of segment table */
+BEGIN_FTR_SECTION
bl .stab_initialize
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+ bl .slb_initialize
/* Initialize the kernel stack. Just a repeat for iSeries. */
LOADADDR(r3,current_set)
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
index eded971d1bf9..5a05a797485f 100644
--- a/arch/powerpc/kernel/lparmap.c
+++ b/arch/powerpc/kernel/lparmap.c
@@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
.xRanges = {
{ .xPages = HvPagesToMap,
.xOffset = 0,
- .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT),
+ .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT),
},
},
};
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index b3e95ff0dba0..ae1433da09b2 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -603,6 +603,76 @@ _GLOBAL(real_writeb)
blr
#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
+/*
+ * SCOM access functions for 970 (FX only for now)
+ *
+ * unsigned long scom970_read(unsigned int address);
+ * void scom970_write(unsigned int address, unsigned long value);
+ *
+ * The address passed in is the 24 bits register address. This code
+ * is 970 specific and will not check the status bits, so you should
+ * know what you are doing.
+ */
+_GLOBAL(scom970_read)
+ /* interrupts off */
+ mfmsr r4
+ ori r0,r4,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd,
+ * and finally or in RW bit
+ */
+ rlwinm r3,r3,8,0,15
+ ori r3,r3,0x8000
+
+ /* do the actual scom read */
+ sync
+ mtspr SPRN_SCOMC,r3
+ isync
+ mfspr r3,SPRN_SCOMD
+ isync
+ mfspr r0,SPRN_SCOMC
+ isync
+
+ /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah
+ * that's the best we can do). Not implemented yet as we don't use
+ * the scom on any of the bogus CPUs yet, but may have to be done
+ * ultimately
+ */
+
+ /* restore interrupts */
+ mtmsrd r4,1
+ blr
+
+
+_GLOBAL(scom970_write)
+ /* interrupts off */
+ mfmsr r5
+ ori r0,r5,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsrd r0,1
+
+ /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ * (including parity). On current CPUs they must be 0'd.
+ */
+
+ rlwinm r3,r3,8,0,15
+
+ sync
+ mtspr SPRN_SCOMD,r4 /* write data */
+ isync
+ mtspr SPRN_SCOMC,r3 /* write command */
+ isync
+ mfspr 3,SPRN_SCOMC
+ isync
+
+ /* restore interrupts */
+ mtmsrd r5,1
+ blr
+
+
/*
* Create a kernel thread
* kernel_thread(fn, arg, flags)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 96843211cc5c..de69fb37c731 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -46,10 +46,10 @@
#include
#include
#include
+#include
#ifdef CONFIG_PPC64
#include
#include
-#include
#endif
extern unsigned long _get_SP(void);
@@ -203,10 +203,8 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
int set_dabr(unsigned long dabr)
{
-#ifdef CONFIG_PPC64
if (ppc_md.set_dabr)
return ppc_md.set_dabr(dabr);
-#endif
mtspr(SPRN_DABR, dabr);
return 0;
@@ -554,12 +552,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
unsigned long sp_vsid = get_kernel_vsid(sp);
+ unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
sp_vsid <<= SLB_VSID_SHIFT;
- sp_vsid |= SLB_VSID_KERNEL;
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- sp_vsid |= SLB_VSID_L;
-
+ sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index eec2da695508..f645adb57534 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset)
* used to extract the memory informations at boot before we can
* unflatten the tree
*/
-static int __init scan_flat_dt(int (*it)(unsigned long node,
- const char *uname, int depth,
- void *data),
- void *data)
+int __init of_scan_flat_dt(int (*it)(unsigned long node,
+ const char *uname, int depth,
+ void *data),
+ void *data)
{
unsigned long p = ((unsigned long)initial_boot_params) +
initial_boot_params->off_dt_struct;
@@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node,
* This function can be used within scan_flattened_dt callback to get
* access to properties
*/
-static void* __init get_flat_dt_prop(unsigned long node, const char *name,
- unsigned long *size)
+void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
+ unsigned long *size)
{
unsigned long p = node;
@@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void)
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
u32 *prop;
unsigned long size = 0;
@@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
-#ifdef CONFIG_PPC_PSERIES
- /* On LPAR, look for the first ibm,pft-size property for the hash table size
- */
- if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) {
- u32 *pft_size;
- pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (pft_size != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = pft_size[1];
- }
- }
-#endif
-
boot_cpuid = 0;
boot_cpuid_phys = 0;
if (initial_boot_params && initial_boot_params->version >= 2) {
@@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
boot_cpuid_phys = initial_boot_params->boot_cpuid_phys;
} else {
/* Check if it's the boot-cpu, set it's hw index now */
- if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) {
- prop = get_flat_dt_prop(node, "reg", NULL);
+ if (of_get_flat_dt_prop(node,
+ "linux,boot-cpu", NULL) != NULL) {
+ prop = of_get_flat_dt_prop(node, "reg", NULL);
if (prop != NULL)
boot_cpuid_phys = *prop;
}
@@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
#ifdef CONFIG_ALTIVEC
/* Check if we have a VMX and eventually update CPU features */
- prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size);
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size);
if (prop && (*prop) > 0) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
/* Same goes for Apple's "altivec" property */
- prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL);
if (prop) {
cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
@@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* this by looking at the size of the ibm,ppc-interrupt-server#s
* property
*/
- prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s",
&size);
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
if (prop && ((size / sizeof(u32)) > 1))
@@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
return 0;
/* get platform type */
- prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL);
+ prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL);
if (prop == NULL)
return 0;
#ifdef CONFIG_PPC64
@@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
#ifdef CONFIG_PPC64
/* check if iommu is forced on or off */
- if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
iommu_is_off = 1;
- if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
+ if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
iommu_force_on = 1;
#endif
- lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL);
if (lprop)
memory_limit = *lprop;
#ifdef CONFIG_PPC64
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
if (lprop)
tce_alloc_start = *lprop;
- lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+ lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
if (lprop)
tce_alloc_end = *lprop;
#endif
@@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node,
{
u64 *basep, *entryp;
- basep = get_flat_dt_prop(node, "linux,rtas-base", NULL);
- entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL);
- prop = get_flat_dt_prop(node, "linux,rtas-size", NULL);
+ basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
+ entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
+ prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL);
if (basep && entryp && prop) {
rtas.base = *basep;
rtas.entry = *entryp;
@@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node,
if (depth != 0)
return 0;
- prop = get_flat_dt_prop(node, "#size-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
dt_root_size_cells = (prop == NULL) ? 1 : *prop;
DBG("dt_root_size_cells = %x\n", dt_root_size_cells);
- prop = get_flat_dt_prop(node, "#address-cells", NULL);
+ prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
dt_root_addr_cells = (prop == NULL) ? 2 : *prop;
DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells);
@@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp)
static int __init early_init_dt_scan_memory(unsigned long node,
const char *uname, int depth, void *data)
{
- char *type = get_flat_dt_prop(node, "device_type", NULL);
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
cell_t *reg, *endp;
unsigned long l;
@@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
- reg = (cell_t *)get_flat_dt_prop(node, "reg", &l);
+ reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l);
if (reg == NULL)
return 0;
@@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- scan_flat_dt(early_init_dt_scan_chosen, NULL);
+ of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
/* Scan memory nodes and rebuild LMBs */
lmb_init();
- scan_flat_dt(early_init_dt_scan_root, NULL);
- scan_flat_dt(early_init_dt_scan_memory, NULL);
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory, NULL);
lmb_enforce_memory_limit(memory_limit);
lmb_analyze();
#ifdef CONFIG_PPC64
@@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params)
DBG("Scanning CPUs ...\n");
- /* Retreive hash table size from flattened tree plus other
- * CPU related informations (altivec support, boot CPU ID, ...)
+ /* Retreive CPU related informations from the flat tree
+ * (altivec support, boot CPU ID, ...)
*/
- scan_flat_dt(early_init_dt_scan_cpus, NULL);
+ of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
DBG(" <- early_init_devtree()\n");
}
@@ -1986,14 +1974,29 @@ EXPORT_SYMBOL(get_property);
/*
* Add a property to a node
*/
-void prom_add_property(struct device_node* np, struct property* prop)
+int prom_add_property(struct device_node* np, struct property* prop)
{
- struct property **next = &np->properties;
+ struct property **next;
prop->next = NULL;
- while (*next)
+ write_lock(&devtree_lock);
+ next = &np->properties;
+ while (*next) {
+ if (strcmp(prop->name, (*next)->name) == 0) {
+ /* duplicate ! don't insert it */
+ write_unlock(&devtree_lock);
+ return -1;
+ }
next = &(*next)->next;
+ }
*next = prop;
+ write_unlock(&devtree_lock);
+
+ /* try to add to proc as well if it was initialized */
+ if (np->pde)
+ proc_device_tree_add_prop(np->pde, prop);
+
+ return 0;
}
/* I quickly hacked that one, check against spec ! */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index c758b6624d7b..6dc33d19fc2a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -403,19 +403,19 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int __init prom_getprop(phandle node, const char *pname,
+static int inline prom_getprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int __init prom_getproplen(phandle node, const char *pname)
+static int inline prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static int __init prom_setprop(phandle node, const char *pname,
+static int inline prom_setprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("setprop", 4, 1, node, ADDR(pname),
@@ -1408,8 +1408,9 @@ static int __init prom_find_machine_type(void)
struct prom_t *_prom = &RELOC(prom);
char compat[256];
int len, i = 0;
+#ifdef CONFIG_PPC64
phandle rtas;
-
+#endif
len = prom_getprop(_prom->root, "compatible",
compat, sizeof(compat)-1);
if (len > 0) {
@@ -1872,7 +1873,7 @@ static void __init fixup_device_tree(void)
if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev))
== PROM_ERROR)
return;
- if (u3_rev != 0x35 && u3_rev != 0x37)
+ if (u3_rev < 0x35 || u3_rev > 0x39)
return;
/* does it need fixup ? */
if (prom_getproplen(i2c, "interrupts") > 0)
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 568ea335d616..3d2abd95c7ae 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -248,46 +248,10 @@ void ptrace_disable(struct task_struct *child)
clear_single_step(child);
}
-long sys_ptrace(long request, long pid, long addr, long data)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
- struct task_struct *child;
int ret = -EPERM;
- lock_kernel();
- if (request == PTRACE_TRACEME) {
- /* are we already being traced? */
- if (current->ptrace & PT_PTRACED)
- goto out;
- ret = security_ptrace(current->parent, current);
- if (ret)
- goto out;
- /* set the ptrace bit in the process flags. */
- current->ptrace |= PT_PTRACED;
- ret = 0;
- goto out;
- }
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- child = find_task_by_pid(pid);
- if (child)
- get_task_struct(child);
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
-
- ret = -EPERM;
- if (pid == 1) /* you may not mess with init */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
-
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -540,10 +504,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
-out_tsk:
- put_task_struct(child);
-out:
- unlock_kernel();
+
return ret;
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index b7fc2d884950..9d4e07f6f1ec 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
@@ -83,7 +84,7 @@ void call_rtas_display_status_delay(unsigned char c)
while (width-- > 0)
call_rtas_display_status(' ');
width = 16;
- udelay(500000);
+ mdelay(500);
pending_newline = 1;
} else {
if (pending_newline) {
@@ -608,7 +609,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0;
}
-#ifdef CONFIG_SMP
/* This version can't take the spinlock, because it never returns */
struct rtas_args rtas_stop_self_args = {
@@ -633,7 +633,6 @@ void rtas_stop_self(void)
panic("Alas, I survived.\n");
}
-#endif
/*
* Call early during boot, before mem init or bootmem, to retreive the RTAS
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d43fa8c0e5ac..e22856ecb5a0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -405,6 +405,46 @@ static int __init set_preferred_console(void)
console_initcall(set_preferred_console);
#endif /* CONFIG_PPC_MULTIPLATFORM */
+void __init check_for_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long *prop;
+
+ DBG(" -> check_for_initrd()\n");
+
+ if (of_chosen) {
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-start", NULL);
+ if (prop != NULL) {
+ initrd_start = (unsigned long)__va(*prop);
+ prop = (unsigned long *)get_property(of_chosen,
+ "linux,initrd-end", NULL);
+ if (prop != NULL) {
+ initrd_end = (unsigned long)__va(*prop);
+ initrd_below_start_ok = 1;
+ } else
+ initrd_start = 0;
+ }
+ }
+
+ /* If we were passed an initrd, set the ROOT_DEV properly if the values
+ * look sensible. If not, clear initrd reference.
+ */
+ if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
+ initrd_end > initrd_start)
+ ROOT_DEV = Root_RAM0;
+ else {
+ printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end);
+ initrd_start = initrd_end = 0;
+ }
+
+ if (initrd_start)
+ printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+
+ DBG(" <- check_for_initrd()\n");
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
#ifdef CONFIG_SMP
/**
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b45eedbb4b3a..3af2631e3fab 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -286,6 +286,7 @@ void __init setup_arch(char **cmdline_p)
loops_per_jiffy = 500000000 / HZ;
unflatten_device_tree();
+ check_for_initrd();
finish_device_tree();
smp_setup_cpu_maps();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6b52cce872be..0471e843b6c5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -41,7 +41,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -60,6 +59,7 @@
#include
#include
#include
+#include
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -243,12 +243,6 @@ void __init early_setup(unsigned long dt_ptr)
DBG(" -> early_setup()\n");
- /*
- * Fill the default DBG level (do we want to keep
- * that old mecanism around forever ?)
- */
- ppcdbg_initialize();
-
/*
* Do early initializations using the flattened device
* tree, like retreiving the physical memory map or
@@ -276,18 +270,23 @@ void __init early_setup(unsigned long dt_ptr)
DBG("Found, Initializing memory management...\n");
- /*
- * Initialize stab / SLB management
- */
- if (!firmware_has_feature(FW_FEATURE_ISERIES))
- stab_initialize(lpaca->stab_real);
-
/*
* Initialize the MMU Hash table and create the linear mapping
- * of memory
+ * of memory. Has to be done before stab/slb initialization as
+ * this is currently where the page size encoding is obtained
*/
htab_initialize();
+ /*
+ * Initialize stab / SLB management except on iSeries
+ */
+ if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
+ if (cpu_has_feature(CPU_FTR_SLB))
+ slb_initialize();
+ else
+ stab_initialize(lpaca->stab_real);
+ }
+
DBG(" <- early_setup()\n");
}
@@ -396,43 +395,6 @@ static void __init initialize_cache_info(void)
DBG(" <- initialize_cache_info()\n");
}
-static void __init check_for_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
- u64 *prop;
-
- DBG(" -> check_for_initrd()\n");
-
- if (of_chosen) {
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-start", NULL);
- if (prop != NULL) {
- initrd_start = (unsigned long)__va(*prop);
- prop = (u64 *)get_property(of_chosen,
- "linux,initrd-end", NULL);
- if (prop != NULL) {
- initrd_end = (unsigned long)__va(*prop);
- initrd_below_start_ok = 1;
- } else
- initrd_start = 0;
- }
- }
-
- /* If we were passed an initrd, set the ROOT_DEV properly if the values
- * look sensible. If not, clear initrd reference.
- */
- if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
- initrd_end > initrd_start)
- ROOT_DEV = Root_RAM0;
- else
- initrd_start = initrd_end = 0;
-
- if (initrd_start)
- printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
-
- DBG(" <- check_for_initrd()\n");
-#endif /* CONFIG_BLK_DEV_INITRD */
-}
/*
* Do some initial setup of the system. The parameters are those which
@@ -516,7 +478,6 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
- printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch);
printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
printk("systemcfg = 0x%p\n", systemcfg);
printk("systemcfg->platform = 0x%x\n", systemcfg->platform);
@@ -552,10 +513,12 @@ static void __init irqstack_early_init(void)
* SLB misses on them.
*/
for_each_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
- hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE,
- THREAD_SIZE, 0x10000000));
+ softirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc_base(THREAD_SIZE,
+ THREAD_SIZE, 0x10000000));
}
}
#else
@@ -583,8 +546,8 @@ static void __init emergency_stack_init(void)
limit = min(0x10000000UL, lmb.rmo_size);
for_each_cpu(i)
- paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128,
- limit)) + PAGE_SIZE;
+ paca[i].emergency_sp =
+ __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 876c57c11365..081d931eae48 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -44,7 +44,6 @@
#include
#ifdef CONFIG_PPC64
#include "ppc32.h"
-#include
#include
#include
#else
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ec9d0984b6a0..58194e150711 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -33,7 +33,6 @@
#include
#include
#include
-#include
#include
#include
#include
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1794a694a928..5c330c3366e4 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -40,7 +40,6 @@
#include
#include
#include
-#include
#include
#include
#include
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6996a593dcb3..a6282b625b44 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -61,6 +61,7 @@
#include
#include
#include
+#include
#ifdef CONFIG_PPC64
#include
#include
@@ -69,6 +70,7 @@
#include
#include
#endif
+#include
/* keep track of when we need to update the rtc */
time_t last_rtc_update;
@@ -118,10 +120,6 @@ static unsigned adjusting_time = 0;
unsigned long ppc_proc_freq;
unsigned long ppc_tb_freq;
-#ifdef CONFIG_PPC32 /* XXX for now */
-#define boot_cpuid 0
-#endif
-
u64 tb_last_jiffy __cacheline_aligned_in_smp;
unsigned long tb_last_stamp;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 07e5ee40b870..32f215825e8d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -39,7 +39,6 @@
#include
#include
#include
-#include
#include
#ifdef CONFIG_PPC32
#include
@@ -748,22 +747,12 @@ static int check_bug_trap(struct pt_regs *regs)
return 0;
if (bug->line & BUG_WARNING_TRAP) {
/* this is a WARN_ON rather than BUG/BUG_ON */
-#ifdef CONFIG_XMON
- xmon_printf(KERN_ERR "Badness in %s at %s:%ld\n",
- bug->function, bug->file,
- bug->line & ~BUG_WARNING_TRAP);
-#endif /* CONFIG_XMON */
printk(KERN_ERR "Badness in %s at %s:%ld\n",
bug->function, bug->file,
bug->line & ~BUG_WARNING_TRAP);
dump_stack();
return 1;
}
-#ifdef CONFIG_XMON
- xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
- bug->function, bug->file, bug->line);
- xmon(regs);
-#endif /* CONFIG_XMON */
printk(KERN_CRIT "kernel BUG in %s at %s:%ld!\n",
bug->function, bug->file, bug->line);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 733d61618bbf..40523b140109 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -11,7 +11,7 @@
#include
#include
-_GLOBAL(copy_page)
+_GLOBAL(copy_4K_page)
std r31,-8(1)
std r30,-16(1)
std r29,-24(1)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index a0b3fbbd6fb1..6d69ef39b7df 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
std r4,-16(r1)
std r5,-8(r1)
dcbt 0,r4
- beq .Lcopy_page
+ beq .Lcopy_page_4K
andi. r6,r6,7
mtcrf 0x01,r5
blt cr1,.Lshort_copy
@@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user)
* above (following the .Ldst_aligned label) but it runs slightly
* slower on POWER3.
*/
-.Lcopy_page:
+.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
std r29,-48(1)
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 2a912f411eb4..35bd03c41dd1 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
#include
#include
+#include
void __spin_yield(raw_spinlock_t *lock)
{
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 841d8b6323a8..93d4fbfdb724 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -389,5 +389,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
}
/* kernel has accessed a bad area */
+
+ printk(KERN_ALERT "Unable to handle kernel paging request for ");
+ switch (regs->trap) {
+ case 0x300:
+ case 0x380:
+ printk("data at address 0x%08lx\n", regs->dar);
+ break;
+ case 0x400:
+ case 0x480:
+ printk("instruction fetch\n");
+ break;
+ default:
+ printk("unknown fault\n");
+ }
+ printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
+ regs->nip);
+
die("Kernel access of bad area", regs, sig);
}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index d6ed9102eeea..e0d02c4a2615 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -1,7 +1,7 @@
/*
* ppc64 MMU hashtable management routines
*
- * (c) Copyright IBM Corp. 2003
+ * (c) Copyright IBM Corp. 2003, 2005
*
* Maintained by: Benjamin Herrenschmidt
*
@@ -10,6 +10,7 @@
* described in the kernel's COPYING file.
*/
+#include
#include
#include
#include
@@ -42,14 +43,24 @@
/* Save non-volatile offsets */
#define STK_REG(i) (112 + ((i)-14)*8)
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+/*****************************************************************************
+ * *
+ * 4K SW & 4K HW pages implementation *
+ * *
+ *****************************************************************************/
+
+
/*
- * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local)
+ * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
*
- * Adds a page to the hash table. This is the non-LPAR version for now
+ * Adds a 4K page to the hash table in a segment of 4K pages only
*/
-_GLOBAL(__hash_page)
+_GLOBAL(__hash_page_4K)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
@@ -88,7 +99,8 @@ _GLOBAL(__hash_page)
/* If so, just bail out and refault if needed. Someone else
* is changing this PTE anyway and might hash it.
*/
- bne- bail_ok
+ bne- htab_bail_ok
+
/* Prepare new PTE value (turn access RW into DIRTY, then
* add BUSY,HASHPTE and ACCESSED)
*/
@@ -118,10 +130,10 @@ _GLOBAL(__hash_page)
/* Convert linux PTE bits into HW equivalents */
andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
andc r0,r30,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
@@ -158,19 +170,21 @@ htab_insert_pte:
andc r30,r30,r0
ori r30,r30,_PAGE_HASHPTE
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate primary group hash */
and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,0 /* no vflags */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert1)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1)
/* Now try secondary slot */
- /* page number in r5 */
- rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT
+ /* physical address r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
/* Calculate secondary group hash */
andc r0,r27,r28
rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
/* Call ppc_md.hpte_insert */
- ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
mr r4,r29 /* Retreive va */
- li r6,HPTE_V_SECONDARY@l /* secondary slot */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
_GLOBAL(htab_call_hpte_insert2)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
bge+ htab_pte_insert_ok /* Insertion successful */
cmpdi 0,r3,-2 /* Critical failure */
@@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2)
rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
/* Call ppc_md.hpte_remove */
_GLOBAL(htab_call_hpte_remove)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* Try all again */
b htab_insert_pte
-bail_ok:
+htab_bail_ok:
li r3,0
- b bail
+ b htab_bail
htab_pte_insert_ok:
/* Insert slot number & secondary bit in PTE */
@@ -227,7 +243,7 @@ htab_write_out_pte:
ld r6,STK_PARM(r6)(r1)
std r30,0(r6)
li r3, 0
-bail:
+htab_bail:
ld r27,STK_REG(r27)(r1)
ld r28,STK_REG(r28)(r1)
ld r29,STK_REG(r29)(r1)
@@ -256,10 +272,10 @@ htab_modify_pte:
/* Call ppc_md.hpte_updatepp */
mr r5,r29 /* va */
- li r6,0 /* large is 0 */
+ li r6,MMU_PAGE_4K /* page size */
ld r7,STK_PARM(r8)(r1) /* get "local" param */
_GLOBAL(htab_call_hpte_updatepp)
- bl . /* Will be patched by htab_finish_init() */
+ bl . /* Patched by htab_finish_init() */
/* if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -276,13 +292,556 @@ htab_wrong_access:
/* Bail out clearing reservation */
stdcx. r31,0,r6
li r3,1
- b bail
+ b htab_bail
htab_pte_insert_failure:
/* Bail out restoring old PTE */
ld r6,STK_PARM(r6)(r1)
std r31,0(r6)
li r3,-1
- b bail
+ b htab_bail
+#else /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ * pte_t *ptep, unsigned long trap, int local)
+ */
+
+/*
+ * For now, we do NOT implement Admixed pages
+ */
+_GLOBAL(__hash_page_4K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ * r26 is the hidx mask
+ * r25 is the index in combo page
+ */
+ std r25,STK_REG(r25)(r1)
+ std r26,STK_REG(r26)(r1)
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- htab_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- htab_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Load the hidx index */
+ rldicl r25,r3,64-12,60
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */
+ rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */
+ or r29,r3,r29 /* r29 = va
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE. We look for
+ * the bit at (1 >> (index + 32))
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ li r26,0 /* Default hidx */
+ beq htab_insert_pte
+ ld r6,STK_PARM(r6)(r1)
+ ori r26,r6,0x8000 /* Load the hidx mask */
+ ld r26,0(r26)
+ addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
+ rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
+ bne htab_modify_pte
+
+htab_insert_pte:
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* real page number in r5, PTE RPN value + index */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
+ add r5,r5,r25
+ sldi r5,r5,HW_PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_4K /* page size */
+_GLOBAL(htab_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ htab_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- htab_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(htab_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b htab_insert_pte
+
+htab_bail_ok:
+ li r3,0
+ b htab_bail
+
+htab_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE second half,
+ * clear _PAGE_BUSY and set approriate HPTE slot bit
+ */
+ ld r6,STK_PARM(r6)(r1)
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ /* HPTE SUB bit */
+ li r0,1
+ subfic r5,r25,27 /* Must match bit position in */
+ sld r0,r0,r5 /* pgtable.h */
+ or r30,r30,r0
+ /* hindx */
+ sldi r5,r25,2
+ sld r3,r3,r5
+ li r4,0xf
+ sld r4,r4,r5
+ andc r26,r26,r4
+ or r26,r26,r3
+ ori r5,r6,0x8000
+ std r26,0(r5)
+ lwsync
+ std r30,0(r6)
+ li r3, 0
+htab_bail:
+ ld r25,STK_REG(r25)(r1)
+ ld r26,STK_REG(r26)(r1)
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+htab_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ sldi r5,r25,2
+ srd r3,r26,r5
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r3,0x8 /* page secondary ? */
+ beq 1f
+ not r5,r5
+1: andi. r3,r3,0x7 /* extract idx alone */
+
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_4K /* page size */
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(htab_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- htab_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3,0
+ b htab_bail
+
+htab_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b htab_bail
+
+htab_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b htab_bail
+
+
+/*****************************************************************************
+ * *
+ * 64K SW & 64K HW in a 64K segment pages implementation *
+ * *
+ *****************************************************************************/
+
+_GLOBAL(__hash_page_64K)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ /* Save all params that we need after a function call */
+ std r6,STK_PARM(r6)(r1)
+ std r8,STK_PARM(r8)(r1)
+
+ /* Add _PAGE_PRESENT to access */
+ ori r4,r4,_PAGE_PRESENT
+
+ /* Save non-volatile registers.
+ * r31 will hold "old PTE"
+ * r30 is "new PTE"
+ * r29 is "va"
+ * r28 is a hash value
+ * r27 is hashtab mask (maybe dynamic patched instead ?)
+ */
+ std r27,STK_REG(r27)(r1)
+ std r28,STK_REG(r28)(r1)
+ std r29,STK_REG(r29)(r1)
+ std r30,STK_REG(r30)(r1)
+ std r31,STK_REG(r31)(r1)
+
+ /* Step 1:
+ *
+ * Check permissions, atomically mark the linux PTE busy
+ * and hashed.
+ */
+1:
+ ldarx r31,0,r6
+ /* Check access rights (access & ~(pte_val(*ptep))) */
+ andc. r0,r4,r31
+ bne- ht64_wrong_access
+ /* Check if PTE is busy */
+ andi. r0,r31,_PAGE_BUSY
+ /* If so, just bail out and refault if needed. Someone else
+ * is changing this PTE anyway and might hash it.
+ */
+ bne- ht64_bail_ok
+ /* Prepare new PTE value (turn access RW into DIRTY, then
+ * add BUSY,HASHPTE and ACCESSED)
+ */
+ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
+ or r30,r30,r31
+ ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
+ /* Write the linux PTE atomically (setting busy) */
+ stdcx. r30,0,r6
+ bne- 1b
+ isync
+
+ /* Step 2:
+ *
+ * Insert/Update the HPTE in the hash table. At this point,
+ * r4 (access) is re-useable, we use it for the new HPTE flags
+ */
+
+ /* Calc va and put it in r29 */
+ rldicr r29,r5,28,63-28
+ rldicl r3,r3,0,36
+ or r29,r3,r29
+
+ /* Calculate hash value for primary slot and store it in r28 */
+ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */
+ rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */
+ xor r28,r5,r0
+
+ /* Convert linux PTE bits into HW equivalents */
+ andi. r3,r30,0x1fe /* Get basic set of flags */
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
+ and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
+ andc r0,r30,r0 /* r0 = pte & ~r0 */
+ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
+
+ /* We eventually do the icache sync here (maybe inline that
+ * code rather than call a C function...)
+ */
+BEGIN_FTR_SECTION
+ mr r4,r30
+ mr r5,r7
+ bl .hash_page_do_lazy_icache
+END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
+
+ /* At this point, r3 contains new PP bits, save them in
+ * place of "access" in the param area (sic)
+ */
+ std r3,STK_PARM(r4)(r1)
+
+ /* Get htab_hash_mask */
+ ld r4,htab_hash_mask@got(2)
+ ld r27,0(r4) /* htab_hash_mask -> r27 */
+
+ /* Check if we may already be in the hashtable, in this case, we
+ * go to out-of-line code to try to modify the HPTE
+ */
+ andi. r0,r31,_PAGE_HASHPTE
+ bne ht64_modify_pte
+
+ht64_insert_pte:
+ /* Clear hpte bits in new pte (we also clear BUSY btw) and
+ * add _PAGE_HASHPTE
+ */
+ lis r0,_PAGE_HPTEFLAGS@h
+ ori r0,r0,_PAGE_HPTEFLAGS@l
+ andc r30,r30,r0
+ ori r30,r30,_PAGE_HASHPTE
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate primary group hash */
+ and r0,r28,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,0 /* !bolted, !secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert1)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Now try secondary slot */
+
+ /* Phyical address in r5 */
+ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
+ sldi r5,r5,PAGE_SHIFT
+
+ /* Calculate secondary group hash */
+ andc r0,r27,r28
+ rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
+
+ /* Call ppc_md.hpte_insert */
+ ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */
+ mr r4,r29 /* Retreive va */
+ li r7,HPTE_V_SECONDARY /* !bolted, secondary */
+ li r8,MMU_PAGE_64K
+_GLOBAL(ht64_call_hpte_insert2)
+ bl . /* patched by htab_finish_init() */
+ cmpdi 0,r3,0
+ bge+ ht64_pte_insert_ok /* Insertion successful */
+ cmpdi 0,r3,-2 /* Critical failure */
+ beq- ht64_pte_insert_failure
+
+ /* Both are full, we need to evict something */
+ mftb r0
+ /* Pick a random group based on TB */
+ andi. r0,r0,1
+ mr r5,r28
+ bne 2f
+ not r5,r5
+2: and r0,r5,r27
+ rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ /* Call ppc_md.hpte_remove */
+_GLOBAL(ht64_call_hpte_remove)
+ bl . /* patched by htab_finish_init() */
+
+ /* Try all again */
+ b ht64_insert_pte
+
+ht64_bail_ok:
+ li r3,0
+ b ht64_bail
+
+ht64_pte_insert_ok:
+ /* Insert slot number & secondary bit in PTE */
+ rldimi r30,r3,12,63-15
+
+ /* Write out the PTE with a normal write
+ * (maybe add eieio may be good still ?)
+ */
+ht64_write_out_pte:
+ ld r6,STK_PARM(r6)(r1)
+ std r30,0(r6)
+ li r3, 0
+ht64_bail:
+ ld r27,STK_REG(r27)(r1)
+ ld r28,STK_REG(r28)(r1)
+ ld r29,STK_REG(r29)(r1)
+ ld r30,STK_REG(r30)(r1)
+ ld r31,STK_REG(r31)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+ht64_modify_pte:
+ /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
+ mr r4,r3
+ rlwinm r3,r31,32-12,29,31
+
+ /* Secondary group ? if yes, get a inverted hash value */
+ mr r5,r28
+ andi. r0,r31,_PAGE_F_SECOND
+ beq 1f
+ not r5,r5
+1:
+ /* Calculate proper slot value for ppc_md.hpte_updatepp */
+ and r0,r5,r27
+ rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
+ add r3,r0,r3 /* add slot idx */
+
+ /* Call ppc_md.hpte_updatepp */
+ mr r5,r29 /* va */
+ li r6,MMU_PAGE_64K
+ ld r7,STK_PARM(r8)(r1) /* get "local" param */
+_GLOBAL(ht64_call_hpte_updatepp)
+ bl . /* patched by htab_finish_init() */
+
+ /* if we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ cmpdi 0,r3,-1
+ beq- ht64_insert_pte
+
+ /* Clear the BUSY bit and Write out the PTE */
+ li r0,_PAGE_BUSY
+ andc r30,r30,r0
+ b ht64_write_out_pte
+
+ht64_wrong_access:
+ /* Bail out clearing reservation */
+ stdcx. r31,0,r6
+ li r3,1
+ b ht64_bail
+
+ht64_pte_insert_failure:
+ /* Bail out restoring old PTE */
+ ld r6,STK_PARM(r6)(r1)
+ std r31,0(r6)
+ li r3,-1
+ b ht64_bail
+
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+
+/*****************************************************************************
+ * *
+ * Huge pages implementation is in hugetlbpage.c *
+ * *
+ *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 174d14576c28..d96bcfe4c6f6 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -9,6 +9,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#undef DEBUG_LOW
+
#include
#include
#include
@@ -22,11 +25,84 @@
#include
#include
#include
+#include
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
+static inline void __tlbie(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile("tlbie %0,0" : : "r" (va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile("tlbie %0,1" : : "r" (va) : "memory");
+ break;
+ }
+}
+
+static inline void __tlbiel(unsigned long va, unsigned int psize)
+{
+ unsigned int penc;
+
+ /* clear top 16 bits, non SLS segment */
+ va &= ~(0xffffULL << 48);
+
+ switch (psize) {
+ case MMU_PAGE_4K:
+ va &= ~0xffful;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ default:
+ penc = mmu_psize_defs[psize].penc;
+ va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ va |= (0x7f >> (8 - penc)) << 12;
+ asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
+ : : "r"(va) : "memory");
+ break;
+ }
+
+}
+
+static inline void tlbie(unsigned long va, int psize, int local)
+{
+ unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL);
+ int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+
+ if (use_local)
+ use_local = mmu_psize_defs[psize].tlbiel;
+ if (lock_tlbie && !use_local)
+ spin_lock(&native_tlbie_lock);
+ asm volatile("ptesync": : :"memory");
+ if (use_local) {
+ __tlbiel(va, psize);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie(va, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ if (lock_tlbie && !use_local)
+ spin_unlock(&native_tlbie_lock);
+}
+
static inline void native_lock_hpte(hpte_t *hptep)
{
unsigned long *word = &hptep->v;
@@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep)
}
long native_hpte_insert(unsigned long hpte_group, unsigned long va,
- unsigned long prpn, unsigned long vflags,
- unsigned long rflags)
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize)
{
hpte_t *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
int i;
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ " rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, va, pa, rflags, vflags, psize);
+ }
+
for (i = 0; i < HPTES_PER_GROUP; i++) {
if (! (hptep->v & HPTE_V_VALID)) {
/* retry with lock held */
@@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID;
- if (vflags & HPTE_V_LARGE)
- va &= ~(1UL << HPTE_V_AVPN_SHIFT);
- hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags;
+ hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED)) {
+ DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ i, hpte_v, hpte_r);
+ }
hptep->r = hpte_r;
/* Guarantee the second dword is visible before the valid bit */
@@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group)
int slot_offset;
unsigned long hpte_v;
+ DBG_LOW(" remove(group=%lx)\n", hpte_group);
+
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
-static inline void set_pp_bit(unsigned long pp, hpte_t *addr)
+static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long va, int psize, int local)
{
- unsigned long old;
- unsigned long *p = &addr->r;
+ hpte_t *hptep = htab_address + slot;
+ unsigned long hpte_v, want_v;
+ int ret = 0;
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- rldimi %0,%2,0,61\n\
- stdcx. %0,0,%3\n\
- bne 1b"
- : "=&r" (old), "=m" (*p)
- : "r" (pp), "r" (p), "m" (*p)
- : "cc");
+ want_v = hpte_encode_v(va, psize);
+
+ DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ va, want_v & HPTE_V_AVPN, slot, newpp);
+
+ native_lock_hpte(hptep);
+
+ hpte_v = hptep->v;
+
+ /* Even if we miss, we need to invalidate the TLB */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ DBG_LOW(" -> miss\n");
+ native_unlock_hpte(hptep);
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
+ native_unlock_hpte(hptep);
+ }
+
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, local);
+
+ return ret;
}
-/*
- * Only works on small pages. Yes its ugly to have to check each slot in
- * the group but we only use this during bootup.
- */
-static long native_hpte_find(unsigned long vpn)
+static long native_hpte_find(unsigned long va, int psize)
{
hpte_t *hptep;
unsigned long hash;
unsigned long i, j;
long slot;
- unsigned long hpte_v;
+ unsigned long want_v, hpte_v;
- hash = hpt_hash(vpn, 0);
+ hash = hpt_hash(va, mmu_psize_defs[psize].shift);
+ want_v = hpte_encode_v(va, psize);
for (j = 0; j < 2; j++) {
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn)
hptep = htab_address + slot;
hpte_v = hptep->v;
- if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11))
+ if (HPTE_V_COMPARE(hpte_v, want_v)
&& (hpte_v & HPTE_V_VALID)
&& ( !!(hpte_v & HPTE_V_SECONDARY) == j)) {
/* HPTE matches */
@@ -177,118 +281,90 @@ static long native_hpte_find(unsigned long vpn)
return -1;
}
-static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
- unsigned long va, int large, int local)
-{
- hpte_t *hptep = htab_address + slot;
- unsigned long hpte_v;
- unsigned long avpn = va >> 23;
- int ret = 0;
-
- if (large)
- avpn &= ~1;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- ret = -1;
- } else {
- set_pp_bit(newpp, hptep);
- native_unlock_hpte(hptep);
- }
-
- /* Ensure it is out of the tlb too */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
-
- return ret;
-}
-
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
* in the HPT. Assumes pages being operated on will not be stolen.
- * Does not work on large pages.
*
* No need to lock here because we should be the only user.
*/
-static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea)
+static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize)
{
- unsigned long vsid, va, vpn, flags = 0;
+ unsigned long vsid, va;
long slot;
hpte_t *hptep;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
vsid = get_kernel_vsid(ea);
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> PAGE_SHIFT;
- slot = native_hpte_find(vpn);
+ slot = native_hpte_find(va, psize);
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
- set_pp_bit(newpp, hptep);
+ /* Update the HPTE */
+ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PP | HPTE_R_N));
- /* Ensure it is out of the tlb too */
- if (lock_tlbie)
- spin_lock_irqsave(&native_tlbie_lock, flags);
- tlbie(va, 0);
- if (lock_tlbie)
- spin_unlock_irqrestore(&native_tlbie_lock, flags);
+ /* Ensure it is out of the tlb too. */
+ tlbie(va, psize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long va,
- int large, int local)
+ int psize, int local)
{
hpte_t *hptep = htab_address + slot;
unsigned long hpte_v;
- unsigned long avpn = va >> 23;
+ unsigned long want_v;
unsigned long flags;
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
-
- if (large)
- avpn &= ~1;
local_irq_save(flags);
- native_lock_hpte(hptep);
+ DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
hpte_v = hptep->v;
/* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- } else {
+ else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
+
+ /* Invalidate the TLB */
+ tlbie(va, psize, local);
+
+ local_irq_restore(flags);
+}
+
+/*
+ * XXX This need fixing based on page size. It's only used by
+ * native_hpte_clear() for now which needs fixing too so they
+ * make a good pair...
+ */
+static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+{
+ unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
+ unsigned long va;
+
+ va = avpn << 23;
+
+ if (! (hpte_v & HPTE_V_LARGE)) {
+ unsigned long vpi, pteg;
+
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+
+ va |= vpi << PAGE_SHIFT;
}
- /* Invalidate the tlb */
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
- tlbiel(va);
- } else {
- if (lock_tlbie)
- spin_lock(&native_tlbie_lock);
- tlbie(va, large);
- if (lock_tlbie)
- spin_unlock(&native_tlbie_lock);
- }
- local_irq_restore(flags);
+ return va;
}
/*
@@ -298,6 +374,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
+ *
+ * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
@@ -327,7 +405,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hptep->v = 0;
- tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE);
+ tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0);
}
}
@@ -335,59 +413,59 @@ static void native_hpte_clear(void)
local_irq_restore(flags);
}
+/*
+ * Batched hash table flush, we batch the tlbie's to avoid taking/releasing
+ * the lock all the time
+ */
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long va, vpn, hash, secondary, slot, flags, avpn;
- int i, j;
+ unsigned long va, hash, index, hidx, shift, slot;
hpte_t *hptep;
unsigned long hpte_v;
+ unsigned long want_v;
+ unsigned long flags;
+ real_pte_t pte;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- unsigned long large = batch->large;
+ unsigned long psize = batch->psize;
+ int i;
local_irq_save(flags);
- j = 0;
for (i = 0; i < number; i++) {
- va = batch->vaddr[j];
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, large);
- secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12;
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
- hptep = htab_address + slot;
-
- avpn = va >> 23;
- if (large)
- avpn &= ~0x1UL;
-
- native_lock_hpte(hptep);
-
- hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if ((HPTE_V_AVPN_VAL(hpte_v) != avpn)
- || !(hpte_v & HPTE_V_VALID)) {
- native_unlock_hpte(hptep);
- } else {
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
- }
-
- j++;
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ hptep = htab_address + slot;
+ want_v = hpte_encode_v(va, psize);
+ native_lock_hpte(hptep);
+ hpte_v = hptep->v;
+ if (!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))
+ native_unlock_hpte(hptep);
+ else
+ hptep->v = 0;
+ } pte_iterate_hashed_end();
}
- if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) {
+ if (cpu_has_feature(CPU_FTR_TLBIEL) &&
+ mmu_psize_defs[psize].tlbiel && local) {
asm volatile("ptesync":::"memory");
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
- for (i = 0; i < j; i++)
- __tlbiel(batch->vaddr[i]);
-
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbiel(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("ptesync":::"memory");
} else {
int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
@@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local)
spin_lock(&native_tlbie_lock);
asm volatile("ptesync":::"memory");
+ for (i = 0; i < number; i++) {
+ va = batch->vaddr[i];
+ pte = batch->pte[i];
- for (i = 0; i < j; i++)
- __tlbie(batch->vaddr[i], large);
-
+ pte_iterate_hashed_subpages(pte, psize, va, index,
+ shift) {
+ __tlbie(va, psize);
+ } pte_iterate_hashed_end();
+ }
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6e9e05cce02c..22e474876133 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -19,6 +19,7 @@
*/
#undef DEBUG
+#undef DEBUG_LOW
#include
#include
@@ -32,7 +33,6 @@
#include
#include
-#include
#include
#include
#include
@@ -59,6 +59,15 @@
#define DBG(fmt...)
#endif
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+
/*
* Note: pte --> Linux PTE
* HPTE --> PowerPC Hashed Page Table Entry
@@ -77,91 +86,292 @@ extern unsigned long dart_tablebase;
hpte_t *htab_address;
unsigned long htab_hash_mask;
-
unsigned long _SDR1;
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+int mmu_linear_psize = MMU_PAGE_4K;
+int mmu_virtual_psize = MMU_PAGE_4K;
+#ifdef CONFIG_HUGETLB_PAGE
+int mmu_huge_psize = MMU_PAGE_16M;
+unsigned int HPAGE_SHIFT;
+#endif
-#define KB (1024)
-#define MB (1024*KB)
+/* There are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
-static inline void loop_forever(void)
+/* Pre-POWER4 CPUs (4k pages only)
+ */
+struct mmu_psize_def mmu_psize_defaults_old[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/* POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = 0,
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = 0,
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode, int psize)
{
- volatile unsigned long x = 1;
- for(;x;x|=1)
- ;
-}
-
-static inline void create_pte_mapping(unsigned long start, unsigned long end,
- unsigned long mode, int large)
-{
- unsigned long addr;
- unsigned int step;
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
unsigned long tmp_mode;
- unsigned long vflags;
+ int ret = 0;
- if (large) {
- step = 16*MB;
- vflags = HPTE_V_BOLTED | HPTE_V_LARGE;
- } else {
- step = 4*KB;
- vflags = HPTE_V_BOLTED;
- }
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
- for (addr = start; addr < end; addr += step) {
+ for (vaddr = vstart, paddr = pstart; vaddr < vend;
+ vaddr += step, paddr += step) {
unsigned long vpn, hash, hpteg;
- unsigned long vsid = get_kernel_vsid(addr);
- unsigned long va = (vsid << 28) | (addr & 0xfffffff);
- int ret = -1;
-
- if (large)
- vpn = va >> HPAGE_SHIFT;
- else
- vpn = va >> PAGE_SHIFT;
-
+ unsigned long vsid = get_kernel_vsid(vaddr);
+ unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff);
+ vpn = va >> shift;
tmp_mode = mode;
/* Make non-kernel text non-executable */
- if (!in_kernel_text(addr))
- tmp_mode = mode | HW_NO_EXEC;
-
- hash = hpt_hash(vpn, large);
+ if (!in_kernel_text(vaddr))
+ tmp_mode = mode | HPTE_R_N;
+ hash = hpt_hash(va, shift);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+ /* The crap below can be cleaned once ppd_md.probe() can
+ * set up the hash callbacks, thus we can just used the
+ * normal insert callback here.
+ */
#ifdef CONFIG_PPC_ISERIES
- if (systemcfg->platform & PLATFORM_ISERIES_LPAR)
- ret = iSeries_hpte_bolt_or_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ if (systemcfg->platform == PLATFORM_ISERIES_LPAR)
+ ret = iSeries_hpte_insert(hpteg, va,
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_PSERIES
if (systemcfg->platform & PLATFORM_LPAR)
ret = pSeries_lpar_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode,
+ HPTE_V_BOLTED,
+ psize);
else
#endif
#ifdef CONFIG_PPC_MULTIPLATFORM
ret = native_hpte_insert(hpteg, va,
- virt_to_abs(addr) >> PAGE_SHIFT,
- vflags, tmp_mode);
+ virt_to_abs(paddr),
+ tmp_mode, HPTE_V_BOLTED,
+ psize);
#endif
-
- if (ret == -1) {
- ppc64_terminate_msg(0x20, "create_pte_mapping");
- loop_forever();
- }
+ if (ret < 0)
+ break;
}
+ return ret < 0 ? ret : 0;
}
-static unsigned long get_hashtable_size(void)
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+ unsigned long size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node,
+ "ibm,segment-page-sizes", &size);
+ if (prop != NULL) {
+ DBG("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int shift = prop[0];
+ unsigned int slbenc = prop[1];
+ unsigned int lpnum = prop[2];
+ unsigned int lpenc = 0;
+ struct mmu_psize_def *def;
+ int idx = -1;
+
+ size -= 3; prop += 3;
+ while(size > 0 && lpnum) {
+ if (prop[0] == shift)
+ lpenc = prop[1];
+ prop += 2; size -= 2;
+ lpnum--;
+ }
+ switch(shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ if (idx < 0)
+ continue;
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ if (shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (shift - 23)) - 1;
+ def->sllp = slbenc;
+ def->penc = lpenc;
+ /* We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, "
+ "tlbiel=%d, penc=%d\n",
+ idx, shift, def->sllp, def->avpnm, def->tlbiel,
+ def->penc);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+
+static void __init htab_init_page_sizes(void)
+{
+ int rc;
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_old,
+ sizeof(mmu_psize_defaults_old));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc != 0) /* Found */
+ goto found;
+
+ /*
+ * Not in the device-tree, let's fallback on known size
+ * list for 16M capable GP & GR
+ */
+ if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) &&
+ cpu_has_feature(CPU_FTR_16M_PAGE))
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ found:
+ /*
+ * Pick a size for the linear mapping. Currently, we only support
+ * 16M, 1M and 4K which is the default
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K if cache inhibited large pages are supported by the
+ * processor
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
+ mmu_virtual_psize = MMU_PAGE_64K;
+#endif
+
+ printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Init large page size. Currently, we pick 16M or 1M depending
+ * on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ mmu_huge_psize = MMU_PAGE_16M;
+ /* With 4k/4level pagetables, we can't (for now) cope with a
+ * huge page size < PMD_SIZE */
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_huge_psize = MMU_PAGE_1M;
+
+ /* Calculate HPAGE_SHIFT and sanity check it */
+ if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT &&
+ mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT)
+ HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
+ else
+ HPAGE_SHIFT = 0; /* No huge pages dude ! */
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ u32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = prop[1];
+ return 1;
+ }
+ return 0;
+}
+
+static unsigned long __init htab_get_table_size(void)
{
unsigned long rnd_mem_size, pteg_count;
- /* If hash size wasn't obtained in prom.c, we calculate it now based on
- * the total RAM size
+ /* If hash size isn't already provided by the platform, we try to
+ * retreive it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
*/
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
if (ppc64_pft_size)
return 1UL << ppc64_pft_size;
@@ -181,25 +391,23 @@ void __init htab_initialize(void)
unsigned long table, htab_size_bytes;
unsigned long pteg_count;
unsigned long mode_rw;
- int i, use_largepages = 0;
unsigned long base = 0, size = 0;
+ int i;
+
extern unsigned long tce_alloc_start, tce_alloc_end;
DBG(" -> htab_initialize()\n");
+ /* Initialize page sizes */
+ htab_init_page_sizes();
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
*/
- htab_size_bytes = get_hashtable_size();
+ htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7;
- /* For debug, make the HTAB 1/8 as big as it normally would be. */
- ifppcdebug(PPCDBG_HTABSIZE) {
- pteg_count >>= 3;
- htab_size_bytes = pteg_count << 7;
- }
-
htab_hash_mask = pteg_count - 1;
if (systemcfg->platform & PLATFORM_LPAR) {
@@ -211,14 +419,11 @@ void __init htab_initialize(void)
* the absolute address space.
*/
table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ BUG_ON(table == 0);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
- if ( !table ) {
- ppc64_terminate_msg(0x20, "hpt space");
- loop_forever();
- }
htab_address = abs_to_virt(table);
/* htab absolute addr + encoded htabsize */
@@ -234,8 +439,6 @@ void __init htab_initialize(void)
* _NOT_ map it to avoid cache paradoxes as it's remapped non
* cacheable later on
*/
- if (cpu_has_feature(CPU_FTR_16M_PAGE))
- use_largepages = 1;
/* create bolted the linear mapping in the hash table */
for (i=0; i < lmb.memory.cnt; i++) {
@@ -246,27 +449,32 @@ void __init htab_initialize(void)
#ifdef CONFIG_U3_DART
/* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two lmb regions and will
- * fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if we
- * only use 2Mb of that space. We will use more of it later for
- * AGP GART. We have to use a full 16Mb large page.
+ * in such a way that it will not cross two lmb regions and
+ * will fit within a single 16Mb page.
+ * The DART space is assumed to be a full 16Mb region even if
+ * we only use 2Mb of that space. We will use more of it later
+ * for AGP GART. We have to use a full 16Mb large page.
*/
DBG("DART base: %lx\n", dart_tablebase);
if (dart_tablebase != 0 && dart_tablebase >= base
&& dart_tablebase < (base + size)) {
if (base != dart_tablebase)
- create_pte_mapping(base, dart_tablebase, mode_rw,
- use_largepages);
+ BUG_ON(htab_bolt_mapping(base, dart_tablebase,
+ base, mode_rw,
+ mmu_linear_psize));
if ((base + size) > (dart_tablebase + 16*MB))
- create_pte_mapping(dart_tablebase + 16*MB, base + size,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
+ base + size,
+ dart_tablebase+16*MB,
+ mode_rw,
+ mmu_linear_psize));
continue;
}
#endif /* CONFIG_U3_DART */
- create_pte_mapping(base, base + size, mode_rw, use_largepages);
- }
+ BUG_ON(htab_bolt_mapping(base, base + size, base,
+ mode_rw, mmu_linear_psize));
+ }
/*
* If we have a memory_limit and we've allocated TCEs then we need to
@@ -282,8 +490,9 @@ void __init htab_initialize(void)
if (base + size >= tce_alloc_start)
tce_alloc_start = base + size + 1;
- create_pte_mapping(tce_alloc_start, tce_alloc_end,
- mode_rw, use_largepages);
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ tce_alloc_start, mode_rw,
+ mmu_linear_psize));
}
DBG(" <- htab_initialize()\n");
@@ -309,7 +518,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
__flush_dcache_icache(page_address(page));
set_bit(PG_arch_1, &page->flags);
} else
- pp |= HW_NO_EXEC;
+ pp |= HPTE_R_N;
}
return pp;
}
@@ -325,94 +534,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
unsigned long vsid;
struct mm_struct *mm;
pte_t *ptep;
- int ret;
- int user_region = 0;
- int local = 0;
cpumask_t tmp;
+ int rc, user_region = 0, local = 0;
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 1;
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
+ DBG_LOW(" out of pgtable range !\n");
+ return 1;
+ }
+
+ /* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
mm = current->mm;
- if (! mm)
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
return 1;
-
+ }
vsid = get_vsid(mm->context.id, ea);
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
vsid = get_kernel_vsid(ea);
break;
-#if 0
- case KERNEL_REGION_ID:
- /*
- * Should never get here - entire 0xC0... region is bolted.
- * Send the problem up to do_page_fault
- */
-#endif
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
return 1;
- break;
}
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Get pgdir */
pgdir = mm->pgd;
-
if (pgdir == NULL)
return 1;
+ /* Check CPU locality */
tmp = cpumask_of_cpu(smp_processor_id());
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
- /* Is this a huge page ? */
- if (unlikely(in_hugepage_area(mm->context, ea)))
- ret = hash_huge_page(mm, access, ea, vsid, local);
- else {
- ptep = find_linux_pte(pgdir, ea);
- if (ptep == NULL)
- return 1;
- ret = __hash_page(ea, access, vsid, ptep, trap, local);
+ /* Handle hugepage regions */
+ if (unlikely(in_hugepage_area(mm->context, ea))) {
+ DBG_LOW(" -> huge page !\n");
+ return hash_huge_page(mm, access, ea, vsid, local);
}
- return ret;
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ return 1;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (access & ~pte_val(*ptep)) {
+ DBG_LOW(" no access !\n");
+ return 1;
+ }
+
+ /* Do actual hashing */
+#ifndef CONFIG_PPC_64K_PAGES
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+ return rc;
}
-void flush_hash_page(unsigned long va, pte_t pte, int local)
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
{
- unsigned long vpn, hash, secondary, slot;
- unsigned long huge = pte_huge(pte);
+ unsigned long vsid;
+ void *pgdir;
+ pte_t *ptep;
+ cpumask_t mask;
+ unsigned long flags;
+ int local = 0;
- if (huge)
- vpn = va >> HPAGE_SHIFT;
+ /* We don't want huge pages prefaulted for now
+ */
+ if (unlikely(in_hugepage_area(mm->context, ea)))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
+
+ /* Get PTE, VSID, access mask */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+ ptep = find_linux_pte(pgdir, ea);
+ if (!ptep)
+ return;
+ vsid = get_vsid(mm->context.id, ea);
+
+ /* Hash it in */
+ local_irq_save(flags);
+ mask = cpumask_of_cpu(smp_processor_id());
+ if (cpus_equal(mm->cpu_vm_mask, mask))
+ local = 1;
+#ifndef CONFIG_PPC_64K_PAGES
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#else
+ if (mmu_virtual_psize == MMU_PAGE_64K)
+ __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- vpn = va >> PAGE_SHIFT;
- hash = hpt_hash(vpn, huge);
- secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
- if (secondary)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+#endif /* CONFIG_PPC_64K_PAGES */
+ local_irq_restore(flags);
+}
- ppc_md.hpte_invalidate(slot, va, huge, local);
+void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local)
+{
+ unsigned long hash, index, shift, hidx, slot;
+
+ DBG_LOW("flush_hash_page(va=%016x)\n", va);
+ pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
+ hash = hpt_hash(va, shift);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx);
+ ppc_md.hpte_invalidate(slot, va, psize, local);
+ } pte_iterate_hashed_end();
}
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range) {
+ if (ppc_md.flush_hash_range)
ppc_md.flush_hash_range(number, local);
- } else {
+ else {
int i;
struct ppc64_tlb_batch *batch =
&__get_cpu_var(ppc64_tlb_batch);
for (i = 0; i < number; i++)
- flush_hash_page(batch->vaddr[i], batch->pte[i], local);
+ flush_hash_page(batch->vaddr[i], batch->pte[i],
+ batch->psize, local);
}
}
@@ -452,6 +736,18 @@ void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
+#ifdef CONFIG_PPC_64K_PAGES
+ extern unsigned int *ht64_call_hpte_insert1;
+ extern unsigned int *ht64_call_hpte_insert2;
+ extern unsigned int *ht64_call_hpte_remove;
+ extern unsigned int *ht64_call_hpte_updatepp;
+
+ make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert);
+ make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove);
+ make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp);
+#endif /* CONFIG_PPC_64K_PAGES */
+
make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert);
make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert);
make_bl(htab_call_hpte_remove, ppc_md.hpte_remove);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..426c269e552e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
+#ifdef CONFIG_PPC_64K_PAGES
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
pt = (pte_t *)pm;
- BUG_ON(!pmd_none(*pm)
- && !(pte_present(*pt) && pte_huge(*pt)));
+#endif /* CONFIG_PPC_64K_PAGES */
return pt;
}
}
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
return NULL;
}
-#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
-
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- int i;
-
if (pte_present(*ptep)) {
- pte_clear(mm, addr, ptep);
+ /* We open-code pte_clear because we need to pass the right
+ * argument to hpte_update (huge / !huge)
+ */
+ unsigned long old = pte_update(ptep, ~0UL);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
flush_tlb_pending();
}
-
- for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
- *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- ptep++;
- }
+ *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
}
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long old = pte_update(ptep, ~0UL);
- int i;
if (old & _PAGE_HASHPTE)
- hpte_update(mm, addr, old, 0);
-
- for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
- ptep[i] = __pte(0);
+ hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
+ *ptep = __pte(0);
return __pte(old);
}
@@ -196,6 +212,12 @@ static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
BUG_ON(area >= NUM_HIGH_AREAS);
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = 0x100000000UL;
+
/* Check no VMAs are in the region */
vma = find_vma(mm, start);
if (vma && (vma->vm_start < end))
@@ -563,6 +585,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
int lastshift;
u16 areamask, curareas;
+ if (HPAGE_SHIFT == 0)
+ return -EINVAL;
if (len & ~HPAGE_MASK)
return -EINVAL;
@@ -619,19 +643,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
pte_t *ptep;
- unsigned long va, vpn;
- pte_t old_pte, new_pte;
- unsigned long rflags, prpn;
+ unsigned long old_pte, new_pte;
+ unsigned long va, rflags, pa;
long slot;
int err = 1;
- spin_lock(&mm->page_table_lock);
-
ptep = huge_pte_offset(mm, ea);
/* Search the Linux page table for a match with va */
va = (vsid << 28) | (ea & 0x0fffffff);
- vpn = va >> HPAGE_SHIFT;
/*
* If no pte found or not present, send the problem up to
@@ -640,8 +660,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(!ptep || pte_none(*ptep)))
goto out;
-/* BUG_ON(pte_bad(*ptep)); */
-
/*
* Check the user's access rights to the page. If access should be
* prevented then send the problem up to do_page_fault.
@@ -661,58 +679,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
*/
- old_pte = *ptep;
- new_pte = old_pte;
+ do {
+ old_pte = pte_val(*ptep);
+ if (old_pte & _PAGE_BUSY)
+ goto out;
+ new_pte = old_pte | _PAGE_BUSY |
+ _PAGE_ACCESSED | _PAGE_HASHPTE;
+ } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
+ old_pte, new_pte));
- rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
+ rflags = 0x2 | (!(new_pte & _PAGE_RW));
/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
+ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
/* Check if pte already has an hpte (case 2) */
- if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
+ if (unlikely(old_pte & _PAGE_HASHPTE)) {
/* There MIGHT be an HPTE for this pte */
unsigned long hash, slot;
- hash = hpt_hash(vpn, 1);
- if (pte_val(old_pte) & _PAGE_SECONDARY)
+ hash = hpt_hash(va, HPAGE_SHIFT);
+ if (old_pte & _PAGE_F_SECOND)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
+ slot += (old_pte & _PAGE_F_GIX) >> 12;
if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
- pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
+ old_pte &= ~_PAGE_HPTEFLAGS;
}
- if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, 1);
+ if (likely(!(old_pte & _PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
unsigned long hpte_group;
- prpn = pte_pfn(old_pte);
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- /* Update the linux pte with the HPTE slot */
- pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
- pte_val(new_pte) |= _PAGE_HASHPTE;
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
/* Add in WIMG bits */
/* XXX We should store these in the pte */
+ /* --BenH: I think they are ... */
rflags |= _PAGE_COHERENT;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE, rflags);
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+ mmu_huge_psize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
- pte_val(new_pte) |= _PAGE_SECONDARY;
+ new_pte |= _PAGE_F_SECOND;
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, va, prpn,
- HPTE_V_LARGE |
+ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
HPTE_V_SECONDARY,
- rflags);
+ mmu_huge_psize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +750,18 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
if (unlikely(slot == -2))
panic("hash_huge_page: pte_insert failed\n");
- pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
-
- /*
- * No need to use ldarx/stdcx here because all who
- * might be updating the pte will hold the
- * page_table_lock
- */
- *ptep = new_pte;
+ new_pte |= (slot << 12) & _PAGE_F_GIX;
}
+ /*
+ * No need to use ldarx/stdcx here because all who
+ * might be updating the pte will hold the
+ * page_table_lock
+ */
+ *ptep = __pte(new_pte & ~_PAGE_BUSY);
+
err = 0;
out:
- spin_unlock(&mm->page_table_lock);
-
return err;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index b0fc822ec29f..ce974c83d88a 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -57,7 +57,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -188,12 +187,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
+#ifdef CONFIG_PPC_64K_PAGES
+static const int pgtable_cache_size[2] = {
+ PTE_TABLE_SIZE, PGD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+ "pte_pmd_cache", "pgd_cache",
+};
+#else
static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};
+#endif /* CONFIG_PPC_64K_PAGES */
kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
@@ -201,19 +209,14 @@ void pgtable_cache_init(void)
{
int i;
- BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
- BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
- BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
- BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
-
for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- SLAB_HWCACHE_ALIGN
- | SLAB_MUST_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 117b00012e14..6f55efd9be95 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -61,6 +61,9 @@ int init_bootmem_done;
int mem_init_done;
unsigned long memory_limit;
+extern void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap);
+
/*
* This is called by /dev/mem to know if a given address has to
* be mapped non-cacheable or not
@@ -355,7 +358,7 @@ void __init mem_init(void)
}
codesize = (unsigned long)&_sdata - (unsigned long)&_stext;
- datasize = (unsigned long)&__init_begin - (unsigned long)&_sdata;
+ datasize = (unsigned long)&_edata - (unsigned long)&_sdata;
initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
@@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t pte)
{
- /* handle i-cache coherency */
- unsigned long pfn = pte_pfn(pte);
-#ifdef CONFIG_PPC32
- pmd_t *pmd;
-#else
- unsigned long vsid;
- void *pgdir;
- pte_t *ptep;
- int local = 0;
- cpumask_t tmp;
- unsigned long flags;
+#ifdef CONFIG_PPC_STD_MMU
+ unsigned long access = 0, trap;
#endif
+ unsigned long pfn = pte_pfn(pte);
/* handle i-cache coherency */
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
@@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
if (!pte_young(pte) || address >= TASK_SIZE)
return;
-#ifdef CONFIG_PPC32
- if (Hash == 0)
+
+ /* We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot
+ *
+ * We also avoid filling the hash if not coming from a fault
+ */
+ if (current->thread.regs == NULL)
return;
- pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
- if (!pmd_none(*pmd))
- add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
-#else
- pgdir = vma->vm_mm->pgd;
- if (pgdir == NULL)
+ trap = TRAP(current->thread.regs);
+ if (trap == 0x400)
+ access |= _PAGE_EXEC;
+ else if (trap != 0x300)
return;
-
- ptep = find_linux_pte(pgdir, address);
- if (!ptep)
- return;
-
- vsid = get_vsid(vma->vm_mm->context.id, address);
-
- local_irq_save(flags);
- tmp = cpumask_of_cpu(smp_processor_id());
- if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
- local = 1;
-
- __hash_page(address, 0, vsid, ptep, 0x300, local);
- local_irq_restore(flags);
-#endif
-#endif
+ hash_preload(vma->vm_mm, address, access, trap);
+#endif /* CONFIG_PPC_STD_MMU */
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 4035cad8d7f1..da09ba03c424 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
static int numa_enabled = 1;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b79a78206135..900842451bd3 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -59,7 +59,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -101,7 +100,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- unsigned long vsid;
if (mem_init_done) {
pgdp = pgd_offset_k(ea);
@@ -117,28 +115,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
- unsigned long va, vpn, hash, hpteg;
-
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
* entry in the hardware page table.
+ *
*/
- vsid = get_kernel_vsid(ea);
- va = (vsid << 28) | (ea & 0xFFFFFFF);
- vpn = va >> PAGE_SHIFT;
-
- hash = hpt_hash(vpn, 0);
-
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- /* Panic if a pte grpup is full */
- if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
- HPTE_V_BOLTED,
- _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
- == -1) {
- panic("map_io_page: could not insert mapping");
- }
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ mmu_virtual_psize))
+ panic("Can't map bolted IO mapping");
}
return 0;
}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index cef9e83cc7e9..ed7fcfe5fd37 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -178,6 +178,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys,
bat_addrs[index].phys = phys;
}
+/*
+ * Preload a translation in the hash table
+ */
+void hash_preload(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap)
+{
+ pmd_t *pmd;
+
+ if (Hash == 0)
+ return;
+ pmd = pmd_offset(pgd_offset(mm, ea), ea);
+ if (!pmd_none(*pmd))
+ add_hash_page(mm->context, ea, pmd_val(*pmd));
+}
+
/*
* Initialize the hash table and patch the instructions in hashtable.S.
*/
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 0473953f6a37..60e852f2f8e5 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,14 +14,32 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include
#include