diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index e8698afcd952..f7e32f218f73 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -43,6 +43,13 @@ Description: read only This sysfs interface exposes the number of cores per chip present in the system. +What: /sys/devices/hv_24x7/interface/cpumask +Date: July 2020 +Contact: Linux on PowerPC Developer List +Description: read only + This sysfs file exposes the cpumask which is designated to make + HCALLs to retrieve hv-24x7 pmu event counter data. + What: /sys/bus/event_source/devices/hv_24x7/event_descs/ Date: February 2014 Contact: Linux on PowerPC Developer List diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 5b10d036a8d4..c1a67275c43f 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -25,3 +25,30 @@ Description: NVDIMM have been scrubbed. * "locked" : Indicating that NVDIMM contents cant be modified until next power cycle. + +What: /sys/bus/nd/devices/nmemX/papr/perf_stats +Date: May, 2020 +KernelVersion: v5.9 +Contact: linuxppc-dev , linux-nvdimm@lists.01.org, +Description: + (RO) Report various performance stats related to papr-scm NVDIMM + device. Each stat is reported on a new line with each line + composed of a stat-identifier followed by it value. Below are + currently known dimm performance stats which are reported: + + * "CtlResCt" : Controller Reset Count + * "CtlResTm" : Controller Reset Elapsed Time + * "PonSecs " : Power-on Seconds + * "MemLife " : Life Remaining + * "CritRscU" : Critical Resource Utilization + * "HostLCnt" : Host Load Count + * "HostSCnt" : Host Store Count + * "HostSDur" : Host Store Duration + * "HostLDur" : Host Load Duration + * "MedRCnt " : Media Read Count + * "MedWCnt " : Media Write Count + * "MedRDur " : Media Read Duration + * "MedWDur " : Media Write Duration + * "CchRHCnt" : Cache Read Hit Count + * "CchWHCnt" : Cache Write Hit Count + * "FastWCnt" : Fast Write Count \ No newline at end of file diff --git a/Documentation/ABI/testing/sysfs-class-ocxl b/Documentation/ABI/testing/sysfs-class-ocxl index b5b1fa197592..ae1276efa45a 100644 --- a/Documentation/ABI/testing/sysfs-class-ocxl +++ b/Documentation/ABI/testing/sysfs-class-ocxl @@ -33,3 +33,14 @@ Date: January 2018 Contact: linuxppc-dev@lists.ozlabs.org Description: read/write Give access the global mmio area for the AFU + +What: /sys/class/ocxl//reload_on_reset +Date: February 2020 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Control whether the FPGA is reloaded on a link reset. Enabled + through a vendor-specific logic block on the FPGA. + 0 Do not reload FPGA image from flash + 1 Reload FPGA image from flash + unavailable + The device does not support this capability diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ef66b3c45ba2..00b993aa9365 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -916,6 +916,10 @@ disable_radix [PPC] Disable RADIX MMU mode on POWER9 + radix_hcall_invalidate=on [PPC/PSERIES] + Disable RADIX GTSE feature and use hcall for TLB + invalidate. + disable_tlbie [PPC] Disable TLBIE instruction. Currently does not work with KVM, with HASH MMU, or with coherent accelerators. diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index f64668759b6a..298c9c8bea9a 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -50,13 +50,6 @@ Command Line Switches This option is limited to the X86 and S390 architecture. -``cede_offline={"off","on"}`` - Use this option to disable/enable putting offlined processors to an extended - ``H_CEDE`` state on supported pseries platforms. If nothing is specified, - ``cede_offline`` is set to "on". - - This option is limited to the PowerPC architecture. - ``cpu0_hotplug`` Allow to shutdown CPU0. diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt index 68658a6f8c5b..47e6903f47a5 100644 --- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt +++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt @@ -5,7 +5,7 @@ # # Architecture requirements # -# * arm/arm64 +# * arm/arm64/powerpc # # Rely on implicit context synchronization as a result of exception return # when returning from IPI handler, and when returning to user-space. @@ -45,7 +45,7 @@ | nios2: | TODO | | openrisc: | TODO | | parisc: | TODO | - | powerpc: | TODO | + | powerpc: | ok | | riscv: | TODO | | s390: | TODO | | sh: | TODO | diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 4e55aba3eb4a..96186332e5f4 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1935,6 +1935,20 @@ There are some more advanced barrier functions: relaxed I/O accessors and the Documentation/DMA-API.txt file for more information on consistent memory. + (*) pmem_wmb(); + + This is for use with persistent memory to ensure that stores for which + modifications are written to persistent storage reached a platform + durability domain. + + For example, after a non-temporal write to pmem region, we use pmem_wmb() + to ensure that stores have reached a platform durability domain. This ensures + that stores have updated persistent storage before any data access or + data transfer caused by subsequent instructions is initiated. This is + in addition to the ordering done by wmb(). + + For load from persistent memory, existing read memory barriers are sufficient + to ensure read ordering. =============================== IMPLICIT KERNEL MEMORY BARRIERS diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst index 1e063c5440c3..9b84e045e713 100644 --- a/Documentation/powerpc/cpu_families.rst +++ b/Documentation/powerpc/cpu_families.rst @@ -9,7 +9,9 @@ and are supported by arch/powerpc. Book3S (aka sPAPR) ------------------ -- Hash MMU +- Hash MMU (except 603 and e300) +- Software loaded TLB (603 and e300) +- Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600) - Mix of 32 & 64 bit:: +--------------+ +----------------+ @@ -24,9 +26,9 @@ Book3S (aka sPAPR) | | | | v v - +--------------+ +----------------+ +-------+ - | 604 | | 750 (G3) | ---> | 750CX | - +--------------+ +----------------+ +-------+ + +--------------+ +-----+ +----------------+ +-------+ + | 604 | | 755 | <--- | 750 (G3) | ---> | 750CX | + +--------------+ +-----+ +----------------+ +-------+ | | | | | | v v v diff --git a/Documentation/powerpc/mpc52xx.rst b/Documentation/powerpc/mpc52xx.rst index 8676ac63e077..30260707c3fe 100644 --- a/Documentation/powerpc/mpc52xx.rst +++ b/Documentation/powerpc/mpc52xx.rst @@ -2,7 +2,7 @@ Linux 2.6.x on MPC52xx family ============================= -For the latest info, go to http://www.246tNt.com/mpc52xx/ +For the latest info, go to https://www.246tNt.com/mpc52xx/ To compile/use : diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst index e49f69f941b9..46caaadbb029 100644 --- a/Documentation/powerpc/syscall64-abi.rst +++ b/Documentation/powerpc/syscall64-abi.rst @@ -5,6 +5,15 @@ Power Architecture 64-bit Linux system call ABI syscall ======= +Invocation +---------- +The syscall is made with the sc instruction, and returns with execution +continuing at the instruction following the sc instruction. + +If PPC_FEATURE2_SCV appears in the AT_HWCAP2 ELF auxiliary vector, the +scv 0 instruction is an alternative that may provide better performance, +with some differences to calling sequence. + syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI specification C function calling sequence, including register preservation rules, with the following differences. @@ -12,16 +21,23 @@ rules, with the following differences. .. [1] Some syscalls (typically low-level management functions) may have different calling sequences (e.g., rt_sigreturn). -Parameters and return value ---------------------------- +Parameters +---------- The system call number is specified in r0. There is a maximum of 6 integer parameters to a syscall, passed in r3-r8. -Both a return value and a return error code are returned. cr0.SO is the return -error code, and r3 is the return value or error code. When cr0.SO is clear, -the syscall succeeded and r3 is the return value. When cr0.SO is set, the -syscall failed and r3 is the error code that generally corresponds to errno. +Return value +------------ +- For the sc instruction, both a value and an error condition are returned. + cr0.SO is the error condition, and r3 is the return value. When cr0.SO is + clear, the syscall succeeded and r3 is the return value. When cr0.SO is set, + the syscall failed and r3 is the error value (that normally corresponds to + errno). + +- For the scv 0 instruction, the return value indicates failure if it is + -4095..-1 (i.e., it is >= -MAX_ERRNO (-4095) as an unsigned comparison), + in which case the error value is the negated return value. Stack ----- @@ -34,22 +50,23 @@ Register preservation rules match the ELF ABI calling sequence with the following differences: =========== ============= ======================================== +--- For the sc instruction, differences with the ELF ABI --- r0 Volatile (System call number.) r3 Volatile (Parameter 1, and return value.) r4-r8 Volatile (Parameters 2-6.) -cr0 Volatile (cr0.SO is the return error condition) +cr0 Volatile (cr0.SO is the return error condition.) cr1, cr5-7 Nonvolatile lr Nonvolatile + +--- For the scv 0 instruction, differences with the ELF ABI --- +r0 Volatile (System call number.) +r3 Volatile (Parameter 1, and return value.) +r4-r8 Volatile (Parameters 2-6.) =========== ============= ======================================== All floating point and vector data registers as well as control and status registers are nonvolatile. -Invocation ----------- -The syscall is performed with the sc instruction, and returns with execution -continuing at the instruction following the sc instruction. - Transactional Memory -------------------- Syscall behavior can change if the processor is in transactional or suspended @@ -75,6 +92,7 @@ auxiliary vector. returning to the caller. This case is not well defined or supported, so this behavior should not be relied upon. +scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC. vsyscall ======== diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index cdfd98155311..eb3a1316f03e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2160,9 +2160,12 @@ registers, find a list below: PPC KVM_REG_PPC_MMCRA 64 PPC KVM_REG_PPC_MMCR2 64 PPC KVM_REG_PPC_MMCRS 64 + PPC KVM_REG_PPC_MMCR3 64 PPC KVM_REG_PPC_SIAR 64 PPC KVM_REG_PPC_SDAR 64 PPC KVM_REG_PPC_SIER 64 + PPC KVM_REG_PPC_SIER2 64 + PPC KVM_REG_PPC_SIER3 64 PPC KVM_REG_PPC_PMC1 32 PPC KVM_REG_PPC_PMC2 32 PPC KVM_REG_PPC_PMC3 32 diff --git a/MAINTAINERS b/MAINTAINERS index a45fe1a6251e..9590b98ad26d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13308,7 +13308,6 @@ F: tools/pci/ PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC M: Russell Currey -M: Sam Bobroff M: Oliver O'Halloran L: linuxppc-dev@lists.ozlabs.org S: Supported diff --git a/arch/m68k/include/asm/adb_iop.h b/arch/m68k/include/asm/adb_iop.h index 195d7fb1268c..6aecd020e2fc 100644 --- a/arch/m68k/include/asm/adb_iop.h +++ b/arch/m68k/include/asm/adb_iop.h @@ -29,6 +29,7 @@ #define ADB_IOP_EXPLICIT 0x80 /* nonzero if explicit command */ #define ADB_IOP_AUTOPOLL 0x40 /* auto/SRQ polling enabled */ +#define ADB_IOP_SET_AUTOPOLL 0x20 /* set autopoll device list */ #define ADB_IOP_SRQ 0x04 /* SRQ detected */ #define ADB_IOP_TIMEOUT 0x02 /* nonzero if timeout */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 29a057908173..1f48bbfb3ce9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -131,6 +131,7 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -145,6 +146,8 @@ config PPC select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS + select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF @@ -491,6 +494,19 @@ config HOTPLUG_CPU Say N if you are unsure. +config PPC_QUEUED_SPINLOCKS + bool "Queued spinlocks" + depends on SMP + help + Say Y here to use queued spinlocks which give better scalability and + fairness on large SMP and NUMA systems without harming single threaded + performance. + + This option is currently experimental, the code is more complex and + less tested so it defaults to "N" for the moment. + + If unsure, say "N". + config ARCH_CPU_PROBE_RELEASE def_bool y depends on HOTPLUG_CPU @@ -834,13 +850,16 @@ config FORCE_MAX_ZONEORDER this in mind when choosing a value for this option. config PPC_SUBPAGE_PROT - bool "Support setting protections for 4k subpages" + bool "Support setting protections for 4k subpages (subpage_prot syscall)" + default n depends on PPC_BOOK3S_64 && PPC_64K_PAGES help - This option adds support for a system call to allow user programs + This option adds support for system call to allow user programs to set access permissions (read/write, readonly, or no access) on the 4k subpages of each 64k page. + If unsure, say N here. + config PPC_COPRO_BASE bool @@ -860,12 +879,8 @@ config PPC_DENORMALISATION Add support for handling denormalisation of single precision values. Useful for bare metal only. If unsure say Y here. -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - config CMDLINE - string "Initial kernel command string" if CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL + string "Initial kernel command string" default "" help On some platforms, there is currently no way for the boot loader to @@ -1199,6 +1214,7 @@ config TASK_SIZE_BOOL config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" if PPC_8xx + default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX default "0xc0000000" endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index f310c32e88a4..3e8da9cf2eb9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -246,7 +246,8 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # often slow when they are implemented at all KBUILD_CFLAGS += $(call cc-option,-mno-string) -cpu-as-$(CONFIG_4xx) += -Wa,-m405 +cpu-as-$(CONFIG_40x) += -Wa,-m405 +cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 63d7456b9518..44af71543380 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,10 +70,10 @@ BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405 -$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 +$(obj)/ebony.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 -$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 +$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=440 +$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=440 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405 @@ -117,7 +117,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c -src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c +src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts index cd9d66041a3f..df18f8dc4642 100644 --- a/arch/powerpc/boot/dts/akebono.dts +++ b/arch/powerpc/boot/dts/akebono.dts @@ -248,7 +248,7 @@ FPGA0: fpga@ebc00000 { }; }; - PCIE0: pciex@10100000000 { + PCIE0: pcie@10100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -288,7 +288,7 @@ PCIE0: pciex@10100000000 { 0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>; }; - PCIE1: pciex@20100000000 { + PCIE1: pcie@20100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -328,7 +328,7 @@ PCIE1: pciex@20100000000 { 0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>; }; - PCIE2: pciex@18100000000 { + PCIE2: pcie@18100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -368,7 +368,7 @@ PCIE2: pciex@18100000000 { 0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>; }; - PCIE3: pciex@28100000000 { + PCIE3: pcie@28100000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts index cc965a1816b6..aa1ae94cd776 100644 --- a/arch/powerpc/boot/dts/bluestone.dts +++ b/arch/powerpc/boot/dts/bluestone.dts @@ -325,7 +325,7 @@ EMAC0: ethernet@ef600c00 { }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 0d6ac92d0f5e..c5fbb08e0a6e 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -461,7 +461,7 @@ PCIX0: pci@c0ec00000 { interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -503,7 +503,7 @@ PCIE0: pciex@d00000000 { 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b6d87b9c2cef..aea8af810106 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -122,7 +122,7 @@ rtc@68 { }; }; - PCIE0: pciex@10100000000 { // 4xGBIF1 + PCIE0: pcie@10100000000 { // 4xGBIF1 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -160,7 +160,7 @@ PCIE0: pciex@10100000000 { // 4xGBIF1 0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>; }; - PCIE1: pciex@30100000000 { // 4xGBIF0 + PCIE1: pcie@30100000000 { // 4xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -197,7 +197,7 @@ PCIE1: pciex@30100000000 { // 4xGBIF0 0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>; }; - PCIE2: pciex@38100000000 { // 2xGBIF0 + PCIE2: pcie@38100000000 { // 2xGBIF0 device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts index 65e20152e22f..969b32c4f2d5 100644 --- a/arch/powerpc/boot/dts/fsl/p4080ds.dts +++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts @@ -125,11 +125,11 @@ partition@fs { i2c@118100 { eeprom@51 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x51>; }; eeprom@52 { - compatible = "atmel,24c256"; + compatible = "atmel,spd"; reg = <0x52>; }; rtc@68 { @@ -143,6 +143,45 @@ adt7461@4c { }; }; + i2c@118000 { + zl2006@21 { + compatible = "zl2006"; + reg = <0x21>; + }; + zl2006@22 { + compatible = "zl2006"; + reg = <0x22>; + }; + zl2006@23 { + compatible = "zl2006"; + reg = <0x23>; + }; + zl2006@24 { + compatible = "zl2006"; + reg = <0x24>; + }; + eeprom@50 { + compatible = "atmel,24c64"; + reg = <0x50>; + }; + eeprom@55 { + compatible = "atmel,24c64"; + reg = <0x55>; + }; + eeprom@56 { + compatible = "atmel,24c64"; + reg = <0x56>; + }; + eeprom@57 { + compatible = "atmel,24c02"; + reg = <0x57>; + }; + }; + + i2c@119100 { + /* 0x6E: ICS9FG108 */ + }; + usb0: usb@210000 { phy_type = "ulpi"; }; diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts index a7a802f4ffdd..e84ff1afb58c 100644 --- a/arch/powerpc/boot/dts/glacier.dts +++ b/arch/powerpc/boot/dts/glacier.dts @@ -489,7 +489,7 @@ PCIX0: pci@c0ec00000 { interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -531,7 +531,7 @@ PCIE0: pciex@d00000000 { 0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts index cb16dad43c92..f81ce8786d59 100644 --- a/arch/powerpc/boot/dts/haleakala.dts +++ b/arch/powerpc/boot/dts/haleakala.dts @@ -237,7 +237,7 @@ EMAC0: ethernet@ef600900 { }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts index 2e6e3a7b2604..fbaa60b8f87a 100644 --- a/arch/powerpc/boot/dts/icon.dts +++ b/arch/powerpc/boot/dts/icon.dts @@ -315,7 +315,7 @@ PCIX0: pci@c0ec00000 { interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -356,7 +356,7 @@ PCIE0: pciex@d00000000 { 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts index 02629e119b87..a8f353229fb7 100644 --- a/arch/powerpc/boot/dts/katmai.dts +++ b/arch/powerpc/boot/dts/katmai.dts @@ -319,7 +319,7 @@ PCIX0: pci@c0ec00000 { >; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -360,7 +360,7 @@ PCIE0: pciex@d00000000 { 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -401,7 +401,7 @@ PCIE1: pciex@d20000000 { 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 2a3413221cc1..a709fb47a180 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -322,7 +322,7 @@ EMAC1: ethernet@ef600a00 { }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -363,7 +363,7 @@ PCIE0: pciex@a0000000 { 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts index bf8fe1629392..c473cd911bca 100644 --- a/arch/powerpc/boot/dts/makalu.dts +++ b/arch/powerpc/boot/dts/makalu.dts @@ -268,7 +268,7 @@ EMAC1: ethernet@ef600a00 { }; }; - PCIE0: pciex@a0000000 { + PCIE0: pcie@a0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -309,7 +309,7 @@ PCIE0: pciex@a0000000 { 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@c0000000 { + PCIE1: pcie@c0000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts index f3e046fb49e2..f38035a1f4a1 100644 --- a/arch/powerpc/boot/dts/redwood.dts +++ b/arch/powerpc/boot/dts/redwood.dts @@ -235,7 +235,7 @@ EMAC0: ethernet@ef600a00 { has-new-stacr-staopc; }; }; - PCIE0: pciex@d00000000 { + PCIE0: pcie@d00000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -276,7 +276,7 @@ PCIE0: pciex@d00000000 { 0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>; }; - PCIE1: pciex@d20000000 { + PCIE1: pcie@d20000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; @@ -317,7 +317,7 @@ PCIE1: pciex@d20000000 { 0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>; }; - PCIE2: pciex@d40000000 { + PCIE2: pcie@d40000000 { device_type = "pci"; #interrupt-cells = <1>; #size-cells = <2>; diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index a9d209135975..cae31a6e8f02 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -104,7 +104,7 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen, { /* If we have an image attached to us, it overrides anything * supplied by the loader. */ - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { printf("Attached initrd image at 0x%p-0x%p\n\r", _initrd_start, _initrd_end); initrd_addr = (unsigned long)_initrd_start; @@ -152,7 +152,7 @@ static void prep_esm_blob(struct addr_range vmlinux, void *chosen) unsigned long esm_blob_addr, esm_blob_size; /* Do we have an ESM (Enter Secure Mode) blob? */ - if (_esm_blob_end <= _esm_blob_start) + if (&_esm_blob_end <= &_esm_blob_start) return; printf("Attached ESM blob at 0x%p-0x%p\n\r", diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c index c52552a681c5..6e4efbdb6b7c 100644 --- a/arch/powerpc/boot/ps3.c +++ b/arch/powerpc/boot/ps3.c @@ -127,7 +127,7 @@ void platform_init(void) ps3_repository_read_rm_size(&rm_size); dt_fixup_memory(0, rm_size); - if (_initrd_end > _initrd_start) { + if (&_initrd_end > &_initrd_start) { setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start)); setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end)); } diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 0bfa7e87e546..9a19e5905485 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -128,7 +128,7 @@ int serial_console_init(void) dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC_MPC52XX +#ifdef CONFIG_PPC_MPC52xx else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 7705a5c3f4ea..3894ba8f8ffc 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -19,8 +19,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_IRQ_ALL_CPUS=y # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y @@ -58,7 +56,6 @@ CONFIG_BLK_DEV_SD=y # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set -# CONFIG_NET_VENDOR_EXAR is not set CONFIG_IBM_EMAC=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MELLANOX is not set diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 82c6f49b8dcb..41d04e70d4fb 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_ARCHES=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index 679213214a75..acbce718eaa8 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_BAMBOO=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index 8006a5728afd..37088f250c9e 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -11,8 +11,6 @@ CONFIG_EXPERT=y # CONFIG_COMPAT_BRK is not set CONFIG_BLUESTONE=y # CONFIG_EBONY is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index ccc14eb7a2f1..61776ade572b 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_CANYONLANDS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index be76e066df01..34c86b3abecb 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -17,8 +17,6 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index 1abaa63e067f..509300f400e2 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_EIGER=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index e67fc041ca3e..30845ce0885a 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -28,7 +28,6 @@ CONFIG_476FPE_ERR46=y CONFIG_SWIOTLB=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on rw" # CONFIG_SUSPEND is not set # CONFIG_PCI is not set diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 7d7ff84c8200..930948a1da76 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_ICON=y CONFIG_HIGHMEM=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index fb5c73a29bf4..2c3834eebca3 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -17,7 +17,6 @@ CONFIG_ISS4xx=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/issblk0" # CONFIG_PCI is not set CONFIG_ADVANCED_OPTIONS=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index c6dc1445fc04..1a0f1c3e0ee9 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_KATMAI=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index c83ad03182df..6dd67de06a0b 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_RAINIER=y CONFIG_MATH_EMULATION=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 640fe1d5af28..e28d76416537 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -10,8 +10,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_REDWOOD=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 22dc0dadf576..ef09786d49b9 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -12,8 +12,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_AMIGA_PARTITION=y # CONFIG_EBONY is not set CONFIG_SAM440EP=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 2c0973db8837..b4984eab43eb 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_SEQUOIA=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index a2d355ca62b2..3ea5932ab852 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_TAISHAN=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index af66c69c49fe..47252c2d7669 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_WARP=y CONFIG_PPC4xx_GPIO=y CONFIG_HZ_1000=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on" # CONFIG_PCI is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index b507df6ac69f..524db76f47b7 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -67,7 +67,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_M25P80=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_RAW_NAND=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index d50aca608736..3a6381aa9fdc 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -51,9 +51,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index 151164cf8cb3..0cb24b33c88e 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -32,8 +32,6 @@ CONFIG_HW_RANDOM=y CONFIG_HZ_1000=y CONFIG_I2C_MPC=y CONFIG_I2C=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSEDEV is not set diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 3c7dad19a691..df37efed0aec 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -56,7 +56,6 @@ CONFIG_IKCONFIG=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MROUTE=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index a68c7f3af10e..1c674c4c1d86 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -51,7 +51,6 @@ CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 067f433c8f5e..271daff47d1d 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -13,7 +13,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_PPC_HOLLY=y CONFIG_GEN_RTC=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index ea59f3d146df..d4be64f190ff 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -37,7 +37,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e39346b3dc3b..e75d3f3060c9 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -47,7 +47,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_RAM_DAX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_SCSI=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index be125729635c..95d43f8a3869 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -19,7 +19,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_QE_GPIO=y CONFIG_MATH_EMULATION=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 0a0d046fc445..1fed6be95d53 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -20,10 +20,9 @@ CONFIG_EMBEDDED6xx=y CONFIG_MVME5100=y CONFIG_KVM_GUEST=y CONFIG_HZ_100=y +CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs" CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -46,7 +45,6 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NETFILTER_XT_MATCH_MAC=m CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 08b7f4cef243..af9af03059e4 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -58,7 +58,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y -CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 05e325ca3fbd..665a8d7cded0 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -75,7 +75,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -90,13 +89,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_IP_DCCP=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 2de9aadf0f50..cf30fc24413b 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -64,7 +64,6 @@ CONFIG_HWPOISON_INJECT=m CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_PM=y CONFIG_HOTPLUG_PCI=y @@ -246,7 +245,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 25f6c91e843a..7e48693775f4 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -20,9 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 8d7e3e98856d..48759656a067 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -268,7 +268,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index feb5d47d8d1e..5e6f92ba3210 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -53,7 +53,6 @@ CONFIG_MPC836x_MDS=y CONFIG_MPC836x_RDK=y CONFIG_MPC837x_MDS=y CONFIG_MPC837x_RDB=y -CONFIG_SBC834x=y CONFIG_ASP834x=y CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y @@ -187,7 +186,6 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -203,7 +201,6 @@ CONFIG_IP_NF_SECURITY=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -241,7 +238,6 @@ CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_DCCP=m -CONFIG_NET_DCCPPROBE=m CONFIG_TIPC=m CONFIG_ATM=m CONFIG_ATM_CLIP=m @@ -251,7 +247,6 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_DECNET=m CONFIG_DECNET_ROUTER=y -CONFIG_IPX=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m @@ -297,26 +292,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -CONFIG_IRTTY_SIR=m -CONFIG_KINGSUN_DONGLE=m -CONFIG_KSDAZZLE_DONGLE=m -CONFIG_KS959_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_SIGMATEL_FIR=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m -CONFIG_VIA_FIR=m -CONFIG_MCS_FIR=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y @@ -332,7 +307,6 @@ CONFIG_BT_HCIBFUSB=m CONFIG_BT_HCIDTL1=m CONFIG_BT_HCIBT3C=m CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m CONFIG_BT_HCIVHCI=m CONFIG_CFG80211=m CONFIG_MAC80211=m @@ -366,7 +340,6 @@ CONFIG_EEPROM_93CX6=m CONFIG_RAID_ATTRS=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m @@ -663,7 +636,6 @@ CONFIG_I2C_MPC=m CONFIG_I2C_PCA_PLATFORM=m CONFIG_I2C_SIMTEC=m CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m CONFIG_I2C_TINY_USB=m CONFIG_I2C_PCA_ISA=m CONFIG_I2C_STUB=m @@ -676,7 +648,6 @@ CONFIG_W1_SLAVE_THERM=m CONFIG_W1_SLAVE_SMEM=m CONFIG_W1_SLAVE_DS2433=m CONFIG_W1_SLAVE_DS2433_CRC=y -CONFIG_W1_SLAVE_DS2760=m CONFIG_APM_POWER=m CONFIG_BATTERY_PMU=m CONFIG_HWMON=m @@ -1065,15 +1036,6 @@ CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y CONFIG_CODA_FS=m CONFIG_9P_FS=m CONFIG_NLS_DEFAULT="utf8" @@ -1117,7 +1079,6 @@ CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y -CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_OBJECTS=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 81b55c880fc3..142f1321fa58 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -34,8 +34,6 @@ CONFIG_KEXEC=y # CONFIG_SPARSEMEM_VMEMMAP is not set # CONFIG_COMPACTION is not set CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" CONFIG_PM=y CONFIG_PM_DEBUG=y # CONFIG_SECCOMP is not set diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index dfa4a726333b..d5dece981c02 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -57,7 +57,6 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_RPA=m @@ -94,6 +93,7 @@ CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m +CONFIG_BLK_DEV_NVME=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y @@ -224,7 +224,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m CONFIG_INFINIBAND_IPOIB=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index ad6739ac63dc..b806a5d3a695 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -45,7 +45,6 @@ CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set # CONFIG_PPC_MEM_KEYS is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index b964084e4056..47dcfaddc1ac 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -12,7 +12,6 @@ CONFIG_EMBEDDED6xx=y CONFIG_STORCENTER=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=y -CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set CONFIG_NET=y diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S index c3524eba4d0d..a16a717c809c 100644 --- a/arch/powerpc/crypto/crc32-vpmsum_core.S +++ b/arch/powerpc/crypto/crc32-vpmsum_core.S @@ -19,7 +19,7 @@ * We then use fixed point Barrett reduction to compute a mod n over GF(2) * for n = CRC using POWER8 instructions. We use x = 32. * - * http://en.wikipedia.org/wiki/Barrett_reduction + * https://en.wikipedia.org/wiki/Barrett_reduction * * Copyright (C) 2015 Anton Blanchard , IBM */ diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 2d444d09b553..90cd5c53af66 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -7,5 +7,6 @@ generic-y += export.h generic-y += kvm_types.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += qrwlock.h generic-y += vtime.h generic-y += early_ioremap.h diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 7d81e86a1e5d..de14b1a34d56 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -98,7 +98,7 @@ unsigned long __init early_init(unsigned long dt_ptr); void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); -notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs); +notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); @@ -144,13 +144,13 @@ void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ -extern s32 patch__call_flush_count_cache; +extern s32 patch__call_flush_branch_caches; extern s32 patch__flush_count_cache_return; extern s32 patch__flush_link_stack_return; extern s32 patch__call_kvm_flush_link_stack; extern s32 patch__memset_nocache, patch__memcpy_nocache; -extern long flush_count_cache; +extern long flush_branch_caches; extern long kvm_flush_link_stack; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 0311c3c42960..8a55eb8cc97b 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -191,6 +191,34 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +/* + * Don't want to override the generic atomic_try_cmpxchg_acquire, because + * we add a lock hint to the lwarx, which may not be wanted for the + * _acquire case (and is not used by the other _acquire variants so it + * would be a surprise). + */ +static __always_inline bool +atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) +{ + int r, o = *old; + + __asm__ __volatile__ ( +"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n" +" cmpw 0,%0,%3 \n" +" bne- 2f \n" +" stwcx. %4,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (r), "+m" (v->counter) + : "r" (&v->counter), "r" (o), "r" (new) + : "cr0", "memory"); + + if (unlikely(r != o)) + *old = r; + return likely(r == o); +} + /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 123adcefd40f..f53c42380832 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -7,6 +7,10 @@ #include +#ifndef __ASSEMBLY__ +#include +#endif + /* * Memory barrier. * The sync instruction guarantees that all memory accesses initiated @@ -76,6 +80,22 @@ do { \ ___p1; \ }) +#ifdef CONFIG_PPC64 +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ + typeof(ptr) __PTR = (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + VAL = READ_ONCE(*__PTR); \ + if (unlikely(!(cond_expr))) { \ + spin_begin(); \ + do { \ + VAL = READ_ONCE(*__PTR); \ + } while (!(cond_expr)); \ + spin_end(); \ + } \ + (typeof(*ptr))VAL; \ +}) +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #define NOSPEC_BARRIER_SLOT nop #elif defined(CONFIG_PPC_FSL_BOOK3E) @@ -97,6 +117,15 @@ do { \ #define barrier_nospec() #endif /* CONFIG_PPC_BARRIER_NOSPEC */ +/* + * pmem_wmb() ensures that all stores for which the modification + * are written to persistent storage by preceding dcbfps/dcbstps + * instructions have updated persistent storage before any data + * access or data transfer caused by subsequent instructions is + * initiated. + */ +#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory") + #include #endif /* _ASM_POWERPC_BARRIER_H */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 224912432821..36443cda8dcf 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -184,17 +184,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #define VMALLOC_OFFSET (0x1000000) /* 16M */ -/* - * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules - * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear - * memory shall not share segments. - */ -#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES) -#define VMALLOC_START ((ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \ - ~(VMALLOC_OFFSET - 1)) -#else #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))) -#endif #ifdef CONFIG_KASAN_VMALLOC #define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) @@ -202,6 +192,11 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #define VMALLOC_END ioremap_bot #endif +#ifdef CONFIG_STRICT_KERNEL_RWX +#define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M) +#define MODULES_VADDR (MODULES_END - SZ_256M) +#endif + #ifndef __ASSEMBLY__ #include #include diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 3f9ae3585ab9..082b98808701 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -34,11 +34,11 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -#define H_PAGE_F_GIX_SHIFT 53 -#define H_PAGE_F_SECOND _RPAGE_RPN44 /* HPTE is in 2ndary HPTEG */ -#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) -#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ -#define H_PAGE_HASHPTE _RPAGE_RSV2 /* software: PTE & hash are busy */ +#define H_PAGE_F_GIX_SHIFT _PAGE_PA_MAX +#define H_PAGE_F_SECOND _RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41) +#define H_PAGE_BUSY _RPAGE_RSV1 +#define H_PAGE_HASHPTE _RPAGE_PKEY_BIT4 /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ @@ -57,11 +57,12 @@ #define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT) /* memory key bits, only 8 keys supported */ -#define H_PTE_PKEY_BIT0 0 -#define H_PTE_PKEY_BIT1 0 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT1 + /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 0729c034e56f..f20de1149ebe 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -32,15 +32,15 @@ */ #define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */ #define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */ -#define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ +#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */ #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ /* memory key bits. */ -#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 -#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 -#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 -#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 -#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#define H_PTE_PKEY_BIT4 _RPAGE_PKEY_BIT4 +#define H_PTE_PKEY_BIT3 _RPAGE_PKEY_BIT3 +#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT2 +#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT1 +#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT0 /* * We need to differentiate between explicit huge page and THP huge diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h new file mode 100644 index 000000000000..795010897e5d --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H +#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H + +static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL)); +} + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); +} + +static inline u16 hash__pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL)); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h new file mode 100644 index 000000000000..6b5c3a248ba2 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kexec.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_ +#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_ + + +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + mtspr(SPRN_AMR, 0); + mtspr(SPRN_UAMOR, 0); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_IAMR, 0); + } + + /* Do we need isync()? We are going via a kexec reset */ + isync(); +} + +#endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3fa1b962dc27..93d18da5e7ec 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -86,8 +86,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) -#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) -#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -103,8 +103,8 @@ #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) #define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) -#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) -#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) @@ -793,7 +793,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, } /* - * For kernel space, we use context ids as below + * For kernel space, we use context ids as * below. Range is 512TB per context. * * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff] diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 5393a535240c..55442d45c597 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -82,6 +82,11 @@ extern unsigned int mmu_pid_bits; /* Base PID to allocate from */ extern unsigned int mmu_base_pid; +/* + * memory block size used with radix translation. + */ +extern unsigned int __ro_after_init radix_mem_block_size; + #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) @@ -209,6 +214,12 @@ extern int mmu_io_psize; void mmu_early_init_devtree(void); void hash__early_init_devtree(void); void radix__early_init_devtree(void); +#ifdef CONFIG_PPC_MEM_KEYS +void pkey_early_init_devtree(void); +#else +static inline void pkey_early_init_devtree(void) {} +#endif + extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); static inline void __init early_init_mmu(void) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 69c5b051734f..e1af0b394ceb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -107,9 +107,23 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) return pud; } +static inline void __pud_free(pud_t *pud) +{ + struct page *page = virt_to_page(pud); + + /* + * Early pud pages allocated via memblock allocator + * can't be directly freed to slab + */ + if (PageReserved(page)) + free_reserved_page(page); + else + kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); +} + static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud); + return __pud_free(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 25c3cb8272c0..6de56c3b33c4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -20,9 +20,13 @@ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ -#define _PAGE_SAO 0x00010 /* Strong access order */ + +#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */ + /* No bits set is normal cacheable memory */ + /* 0x00010 unused, is SAO bit on radix POWER9 */ #define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ + #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ /* @@ -32,11 +36,13 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 -#define _RPAGE_RSV1 0x1000000000000000UL -#define _RPAGE_RSV2 0x0800000000000000UL -#define _RPAGE_RSV3 0x0400000000000000UL -#define _RPAGE_RSV4 0x0200000000000000UL -#define _RPAGE_RSV5 0x00040UL +#define _RPAGE_RSV1 0x00040UL + +#define _RPAGE_PKEY_BIT4 0x1000000000000000UL +#define _RPAGE_PKEY_BIT3 0x0800000000000000UL +#define _RPAGE_PKEY_BIT2 0x0400000000000000UL +#define _RPAGE_PKEY_BIT1 0x0200000000000000UL +#define _RPAGE_PKEY_BIT0 0x0100000000000000UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -58,13 +64,12 @@ */ #define _RPAGE_RPN0 0x01000 #define _RPAGE_RPN1 0x02000 -#define _RPAGE_RPN44 0x0100000000000000UL #define _RPAGE_RPN43 0x0080000000000000UL #define _RPAGE_RPN42 0x0040000000000000UL #define _RPAGE_RPN41 0x0020000000000000UL /* Max physical address bit as per radix table */ -#define _RPAGE_PA_MAX 57 +#define _RPAGE_PA_MAX 56 /* * Max physical address bit we will use for now. @@ -125,8 +130,6 @@ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) -#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ - H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable @@ -825,8 +828,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, return hash__set_pte_at(mm, addr, ptep, pte, percpu); } -#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) - #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) { diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h new file mode 100644 index 000000000000..b7d9f4267bcd --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/pkeys.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H +#define _ASM_POWERPC_BOOK3S_64_PKEYS_H + +#include + +extern u64 __ro_after_init default_uamor; + +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (!mmu_has_feature(MMU_FTR_PKEY)) + return 0x0UL; + + if (radix_enabled()) + BUG(); + return hash__vmflag_to_pte_pkey_bits(vm_flags); +} + +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + if (radix_enabled()) + BUG(); + return hash__pte_to_pkey_bits(pteflags); +} + +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index d5f5ab73dc7f..035ceecd6d67 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -11,7 +11,7 @@ #define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* - * One fragment per per page + * One fragment per page */ #define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3) #define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index ca8db193ae38..94439e0cefc9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -2,10 +2,25 @@ #ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H #define _ASM_POWERPC_TLBFLUSH_RADIX_H +#include + struct vm_area_struct; struct mm_struct; struct mmu_gather; +static inline u64 psize_to_rpti_pgsize(unsigned long psize) +{ + if (psize == MMU_PAGE_4K) + return H_RPTI_PAGE_4K; + if (psize == MMU_PAGE_64K) + return H_RPTI_PAGE_64K; + if (psize == MMU_PAGE_2M) + return H_RPTI_PAGE_2M; + if (psize == MMU_PAGE_1G) + return H_RPTI_PAGE_1G; + return H_RPTI_PAGE_ALL; +} + static inline int mmu_get_ap(int psize) { return mmu_psize_defs[psize].ap; diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index de600b915a3c..54764c6e922d 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -6,6 +6,7 @@ #include #include +#include #ifdef CONFIG_PPC_BOOK3S_64 /* diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index bac2252c839e..fdddb822d564 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -191,14 +191,14 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) #define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) -#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) +// Free LONG_ASM_CONST(0x0000000008000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) -#define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) +/* LONG_ASM_CONST(0x0000000400000000) Free */ #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) @@ -214,6 +214,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000) #define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) +#define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000) #ifndef __ASSEMBLY__ @@ -435,32 +436,32 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ + CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX ) #define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP ) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \ + CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR) #define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG) #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \ CPU_FTR_P9_RADIX_PREFETCH_BUG | \ @@ -473,12 +474,12 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ - CPU_FTR_DSCR | CPU_FTR_SAO | \ + CPU_FTR_DSCR | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ - CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_ARCH_31) + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \ + CPU_FTR_DAWR | CPU_FTR_DAWR1) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -628,9 +629,10 @@ enum { /* * Maximum number of hw breakpoint supported on powerpc. Number of - * breakpoints supported by actual hw might be less than this. + * breakpoints supported by actual hw might be less than this, which + * is decided at run time in nr_wp_slots(). */ -#define HBP_NUM_MAX 1 +#define HBP_NUM_MAX 2 #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 0fccd5ea1e9a..ed75d1c318e3 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -36,6 +36,8 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) return mulhdu((__force u64) ct, __cputime_usec_factor); } +#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime) + /* * PPC64 uses PACA which is task independent for storing accounting data while * PPC32 uses struct thread_info, therefore at task switch the accounting data @@ -65,7 +67,7 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) /* * account_cpu_user_entry/exit runs "unreconciled", so can't trace, - * can't use use get_paca() + * can't use get_paca() */ static notrace inline void account_cpu_user_entry(void) { diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h new file mode 100644 index 000000000000..68d9717cc5ee --- /dev/null +++ b/arch/powerpc/include/asm/crashdump-ppc64.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H +#define _ASM_POWERPC_CRASHDUMP_PPC64_H + +/* + * Backup region - first 64KB of System RAM + * + * If ever the below macros are to be changed, please be judicious. + * The implicit assumptions are: + * - start, end & size are less than UINT32_MAX. + * - start & size are at least 8 byte aligned. + * + * For implementation details: arch/powerpc/purgatory/trampoline_64.S + */ +#define BACKUP_SRC_START 0 +#define BACKUP_SRC_END 0xffff +#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1) + +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */ diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index 4ce6808deed3..3e9da22a2779 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -11,8 +11,10 @@ #include #include +#include #include #include +#include #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) @@ -87,9 +89,6 @@ static inline void ppc_msgsync(void) #endif /* CONFIG_PPC_BOOK3S */ -extern void doorbell_global_ipi(int cpu); -extern void doorbell_core_ipi(int cpu); -extern int doorbell_try_core_ipi(int cpu); extern void doorbell_exception(struct pt_regs *regs); static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) @@ -100,4 +99,63 @@ static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag) _ppc_msgsnd(msg); } +#ifdef CONFIG_SMP + +/* + * Doorbells must only be used if CPU_FTR_DBELL is available. + * msgsnd is used in HV, and msgsndp is used in !HV. + * + * These should be used by platform code that is aware of restrictions. + * Other arch code should use ->cause_ipi. + * + * doorbell_global_ipi() sends a dbell to any target CPU. + * Must be used only by architectures that address msgsnd target + * by PIR/get_hard_smp_processor_id. + */ +static inline void doorbell_global_ipi(int cpu) +{ + u32 tag = get_hard_smp_processor_id(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * doorbell_core_ipi() sends a dbell to a target CPU in the same core. + * Must be used only by architectures that address msgsnd target + * by TIR/cpu_thread_in_core. + */ +static inline void doorbell_core_ipi(int cpu) +{ + u32 tag = cpu_thread_in_core(cpu); + + kvmppc_set_host_ipi(cpu); + /* Order previous accesses vs. msgsnd, which is treated as a store */ + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); +} + +/* + * Attempt to cause a core doorbell if destination is on the same core. + * Returns 1 on success, 0 on failure. + */ +static inline int doorbell_try_core_ipi(int cpu) +{ + int this_cpu = get_cpu(); + int ret = 0; + + if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { + doorbell_core_ipi(cpu); + ret = 1; + } + + put_cpu(); + + return ret; +} + +#endif /* CONFIG_SMP */ + #endif /* _ASM_POWERPC_DBELL_H */ diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 452402215e12..d8a0729cf754 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -44,6 +44,9 @@ struct dev_archdata { #ifdef CONFIG_CXL_BASE struct cxl_context *cxl_ctx; #endif +#ifdef CONFIG_PCI_IOV + void *iov_data; +#endif }; struct pdev_archdata { diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 414d209f45bb..17ccc6474ab6 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -90,13 +90,14 @@ static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb) } u64 drmem_lmb_memory_max(void); -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)); +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); int drmem_update_dt(void); #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)); +int __init +walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)); #endif static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 964a54292b36..d5f369bcd130 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -133,7 +133,6 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe) struct eeh_dev { int mode; /* EEH mode */ - int class_code; /* Class code of the device */ int bdfn; /* bdfn of device (for cfg ops) */ struct pci_controller *controller; int pe_config_addr; /* PE config address */ @@ -148,7 +147,10 @@ struct eeh_dev { struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ + + /* VF specific properties */ struct pci_dev *physfn; /* Associated SRIOV PF */ + int vf_index; /* Index of this VF */ }; /* "fmt" must be a simple literal string */ @@ -217,18 +219,17 @@ struct eeh_ops { int (*init)(void); struct eeh_dev *(*probe)(struct pci_dev *pdev); int (*set_option)(struct eeh_pe *pe, int option); - int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *delay); int (*reset)(struct eeh_pe *pe, int option); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); int (*err_inject)(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); - int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val); - int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val); + int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val); + int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val); int (*next_error)(struct eeh_pe **pe); - int (*restore_config)(struct pci_dn *pdn); - int (*notify_resume)(struct pci_dn *pdn); + int (*restore_config)(struct eeh_dev *edev); + int (*notify_resume)(struct eeh_dev *edev); }; extern int eeh_subsystem_flags; @@ -282,8 +283,8 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no, int config_addr); -int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev); +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent); +int eeh_pe_tree_remove(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_traverse(struct eeh_pe *root, eeh_pe_traverse_func fn, void *flag); @@ -293,8 +294,6 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn); -void eeh_dev_phb_init_dynamic(struct pci_controller *phb); void eeh_show_enabled(void); int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); @@ -314,7 +313,6 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed); int eeh_pe_configure(struct eeh_pe *pe); int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, unsigned long addr, unsigned long mask); -int eeh_restore_vf_config(struct pci_dn *pdn); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -340,11 +338,6 @@ static inline bool eeh_enabled(void) static inline void eeh_show_enabled(void) { } -static inline void *eeh_dev_init(struct pci_dn *pdn, void *data) -{ - return NULL; -} - static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } static inline int eeh_check_failure(const volatile void __iomem *token) @@ -362,6 +355,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) +static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; } #endif /* CONFIG_EEH */ #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH) diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index d3a7e36f1402..c99ba08a408d 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -37,7 +37,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* A "hypercall" is an "sc 1" instruction. This header file file provides C +/* A "hypercall" is an "sc 1" instruction. This header file provides C * wrapper functions for the ePAPR hypervisor interface. It is inteded * for use by Linux device drivers and other operating systems. * @@ -246,7 +246,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, * ev_int_eoi - signal the end of interrupt processing * @interrupt: the interrupt number * - * This function signals the end of processing for the the specified + * This function signals the end of processing for the specified * interrupt, which must be the interrupt currently in service. By * definition, this is also the highest-priority interrupt. * diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 54a98ef7d7fe..40cdcb2fb057 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -66,14 +66,7 @@ #define EX_TLB_SRR0 (10 * 8) #define EX_TLB_SRR1 (11 * 8) #define EX_TLB_R7 (12 * 8) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define EX_TLB_R8 (13 * 8) -#define EX_TLB_R9 (14 * 8) -#define EX_TLB_LR (15 * 8) -#define EX_TLB_SIZE (16 * 8) -#else #define EX_TLB_SIZE (13 * 8) -#endif #define START_EXCEPTION(label) \ .globl exc_##label##_book3e; \ @@ -110,8 +103,7 @@ exc_##label##_book3e: std r11,EX_TLB_R12(r12); \ mtspr SPRN_SPRG_TLB_EXFRAME,r14; \ std r15,EX_TLB_SRR1(r12); \ - std r16,EX_TLB_SRR0(r12); \ - TLB_MISS_PROLOG_STATS + std r16,EX_TLB_SRR0(r12); /* And these are the matching epilogs that restores things * @@ -143,7 +135,6 @@ exc_##label##_book3e: mtspr SPRN_SRR0,r15; \ ld r15,EX_TLB_R15(r12); \ mtspr SPRN_SRR1,r16; \ - TLB_MISS_RESTORE_STATS \ ld r16,EX_TLB_R16(r12); \ ld r12,EX_TLB_R12(r12); \ @@ -158,53 +149,15 @@ exc_##label##_book3e: addi r11,r13,PACA_EXTLB; \ TLB_MISS_RESTORE(r11) -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define TLB_MISS_PROLOG_STATS \ - mflr r10; \ - std r8,EX_TLB_R8(r12); \ - std r9,EX_TLB_R9(r12); \ - std r10,EX_TLB_LR(r12); -#define TLB_MISS_RESTORE_STATS \ - ld r16,EX_TLB_LR(r12); \ - ld r9,EX_TLB_R9(r12); \ - ld r8,EX_TLB_R8(r12); \ - mtlr r16; -#define TLB_MISS_STATS_D(name) \ - addi r9,r13,MMSTAT_DSTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_I(name) \ - addi r9,r13,MMSTAT_ISTATS+name; \ - bl tlb_stat_inc; -#define TLB_MISS_STATS_X(name) \ - ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \ - cmpdi cr2,r8,-1; \ - beq cr2,61f; \ - addi r9,r13,MMSTAT_DSTATS+name; \ - b 62f; \ -61: addi r9,r13,MMSTAT_ISTATS+name; \ -62: bl tlb_stat_inc; -#define TLB_MISS_STATS_SAVE_INFO \ - std r14,EX_TLB_ESR(r12); /* save ESR */ -#define TLB_MISS_STATS_SAVE_INFO_BOLTED \ - std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */ -#else -#define TLB_MISS_PROLOG_STATS -#define TLB_MISS_RESTORE_STATS -#define TLB_MISS_PROLOG_STATS_BOLTED -#define TLB_MISS_RESTORE_STATS_BOLTED -#define TLB_MISS_STATS_D(name) -#define TLB_MISS_STATS_I(name) -#define TLB_MISS_STATS_X(name) -#define TLB_MISS_STATS_Y(name) -#define TLB_MISS_STATS_SAVE_INFO -#define TLB_MISS_STATS_SAVE_INFO_BOLTED -#endif - #define SET_IVOR(vector_number, vector_offset) \ LOAD_REG_ADDR(r3,interrupt_base_book3e);\ ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; - +/* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ #define RFI_TO_KERNEL \ rfi diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 47bd4ea0837d..ebe95aa04d53 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -68,6 +68,14 @@ * * The nop instructions allow us to insert one or more instructions to flush the * L1-D cache when returning to userspace or a guest. + * + * powerpc relies on return from interrupt/syscall being context synchronising + * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE + * without additional synchronisation instructions. + * + * soft-masked interrupt replay does not include a context-synchronising rfid, + * but those always return to kernel, the sync is only required when returning + * to user. */ #define RFI_FLUSH_SLOT \ RFI_FLUSH_FIXUP_SECTION; \ @@ -123,6 +131,12 @@ hrfid; \ b hrfi_flush_fallback +#define RFSCV_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ + RFI_FLUSH_SLOT; \ + RFSCV; \ + b rfscv_flush_fallback + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 6003c2e533a0..0b295bdb201e 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) +#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) #ifndef __ASSEMBLY__ @@ -71,7 +72,8 @@ enum { FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | - FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR, + FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | + FW_FEATURE_RPT_INVALIDATE, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, @@ -132,6 +134,12 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +#endif + #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 29188810ba30..925cf89cbf4b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -52,7 +52,7 @@ enum fixed_addresses { FIX_HOLE, /* reserve the top 128K for early debugging purposes */ FIX_EARLY_DEBUG_TOP = FIX_HOLE, - FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+((128*1024)/PAGE_SIZE)-1, + FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index f1e9067bd5ac..f133b5930ae1 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -13,7 +13,6 @@ typedef struct { unsigned int pmu_irqs; unsigned int mce_exceptions; unsigned int spurious_irqs; - unsigned int hmi_exceptions; unsigned int sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG unsigned int soft_nmi_irqs; diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 2dabcf668292..4cb9efa2eb21 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -128,7 +128,7 @@ end_##sname: .if ((start) % (size) != 0); \ .error "Fixed section exception vector misalignment"; \ .endif; \ - .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \ + .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \ .error "Fixed section exception vector bad size"; \ .endif; \ .if (start) < sname##_start; \ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 551a9d4d3958..013165e62618 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -57,6 +57,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); +void gigantic_hugetlb_cma_reserve(void) __init; #include #else /* ! CONFIG_HUGETLB_PAGE */ @@ -71,6 +72,12 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, { return NULL; } + + +static inline void __init gigantic_hugetlb_cma_reserve(void) +{ +} + #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e90c073e437e..fbb377055471 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -237,7 +237,7 @@ #define H_CREATE_RPT 0x1A4 #define H_REMOVE_RPT 0x1A8 #define H_REGISTER_RPAGES 0x1AC -#define H_DISABLE_AND_GETC 0x1B0 +#define H_DISABLE_AND_GET 0x1B0 #define H_ERROR_DATA 0x1B4 #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC @@ -305,7 +305,8 @@ #define H_SCM_UNBIND_ALL 0x3FC #define H_SCM_HEALTH 0x400 #define H_SCM_PERFORMANCE_STATS 0x418 -#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS +#define H_RPT_INVALIDATE 0x448 +#define MAX_HCALL_OPCODE H_RPT_INVALIDATE /* Scope args for H_SCM_UNBIND_ALL */ #define H_UNBIND_SCOPE_ALL (0x1) @@ -354,9 +355,10 @@ /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 -#define H_SET_MODE_RESOURCE_SET_DAWR 2 +#define H_SET_MODE_RESOURCE_SET_DAWR0 2 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +#define H_SET_MODE_RESOURCE_SET_DAWR1 5 /* Values for argument to H_SIGNAL_SYS_RESET */ #define H_SIGNAL_SYS_RESET_ALL -1 @@ -389,6 +391,37 @@ #define PROC_TABLE_RADIX 0x04 #define PROC_TABLE_GTSE 0x01 +/* + * Defines for + * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information. + */ + +/* Type of translation to invalidate (type) */ +#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */ +#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */ +#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */ +/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */ +#define H_RPTI_TYPE_PRT 0x0008 +/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */ +#define H_RPTI_TYPE_PAT 0x0008 +#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PRT) +#define H_RPTI_TYPE_NESTED_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \ + H_RPTI_TYPE_PAT) + +/* Invalidation targets (target) */ +#define H_RPTI_TARGET_CMMU 0x01 /* All virtual processors in the partition */ +#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */ +/* All nest/accelerator agents in use by the partition */ +#define H_RPTI_TARGET_NMMU 0x04 + +/* Page size mask (page sizes) */ +#define H_RPTI_PAGE_4K 0x01 +#define H_RPTI_PAGE_64K 0x02 +#define H_RPTI_PAGE_2M 0x04 +#define H_RPTI_PAGE_1G 0x08 +#define H_RPTI_PAGE_ALL (-1UL) + #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index cb424799da0d..db206a7f38e2 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -9,6 +9,8 @@ #ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H #define _PPC_BOOK3S_64_HW_BREAKPOINT_H +#include + #ifdef __KERNEL__ struct arch_hw_breakpoint { unsigned long address; @@ -17,7 +19,7 @@ struct arch_hw_breakpoint { u16 hw_len; /* length programmed in hw */ }; -/* Note: Don't change the the first 6 bits below as they are in the same order +/* Note: Don't change the first 6 bits below as they are in the same order * as the dabr and dabrx. */ #define HW_BRK_TYPE_READ 0x01 @@ -46,7 +48,7 @@ struct arch_hw_breakpoint { static inline int nr_wp_slots(void) { - return HBP_NUM_MAX; + return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h index b3b0f2d020f0..ae02eb53d6ef 100644 --- a/arch/powerpc/include/asm/hydra.h +++ b/arch/powerpc/include/asm/hydra.h @@ -10,7 +10,7 @@ * * © Copyright 1995 Apple Computer, Inc. All rights reserved. * - * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf + * It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf * You can obtain paper copies of this book from computer bookstores or by * writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San * Francisco, CA 94104. Reference ISBN 1-55860-393-X. diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4da4fcba0684..4f897993b710 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -99,6 +99,11 @@ struct trace_imc_data { */ #define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL +/* + * Bit 0:1 in third DW of IMC trace record + * specifies the MSR[HV PR] values. + */ +#define IMC_TRACE_RECORD_VAL_HVPR(x) ((x) >> 62) /* * Device tree parser code detects IMC pmu support and diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 45f3ec868258..cc73c1267572 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -122,6 +122,25 @@ static inline u64 ppc_inst_as_u64(struct ppc_inst x) #endif } +#define PPC_INST_STR_LEN sizeof("00000000 00000000") + +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) +{ + if (ppc_inst_prefixed(x)) + sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); + else + sprintf(str, "%08x", ppc_inst_val(x)); + + return str; +} + +#define ppc_inst_as_str(x) \ +({ \ + char __str[PPC_INST_STR_LEN]; \ + __ppc_inst_as_str(__str, x); \ + __str; \ +}) + int probe_user_read_inst(struct ppc_inst *inst, struct ppc_inst __user *nip); diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index be85c7005fb1..d635b96c7ea6 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -27,10 +27,12 @@ #ifdef CONFIG_KASAN void kasan_early_init(void); +void kasan_mmu_init(void); void kasan_init(void); void kasan_late_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_mmu_init(void) { } static inline void kasan_late_init(void) { } #endif diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index c68476818753..55d6ede30c19 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -100,15 +100,26 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; -#ifdef CONFIG_IMA_KEXEC #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + struct crash_mem *exclude_ranges; + + unsigned long backup_start; + void *backup_buf; + + unsigned long elfcorehdr_addr; + unsigned long elf_headers_sz; + void *elf_headers; + +#ifdef CONFIG_IMA_KEXEC phys_addr_t ima_buffer_addr; size_t ima_buffer_size; -}; #endif +}; +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len); int setup_purgatory(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); @@ -116,6 +127,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size); + +#ifdef CONFIG_PPC64 +struct kexec_buf; + +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf); +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline); +#endif /* CONFIG_PPC64 */ + #endif /* CONFIG_KEXEC_FILE */ #else /* !CONFIG_KEXEC_CORE */ @@ -150,6 +175,18 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_PPC_BOOK3S_64 +#include +#endif + +#ifndef reset_sprs +#define reset_sprs reset_sprs +static inline void reset_sprs(void) +{ +} +#endif + #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h new file mode 100644 index 000000000000..7a90000f8d15 --- /dev/null +++ b/arch/powerpc/include/asm/kexec_ranges.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_KEXEC_RANGES_H +#define _ASM_POWERPC_KEXEC_RANGES_H + +#define MEM_RANGE_CHUNK_SZ 2048 /* Memory ranges size chunk */ + +void sort_memory_ranges(struct crash_mem *mrngs, bool merge); +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); +int add_tce_mem_ranges(struct crash_mem **mem_ranges); +int add_initrd_mem_range(struct crash_mem **mem_ranges); +#ifdef CONFIG_PPC_BOOK3S_64 +int add_htab_mem_range(struct crash_mem **mem_ranges); +#else +static inline int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + return 0; +} +#endif +int add_kernel_mem_range(struct crash_mem **mem_ranges); +int add_rtas_mem_range(struct crash_mem **mem_ranges); +int add_opal_mem_range(struct crash_mem **mem_ranges); +int add_reserved_mem_ranges(struct crash_mem **mem_ranges); + +#endif /* _ASM_POWERPC_KEXEC_RANGES_H */ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 45704f2716e2..078f4648ea27 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -119,7 +119,7 @@ struct kvmppc_host_state { void __iomem *xive_tima_virt; u32 saved_xirr; u64 dabr; - u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ + u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */ u32 host_pmc[8]; u64 host_purr; u64 host_spurr; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 310ba48d13f0..0c3401b2e19e 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -89,10 +89,12 @@ static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu) return vcpu->arch.regs.nip; } +#ifdef CONFIG_BOOKE static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault_dear; } +#endif static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7e2d061d0445..e020d269416d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -637,12 +637,14 @@ struct kvm_vcpu_arch { u32 ccr1; u32 dbsr; - u64 mmcr[5]; + u64 mmcr[4]; /* MMCR0, MMCR1, MMCR2, MMCR3 */ + u64 mmcra; + u64 mmcrs; u32 pmc[8]; u32 spmc[2]; u64 siar; u64 sdar; - u64 sier; + u64 sier[3]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM u64 tfhar; u64 texasr; diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 9c1f6b4b9bbf..744612054c94 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,35 +8,15 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ +#include + #include -#ifdef CONFIG_KVM_GUEST - -#include - static inline int kvm_para_available(void) { - struct device_node *hyper_node; - - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return 0; - - if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; - - return 1; + return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest(); } -#else - -static inline int kvm_para_available(void) -{ - return 0; -} - -#endif - static inline unsigned int kvm_arch_para_features(void) { unsigned long r; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7bcb64444a39..a90b892f0bfe 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -131,7 +131,7 @@ struct machdep_calls { unsigned long dabrx); /* Set DAWR for this platform, leave empty for default implementation */ - int (*set_dawr)(unsigned long dawr, + int (*set_dawr)(int nr, unsigned long dawr, unsigned long dawrx); #ifdef CONFIG_PPC32 /* XXX for now */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 376a395daf32..adf2cda67f9a 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -86,6 +86,7 @@ enum MCE_TlbErrorType { enum MCE_UserErrorType { MCE_USER_ERROR_INDETERMINATE = 0, MCE_USER_ERROR_TLBIE = 1, + MCE_USER_ERROR_SCV = 2, }; enum MCE_RaErrorType { @@ -220,6 +221,8 @@ extern void machine_check_print_event_info(struct machine_check_event *evt, unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); extern void mce_common_process_ue(struct pt_regs *regs, struct mce_error_info *mce_err); +int mce_register_notifier(struct notifier_block *nb); +int mce_unregister_notifier(struct notifier_block *nb); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index d610c2e07b28..7c07728af300 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,42 +13,20 @@ #include #include -/* - * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits() - * here. How important is the optimization? - */ +#ifdef CONFIG_PPC_MEM_KEYS static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { -#ifdef CONFIG_PPC_MEM_KEYS - return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); -#else - return ((prot & PROT_SAO) ? VM_SAO : 0); -#endif + return pkey_to_vmflag_bits(pkey); } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { -#ifdef CONFIG_PPC_MEM_KEYS - return (vm_flags & VM_SAO) ? - __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : - __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); -#else - return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); -#endif + return __pgprot(vmflag_to_pte_pkey_bits(vm_flags)); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) - -static inline bool arch_validate_prot(unsigned long prot, unsigned long addr) -{ - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO)) - return false; - if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO)) - return false; - return true; -} -#define arch_validate_prot arch_validate_prot +#endif #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index f4ac25d4df05..255a1837e9f7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -28,6 +28,19 @@ * Individual features below. */ +/* + * Support for KUEP feature. + */ +#define MMU_FTR_KUEP ASM_CONST(0x00000400) + +/* + * Support for memory protection keys. + */ +#define MMU_FTR_PKEY ASM_CONST(0x00000800) + +/* Guest Translation Shootdown Enable */ +#define MMU_FTR_GTSE ASM_CONST(0x00001000) + /* * Support for 68 bit VA space. We added that from ISA 2.05 */ @@ -173,10 +186,18 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | + MMU_FTR_GTSE | #ifdef CONFIG_PPC_KUAP MMU_FTR_RADIX_KUAP | #endif /* CONFIG_PPC_KUAP */ #endif /* CONFIG_PPC_RADIX_MMU */ +#ifdef CONFIG_PPC_MEM_KEYS + MMU_FTR_PKEY | +#endif +#ifdef CONFIG_PPC_KUEP + MMU_FTR_KUEP | +#endif /* CONFIG_PPC_KUAP */ + 0, }; @@ -356,6 +377,8 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); static inline void mmu_early_init_devtree(void) { } +static inline void pkey_early_init_devtree(void) {} + extern void *abatron_pteptrs[2]; #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 1a474f6b1992..7f3658a97384 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -218,8 +218,6 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm) { } static inline void dec_mm_active_cpus(struct mm_struct *mm) { } static inline void mm_context_add_copro(struct mm_struct *mm) { } static inline void mm_context_remove_copro(struct mm_struct *mm) { } -static inline void mm_context_add_vas_windows(struct mm_struct *mm) { } -static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { } #endif diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b0afbdd07740..b9e134d0f03a 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -249,6 +249,18 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p return old; } + +#ifdef CONFIG_PPC_16K_PAGES +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_basic_t val = READ_ONCE(ptep->pte); + pte_t pte = {val, val, val, val}; + + return pte; +} +#endif /* CONFIG_PPC_16K_PAGES */ + #else static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) @@ -284,16 +296,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } -#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) -#define __HAVE_ARCH_PTEP_GET -static inline pte_t ptep_get(pte_t *ptep) -{ - pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; - - return pte; -} -#endif - #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 6cb8aa357191..59ee9fa4ae09 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -82,8 +82,6 @@ */ #include -#define _PAGE_SAO 0 - #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) /* diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 84b2564cf5a4..9454d29ff4b4 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -225,6 +225,7 @@ struct paca_struct { u16 in_mce; u8 hmi_event_available; /* HMI event is available */ u8 hmi_p9_special_emu; /* HMI P9 special emulation */ + u32 hmi_irqs; /* HMI irq stat */ #endif u8 ftrace_enabled; /* Hard disable ftrace */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index a63fe6f3a0ff..254687258f42 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -255,8 +255,10 @@ static inline bool pfn_valid(unsigned long pfn) */ #ifdef CONFIG_PPC_BOOK3E_64 #define is_kernel_addr(x) ((x) >= 0x8000000000000000ul) -#else +#elif defined(CONFIG_PPC_BOOK3S_64) #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) +#else +#define is_kernel_addr(x) ((x) >= TASK_SIZE) #endif #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h new file mode 100644 index 000000000000..9362c94fe3aa --- /dev/null +++ b/arch/powerpc/include/asm/paravirt.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_PARAVIRT_H +#define _ASM_POWERPC_PARAVIRT_H + +#include +#include +#ifdef CONFIG_PPC64 +#include +#include +#endif + +#ifdef CONFIG_PPC_SPLPAR +DECLARE_STATIC_KEY_FALSE(shared_processor); + +static inline bool is_shared_processor(void) +{ + return static_branch_unlikely(&shared_processor); +} + +/* If bit 0 is set, the cpu has been preempted */ +static inline u32 yield_count_of(int cpu) +{ + __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count); + return be32_to_cpu(yield_count); +} + +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); +} + +static inline void prod_cpu(int cpu) +{ + plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +} + +static inline void yield_to_any(void) +{ + plpar_hcall_norets(H_CONFER, -1, 0); +} +#else +static inline bool is_shared_processor(void) +{ + return false; +} + +static inline u32 yield_count_of(int cpu) +{ + return 0; +} + +extern void ___bad_yield_to_preempted(void); +static inline void yield_to_preempted(int cpu, u32 yield_count) +{ + ___bad_yield_to_preempted(); /* This would be a bug */ +} + +extern void ___bad_yield_to_any(void); +static inline void yield_to_any(void) +{ + ___bad_yield_to_any(); /* This would be a bug */ +} + +extern void ___bad_prod_cpu(void); +static inline void prod_cpu(int cpu) +{ + ___bad_prod_cpu(); /* This would be a bug */ +} + +#endif + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + if (!is_shared_processor()) + return false; + if (yield_count_of(cpu) & 1) + return true; + return false; +} + +static inline bool pv_is_native_spin_unlock(void) +{ + return !is_shared_processor(); +} + +#endif /* _ASM_POWERPC_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b92e81b256e5..d2a2a14e56f9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -202,7 +202,6 @@ struct pci_dn { #define IODA_INVALID_PE 0xFFFFFFFF unsigned int pe_number; #ifdef CONFIG_PCI_IOV - int vf_index; /* VF index in the PF */ u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ unsigned int *pe_num_map; /* PE# for the first VF PE or array */ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635c..8e5b7d0b851c 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include +#include + #endif /* _ASM_POWERPC_PERCPU_H_ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index eed3954082fa..1e8b2e1ec1db 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -12,6 +12,8 @@ #ifdef CONFIG_PPC_PERF_CTRS #include +#else +static inline bool is_sier_available(void) { return false; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3e9703f44c7c..86c9eb064b22 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -17,6 +17,13 @@ struct perf_event; +struct mmcr_regs { + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcra; + unsigned long mmcr3; +}; /* * This struct provides the constants and functions needed to * describe the PMU on a particular POWER-family CPU. @@ -28,7 +35,7 @@ struct power_pmu { unsigned long add_fields; unsigned long test_adder; int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); int (*get_constraint)(u64 event_id, unsigned long *mskp, unsigned long *valp); @@ -41,13 +48,13 @@ struct power_pmu { unsigned long group_constraint_val; u64 (*bhrb_filter_map)(u64 branch_sample_type); void (*config_bhrb)(u64 pmu_bhrb_filter); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + void (*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr); int (*limited_pmc_event)(u64 event_id); u32 flags; const struct attribute_group **attr_groups; int n_generic; int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + u64 (*cache_events)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; @@ -69,6 +76,7 @@ struct power_pmu { #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ +#define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h index 2fe6cae14d10..a7951049e129 100644 --- a/arch/powerpc/include/asm/pkeys.h +++ b/arch/powerpc/include/asm/pkeys.h @@ -11,9 +11,7 @@ #include #include -DECLARE_STATIC_KEY_TRUE(pkey_disabled); -extern int pkeys_total; /* total pkeys as per device tree */ -extern u32 initial_allocation_mask; /* bits set for the initially allocated keys */ +extern int num_pkey; extern u32 reserved_allocation_mask; /* bits set for reserved keys */ #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ @@ -25,48 +23,28 @@ extern u32 reserved_allocation_mask; /* bits set for reserved keys */ PKEY_DISABLE_WRITE | \ PKEY_DISABLE_EXECUTE) +#ifdef CONFIG_PPC_BOOK3S_64 +#include +#else +#error "Not supported" +#endif + + static inline u64 pkey_to_vmflag_bits(u16 pkey) { return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); } -static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) -{ - if (static_branch_likely(&pkey_disabled)) - return 0x0UL; - - return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | - ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); -} - static inline int vma_pkey(struct vm_area_struct *vma) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; } -#define arch_max_pkey() pkeys_total - -static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +static inline int arch_max_pkey(void) { - return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); -} - -static inline u16 pte_to_pkey_bits(u64 pteflags) -{ - return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | - ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); + return num_pkey; } #define pkey_alloc_mask(pkey) (0x1 << pkey) @@ -114,9 +92,8 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) u32 all_pkeys_mask = (u32)(~(0x0)); int ret; - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; - /* * Are we out of pkeys? We must handle this specially because ffz() * behavior is undefined if there are no zeros. @@ -132,7 +109,7 @@ static inline int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -1; if (!mm_pkey_is_allocated(mm, pkey)) @@ -147,21 +124,13 @@ static inline int mm_pkey_free(struct mm_struct *mm, int pkey) * Try to dedicate one of the protection keys to be used as an * execute-only protection key. */ -extern int __execute_only_pkey(struct mm_struct *mm); -static inline int execute_only_pkey(struct mm_struct *mm) -{ - if (static_branch_likely(&pkey_disabled)) - return -1; - - return __execute_only_pkey(mm); -} - +extern int execute_only_pkey(struct mm_struct *mm); extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey); static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return 0; /* @@ -179,7 +148,7 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return -EINVAL; /* @@ -196,7 +165,7 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool arch_pkeys_enabled(void) { - return !static_branch_likely(&pkey_disabled); + return mmu_has_feature(MMU_FTR_PKEY); } extern void pkey_mm_init(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 4497c8afb573..ece84a430701 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -312,7 +312,12 @@ static inline long plpar_set_ciabr(unsigned long ciabr) static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) { - return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0); +} + +static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1) +{ + return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1); } static inline long plpar_signal_sys_reset(long cpu) @@ -334,6 +339,51 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +/* + * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately + * + * - Returns H_SUCCESS on success + * - For H_BUSY return value, we retry the hcall. + * - For any other hcall failures, attempt a full flush once before + * resorting to BUG(). + * + * Note: This hcall is expected to fail only very rarely. The correct + * error recovery of killing the process/guest will be eventually + * needed. + */ +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + long rc; + unsigned long all; + + while (true) { + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type, + page_sizes, start, end); + if (rc == H_BUSY) { + cpu_relax(); + continue; + } else if (rc == H_SUCCESS) + return rc; + + /* Flush request failed, try with a full flush once */ + if (type & H_RPTI_TYPE_NESTED) + all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL; + else + all = H_RPTI_TYPE_ALL; +retry: + rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, + all, page_sizes, 0, -1UL); + if (rc == H_BUSY) { + cpu_relax(); + goto retry; + } else if (rc == H_SUCCESS) + return rc; + + BUG(); + } +} + #else /* !CONFIG_PPC_PSERIES */ static inline long plpar_set_ciabr(unsigned long ciabr) @@ -346,6 +396,13 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex, { return 0; } + +static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type, + u64 page_sizes, u64 start, u64 end) +{ + return 0; +} + #endif /* CONFIG_PPC_PSERIES */ #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 7de82647e761..ee79d2cd9fb6 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -9,28 +9,26 @@ #define PNV_OCXL_TL_BITS_PER_RATE 4 #define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8) -extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, - u16 *supported); -extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported); +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count); -extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, char *rate_buf, int rate_buf_size); -extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, - uint64_t rate_buf_phys, int rate_buf_size); +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size); -extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); -extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, - void __iomem *tfc, void __iomem *pe_handle); -extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, - void __iomem **dar, void __iomem **tfc, - void __iomem **pe_handle); +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq); +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle); +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle); -extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, - void **platform_data); -extern void pnv_ocxl_spa_release(void *platform_data); -extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data); +void pnv_ocxl_spa_release(void *platform_data); +int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle); -extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); -extern void pnv_ocxl_free_xive_irq(u32 irq); +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr); +void pnv_ocxl_free_xive_irq(u32 irq); #endif /* _ASM_PNV_OCXL_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 2a39c716c343..a6e3700c4566 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,19 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) +#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) + +/* + * 16-bit immediate helper macros: HA() is for use with sign-extending instrs + * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the + * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). + */ +#define IMM_H(i) ((uintptr_t)(i)>>16) +#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ + (((uintptr_t)(i) & 0x8000) >> 15)) + + /* opcode and xopcode for instructions */ #define OP_TRAP 3 #define OP_TRAP_64 2 @@ -195,56 +208,28 @@ #define OP_LQ 56 /* sorted alphabetically */ -#define PPC_INST_BHRBE 0x7c00025c -#define PPC_INST_CLRBHRB 0x7c00035c +#define PPC_INST_BCCTR_FLUSH 0x4c400420 #define PPC_INST_COPY 0x7c20060c -#define PPC_INST_CP_ABORT 0x7c00068c -#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe -#define PPC_INST_DCBAL 0x7c2005ec -#define PPC_INST_DCBZL 0x7c2007ec -#define PPC_INST_ICBT 0x7c00002c -#define PPC_INST_ICSWX 0x7c00032d -#define PPC_INST_ICSWEPX 0x7c00076d #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e -#define PPC_INST_LDARX 0x7c0000a8 -#define PPC_INST_STDCX 0x7c0001ad -#define PPC_INST_LQARX 0x7c000228 -#define PPC_INST_STQCX 0x7c00016d #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a -#define PPC_INST_LWARX 0x7c000028 -#define PPC_INST_STWCX 0x7c00012d #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe #define PPC_INST_ISYNC 0x4c00012c -#define PPC_INST_LXVD2X 0x7c000698 #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe -#define PPC_INST_MFTMR 0x7c0002dc -#define PPC_INST_MSGSND 0x7c00019c -#define PPC_INST_MSGCLR 0x7c0001dc -#define PPC_INST_MSGSYNC 0x7c0006ec -#define PPC_INST_MSGSNDP 0x7c00011c -#define PPC_INST_MSGCLRP 0x7c00015c #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 -#define PPC_INST_PASTE 0x7c20070d #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe -#define PPC_INST_POPCNTD 0x7c0003f4 -#define PPC_INST_POPCNTW 0x7c0002f4 #define PPC_INST_RFEBB 0x4c000124 -#define PPC_INST_RFCI 0x4c000066 -#define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFID 0x4c000024 -#define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_MFSPR 0x7c0002a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe @@ -254,131 +239,43 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_MFVSRD 0x7c000066 -#define PPC_INST_MTVSRD 0x7c000166 #define PPC_INST_SC 0x44000002 -#define PPC_INST_SLBFEE 0x7c0007a7 -#define PPC_INST_SLBIA 0x7c0003e4 - #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e - #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a -#define PPC_INST_STXVD2X 0x7c000798 -#define PPC_INST_TLBIE 0x7c000264 -#define PPC_INST_TLBIEL 0x7c000224 -#define PPC_INST_TLBILX 0x7c000024 -#define PPC_INST_WAIT 0x7c00007c -#define PPC_INST_TLBIVAX 0x7c000624 -#define PPC_INST_TLBSRX_DOT 0x7c0006a5 -#define PPC_INST_VPMSUMW 0x10000488 -#define PPC_INST_VPMSUMD 0x100004c8 -#define PPC_INST_VPERMXOR 0x1000002d -#define PPC_INST_XXLOR 0xf0000490 -#define PPC_INST_XXSWAPD 0xf0000250 -#define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_TRECHKPT 0x7c0007dd #define PPC_INST_TRECLAIM 0x7c00075d -#define PPC_INST_TABORT 0x7c00071d #define PPC_INST_TSR 0x7c0005dd - -#define PPC_INST_NAP 0x4c000364 -#define PPC_INST_SLEEP 0x4c0003a4 -#define PPC_INST_WINKLE 0x4c0003e4 - -#define PPC_INST_STOP 0x4c0002e4 - -/* A2 specific instructions */ -#define PPC_INST_ERATWE 0x7c0001a6 -#define PPC_INST_ERATRE 0x7c000166 -#define PPC_INST_ERATILX 0x7c000066 -#define PPC_INST_ERATIVAX 0x7c000666 -#define PPC_INST_ERATSX 0x7c000126 -#define PPC_INST_ERATSX_DOT 0x7c000127 - -/* Misc instructions for BPF compiler */ -#define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 -#define PPC_INST_LDX 0x7c00002a -#define PPC_INST_LHZ 0xa0000000 -#define PPC_INST_LWZ 0x80000000 -#define PPC_INST_LHBRX 0x7c00062c -#define PPC_INST_LDBRX 0x7c000428 -#define PPC_INST_STB 0x98000000 -#define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_STDX 0x7c00012a -#define PPC_INST_STDU 0xf8000001 -#define PPC_INST_STW 0x90000000 -#define PPC_INST_STWU 0x94000000 #define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTLR 0x7c0803a6 #define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_CMPWI 0x2c000000 -#define PPC_INST_CMPDI 0x2c200000 -#define PPC_INST_CMPW 0x7c000000 -#define PPC_INST_CMPD 0x7c200000 -#define PPC_INST_CMPLW 0x7c000040 -#define PPC_INST_CMPLD 0x7c200040 -#define PPC_INST_CMPLWI 0x28000000 -#define PPC_INST_CMPLDI 0x28200000 #define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_ADDC 0x7c000014 -#define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BLRL 0x4e800021 #define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_MULLD 0x7c0001d2 -#define PPC_INST_MULLW 0x7c0001d6 -#define PPC_INST_MULHWU 0x7c000016 -#define PPC_INST_MULLI 0x1c000000 -#define PPC_INST_MADDHD 0x10000030 -#define PPC_INST_MADDHDU 0x10000031 -#define PPC_INST_MADDLD 0x10000033 -#define PPC_INST_DIVWU 0x7c000396 +#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_DIVDU 0x7c000392 -#define PPC_INST_RLWINM 0x54000000 -#define PPC_INST_RLWINM_DOT 0x54000001 -#define PPC_INST_RLWIMI 0x50000000 -#define PPC_INST_RLDICL 0x78000000 #define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_SLW 0x7c000030 -#define PPC_INST_SLD 0x7c000036 -#define PPC_INST_SRW 0x7c000430 -#define PPC_INST_SRAW 0x7c000630 -#define PPC_INST_SRAWI 0x7c000670 -#define PPC_INST_SRD 0x7c000436 -#define PPC_INST_SRAD 0x7c000634 -#define PPC_INST_SRADI 0x7c000674 -#define PPC_INST_AND 0x7c000038 -#define PPC_INST_ANDDOT 0x7c000039 -#define PPC_INST_OR 0x7c000378 -#define PPC_INST_XOR 0x7c000278 -#define PPC_INST_ANDI 0x70000000 #define PPC_INST_ORI 0x60000000 #define PPC_INST_ORIS 0x64000000 -#define PPC_INST_XORI 0x68000000 -#define PPC_INST_XORIS 0x6c000000 -#define PPC_INST_NEG 0x7c0000d0 -#define PPC_INST_EXTSW 0x7c0007b4 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BRANCH_COND 0x40800000 -#define PPC_INST_LBZCIX 0x7c0006aa -#define PPC_INST_STBCIX 0x7c0007aa -#define PPC_INST_LWZX 0x7c00002e -#define PPC_INST_LFSX 0x7c00042e -#define PPC_INST_STFSX 0x7c00052e -#define PPC_INST_LFDX 0x7c0004ae -#define PPC_INST_STFDX 0x7c0005ae -#define PPC_INST_LVX 0x7c0000ce -#define PPC_INST_STVX 0x7c0001ce -#define PPC_INST_VCMPEQUD 0x100000c7 -#define PPC_INST_VCMPEQUB 0x10000006 + +/* Prefixes */ +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFD 0xd8000000 +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +/* Prefixed instructions */ +#define PPC_INST_PLD 0xe4000000 +#define PPC_INST_PSTD 0xf4000000 /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) @@ -411,6 +308,7 @@ #define __PPC_CT(t) (((t) & 0x0f) << 21) #define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11)) #define __PPC_RC21 (0x1 << 10) +#define __PPC_PRFX_R(r) (((r) & 0x1) << 20) /* * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits @@ -431,175 +329,278 @@ #define __PPC_EH(eh) 0 #endif -/* Deal with instructions that older assemblers aren't aware of */ -#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) -#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ - ___PPC_RT(t) | \ - (((l) & 0x3) << 16)) -#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ - __PPC_RA(a) | __PPC_RB(b)) -#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_INST_STQCX | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_INST_MADDHDU | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_INST_MADDLD | \ - ___PPC_RT(t) | ___PPC_RA(a) | \ - ___PPC_RB(b) | ___PPC_RC(c)) -#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ - ___PPC_RB(b)) -#define PPC_MSGSYNC stringify_in_c(.long PPC_INST_MSGSYNC) -#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ - ___PPC_RB(b)) -#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ - ___PPC_RB(b)) -#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ - ___PPC_RB(b)) -#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_INST_POPCNTW | \ - __PPC_RA(a) | __PPC_RS(s)) -#define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI) -#define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI) -#define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI) -#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_INST_TLBILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) -#define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) -#define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) -#define PPC_WAIT(w) stringify_in_c(.long PPC_INST_WAIT | \ - __PPC_WC(w)) -#define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(a) | ___PPC_RS(lp)) -#define PPC_TLBIE_5(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIE | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) -#define PPC_TLBIEL(rb,rs,ric,prs,r) \ - stringify_in_c(.long PPC_INST_TLBIEL | \ - ___PPC_RB(rb) | ___PPC_RS(rs) | \ - ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ - ___PPC_R(r)) -#define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \ - __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ - __PPC_RA0(a) | __PPC_RB(b)) - -#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) -#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \ - __PPC_T_TLB(t) | __PPC_RA0(a) | \ - __PPC_RB(b)) -#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \ - __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \ - __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) -#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - __PPC_RT(t) | __PPC_RB(b)) -#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ - ___PPC_RT(t) | ___PPC_RB(b)) -#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ - __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) -/* PASemi instructions */ -#define LBZCIX(t,a,b) stringify_in_c(.long PPC_INST_LBZCIX | \ - __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) -#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \ - __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) - +/* Base instruction encoding */ +#define PPC_RAW_CP_ABORT (0x7c00068c) +#define PPC_RAW_COPY(a, b) (PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DARN(t, l) (0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16)) +#define PPC_RAW_DCBAL(a, b) (0x7c2005ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBZL(a, b) (0x7c2007ec | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_LQARX(t, a, b, eh) (0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LDARX(t, a, b, eh) (0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_LWARX(t, a, b, eh) (0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh)) +#define PPC_RAW_PHWSYNC (0x7c8004ac) +#define PPC_RAW_PLWSYNC (0x7ca004ac) +#define PPC_RAW_STQCX(t, a, b) (0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MADDHD(t, a, b, c) (0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDHDU(t, a, b, c) (0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MADDLD(t, a, b, c) (0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c)) +#define PPC_RAW_MSGSND(b) (0x7c00019c | ___PPC_RB(b)) +#define PPC_RAW_MSGSYNC (0x7c0006ec) +#define PPC_RAW_MSGCLR(b) (0x7c0001dc | ___PPC_RB(b)) +#define PPC_RAW_MSGSNDP(b) (0x7c00011c | ___PPC_RB(b)) +#define PPC_RAW_MSGCLRP(b) (0x7c00015c | ___PPC_RB(b)) +#define PPC_RAW_PASTE(a, b) (0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_POPCNTB(a, s) (PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTD(a, s) (0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_POPCNTW(a, s) (0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s)) +#define PPC_RAW_RFCI (0x4c000066) +#define PPC_RAW_RFDI (0x4c00004e) +#define PPC_RAW_RFMCI (0x4c00004c) +#define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_WAIT(w) (0x7c00007c | __PPC_WC(w)) +#define PPC_RAW_TLBIE(lp, a) (0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp)) +#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \ + (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ + (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATRE(s, a, w) (0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) +#define PPC_RAW_ERATILX(t, a, b) (0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATIVAX(s, a, b) (0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX(t, a, w) (0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_ERATSX_DOT(t, a, w) (0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b)) +#define PPC_RAW_ICBT(c, a, b) (0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) +#define PPC_RAW_LBZCIX(t, a, b) (0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) +#define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. */ -#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) -#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) -#define STXVD2X(s, a, b) stringify_in_c(.long PPC_INST_STXVD2X | \ - VSX_XX1((s), a, b)) -#define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \ - VSX_XX1((s), a, b)) -#define MFVRD(a, t) stringify_in_c(.long PPC_INST_MFVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define MTVRD(t, a) stringify_in_c(.long PPC_INST_MTVSRD | \ - VSX_XX1((t)+32, a, R0)) -#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMW | \ - VSX_XX3((t), a, b)) -#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_INST_VPMSUMD | \ - VSX_XX3((t), a, b)) -#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ - VSX_XX3((t), a, b)) -#define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \ - VSX_XX3((t), a, a)) -#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ - VSX_XX3((t), (a), (b)))) +#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b)) +#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b)) +#define PPC_RAW_STXVD2X(s, a, b) (0x7c000798 | VSX_XX1((s), a, b)) +#define PPC_RAW_LXVD2X(s, a, b) (0x7c000698 | VSX_XX1((s), a, b)) +#define PPC_RAW_MFVRD(a, t) (0x7c000066 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_MTVRD(t, a) (0x7c000166 | VSX_XX1((t) + 32, a, R0)) +#define PPC_RAW_VPMSUMW(t, a, b) (0x10000488 | VSX_XX3((t), a, b)) +#define PPC_RAW_VPMSUMD(t, a, b) (0x100004c8 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXLOR(t, a, b) (0xf0000490 | VSX_XX3((t), a, b)) +#define PPC_RAW_XXSWAPD(t, a) (0xf0000250 | VSX_XX3((t), a, a)) +#define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b)))) +#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \ + ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) +#define PPC_RAW_NAP (0x4c000364) +#define PPC_RAW_SLEEP (0x4c0003a4) +#define PPC_RAW_WINKLE (0x4c0003e4) +#define PPC_RAW_STOP (0x4c0002e4) +#define PPC_RAW_CLRBHRB (0x7c00035c) +#define PPC_RAW_MFBHRBE(r, n) (0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11)) +#define PPC_RAW_TRECHKPT (PPC_INST_TRECHKPT) +#define PPC_RAW_TRECLAIM(r) (PPC_INST_TRECLAIM | __PPC_RA(r)) +#define PPC_RAW_TABORT(r) (0x7c00071d | __PPC_RA(r)) +#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) +#define PPC_RAW_MTTMR(tmr, r) (0x7c0003dc | TMRN(tmr) | ___PPC_RS(r)) +#define PPC_RAW_MFTMR(tmr, r) (0x7c0002dc | TMRN(tmr) | ___PPC_RT(r)) +#define PPC_RAW_ICSWX(s, a, b) (0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ICSWEPX(s, a, b) (0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SLBIA(IH) (0x7c0003e4 | (((IH) & 0x7) << 21)) +#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \ + (0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \ + (0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21) +#define PPC_RAW_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_LWZ(r, base, i) (0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LWZX(t, a, b) (0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i)) +#define PPC_RAW_STDCX(s, a, b) (0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFSX(t, a, b) (0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFSX(s, a, b) (0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LFDX(t, a, b) (0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_NOP() (PPC_INST_NOP) +#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_BLRL() (0x4e800021) +#define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) +#define PPC_RAW_BCTR() (PPC_INST_BCTR) +#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) +#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) +#define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_RAW_STW(r, base, i) (0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STWU(r, base, i) (0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STH(r, base, i) (0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) +#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPDI(a, i) (0x2c200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPW(a, b) (0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPD(a, b) (0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLWI(a, i) (0x28000000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLDI(a, i) (0x28200000 | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b)) +#define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDE_DOT(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_DIVDEU(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) +#define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) +#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a)) +#define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRW(d, a, s) (0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAW(d, a, s) (0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAWI(d, a, i) (0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i)) +#define PPC_RAW_SRD(d, a, s) (0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRAD(d, a, s) (0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RAW_SRADI(d, a, i) (0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i)) +#define PPC_RAW_RLWINM(d, a, i, mb, me) (0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \ + (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) +#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) + +/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ +#define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) +/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ +#define PPC_RAW_SRWI(d, a, i) PPC_RAW_RLWINM(d, a, 32-(i), i, 31) +/* sldi = rldicr Rx, Ry, n, 63-n */ +#define PPC_RAW_SLDI(d, a, i) PPC_RAW_RLDICR(d, a, i, 63-(i)) +/* sldi = rldicl Rx, Ry, 64-n, n */ +#define PPC_RAW_SRDI(d, a, i) PPC_RAW_RLDICL(d, a, 64-(i), i) + +#define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) + +/* Deal with instructions that older assemblers aren't aware of */ +#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) +#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) +#define PPC_COPY(a, b) stringify_in_c(.long PPC_RAW_COPY(a, b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_RAW_DARN(t, l)) +#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_RAW_DCBAL(a, b)) +#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) +#define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) +#define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) +#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh)) +#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh)) +#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) +#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) +#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c)) +#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c)) +#define PPC_MSGSND(b) stringify_in_c(.long PPC_RAW_MSGSND(b)) +#define PPC_MSGSYNC stringify_in_c(.long PPC_RAW_MSGSYNC) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_RAW_MSGCLR(b)) +#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_RAW_MSGSNDP(b)) +#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_RAW_MSGCLRP(b)) +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_RAW_PASTE(a, b)) +#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_RAW_POPCNTB(a, s)) +#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_RAW_POPCNTD(a, s)) +#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_RAW_POPCNTW(a, s)) +#define PPC_RFCI stringify_in_c(.long PPC_RAW_RFCI) +#define PPC_RFDI stringify_in_c(.long PPC_RAW_RFDI) +#define PPC_RFMCI stringify_in_c(.long PPC_RAW_RFMCI) +#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b)) +#define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) +#define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) +#define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) +#define PPC_WAIT(w) stringify_in_c(.long PPC_RAW_WAIT(w)) +#define PPC_TLBIE(lp, a) stringify_in_c(.long PPC_RAW_TLBIE(lp, a)) +#define PPC_TLBIE_5(rb, rs, ric, prs, r) \ + stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) +#define PPC_TLBIEL(rb,rs,ric,prs,r) \ + stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) +#define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) +#define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) + +#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_RAW_ERATWE(s, a, w)) +#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_RAW_ERATRE(a, a, w)) +#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_RAW_ERATILX(t, a, b)) +#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b)) +#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX(t, a, w)) +#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w)) +#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b)) +#define PPC_ICBT(c, a, b) stringify_in_c(.long PPC_RAW_ICBT(c, a, b)) +/* PASemi instructions */ +#define LBZCIX(t, a, b) stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b)) +#define STBCIX(s, a, b) stringify_in_c(.long PPC_RAW_STBCIX(s, a, b)) +#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_RAW_DCBFPS(a, b)) +#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_RAW_DCBSTPS(a, b)) +#define PPC_PHWSYNC stringify_in_c(.long PPC_RAW_PHWSYNC) +#define PPC_PLWSYNC stringify_in_c(.long PPC_RAW_PLWSYNC) +#define STXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b)) +#define LXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b)) +#define MFVRD(a, t) stringify_in_c(.long PPC_RAW_MFVRD(a, t)) +#define MTVRD(t, a) stringify_in_c(.long PPC_RAW_MTVRD(t, a)) +#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b)) +#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b)) +#define XXLOR(t, a, b) stringify_in_c(.long PPC_RAW_XXLOR(t, a, b)) +#define XXSWAPD(t, a) stringify_in_c(.long PPC_RAW_XXSWAPD(t, a)) +#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b))) #define VPERMXOR(vrt, vra, vrb, vrc) \ - stringify_in_c(.long (PPC_INST_VPERMXOR | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) + stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc))) -#define PPC_NAP stringify_in_c(.long PPC_INST_NAP) -#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) -#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) +#define PPC_NAP stringify_in_c(.long PPC_RAW_NAP) +#define PPC_SLEEP stringify_in_c(.long PPC_RAW_SLEEP) +#define PPC_WINKLE stringify_in_c(.long PPC_RAW_WINKLE) -#define PPC_STOP stringify_in_c(.long PPC_INST_STOP) +#define PPC_STOP stringify_in_c(.long PPC_RAW_STOP) /* BHRB instructions */ -#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) -#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \ - __PPC_RT(r) | \ - (((n) & 0x3ff) << 11)) +#define PPC_CLRBHRB stringify_in_c(.long PPC_RAW_CLRBHRB) +#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_RAW_MFBHRBE(r, n)) /* Transactional memory instructions */ -#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT) -#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \ - | __PPC_RA(r)) -#define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \ - | __PPC_RA(r)) +#define TRECHKPT stringify_in_c(.long PPC_RAW_TRECHKPT) +#define TRECLAIM(r) stringify_in_c(.long PPC_RAW_TRECLAIM(r)) +#define TABORT(r) stringify_in_c(.long PPC_RAW_TABORT(r)) /* book3e thread control instructions */ -#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6)) -#define MTTMR(tmr, r) stringify_in_c(.long PPC_INST_MTTMR | \ - TMRN(tmr) | ___PPC_RS(r)) -#define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ - TMRN(tmr) | ___PPC_RT(r)) +#define MTTMR(tmr, r) stringify_in_c(.long PPC_RAW_MTTMR(tmr, r)) +#define MFTMR(tmr, r) stringify_in_c(.long PPC_RAW_MFTMR(tmr, r)) /* Coprocessor instructions */ -#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_INST_ICSWX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_INST_ICSWEPX | \ - ___PPC_RS(s) | \ - ___PPC_RA(a) | \ - ___PPC_RB(b)) +#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWX(s, a, b)) +#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b)) -#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ - ((IH & 0x7) << 21)) +#define PPC_SLBIA(IH) stringify_in_c(.long PPC_RAW_SLBIA(IH)) /* * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix @@ -611,12 +612,8 @@ #define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3) #define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6) -#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb)) -#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB | \ - ___PPC_RT(vrt) | ___PPC_RA(vra) | \ - ___PPC_RB(vrb) | __PPC_RC21) +#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb)) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6b03dff61a05..b4cc6608131c 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -755,6 +755,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define N_SLINE 68 #define N_SO 100 +#define RFSCV .long 0x4c0000a4 + /* * Create an endian fixup trampoline * @@ -774,7 +776,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #define FIXUP_ENDIAN #else /* - * This version may be used in in HV or non-HV context. + * This version may be used in HV or non-HV context. * MSR[EE] must be disabled. */ #define FIXUP_ENDIAN \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 52a67835057a..ed0d633ab5aa 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -237,7 +237,6 @@ struct thread_struct { #ifdef CONFIG_PPC_MEM_KEYS unsigned long amr; unsigned long iamr; - unsigned long uamor; #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ @@ -272,6 +271,10 @@ struct thread_struct { unsigned mmcr0; unsigned used_ebb; + unsigned long mmcr3; + unsigned long sier2; + unsigned long sier3; + #endif }; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ac3970fff0d5..155a197c0aa1 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -222,9 +222,14 @@ static inline void set_trap(struct pt_regs *regs, unsigned long val) regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK); } +static inline bool trap_is_scv(struct pt_regs *regs) +{ + return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000); +} + static inline bool trap_is_syscall(struct pt_regs *regs) { - return TRAP(regs) == 0xc00; + return (trap_is_scv(regs) || TRAP(regs) == 0xc00); } static inline bool trap_norestart(struct pt_regs *regs) @@ -238,7 +243,7 @@ static inline void set_trap_norestart(struct pt_regs *regs) } #define arch_has_single_step() (1) -#ifndef CONFIG_BOOK3S_601 +#ifndef CONFIG_PPC_BOOK3S_601 #define arch_has_block_step() (true) #else #define arch_has_block_step() (false) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h new file mode 100644 index 000000000000..b752d34517b3 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_QSPINLOCK_H +#define _ASM_POWERPC_QSPINLOCK_H + +#include +#include + +#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + if (!is_shared_processor()) + native_queued_spin_lock_slowpath(lock, val); + else + __pv_queued_spin_lock_slowpath(lock, val); +} + +#define queued_spin_unlock queued_spin_unlock +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + if (!is_shared_processor()) + smp_store_release(&lock->locked, 0); + else + __pv_queued_spin_unlock(lock); +} + +#else +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#endif + +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + u32 val = 0; + + if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); +} +#define queued_spin_lock queued_spin_lock + +#define smp_mb__after_spinlock() smp_mb() + +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + /* + * This barrier was added to simple spinlocks by commit 51d7d5205d338, + * but it should now be possible to remove it, asm arm64 has done with + * commit c6f5d02b6a0f. + */ + smp_mb(); + return atomic_read(&lock->val); +} +#define queued_spin_is_locked queued_spin_is_locked + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define SPIN_THRESHOLD (1<<15) /* not tuned */ + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + if (*ptr != val) + return; + yield_to_any(); + /* + * We could pass in a CPU here if waiting in the queue and yield to + * the previous CPU in the queue. + */ +} + +static __always_inline void pv_kick(int cpu) +{ + prod_cpu(cpu); +} + +extern void __pv_init_lock_hash(void); + +static inline void pv_spinlocks_init(void) +{ + __pv_init_lock_hash(); +} + +#endif + +#include + +#endif /* _ASM_POWERPC_QSPINLOCK_H */ diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..6b60e7736a47 --- /dev/null +++ b/arch/powerpc/include/asm/qspinlock_paravirt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H +#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H + +EXPORT_SYMBOL(__pv_queued_spin_unlock); + +#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 88e6c78100d9..41419f1fc00f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -12,6 +12,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -876,7 +877,9 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCR2 785 +#define SPRN_MMCR3 754 #define SPRN_UMMCR2 769 +#define SPRN_UMMCR3 738 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL @@ -886,6 +889,7 @@ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT_SHIFT 24 #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ +#define MMCRA_BHRB_DISABLE _UL(0x2000000000) // BHRB disable bit for ISA v3.1 #define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ #define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL @@ -918,6 +922,10 @@ #define SIER_SIHV 0x1000000 /* Sampled MSR_HV */ #define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */ #define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */ +#define SPRN_SIER2 752 +#define SPRN_SIER3 753 +#define SPRN_USIER2 736 +#define SPRN_USIER3 737 #define SPRN_SIAR 796 #define SPRN_SDAR 797 #define SPRN_TACR 888 @@ -1472,7 +1480,7 @@ static inline void update_power8_hid0(unsigned long hid0) { /* * The HID0 update on Power8 should at the very least be - * preceded by a a SYNC instruction followed by an ISYNC + * preceded by a SYNC instruction followed by an ISYNC * instruction */ asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 014968f25f7e..55f9a154c95d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -215,7 +215,6 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 -#define PSERIES_HP_ELOG_ACTION_READD 3 #define PSERIES_HP_ELOG_ID_DRC_NAME 1 #define PSERIES_HP_ELOG_ID_DRC_INDEX 2 @@ -253,8 +252,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_online_cpus_mask(cpumask_var_t cpus); -extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 7c05e95a5c44..fbb8fa32150f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -63,6 +63,8 @@ static inline bool security_ftr_enabled(u64 feature) // bcctr 2,0,0 triggers a hardware assisted count cache flush #define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull +// bcctr 2,0,0 triggers a hardware assisted link stack flush +#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST 0x0000000000002000ull // Features indicating need for Spectre/Meltdown mitigations diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..9efbddee2bca 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -30,12 +30,12 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES -extern void pseries_enable_reloc_on_exc(void); +extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); extern void pseries_little_endian_exceptions(void); #else -static inline void pseries_enable_reloc_on_exc(void) {} +static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h new file mode 100644 index 000000000000..9c3c30534333 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -0,0 +1,288 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_H + +/* + * Simple spin lock operations. + * + * Copyright (C) 2001-2004 Paul Mackerras , IBM + * Copyright (C) 2001 Anton Blanchard , IBM + * Copyright (C) 2002 Dave Engebretsen , IBM + * Rework to support virtual processors + * + * Type of int is used as a full 64b word is not necessary. + * + * (the type definitions are in asm/simple_spinlock_types.h) + */ +#include +#include +#include +#include +#include + +#ifdef CONFIG_PPC64 +/* use 0x800000yy when locked, where yy == CPU number */ +#ifdef __BIG_ENDIAN__ +#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) +#else +#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) +#endif +#else +#define LOCK_TOKEN 1 +#endif + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.slock == 0; +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + smp_mb(); + return !arch_spin_value_unlocked(*lock); +} + +/* + * This returns the old value in the lock, so we succeeded + * in getting the lock if the return value is 0. + */ +static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) +{ + unsigned long tmp, token; + + token = LOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n\ + stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" + : "=&r" (tmp) + : "r" (token), "r" (&lock->slock) + : "cr0", "memory"); + + return tmp; +} + +static inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __arch_spin_trylock(lock) == 0; +} + +/* + * On a system with shared processors (that is, where a physical + * processor is multiplexed between several virtual processors), + * there is no point spinning on a lock if the holder of the lock + * isn't currently scheduled on a physical processor. Instead + * we detect this situation and ask the hypervisor to give the + * rest of our timeslice to the lock holder. + * + * So that we can tell which virtual processor is holding a lock, + * we put 0x80000000 | smp_processor_id() in the lock when it is + * held. Conveniently, we have a word in the paca that holds this + * value. + */ + +#if defined(CONFIG_PPC_SPLPAR) +/* We only yield to the hypervisor if we are in shared processor mode */ +void splpar_spin_yield(arch_spinlock_t *lock); +void splpar_rw_yield(arch_rwlock_t *lock); +#else /* SPLPAR */ +static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; +static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; +#endif + +static inline void spin_yield(arch_spinlock_t *lock) +{ + if (is_shared_processor()) + splpar_spin_yield(lock); + else + barrier(); +} + +static inline void rw_yield(arch_rwlock_t *lock) +{ + if (is_shared_processor()) + splpar_rw_yield(lock); + else + barrier(); +} + +static inline void arch_spin_lock(arch_spinlock_t *lock) +{ + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + } +} + +static inline +void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) +{ + unsigned long flags_dis; + + while (1) { + if (likely(__arch_spin_trylock(lock) == 0)) + break; + local_save_flags(flags_dis); + local_irq_restore(flags); + do { + HMT_low(); + if (is_shared_processor()) + splpar_spin_yield(lock); + } while (unlikely(lock->slock != 0)); + HMT_medium(); + local_irq_restore(flags_dis); + } +} +#define arch_spin_lock_flags arch_spin_lock_flags + +static inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __asm__ __volatile__("# arch_spin_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + lock->slock = 0; +} + +/* + * Read-write spinlocks, allowing multiple readers + * but only one writer. + * + * NOTE! it is quite common to have readers in interrupts + * but no interrupt writers. For those circumstances we + * can "mix" irq-safe locks - any writer needs to get a + * irq-safe write-lock, but readers can get non-irqsafe + * read-locks. + */ + +#ifdef CONFIG_PPC64 +#define __DO_SIGN_EXTEND "extsw %0,%0\n" +#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ +#else +#define __DO_SIGN_EXTEND +#define WRLOCK_TOKEN (-1) +#endif + +/* + * This returns the old value in the lock + 1, + * so we got a read lock if the return value is > 0. + */ +static inline long __arch_read_trylock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%1,1) "\n" + __DO_SIGN_EXTEND +" addic. %0,%0,1\n\ + ble- 2f\n" +" stwcx. %0,0,%1\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (&rw->lock) + : "cr0", "xer", "memory"); + + return tmp; +} + +/* + * This returns the old value in the lock, + * so we got the write lock if the return value is 0. + */ +static inline long __arch_write_trylock(arch_rwlock_t *rw) +{ + long tmp, token; + + token = WRLOCK_TOKEN; + __asm__ __volatile__( +"1: " PPC_LWARX(%0,0,%2,1) "\n\ + cmpwi 0,%0,0\n\ + bne- 2f\n" +" stwcx. %1,0,%2\n\ + bne- 1b\n" + PPC_ACQUIRE_BARRIER +"2:" : "=&r" (tmp) + : "r" (token), "r" (&rw->lock) + : "cr0", "memory"); + + return tmp; +} + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_read_trylock(rw) > 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock < 0)); + HMT_medium(); + } +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + while (1) { + if (likely(__arch_write_trylock(rw) == 0)) + break; + do { + HMT_low(); + if (is_shared_processor()) + splpar_rw_yield(rw); + } while (unlikely(rw->lock != 0)); + HMT_medium(); + } +} + +static inline int arch_read_trylock(arch_rwlock_t *rw) +{ + return __arch_read_trylock(rw) > 0; +} + +static inline int arch_write_trylock(arch_rwlock_t *rw) +{ + return __arch_write_trylock(rw) == 0; +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + long tmp; + + __asm__ __volatile__( + "# read_unlock\n\t" + PPC_RELEASE_BARRIER +"1: lwarx %0,0,%1\n\ + addic %0,%0,-1\n" +" stwcx. %0,0,%1\n\ + bne- 1b" + : "=&r"(tmp) + : "r"(&rw->lock) + : "cr0", "xer", "memory"); +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + __asm__ __volatile__("# write_unlock\n\t" + PPC_RELEASE_BARRIER: : :"memory"); + rw->lock = 0; +} + +#define arch_spin_relax(lock) spin_yield(lock) +#define arch_read_relax(lock) rw_yield(lock) +#define arch_write_relax(lock) rw_yield(lock) + +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h new file mode 100644 index 000000000000..0f3cdd8faa95 --- /dev/null +++ b/arch/powerpc/include/asm/simple_spinlock_types.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H +#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int slock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile signed int lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0 } + +#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 8dff086c0cab..4b30a0205c93 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -108,7 +108,7 @@ /* * i2c commands * - * To issue an i2c command, first is to send a parameter block to the + * To issue an i2c command, first is to send a parameter block to * the SMU. This is a command of type 0x9a with 9 bytes of header * eventually followed by data for a write: * diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index c89b32443cff..1e6fa371cc38 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,12 +17,6 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); -#ifdef CONFIG_PPC_BOOK3S_64 -extern int resize_hpt_for_hotplug(unsigned long new_mem_size); -#else -static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } -#endif - #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); #else diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 2d620896cdae..6ec72282888d 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -3,312 +3,16 @@ #define __ASM_SPINLOCK_H #ifdef __KERNEL__ -/* - * Simple spin lock operations. - * - * Copyright (C) 2001-2004 Paul Mackerras , IBM - * Copyright (C) 2001 Anton Blanchard , IBM - * Copyright (C) 2002 Dave Engebretsen , IBM - * Rework to support virtual processors - * - * Type of int is used as a full 64b word is not necessary. - * - * (the type definitions are in asm/spinlock_types.h) - */ -#include -#include -#ifdef CONFIG_PPC64 -#include -#include -#endif -#include -#include - -#ifdef CONFIG_PPC64 -/* use 0x800000yy when locked, where yy == CPU number */ -#ifdef __BIG_ENDIAN__ -#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include +#include #else -#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) -#endif -#else -#define LOCK_TOKEN 1 +#include #endif -#ifdef CONFIG_PPC_PSERIES -DECLARE_STATIC_KEY_FALSE(shared_processor); - -#define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(int cpu) -{ - if (!static_branch_unlikely(&shared_processor)) - return false; - return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); -} +#ifndef CONFIG_PARAVIRT_SPINLOCKS +static inline void pv_spinlocks_init(void) { } #endif -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return lock.slock == 0; -} - -static inline int arch_spin_is_locked(arch_spinlock_t *lock) -{ - smp_mb(); - return !arch_spin_value_unlocked(*lock); -} - -/* - * This returns the old value in the lock, so we succeeded - * in getting the lock if the return value is 0. - */ -static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) -{ - unsigned long tmp, token; - - token = LOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n\ - stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" - : "=&r" (tmp) - : "r" (token), "r" (&lock->slock) - : "cr0", "memory"); - - return tmp; -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - return __arch_spin_trylock(lock) == 0; -} - -/* - * On a system with shared processors (that is, where a physical - * processor is multiplexed between several virtual processors), - * there is no point spinning on a lock if the holder of the lock - * isn't currently scheduled on a physical processor. Instead - * we detect this situation and ask the hypervisor to give the - * rest of our timeslice to the lock holder. - * - * So that we can tell which virtual processor is holding a lock, - * we put 0x80000000 | smp_processor_id() in the lock when it is - * held. Conveniently, we have a word in the paca that holds this - * value. - */ - -#if defined(CONFIG_PPC_SPLPAR) -/* We only yield to the hypervisor if we are in shared processor mode */ -void splpar_spin_yield(arch_spinlock_t *lock); -void splpar_rw_yield(arch_rwlock_t *lock); -#else /* SPLPAR */ -static inline void splpar_spin_yield(arch_spinlock_t *lock) {}; -static inline void splpar_rw_yield(arch_rwlock_t *lock) {}; -#endif - -static inline bool is_shared_processor(void) -{ -#ifdef CONFIG_PPC_SPLPAR - return static_branch_unlikely(&shared_processor); -#else - return false; -#endif -} - -static inline void spin_yield(arch_spinlock_t *lock) -{ - if (is_shared_processor()) - splpar_spin_yield(lock); - else - barrier(); -} - -static inline void rw_yield(arch_rwlock_t *lock) -{ - if (is_shared_processor()) - splpar_rw_yield(lock); - else - barrier(); -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - } -} - -static inline -void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - unsigned long flags_dis; - - while (1) { - if (likely(__arch_spin_trylock(lock) == 0)) - break; - local_save_flags(flags_dis); - local_irq_restore(flags); - do { - HMT_low(); - if (is_shared_processor()) - splpar_spin_yield(lock); - } while (unlikely(lock->slock != 0)); - HMT_medium(); - local_irq_restore(flags_dis); - } -} -#define arch_spin_lock_flags arch_spin_lock_flags - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - __asm__ __volatile__("# arch_spin_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - lock->slock = 0; -} - -/* - * Read-write spinlocks, allowing multiple readers - * but only one writer. - * - * NOTE! it is quite common to have readers in interrupts - * but no interrupt writers. For those circumstances we - * can "mix" irq-safe locks - any writer needs to get a - * irq-safe write-lock, but readers can get non-irqsafe - * read-locks. - */ - -#ifdef CONFIG_PPC64 -#define __DO_SIGN_EXTEND "extsw %0,%0\n" -#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */ -#else -#define __DO_SIGN_EXTEND -#define WRLOCK_TOKEN (-1) -#endif - -/* - * This returns the old value in the lock + 1, - * so we got a read lock if the return value is > 0. - */ -static inline long __arch_read_trylock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%1,1) "\n" - __DO_SIGN_EXTEND -" addic. %0,%0,1\n\ - ble- 2f\n" -" stwcx. %0,0,%1\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (&rw->lock) - : "cr0", "xer", "memory"); - - return tmp; -} - -/* - * This returns the old value in the lock, - * so we got the write lock if the return value is 0. - */ -static inline long __arch_write_trylock(arch_rwlock_t *rw) -{ - long tmp, token; - - token = WRLOCK_TOKEN; - __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ - cmpwi 0,%0,0\n\ - bne- 2f\n" -" stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ACQUIRE_BARRIER -"2:" : "=&r" (tmp) - : "r" (token), "r" (&rw->lock) - : "cr0", "memory"); - - return tmp; -} - -static inline void arch_read_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_read_trylock(rw) > 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock < 0)); - HMT_medium(); - } -} - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - while (1) { - if (likely(__arch_write_trylock(rw) == 0)) - break; - do { - HMT_low(); - if (is_shared_processor()) - splpar_rw_yield(rw); - } while (unlikely(rw->lock != 0)); - HMT_medium(); - } -} - -static inline int arch_read_trylock(arch_rwlock_t *rw) -{ - return __arch_read_trylock(rw) > 0; -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - return __arch_write_trylock(rw) == 0; -} - -static inline void arch_read_unlock(arch_rwlock_t *rw) -{ - long tmp; - - __asm__ __volatile__( - "# read_unlock\n\t" - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1\n\ - addic %0,%0,-1\n" -" stwcx. %0,0,%1\n\ - bne- 1b" - : "=&r"(tmp) - : "r"(&rw->lock) - : "cr0", "xer", "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - __asm__ __volatile__("# write_unlock\n\t" - PPC_RELEASE_BARRIER: : :"memory"); - rw->lock = 0; -} - -#define arch_spin_relax(lock) spin_yield(lock) -#define arch_read_relax(lock) rw_yield(lock) -#define arch_write_relax(lock) rw_yield(lock) - -/* See include/linux/spinlock.h */ -#define smp_mb__after_spinlock() smp_mb() - #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h index 87adaf13b7e8..c5d742f18021 100644 --- a/arch/powerpc/include/asm/spinlock_types.h +++ b/arch/powerpc/include/asm/spinlock_types.h @@ -6,16 +6,11 @@ # error "please don't include this file directly" #endif -typedef struct { - volatile unsigned int slock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile signed int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } +#ifdef CONFIG_PPC_QUEUED_SPINLOCKS +#include +#include +#else +#include +#endif #endif diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 3b01c69a44aa..972ed0df154d 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -40,6 +40,7 @@ enum instruction_type { CACHEOP, BARRIER, SYSCALL, + SYSCALL_VECTORED_0, MFMSR, MTMSR, RFI, @@ -104,6 +105,12 @@ enum instruction_type { #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) +/* Prefix instruction operands */ +#define GET_PREFIX_RA(i) (((i) >> 16) & 0x1f) +#define GET_PREFIX_R(i) ((i) & (1ul << 20)) + +extern s32 patch__exec_instr; + struct instruction_op { int type; int reg; diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index b72692702f35..283552cd0e58 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -28,7 +28,7 @@ extern void * memcpy(void *,const void *,__kernel_size_t); extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); -extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void memcpy_flushcache(void *dest, const void *src, size_t size); void *__memset(void *s, int c, __kernel_size_t count); void *__memcpy(void *to, const void *from, __kernel_size_t n); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index b287cfc2dd85..cb326720a8a1 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -39,7 +39,6 @@ struct div_result { }; /* Accessor functions for the timebase (RTC on 601) registers. */ -/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ #define __USE_RTC() (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index d2d2c4bd8435..6047402b0a4d 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,7 +17,7 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_BOOK3S_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return 0; return mftb(); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 2db7ba789720..f0b6300e7dd3 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -43,7 +43,6 @@ extern void __init dump_numa_cpu_topology(void); extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -extern int numa_update_cpu_topology(bool cpus_locked); static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) { @@ -78,11 +77,6 @@ static inline void sysfs_remove_device_from_node(struct device *dev, { } -static inline int numa_update_cpu_topology(bool cpus_locked) -{ - return 0; -} - static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) @@ -93,32 +87,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) -extern int start_topology_update(void); -extern int stop_topology_update(void); -extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); -extern int timed_topology_update(int nsecs); #else -static inline int start_topology_update(void) -{ - return 0; -} -static inline int stop_topology_update(void) -{ - return 0; -} -static inline int prrn_is_enabled(void) -{ - return 0; -} static inline int find_and_online_cpu_nid(int cpu) { return 0; } -static inline int timed_topology_update(int nsecs) -{ - return 0; -} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ @@ -141,7 +115,6 @@ int get_physical_package_id(int cpu); #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) -int dlpar_cpu_readd(int cpu); #endif #endif diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index d08ea11b271c..309b4d65b74f 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -155,7 +155,6 @@ static inline void xive_smp_probe(void) { } static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; } static inline void xive_smp_setup_cpu(void) { } static inline void xive_smp_disable_cpu(void) { } -static inline void xive_kexec_teardown_cpu(int secondary) { } static inline void xive_shutdown(void) { } static inline void xive_flush_interrupt(void) { } diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf..c3af3f324c5a 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index c0c737215b00..3a700351feca 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -11,7 +11,7 @@ #include -#define PROT_SAO 0x10 /* Strong Access Ordering */ +#define PROT_SAO 0x10 /* Unsupported since v5.9 */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 9ccecc1d6840..50ef95e2f5b1 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -72,6 +72,11 @@ #define PAPR_PDSM_DIMM_CRITICAL 2 #define PAPR_PDSM_DIMM_FATAL 3 +/* struct nd_papr_pdsm_health.extension_flags field flags */ + +/* Indicate that the 'dimm_fuel_gauge' field is valid */ +#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 + /* * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH * Various flags indicate the health status of the dimm. @@ -84,6 +89,7 @@ * dimm_locked : Contents of the dimm cant be modified until CEC reboot * dimm_encrypted : Contents of dimm are encrypted. * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + * dimm_fuel_gauge : Life remaining of DIMM as a percentage from 0-100 */ struct nd_papr_pdsm_health { union { @@ -96,6 +102,9 @@ struct nd_papr_pdsm_health { __u8 dimm_locked; __u8 dimm_encrypted; __u16 dimm_health; + + /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ + __u16 dimm_fuel_gauge; }; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 244542ae2a91..d4d5946224f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -45,11 +45,10 @@ obj-y := cputable.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o + of_platform.o prom_parse.o firmware.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ - paca.o nvram_64.o firmware.o note.o \ - syscall_64.o + paca.o nvram_64.o note.o syscall_64.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -71,7 +70,7 @@ obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 1f1ce8b86d5b..c7797eb958c7 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -178,11 +178,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -263,11 +263,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); - /* fall through */ + fallthrough; case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); - /* fall through */ + fallthrough; case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6657dc6b2336..8711c2164b45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -559,6 +559,8 @@ int main(void) OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); + OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); + OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); @@ -696,6 +698,9 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); + HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); + HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); + HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 470336277c67..65ab9fcebd31 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -7,6 +7,8 @@ * Author: Nathan Lynch */ +#define pr_fmt(fmt) "cacheinfo: " fmt + #include #include #include @@ -166,7 +168,7 @@ static void release_cache_debugcheck(struct cache *cache) list_for_each_entry(iter, &cache_list, list) WARN_ONCE(iter->next_local == cache, - "cache for %pOF(%s) refers to cache for %pOF(%s)\n", + "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n", iter->ofnode, cache_type_string(iter), cache->ofnode, @@ -178,7 +180,7 @@ static void release_cache(struct cache *cache) if (!cache) return; - pr_debug("freeing L%d %s cache for %pOF\n", cache->level, + pr_debug("freeing L%d %s cache for %pOFP\n", cache->level, cache_type_string(cache), cache->ofnode); release_cache_debugcheck(cache); @@ -193,7 +195,7 @@ static void cache_cpu_set(struct cache *cache, int cpu) while (next) { WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map), - "CPU %i already accounted in %pOF(%s)\n", + "CPU %i already accounted in %pOFP(%s)\n", cpu, next->ofnode, cache_type_string(next)); cpumask_set_cpu(cpu, &next->shared_cpu_map); @@ -352,7 +354,7 @@ static int cache_is_unified_d(const struct device_node *np) static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) { - pr_debug("creating L%d ucache for %pOF\n", level, node); + pr_debug("creating L%d ucache for %pOFP\n", level, node); return new_cache(cache_is_unified_d(node), level, node); } @@ -362,7 +364,7 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, { struct cache *dcache, *icache; - pr_debug("creating L%d dcache and icache for %pOF\n", level, + pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); dcache = new_cache(CACHE_TYPE_DATA, level, node); @@ -418,12 +420,27 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger) } smaller->next_local = bigger; + + /* + * The cache->next_local list sorts by level ascending: + * L1d -> L1i -> L2 -> L3 ... + */ + WARN_ONCE((smaller->level == 1 && bigger->level > 2) || + (smaller->level > 1 && bigger->level != smaller->level + 1), + "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n", + smaller->level, smaller->ofnode, bigger->level, bigger->ofnode); } static void do_subsidiary_caches_debugcheck(struct cache *cache) { - WARN_ON_ONCE(cache->level != 1); - WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu")); + WARN_ONCE(cache->level != 1, + "instantiating cache chain from L%d %s cache for " + "%pOFP instead of an L1\n", cache->level, + cache_type_string(cache), cache->ofnode); + WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"), + "instantiating cache chain from node %pOFP of type '%s' " + "instead of a cpu node\n", cache->ofnode, + of_node_get_device_type(cache->ofnode)); } static void do_subsidiary_caches(struct cache *cache) @@ -647,12 +664,13 @@ static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache * return &cache->shared_cpu_map; } -static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +static ssize_t +show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list) { struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int ret, cpu; + int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; @@ -664,16 +682,25 @@ static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *att mask = &cache->shared_cpu_map; } - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", - cpumask_pr_args(mask)); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; + return cpumap_print_to_pagebuf(list, buf, mask); +} + +static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, false); +} + +static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf) +{ + return show_shared_cpumap(k, attr, buf, true); } static struct kobj_attribute cache_shared_cpu_map_attr = __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); +static struct kobj_attribute cache_shared_cpu_list_attr = + __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); + /* Attributes which should always be created -- the kobject/sysfs core * does this automatically via kobj_type->default_attrs. This is the * minimum data required to uniquely identify a cache. @@ -682,6 +709,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_type_attr.attr, &cache_level_attr.attr, &cache_shared_cpu_map_attr.attr, + &cache_shared_cpu_list_attr.attr, NULL, }; @@ -733,13 +761,13 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) rc = attr->show(&dir->kobj, attr, buf); if (rc <= 0) { pr_debug("not creating %s attribute for " - "%pOF(%s) (rc = %zd)\n", + "%pOFP(%s) (rc = %zd)\n", attr->attr.name, cache->ofnode, cache_type, rc); continue; } if (sysfs_create_file(&dir->kobj, &attr->attr)) - pr_debug("could not create %s attribute for %pOF(%s)\n", + pr_debug("could not create %s attribute for %pOFP(%s)\n", attr->attr.name, cache->ofnode, cache_type); } @@ -855,7 +883,7 @@ static void cache_cpu_clear(struct cache *cache, int cpu) struct cache *next = cache->next_local; WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map), - "CPU %i not accounted in %pOF(%s)\n", + "CPU %i not accounted in %pOFP(%s)\n", cpu, cache->ofnode, cache_type_string(cache)); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index efdcfa714106..704e8b9501ee 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -94,13 +94,15 @@ _GLOBAL(__restore_cpu_power8) _GLOBAL(__setup_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__setup_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - bl __init_hvmode_206 + bl __init_FSCR_power9 + bl __init_PMU +1: bl __init_hvmode_206 mtlr r11 beqlr li r0,0 @@ -124,13 +126,15 @@ _GLOBAL(__setup_cpu_power9) _GLOBAL(__restore_cpu_power10) mflr r11 bl __init_FSCR_power10 + bl __init_PMU + bl __init_PMU_ISA31 b 1f _GLOBAL(__restore_cpu_power9) mflr r11 - bl __init_FSCR -1: bl __init_PMU - mfmsr r3 + bl __init_FSCR_power9 + bl __init_PMU +1: mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 beqlr @@ -198,6 +202,12 @@ __init_FSCR_power10: mtspr SPRN_FSCR, r3 // fall through +__init_FSCR_power9: + mfspr r3, SPRN_FSCR + ori r3, r3, FSCR_SCV + mtspr SPRN_FSCR, r3 + // fall through + __init_FSCR: mfspr r3,SPRN_FSCR ori r3,r3,FSCR_TAR|FSCR_EBB @@ -233,3 +243,10 @@ __init_PMU_ISA207: li r5,0 mtspr SPRN_MMCRS,r5 blr + +__init_PMU_ISA31: + li r5,0 + mtspr SPRN_MMCR3,r5 + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) + mtspr SPRN_MMCRA,r5 + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b4066354f073..3d406a9626e8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -120,7 +120,8 @@ extern void __restore_cpu_e6500(void); #define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ PPC_FEATURE2_ARCH_3_00 | \ PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN ) + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) #define COMMON_USER_POWER10 COMMON_USER_POWER9 #define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \ PPC_FEATURE2_ARCH_3_1 | \ diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 500f52fa4711..cdc2dccb987d 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -37,7 +37,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk) dawrx |= (mrd & 0x3f) << (63 - 53); if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); + return ppc_md.set_dawr(nr, dawr, dawrx); if (nr == 0) { mtspr(SPRN_DAWR0, dawr); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f17ff1200eaa..52680cf07c9d 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,61 +18,6 @@ #ifdef CONFIG_SMP -/* - * Doorbells must only be used if CPU_FTR_DBELL is available. - * msgsnd is used in HV, and msgsndp is used in !HV. - * - * These should be used by platform code that is aware of restrictions. - * Other arch code should use ->cause_ipi. - * - * doorbell_global_ipi() sends a dbell to any target CPU. - * Must be used only by architectures that address msgsnd target - * by PIR/get_hard_smp_processor_id. - */ -void doorbell_global_ipi(int cpu) -{ - u32 tag = get_hard_smp_processor_id(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * doorbell_core_ipi() sends a dbell to a target CPU in the same core. - * Must be used only by architectures that address msgsnd target - * by TIR/cpu_thread_in_core. - */ -void doorbell_core_ipi(int cpu) -{ - u32 tag = cpu_thread_in_core(cpu); - - kvmppc_set_host_ipi(cpu); - /* Order previous accesses vs. msgsnd, which is treated as a store */ - ppc_msgsnd_sync(); - ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag); -} - -/* - * Attempt to cause a core doorbell if destination is on the same core. - * Returns 1 on success, 0 on failure. - */ -int doorbell_try_core_ipi(int cpu) -{ - int this_cpu = get_cpu(); - int ret = 0; - - if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) { - doorbell_core_ipi(cpu); - ret = 1; - } - - put_cpu(); - - return ret; -} - void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 3a409517c031..6f8c0c6b937a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -24,7 +24,6 @@ /* Device-tree visible constants follow */ -#define ISA_V2_07B 2070 #define ISA_V3_0B 3000 #define ISA_V3_1 3100 @@ -67,6 +66,7 @@ struct dt_cpu_feature { extern long __machine_check_early_realmode_p8(struct pt_regs *regs); extern long __machine_check_early_realmode_p9(struct pt_regs *regs); +extern long __machine_check_early_realmode_p10(struct pt_regs *regs); static int hv_mode; @@ -337,6 +337,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) #ifdef CONFIG_PPC_RADIX_MMU cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; @@ -450,6 +451,39 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) return 1; } +static void init_pmu_power10(void) +{ + init_pmu_power9(); + + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); +} + +static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) +{ + hfscr_pmu_enable(); + + init_pmu_power10(); + init_pmu_registers = init_pmu_power10; + + cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA; + cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT; + + cur_cpu_spec->num_pmcs = 6; + cur_cpu_spec->pmc_type = PPC_PMC_IBM; + cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; + + return 1; +} + +static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power10"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -587,6 +621,7 @@ static struct dt_cpu_feature_match __initdata {"little-endian", feat_enable_le, CPU_FTR_REAL_LE}, {"smt", feat_enable_smt, 0}, {"interrupt-facilities", feat_enable, 0}, + {"system-call-vectored", feat_enable, 0}, {"timer-facilities", feat_enable, 0}, {"timer-facilities-v3", feat_enable, 0}, {"debug-facilities", feat_enable, 0}, @@ -622,7 +657,7 @@ static struct dt_cpu_feature_match __initdata {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL}, {"processor-utilization-of-resources-register", feat_enable_purr, 0}, {"no-execute", feat_enable, 0}, - {"strong-access-ordering", feat_enable, CPU_FTR_SAO}, + /* strong-access-ordering is unused */ {"cache-inhibited-large-page", feat_enable_large_ci, 0}, {"coprocessor-icswx", feat_enable, 0}, {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0}, @@ -638,7 +673,9 @@ static struct dt_cpu_feature_match __initdata {"group-start-register", feat_enable, 0}, {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, + {"machine-check-power10", feat_enable_mce_power10, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, + {"performance-monitor-power10", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -649,6 +686,7 @@ static struct dt_cpu_feature_match __initdata {"wait-v3", feat_enable, 0}, {"prefix-instructions", feat_enable, 0}, {"matrix-multiply-assist", feat_enable_mma, 0}, + {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1}, }; static bool __initdata using_dt_cpu_ftrs; @@ -674,12 +712,12 @@ static void __init cpufeatures_setup_start(u32 isa) { pr_info("setup for ISA %d\n", isa); - if (isa >= 3000) { + if (isa >= ISA_V3_0B) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00; } - if (isa >= 3100) { + if (isa >= ISA_V3_1) { cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31; cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1; } @@ -776,12 +814,6 @@ static __init void cpufeatures_cpu_quirks(void) } update_tlbie_feature_flag(version); - /* - * PKEY was not in the initial base or feature node - * specification, but it should become optional in the next - * cpu feature version sequence. - */ - cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index d407981dec76..94682382fc8c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -167,39 +167,33 @@ void eeh_show_enabled(void) */ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); u32 cfg; int cap, i; int n = 0, l = 0; char buffer[128]; - if (!pdn) { - pr_warn("EEH: Note: No error log for absent device.\n"); - return 0; - } - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", - pdn->phb->global_number, pdn->busno, - PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + edev->pe->phb->global_number, edev->bdfn >> 8, + PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn)); - eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); + eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg); n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); pr_warn("EEH: PCI device/vendor: %08x\n", cfg); - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg); n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); /* Gather bridge-specific registers */ if (edev->mode & EEH_DEV_BRIDGE) { - eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); + eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg); n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); pr_warn("EEH: Bridge secondary status: %04x\n", cfg); - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg); n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); pr_warn("EEH: Bridge control: %04x\n", cfg); } @@ -207,11 +201,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) /* Dump out the PCI-X command and status regs */ cap = edev->pcix_cap; if (cap) { - eeh_ops->read_config(pdn, cap, 4, &cfg); + eeh_ops->read_config(edev, cap, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); pr_warn("EEH: PCI-X cmd: %08x\n", cfg); - eeh_ops->read_config(pdn, cap+4, 4, &cfg); + eeh_ops->read_config(edev, cap+4, 4, &cfg); n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); pr_warn("EEH: PCI-X status: %08x\n", cfg); } @@ -223,7 +217,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E capabilities and status follow:\n"); for (i=0; i<=8; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -250,7 +244,7 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) pr_warn("EEH: PCI-E AER capability register set follows:\n"); for (i=0; i<=13; i++) { - eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); + eeh_ops->read_config(edev, cap+4*i, 4, &cfg); n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); if ((i % 4) == 0) { @@ -726,7 +720,6 @@ static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -734,73 +727,14 @@ static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) return; /* Apply customization from firmware */ - if (pdn && eeh_ops->restore_config) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); /* The caller should restore state for the specified device */ if (pdev != dev) pci_restore_state(pdev); } -int eeh_restore_vf_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - u32 devctl, cmd, cap2, aer_capctl; - int old_mps; - - if (edev->pcie_cap) { - /* Restore MPS */ - old_mps = (ffs(pdn->mps) - 8) << 5; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; - devctl |= old_mps; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - - /* Disable Completion Timeout if possible */ - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, - 4, &cap2); - if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { - eeh_ops->read_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, &cap2); - cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; - eeh_ops->write_config(pdn, - edev->pcie_cap + PCI_EXP_DEVCTL2, - 4, cap2); - } - } - - /* Enable SERR and parity checking */ - eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); - cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); - eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); - - /* Enable report various errors */ - if (edev->pcie_cap) { - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, &devctl); - devctl &= ~PCI_EXP_DEVCTL_CERE; - devctl |= (PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE); - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, - 2, devctl); - } - - /* Enable ECRC generation and check */ - if (edev->pcie_cap && edev->aer_cap) { - eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, &aer_capctl); - aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); - eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, - 4, aer_capctl); - } - - return 0; -} - /** * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct @@ -977,15 +911,13 @@ int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) */ void eeh_save_bars(struct eeh_dev *edev) { - struct pci_dn *pdn; int i; - pdn = eeh_dev_to_pdn(edev); - if (!pdn) + if (!edev) return; for (i = 0; i < 16; i++) - eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); + eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]); /* * For PCI bridges including root port, we need enable bus @@ -1096,7 +1028,7 @@ static int eeh_init(void) /* Initialize PHB PEs */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(hose); + eeh_phb_pe_create(hose); eeh_addr_cache_init(); @@ -1175,7 +1107,7 @@ void eeh_probe_device(struct pci_dev *dev) * FIXME: HEY MA, LOOK AT ME, NO LOCKING! */ if (edev->pdev && edev->pdev != dev) { - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); eeh_addr_cache_rmv_dev(edev->pdev); eeh_sysfs_remove_device(edev->pdev); @@ -1254,7 +1186,7 @@ void eeh_remove_device(struct pci_dev *dev) edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); else edev->mode |= EEH_DEV_DISCONNECTED; } diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c deleted file mode 100644 index 7370185c7a05..000000000000 --- a/arch/powerpc/kernel/eeh_dev.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The file intends to implement dynamic creation of EEH device, which will - * be bound with OF node and PCI device simutaneously. The EEH devices would - * be foundamental information for EEH core components to work proerly. Besides, - * We have to support multiple situations where dynamic creation of EEH device - * is required: - * - * 1) Before PCI emunation starts, we need create EEH devices according to the - * PCI sensitive OF nodes. - * 2) When PCI emunation is done, we need do the binding between PCI device and - * the associated EEH device. - * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device - * will be created while PCI sensitive OF node is detected from DR. - * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If - * PHB is newly inserted, we also need create EEH devices accordingly. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/** - * eeh_dev_init - Create EEH device according to OF node - * @pdn: PCI device node - * - * It will create EEH device according to the given OF node. The function - * might be called by PCI emunation, DR, PHB hotplug. - */ -struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) -{ - struct eeh_dev *edev; - - /* Allocate EEH device */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return NULL; - - /* Associate EEH device with OF node */ - pdn->edev = edev; - edev->pdn = pdn; - edev->bdfn = (pdn->busno << 8) | pdn->devfn; - edev->controller = pdn->phb; - - return edev; -} - -/** - * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB - * @phb: PHB - * - * Scan the PHB OF node and its child association, then create the - * EEH devices accordingly - */ -void eeh_dev_phb_init_dynamic(struct pci_controller *phb) -{ - /* EEH PE for PHB */ - eeh_phb_pe_create(phb); -} diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b048cee767c..4197e4559f65 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -425,8 +425,8 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV - if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) - eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); + if (eeh_ops->notify_resume) + eeh_ops->notify_resume(edev); #endif return PCI_ERS_RESULT_NONE; } @@ -477,7 +477,7 @@ static void *eeh_add_virt_device(struct eeh_dev *edev) } #ifdef CONFIG_PCI_IOV - pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index); + pci_iov_add_virtfn(edev->physfn, edev->vf_index); #endif return NULL; } @@ -521,9 +521,7 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) if (edev->physfn) { #ifdef CONFIG_PCI_IOV - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); + pci_iov_remove_virtfn(edev->physfn, edev->vf_index); edev->pdev = NULL; #endif if (rmv_data) @@ -544,7 +542,7 @@ static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) continue; edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED); - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); } return NULL; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 177852e39a25..d2aaaa73fdd5 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -319,56 +319,22 @@ struct eeh_pe *eeh_pe_get(struct pci_controller *phb, } /** - * eeh_pe_get_parent - Retrieve the parent PE + * eeh_pe_tree_insert - Add EEH device to parent PE * @edev: EEH device + * @new_pe_parent: PE to create additional PEs under * - * The whole PEs existing in the system are organized as hierarchy - * tree. The function is used to retrieve the parent PE according - * to the parent EEH device. - */ -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) -{ - struct eeh_dev *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - - /* - * It might have the case for the indirect parent - * EEH device already having associated PE, but - * the direct parent EEH device doesn't have yet. - */ - if (edev->physfn) - pdn = pci_get_pdn(edev->physfn); - else - pdn = pdn ? pdn->parent : NULL; - while (pdn) { - /* We're poking out of PCI territory */ - parent = pdn_to_eeh_dev(pdn); - if (!parent) - return NULL; - - if (parent->pe) - return parent->pe; - - pdn = pdn->parent; - } - - return NULL; -} - -/** - * eeh_add_to_parent_pe - Add EEH device to parent PE - * @edev: EEH device + * Add EEH device to the PE in edev->pe_config_addr. If a PE already + * exists with that address then @edev is added to that PE. Otherwise + * a new PE is created and inserted into the PE tree as a child of + * @new_pe_parent. * - * Add EEH device to the parent PE. If the parent PE already - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, - * we have to create new PE to hold the EEH device and the new - * PE will be linked to its parent PE as well. + * If @new_pe_parent is NULL then the new PE will be inserted under + * directly under the the PHB. */ -int eeh_add_to_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent) { + struct pci_controller *hose = edev->controller; struct eeh_pe *pe, *parent; - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - int config_addr = (pdn->busno << 8) | (pdn->devfn); /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { @@ -382,7 +348,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * PE should be composed of PCI bus and its subordinate * components. */ - pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr); + pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn); if (pe) { if (pe->type & EEH_PE_INVALID) { list_add_tail(&edev->entry, &pe->edevs); @@ -399,8 +365,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent = parent->parent; } - eeh_edev_dbg(edev, - "Added to device PE (parent: PE#%x)\n", + eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n", pe->parent->addr); } else { /* Mark the PE as type of PCI bus */ @@ -416,15 +381,15 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ if (edev->physfn) - pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF); + pe = eeh_pe_alloc(hose, EEH_PE_VF); else - pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE); + pe = eeh_pe_alloc(hose, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } pe->addr = edev->pe_config_addr; - pe->config_addr = config_addr; + pe->config_addr = edev->bdfn; /* * Put the new EEH PE into hierarchy tree. If the parent @@ -432,34 +397,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * to PHB directly. Otherwise, we have to associate the * PE with its parent. */ - parent = eeh_pe_get_parent(edev); - if (!parent) { - parent = eeh_phb_pe_get(pdn->phb); - if (!parent) { + if (!new_pe_parent) { + new_pe_parent = eeh_phb_pe_get(hose); + if (!new_pe_parent) { pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, pdn->phb->global_number); + __func__, hose->global_number); edev->pe = NULL; kfree(pe); return -EEXIST; } } - pe->parent = parent; + + /* link new PE into the tree */ + pe->parent = new_pe_parent; + list_add_tail(&pe->child, &new_pe_parent->child_list); /* * Put the newly created PE into the child list and * link the EEH device accordingly. */ - list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; - eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n", - pe->parent->addr); + eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n", + new_pe_parent->addr); return 0; } /** - * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * eeh_pe_tree_remove - Remove one EEH device from the associated PE * @edev: EEH device * * The PE hierarchy tree might be changed when doing PCI hotplug. @@ -467,7 +433,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +int eeh_pe_tree_remove(struct eeh_dev *edev) { struct eeh_pe *pe, *parent, *child; bool keep, recover; @@ -698,7 +664,6 @@ void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed) */ static void eeh_bridge_check_link(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int cap; uint32_t val; int timeout = 0; @@ -714,32 +679,32 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) /* Check slot status */ cap = edev->pcie_cap; - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val); if (!(val & PCI_EXP_SLTSTA_PDS)) { eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val); return; } /* Check power status if we have the capability */ - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val); if (val & PCI_EXP_SLTCAP_PCP) { - eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val); if (val & PCI_EXP_SLTCTL_PCC) { eeh_edev_dbg(edev, "In power-off state, power it on ...\n"); val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC); val |= (0x0100 & PCI_EXP_SLTCTL_PIC); - eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val); msleep(2 * 1000); } } /* Enable link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val); val &= ~PCI_EXP_LNKCTL_LD; - eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val); + eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); msleep(1000); @@ -752,7 +717,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) msleep(20); timeout += 20; - eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val); + eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val); if (val & PCI_EXP_LNKSTA_DLLLA) break; } @@ -769,7 +734,6 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) static void eeh_restore_bridge_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; /* @@ -777,20 +741,20 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) * Bus numbers and windows: 0x18 - 0x30 */ for (i = 4; i < 13; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* Rom: 0x38 */ - eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]); + eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]); /* Cache line & Latency timer: 0xC 0xD */ - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); /* Max latency, min grant, interrupt ping and line: 0x3C */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ @@ -799,28 +763,27 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) static void eeh_restore_device_bars(struct eeh_dev *edev) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); int i; u32 cmd; for (i = 4; i < 10; i++) - eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]); + eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]); /* 12 == Expansion ROM Address */ - eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]); + eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]); - eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1, + eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1, SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1, + eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1, SAVED_BYTE(PCI_LATENCY_TIMER)); /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); + eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]); /* * Restore PERR & SERR bits, some devices require it, * don't touch the other command bits */ - eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd); + eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd); if (edev->config_space[1] & PCI_COMMAND_PARITY) cmd |= PCI_COMMAND_PARITY; else @@ -829,7 +792,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) cmd |= PCI_COMMAND_SERR; else cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd); + eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd); } /** @@ -843,16 +806,14 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) */ static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct pci_dn *pdn = eeh_dev_to_pdn(edev); - /* Do special restore for bridges */ if (edev->mode & EEH_DEV_BRIDGE) eeh_restore_bridge_bars(edev); else eeh_restore_device_bars(edev); - if (eeh_ops->restore_config && pdn) - eeh_ops->restore_config(pdn); + if (eeh_ops->restore_config) + eeh_ops->restore_config(edev); } /** diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c index 4fb0f1e1017a..429620da73ba 100644 --- a/arch/powerpc/kernel/eeh_sysfs.c +++ b/arch/powerpc/kernel/eeh_sysfs.c @@ -99,7 +99,7 @@ static ssize_t eeh_notify_resume_store(struct device *dev, if (!edev || !edev->pe || !eeh_ops->notify_resume) return -ENODEV; - if (eeh_ops->notify_resume(pci_get_pdn(pdev))) + if (eeh_ops->notify_resume(edev)) return -EIO; return count; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 217ebdf5b00b..f4d0af8e1136 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -35,6 +35,12 @@ #include "head_32.h" +/* + * powerpc relies on return from interrupt/syscall being context synchronising + * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional + * synchronisation instructions. + */ + /* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 * fit into one page in order to not encounter a TLB miss between the diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9d49338e0c85..33a42e42c56f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -64,15 +64,173 @@ exception_marker: .section ".text" .align 7 - .globl system_call_common -system_call_common: +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BEGIN_FTR_SECTION extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif + INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + std r3,ORIG_GPR3(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + + /* + * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which + * would clobber syscall parameters. Also we always enter with IRQs + * enabled and nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + * + * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The + * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + li r3,0 + mtmsrd r3,1 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.endm + +system_call_vectored common 0x3000 +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common +system_call_common: _ASM_NOKPROBE_SYMBOL(system_call_common) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne .Ltabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -138,6 +296,7 @@ END_BTB_FLUSH_SECTION .Lsyscall_exit: addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ bl syscall_exit_prepare ld r2,_CCR(r1) @@ -224,21 +383,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) b . /* prevent speculative execution */ #endif +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) - li r3,0 + li r3,0 /* fork() return value */ b .Lsyscall_exit _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 + mtctr r14 mr r3,r15 #ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif - blrl + bctrl li r3,0 b .Lsyscall_exit @@ -259,10 +426,7 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); #define FLUSH_COUNT_CACHE \ 1: nop; \ - patch_site 1b, patch__call_flush_count_cache - - -#define BCCTR_FLUSH .long 0x4c400420 + patch_site 1b, patch__call_flush_branch_caches .macro nops number .rept \number @@ -271,8 +435,8 @@ _ASM_NOKPROBE_SYMBOL(save_nvgprs); .endm .balign 32 -.global flush_count_cache -flush_count_cache: +.global flush_branch_caches +flush_branch_caches: /* Save LR into r9 */ mflr r9 @@ -294,7 +458,7 @@ flush_count_cache: li r9,0x7fff mtctr r9 - BCCTR_FLUSH + PPC_BCCTR_FLUSH 2: nop patch_site 2b patch__flush_count_cache_return @@ -303,7 +467,7 @@ flush_count_cache: .rept 278 .balign 32 - BCCTR_FLUSH + PPC_BCCTR_FLUSH nops 7 .endr @@ -357,7 +521,7 @@ _GLOBAL(_switch) * kernel/sched/core.c). * * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__before_spin_lock() in __schedule() + * be taken care of. The smp_mb__after_spinlock() in __schedule() * is implemented as hwsync on powerpc, which orders MMIO too. So * long as there is an hwsync in the context switch path, it will * be executed on the source CPU after the task has performed diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 446e54c3f71e..f7d748b88705 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .macro __GEN_COMMON_BODY name .if IMASK + .if ! ISTACK + .error "No support for masked interrupt to use custom stack" + .endif + + /* If coming from user, skip soft-mask tests. */ + andi. r10,r12,MSR_PR + bne 2f + + /* Kernel code running below __end_interrupts is implicitly + * soft-masked */ + LOAD_HANDLER(r10, __end_interrupts) + cmpld r11,r10 + li r10,IMASK + blt- 1f + + /* Test the soft mask state against our interrupt's bit */ lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,IMASK +1: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ - mr r10,r1 /* Save r1 */ +2: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -740,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * guarantee they will be delivered virtually. Some conditions (see the ISA) * cause exceptions to be delivered in real mode. * + * The scv instructions are a special case. They get a 0x3000 offset applied. + * scv exceptions have unique reentrancy properties, see below. + * * It's impossible to receive interrupts below 0x300 via AIL. * * KVM: None of the virtual exceptions are from the guest. Anything that @@ -749,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors - * 0x1900 - 0x3fff : Real mode trampolines - * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x1900 - 0x2fff : Real mode trampolines + * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area * 0x8000 - .... : Common interrupt handlers, remaining early @@ -761,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) * vectors there. */ OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) -OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) -OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000) +OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900) OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) #ifdef CONFIG_PPC_POWERNV @@ -798,6 +817,77 @@ USE_FIXED_SECTION(real_vectors) .globl __start_interrupts __start_interrupts: +/** + * Interrupt 0x3000 - System Call Vectored Interrupt (syscall). + * This is a synchronous interrupt invoked with the "scv" instruction. The + * system call does not alter the HV bit, so it is directed to the OS. + * + * Handling: + * scv instructions enter the kernel without changing EE, RI, ME, or HV. + * In particular, this means we can take a maskable interrupt at any point + * in the scv handler, which is unlike any other interrupt. This is solved + * by treating the instruction addresses below __end_interrupts as being + * soft-masked. + * + * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and + * ensure scv is never executed with relocation off, which means AIL-0 + * should never happen. + * + * Before leaving the below __end_interrupts text, at least of the following + * must be true: + * - MSR[PR]=1 (i.e., return to userspace) + * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - Standard kernel environment is set up (stack, paca, etc) + * + * Call convention: + * + * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + */ +EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) + /* SCV 0 */ + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_tramp +#else + b system_call_vectored_common +#endif + nop + + /* SCV 1 - 127 */ + .rept 127 + mr r9,r13 + GET_PACA(r13) + mflr r11 + mfctr r12 + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + li r0,-1 /* cause failure */ +#ifdef CONFIG_RELOCATABLE + b system_call_vectored_sigill_tramp +#else + b system_call_vectored_sigill +#endif + .endr +EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) + +#ifdef CONFIG_RELOCATABLE +TRAMP_VIRT_BEGIN(system_call_vectored_tramp) + __LOAD_HANDLER(r10, system_call_vectored_common) + mtctr r10 + bctr + +TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) + __LOAD_HANDLER(r10, system_call_vectored_sigill) + mtctr r10 + bctr +#endif + + /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x100) @@ -2838,7 +2928,8 @@ masked_interrupt: ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) - /* returns to kernel where r13 must be set up, so don't restore it */ + ld r13,PACA_EXGEN+EX_R13(r13) + /* May return to masked low address where r13 is not set up */ .if \hsrr HRFI_TO_KERNEL .else @@ -2946,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +TRAMP_REAL_BEGIN(rfscv_flush_fallback) + /* system call volatile */ + mr r7,r13 + GET_PACA(r13); + mr r8,r1 + ld r1,PACAKSAVE(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + li r9,0 + li r10,0 + li r11,0 + mr r1,r8 + mr r13,r7 + RFSCV + USE_TEXT_SECTION() MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 @@ -2997,6 +3129,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) USE_FIXED_SECTION(virt_trampolines) /* + * All code below __end_interrupts is treated as soft-masked. If + * any code runs here with MSR[EE]=1, it must then cope with pending + * soft interrupt being raised (i.e., by ensuring it is replayed). + * * The __end_interrupts marker must be past the out-of-line (OOL) * handlers, so that they are copied to real address 0x100 when running * a relocatable kernel. This ensures they can be reached from the short diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 78ab9a6ee6ac..10ebb4bf71ad 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -32,6 +32,14 @@ #include #include +/* + * The CPU who acquired the lock to trigger the fadump crash should + * wait for other CPUs to enter. + * + * The timeout is in milliseconds. + */ +#define CRASH_TIMEOUT 500 + static struct fw_dump fw_dump; static void __init fadump_reserve_crash_area(u64 base); @@ -39,7 +47,10 @@ static void __init fadump_reserve_crash_area(u64 base); struct kobject *fadump_kobj; #ifndef CONFIG_PRESERVE_FA_DUMP + +static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); + struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ @@ -668,8 +679,11 @@ early_param("fadump_reserve_mem", early_fadump_reserve_mem); void crash_fadump(struct pt_regs *regs, const char *str) { + unsigned int msecs; struct fadump_crash_info_header *fdh = NULL; int old_cpu, this_cpu; + /* Do not include first CPU */ + unsigned int ncpus = num_online_cpus() - 1; if (!should_fadump_crash()) return; @@ -685,6 +699,8 @@ void crash_fadump(struct pt_regs *regs, const char *str) old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); if (old_cpu != -1) { + atomic_inc(&cpus_in_fadump); + /* * We can't loop here indefinitely. Wait as long as fadump * is in force. If we race with fadump un-registration this @@ -708,6 +724,16 @@ void crash_fadump(struct pt_regs *regs, const char *str) fdh->online_mask = *cpu_online_mask; + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(&(fdh->regs)) == 0x100) { + msecs = CRASH_TIMEOUT; + while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0)) + mdelay(1); + } + fw_dump.ops->fadump_trigger(fdh, str); } diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index cc4a5e3f51f1..fe48d319d490 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -11,8 +11,27 @@ #include #include +#include #include +#ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); +#endif + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +bool is_kvm_guest(void) +{ + struct device_node *hyper_node; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return 0; + + if (!of_device_is_compatible(hyper_node, "linux,kvm")) + return 0; + + return 1; +} +#endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index cac22cb97a8c..4ae39db70044 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -107,9 +107,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_FP(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 705c042309d8..f3ab94d73936 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -474,7 +474,7 @@ InstructionTLBMiss: /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR @@ -484,7 +484,7 @@ InstructionTLBMiss: li r1,_PAGE_PRESENT | _PAGE_EXEC #endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif @@ -541,7 +541,7 @@ DataLoadTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -549,7 +549,7 @@ DataLoadTLBMiss: #else li r1, _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -621,7 +621,7 @@ DataStoreTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_DMISS - lis r1,PAGE_OFFSET@h /* check if kernel address */ + lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR #ifdef CONFIG_SWAP @@ -629,7 +629,7 @@ DataStoreTLBMiss: #else li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT #endif - bge- 112f + bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -671,6 +671,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception +#endif + +#ifndef CONFIG_TAU_INT +#define TAUException unknown_exception #endif EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 926bfa73586a..5b282d9965a5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -620,7 +620,7 @@ start_here: ori r6, r6, swapper_pg_dir@l lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0000daf0e1da..1f4a1efa0074 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -418,8 +418,9 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw) if (dawr_enabled()) { max_len = DAWR_MAX_LEN; - /* DAWR region can't cross 512 bytes boundary */ - if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)) + /* DAWR region can't cross 512 bytes boundary on p10 predecessors */ + if (!cpu_has_feature(CPU_FTR_ARCH_31) && + (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))) return -EINVAL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { /* 8xx can setup a range without limitation */ @@ -498,11 +499,11 @@ static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info return ((info->address <= dar) && (dar - info->address < info->len)); } -static bool dar_user_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_user_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { - return ((dar < info->address + info->len) && - (dar + size > info->address)); + return ((ea < info->address + info->len) && + (ea + size > info->address)); } static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) @@ -515,20 +516,22 @@ static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info) return ((hw_start_addr <= dar) && (hw_end_addr > dar)); } -static bool dar_hw_range_overlaps(unsigned long dar, int size, - struct arch_hw_breakpoint *info) +static bool ea_hw_range_overlaps(unsigned long ea, int size, + struct arch_hw_breakpoint *info) { unsigned long hw_start_addr, hw_end_addr; hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE); hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); - return ((dar < hw_end_addr) && (dar + size > hw_start_addr)); + return ((ea < hw_end_addr) && (ea + size > hw_start_addr)); } /* * If hw has multiple DAWR registers, we also need to check all * dawrx constraint bits to confirm this is _really_ a valid event. + * If type is UNKNOWN, but privilege level matches, consider it as + * a positive match. */ static bool check_dawrx_constraints(struct pt_regs *regs, int type, struct arch_hw_breakpoint *info) @@ -536,7 +539,12 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ)) return false; - if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE)) + /* + * The Cache Management instructions other than dcbz never + * cause a match. i.e. if type is CACHEOP, the instruction + * is dcbz, and dcbz is treated as Store. + */ + if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE)) return false; if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL)) @@ -553,7 +561,8 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * including extraneous exception. Otherwise return false. */ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, - int type, int size, struct arch_hw_breakpoint *info) + unsigned long ea, int type, int size, + struct arch_hw_breakpoint *info) { bool in_user_range = dar_in_user_range(regs->dar, info); bool dawrx_constraints; @@ -569,22 +578,27 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, } if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) { - if (in_user_range) - return true; + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; - if (dar_in_hw_range(regs->dar, info)) { - info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; - return true; - } - return false; + return true; } dawrx_constraints = check_dawrx_constraints(regs, type, info); - if (dar_user_range_overlaps(regs->dar, size, info)) + if (type == UNKNOWN) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && + !dar_in_hw_range(regs->dar, info)) + return false; + + return dawrx_constraints; + } + + if (ea_user_range_overlaps(ea, size, info)) return dawrx_constraints; - if (dar_hw_range_overlaps(regs->dar, size, info)) { + if (ea_hw_range_overlaps(ea, size, info)) { if (dawrx_constraints) { info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; return true; @@ -593,8 +607,17 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } +static int cache_op_size(void) +{ +#ifdef __powerpc64__ + return ppc64_caches.l1d.block_size; +#else + return L1_CACHE_BYTES; +#endif +} + static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, - int *type, int *size, bool *larx_stcx) + int *type, int *size, unsigned long *ea) { struct instruction_op op; @@ -602,16 +625,23 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, return; analyse_instr(&op, regs, *instr); - - /* - * Set size = 8 if analyse_instr() fails. If it's a userspace - * watchpoint(valid or extraneous), we can notify user about it. - * If it's a kernel watchpoint, instruction emulation will fail - * in stepping_handler() and watchpoint will be disabled. - */ *type = GETTYPE(op.type); - *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8; - *larx_stcx = (*type == LARX || *type == STCX); + *ea = op.ea; +#ifdef __powerpc64__ + if (!(regs->msr & MSR_64BIT)) + *ea &= 0xffffffffUL; +#endif + + *size = GETSIZE(op.type); + if (*type == CACHEOP) { + *size = cache_op_size(); + *ea &= ~(*size - 1); + } +} + +static bool is_larx_stcx_instr(int type) +{ + return type == LARX || type == STCX; } /* @@ -678,7 +708,7 @@ int hw_breakpoint_handler(struct die_args *args) struct ppc_inst instr = ppc_inst(0); int type = 0; int size = 0; - bool larx_stcx = false; + unsigned long ea; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -692,7 +722,7 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); if (!IS_ENABLED(CONFIG_PPC_8xx)) - get_instr_detail(regs, &instr, &type, &size, &larx_stcx); + get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { bp[i] = __this_cpu_read(bp_per_reg[i]); @@ -702,7 +732,7 @@ int hw_breakpoint_handler(struct die_args *args) info[i] = counter_arch_bp(bp[i]); info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (check_constraints(regs, instr, type, size, info[i])) { + if (check_constraints(regs, instr, ea, type, size, info[i])) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { handler_error(bp[i], info[i]); @@ -744,7 +774,7 @@ int hw_breakpoint_handler(struct die_args *args) } if (!IS_ENABLED(CONFIG_PPC_8xx)) { - if (larx_stcx) { + if (is_larx_stcx_instr(type)) { for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 05b1cc0e009e..bf21ebd36190 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -621,13 +621,14 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); seq_printf(p, " Machine check exceptions\n"); +#ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { seq_printf(p, "%*s: ", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat, j).hmi_exceptions); + seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } +#endif seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) @@ -665,7 +666,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; - sum += per_cpu(irq_stat, cpu).hmi_exceptions; +#ifdef CONFIG_PPC_BOOK3S_64 + sum += paca_ptrs[cpu]->hmi_irqs; +#endif sum += per_cpu(irq_stat, cpu).sreset_irqs; #ifdef CONFIG_PPC_WATCHDOG sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9cc792a3a6a9..6ab9b4d037c3 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -244,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %x\n", ppc_inst_val(insn)); + printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); BUG(); } else { /* diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd90c0eda229..ada59f6c4298 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -49,6 +49,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -278,6 +292,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(&mce_ue_event_queue[index]); + blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but @@ -370,6 +385,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, static const char *mc_user_types[] = { "Indeterminate", "tlbie(l) invalid", + "scv invalid", }; static const char *mc_ra_types[] = { "Indeterminate", @@ -711,7 +727,7 @@ long hmi_exception_realmode(struct pt_regs *regs) { int ret; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; ret = hmi_handle_debugtrig(regs); if (ret >= 0) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c3b522bff9b4..b7e173754a2e 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -243,6 +243,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, 0, 0, 0, 0, 0, 0 } }; +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000000080000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000100000, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000140000, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x0000000000180000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x00000000001c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008080000, true, + MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000000081c0000, 0x00000000080c0000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008100000, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000000081c0000, 0x0000000008140000, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; + struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ @@ -361,6 +400,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = { MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0, false, 0, 0, 0, 0, 0 } }; +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, false, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00004000, true, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000800, true, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000400, true, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000200, false, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000080, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0x00000100, true, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000040, true, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000020, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000010, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0x00000008, false, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; + static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) { @@ -657,3 +736,8 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); } + +long __machine_check_early_realmode_p10(struct pt_regs *regs) +{ + return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); +} diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1864605eca29..7bb46ad98207 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -413,20 +413,6 @@ _GLOBAL(kexec_sequence) li r0,0 std r0,16(r1) -BEGIN_FTR_SECTION - /* - * This is the best time to turn AMR/IAMR off. - * key 0 is used in radix for supervisor<->user - * protection, but on hash key 0 is reserved - * ideally we want to enter with a clean state. - * NOTE, we rely on r0 being 0 from above. - */ - mtspr SPRN_IAMR,r0 -BEGIN_FTR_SECTION_NESTED(42) - mtspr SPRN_AMOR,r0 -END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* save regs for local vars on new stack. * yes, we won't go back, but ... */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index df649acb5631..a211b0253cdb 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -86,3 +86,14 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } + +#ifdef MODULES_VADDR +void *module_alloc(unsigned long size) +{ + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 71a3f97dc988..f89376ff633e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -62,8 +62,8 @@ static int of_pci_phb_probe(struct platform_device *dev) /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); - /* Create EEH PEs for the PHB */ - eeh_dev_phb_init_dynamic(phb); + /* Create EEH PE for the PHB */ + eeh_phb_pe_create(phb); /* Scan the bus */ pcibios_scan_phb(phb); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 74da65aacbc9..0ad15768d762 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -57,8 +57,8 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, #define LPPACA_SIZE 0x400 -static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, - unsigned long limit, int cpu) +static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, + int cpu) { size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE); static unsigned long shared_lppaca_size; @@ -68,6 +68,13 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, if (!shared_lppaca) { memblock_set_bottom_up(true); + /* + * See Documentation/powerpc/ultravisor.rst for more details. + * + * UV/HV data sharing is in PAGE_SIZE granularity. In order to + * minimize the number of pages shared, align the allocation to + * PAGE_SIZE. + */ shared_lppaca = memblock_alloc_try_nid(shared_lppaca_total_size, PAGE_SIZE, MEMBLOCK_LOW_LIMIT, @@ -122,7 +129,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) return NULL; if (is_secure_guest()) - lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu); + lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu); else lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e654df55969..e99b7c547d7e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -124,9 +124,28 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH +static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) +{ + struct eeh_dev *edev; + + /* Allocate EEH device */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return NULL; + + /* Associate EEH device with OF node */ + pdn->edev = edev; + edev->pdn = pdn; + edev->bdfn = (pdn->busno << 8) | pdn->devfn; + edev->controller = pdn->phb; + + return edev; +} +#endif /* CONFIG_EEH */ + #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, - int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -143,7 +162,6 @@ static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent, pdn->parent = parent; pdn->busno = busno; pdn->devfn = devfn; - pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -174,7 +192,7 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { struct eeh_dev *edev __maybe_unused; - pdn = add_one_sriov_vf_pdn(parent, i, + pdn = add_one_sriov_vf_pdn(parent, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -187,7 +205,10 @@ struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev) /* Create the EEH device for the VF */ edev = eeh_dev_init(pdn); BUG_ON(!edev); + + /* FIXME: these should probably be populated by the EEH probe */ edev->physfn = pdev; + edev->vf_index = i; #endif /* CONFIG_EEH */ } return pci_get_pdn(pdev); @@ -242,7 +263,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) * have a configured PE. */ if (edev->pe) - eeh_rmv_from_parent_pe(edev); + eeh_pe_tree_remove(edev); pdn->edev = NULL; kfree(edev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 794b754deec2..016bd831908e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -471,49 +471,58 @@ EXPORT_SYMBOL(giveup_all); #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_FPU -static int restore_fp(struct task_struct *tsk) +static bool should_restore_fp(void) { - if (tsk->thread.load_fp) { - load_fp_state(¤t->thread.fp_state); + if (current->thread.load_fp) { current->thread.load_fp++; - return 1; + return true; } - return 0; + return false; +} + +static void do_restore_fp(void) +{ + load_fp_state(¤t->thread.fp_state); } #else -static int restore_fp(struct task_struct *tsk) { return 0; } +static bool should_restore_fp(void) { return false; } +static void do_restore_fp(void) { } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC -#define loadvec(thr) ((thr).load_vec) -static int restore_altivec(struct task_struct *tsk) +static bool should_restore_altivec(void) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { - load_vr_state(&tsk->thread.vr_state); - tsk->thread.used_vr = 1; - tsk->thread.load_vec++; - - return 1; + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) { + current->thread.load_vec++; + return true; } - return 0; + return false; +} + +static void do_restore_altivec(void) +{ + load_vr_state(¤t->thread.vr_state); + current->thread.used_vr = 1; } #else -#define loadvec(thr) 0 -static inline int restore_altivec(struct task_struct *tsk) { return 0; } +static bool should_restore_altivec(void) { return false; } +static void do_restore_altivec(void) { } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -static int restore_vsx(struct task_struct *tsk) +static bool should_restore_vsx(void) { - if (cpu_has_feature(CPU_FTR_VSX)) { - tsk->thread.used_vsr = 1; - return 1; - } - - return 0; + if (cpu_has_feature(CPU_FTR_VSX)) + return true; + return false; +} +static void do_restore_vsx(void) +{ + current->thread.used_vsr = 1; } #else -static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static bool should_restore_vsx(void) { return false; } +static void do_restore_vsx(void) { } #endif /* CONFIG_VSX */ /* @@ -529,32 +538,42 @@ static inline int restore_vsx(struct task_struct *tsk) { return 0; } void notrace restore_math(struct pt_regs *regs) { unsigned long msr; - - if (!MSR_TM_ACTIVE(regs->msr) && - !current->thread.load_fp && !loadvec(current->thread)) - return; + unsigned long new_msr = 0; msr = regs->msr; - msr_check_and_set(msr_all_available); /* - * Only reload if the bit is not set in the user MSR, the bit BEING set - * indicates that the registers are hot + * new_msr tracks the facilities that are to be restored. Only reload + * if the bit is not set in the user MSR (if it is set, the registers + * are live for the user thread). */ - if ((!(msr & MSR_FP)) && restore_fp(current)) - msr |= MSR_FP | current->thread.fpexc_mode; + if ((!(msr & MSR_FP)) && should_restore_fp()) + new_msr |= MSR_FP | current->thread.fpexc_mode; - if ((!(msr & MSR_VEC)) && restore_altivec(current)) - msr |= MSR_VEC; + if ((!(msr & MSR_VEC)) && should_restore_altivec()) + new_msr |= MSR_VEC; - if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && - restore_vsx(current)) { - msr |= MSR_VSX; + if ((!(msr & MSR_VSX)) && should_restore_vsx()) { + if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) + new_msr |= MSR_VSX; } - msr_check_and_clear(msr_all_available); + if (new_msr) { + msr_check_and_set(new_msr); - regs->msr = msr; + if (new_msr & MSR_FP) + do_restore_fp(); + + if (new_msr & MSR_VEC) + do_restore_altivec(); + + if (new_msr & MSR_VSX) + do_restore_vsx(); + + msr_check_and_clear(new_msr); + + regs->msr |= new_msr; + } } #endif @@ -1599,6 +1618,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, { struct pt_regs *childregs, *kregs; extern void ret_from_fork(void); + extern void ret_from_fork_scv(void); extern void ret_from_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; @@ -1635,7 +1655,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) childregs->gpr[1] = usp; p->thread.regs = childregs; - childregs->gpr[3] = 0; /* Result from fork() */ + /* 64s sets this in ret_from_fork */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + childregs->gpr[3] = 0; /* Result from fork() */ if (clone_flags & CLONE_SETTLS) { if (!is_32bit_task()) childregs->gpr[13] = tls; @@ -1643,7 +1665,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[2] = tls; } - f = ret_from_fork; + if (trap_is_scv(regs)) + f = ret_from_fork_scv; + else + f = ret_from_fork; } childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9cc49f265c86..d8a2fb87ba0c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -163,7 +163,7 @@ static struct ibm_pa_feature { { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, #ifdef CONFIG_PPC_RADIX_MMU - { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, @@ -175,6 +175,8 @@ static struct ibm_pa_feature { */ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, + + { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -468,8 +470,9 @@ static bool validate_mem_limit(u64 base, u64 *size) * This contains a list of memory blocks along with NUMA affinity * information. */ -static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { u64 base, size; int is_kexec_kdump = 0, rngs; @@ -484,7 +487,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -499,7 +502,7 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, */ rngs = dt_mem_next_cell(dt_root_size_cells, usm); if (!rngs) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -524,6 +527,8 @@ static void __init early_init_drmem_lmb(struct drmem_lmb *lmb, if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) memblock_mark_hotplug(base, size); } while (--rngs); + + return 0; } #endif /* CONFIG_PPC_PSERIES */ @@ -534,7 +539,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node, #ifdef CONFIG_PPC_PSERIES if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { - walk_drmem_lmbs_early(node, early_init_drmem_lmb); + walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); return 0; } #endif @@ -815,6 +820,11 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); + /* + * Initialize pkey features and default AMR/IAMR values + */ + pkey_early_init_devtree(); + #ifdef CONFIG_PPC_PS3 /* Identify PS3 firmware */ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 90c604d00b7d..ae7ec9903191 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -169,6 +169,7 @@ static unsigned long __prombss prom_tce_alloc_end; #ifdef CONFIG_PPC_PSERIES static bool __prombss prom_radix_disable; +static bool __prombss prom_radix_gtse_disable; static bool __prombss prom_xive_disable; #endif @@ -823,6 +824,12 @@ static void __init early_cmdline_parse(void) if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); + opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on"); + if (opt) { + prom_radix_gtse_disable = true; + prom_debug("Radix GTSE disabled from cmdline\n"); + } + opt = prom_strstr(prom_cmd_line, "xive=off"); if (opt) { prom_xive_disable = true; @@ -1285,10 +1292,8 @@ static void __init prom_parse_platform_support(u8 index, u8 val, prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); break; case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ - if (val & OV5_FEAT(OV5_RADIX_GTSE)) { - prom_debug("Radix - GTSE supported\n"); - support->radix_gtse = true; - } + if (val & OV5_FEAT(OV5_RADIX_GTSE)) + support->radix_gtse = !prom_radix_gtse_disable; break; case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT), @@ -1336,12 +1341,15 @@ static void __init prom_check_platform_support(void) } } - if (supported.radix_mmu && supported.radix_gtse && - IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { - /* Radix preferred - but we require GTSE for now */ - prom_debug("Asking for radix with GTSE\n"); + if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { + /* Radix preferred - Check if GTSE is also supported */ + prom_debug("Asking for radix\n"); ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); - ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + if (supported.radix_gtse) + ibm_architecture_vec.vec5.radix_ext = + OV5_FEAT(OV5_RADIX_GTSE); + else + prom_debug("Radix GTSE isn't supported\n"); } else if (supported.hash_mmu) { /* Default to hash mmu (if we can) */ prom_debug("Asking for hash\n"); @@ -3262,7 +3270,7 @@ static int enter_secure_mode(unsigned long kbase, unsigned long fdt) /* * Call the Ultravisor to transfer us to secure memory if we have an ESM blob. */ -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { int ret; @@ -3292,7 +3300,7 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt) } } #else -static void setup_secure_guest(unsigned long kbase, unsigned long fdt) +static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) { } #endif /* CONFIG_PPC_SVM */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 13208a9a02ca..19823a250aa0 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -470,13 +470,15 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + int ret; + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); - BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); if (!arch_pkeys_enabled()) return -ENODEV; - return membuf_write(&to, &target->thread.amr, ELF_NPKEY * sizeof(unsigned long)); + membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + return membuf_store(&to, default_uamor); } static int pkey_set(struct task_struct *target, const struct user_regset *regset, @@ -498,9 +500,17 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset if (ret) return ret; - /* UAMOR determines which bits of the AMR can be set from userspace. */ - target->thread.amr = (new_amr & target->thread.uamor) | - (target->thread.amr & ~target->thread.uamor); + /* + * UAMOR determines which bits of the AMR can be set from userspace. + * UAMOR value 0b11 indicates that the AMR value can be modified + * from userspace. If the kernel is using a specific key, we avoid + * userspace modifying the AMR value for that key by masking them + * via UAMOR 0b00. + * + * Pick the AMR values for the keys that kernel is using. This + * will be indicated by the ~default_uamor bits. + */ + target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); return 0; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a09eba03f180..806d554ce357 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -843,96 +843,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - cond_resched(); - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -940,8 +850,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -962,9 +870,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -973,24 +878,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1001,18 +890,11 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: + cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); - -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 89b798f8f656..8561dfb33f24 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -273,37 +273,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } } -#ifdef CONFIG_PPC_PSERIES -static void handle_prrn_event(s32 scope) -{ - /* - * For PRRN, we must pass the negative of the scope value in - * the RTAS event. - */ - pseries_devicetree_update(-scope); - numa_update_cpu_topology(false); -} - static void handle_rtas_event(const struct rtas_error_log *log) { - if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + if (!machine_is(pseries)) return; - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - handle_prrn_event(rtas_error_extended_log_length(log)); + if (rtas_error_type(log) == RTAS_TYPE_PRRN) + pr_info_ratelimited("Platform resource reassignment ignored.\n"); } -#else - -static void handle_rtas_event(const struct rtas_error_log *log) -{ - return; -} - -#endif - static int rtas_log_open(struct inode * inode, struct file * file) { return 0; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 4b982324d368..f9af305d9579 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -23,12 +23,19 @@ bool is_ppc_secureboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 secureboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "os-secureboot-enforcing"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + enabled = (secureboot > 1); + +out: pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; @@ -38,12 +45,19 @@ bool is_ppc_trustedboot_enabled(void) { struct device_node *node; bool enabled = false; + u32 trustedboot; node = get_ppc_fw_sb_node(); enabled = of_property_read_bool(node, "trusted-enabled"); - of_node_put(node); + if (enabled) + goto out; + + if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + enabled = (trustedboot > 0); + +out: pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); return enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index d86701ce116b..c9876aab3142 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -21,13 +21,13 @@ u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -enum count_cache_flush_type { - COUNT_CACHE_FLUSH_NONE = 0x1, - COUNT_CACHE_FLUSH_SW = 0x2, - COUNT_CACHE_FLUSH_HW = 0x4, +enum branch_cache_flush_type { + BRANCH_CACHE_FLUSH_NONE = 0x1, + BRANCH_CACHE_FLUSH_SW = 0x2, + BRANCH_CACHE_FLUSH_HW = 0x4, }; -static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; -static bool link_stack_flush_enabled; +static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; +static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; bool barrier_nospec_enabled; static bool no_nospec; @@ -219,24 +219,25 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - - } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); - if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); - if (link_stack_flush_enabled) - seq_buf_printf(&s, ", Software link stack flush"); - } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { seq_buf_printf(&s, "Vulnerable"); } + if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + seq_buf_printf(&s, ", Software link stack flush"); + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) + seq_buf_printf(&s, " (hardware accelerated)"); + } + seq_buf_printf(&s, "\n"); return s.len; @@ -427,61 +428,79 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -static void no_count_cache_flush(void) +static void update_branch_cache_flush(void) { - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); -} - -static void toggle_count_cache_flush(bool enable) -{ - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && - !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) - enable = false; - - if (!enable) { - patch_instruction_site(&patch__call_flush_count_cache, - ppc_inst(PPC_INST_NOP)); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This controls the branch from guest_exit_cont to kvm_flush_link_stack + if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__call_kvm_flush_link_stack, ppc_inst(PPC_INST_NOP)); -#endif - pr_info("link-stack-flush: software flush disabled.\n"); - link_stack_flush_enabled = false; - no_count_cache_flush(); - return; + } else { + // Could use HW flush, but that could also flush count cache + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); } - - // This enables the branch from _switch to flush_count_cache - patch_branch_site(&patch__call_flush_count_cache, - (u64)&flush_count_cache, BRANCH_SET_LINK); - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - // This enables the branch from guest_exit_cont to kvm_flush_link_stack - patch_branch_site(&patch__call_kvm_flush_link_stack, - (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); #endif - pr_info("link-stack-flush: software flush enabled.\n"); - link_stack_flush_enabled = true; + // This controls the branch from _switch to flush_branch_caches + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && + link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_NOP)); + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW && + link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__call_flush_branch_caches, + ppc_inst(PPC_INST_BCCTR_FLUSH)); + } else { + patch_branch_site(&patch__call_flush_branch_caches, + (u64)&flush_branch_caches, BRANCH_SET_LINK); - // If we just need to flush the link stack, patch an early return - if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { - patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); - no_count_cache_flush(); - return; + // If we just need to flush the link stack, early return + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { + patch_instruction_site(&patch__flush_link_stack_return, + ppc_inst(PPC_INST_BLR)); + + // If we have flush instruction, early return + } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { + patch_instruction_site(&patch__flush_count_cache_return, + ppc_inst(PPC_INST_BLR)); + } + } +} + +static void toggle_branch_cache_flush(bool enable) +{ + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) + count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("count-cache-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { + count_cache_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("count-cache-flush: hardware flush enabled.\n"); + } else { + count_cache_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("count-cache-flush: software flush enabled.\n"); + } } - if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { - count_cache_flush_type = COUNT_CACHE_FLUSH_SW; - pr_info("count-cache-flush: full software flush sequence enabled.\n"); - return; + if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) { + if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE) + link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE; + + pr_info("link-stack-flush: flush disabled.\n"); + } else { + if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) { + link_stack_flush_type = BRANCH_CACHE_FLUSH_HW; + pr_info("link-stack-flush: hardware flush enabled.\n"); + } else { + link_stack_flush_type = BRANCH_CACHE_FLUSH_SW; + pr_info("link-stack-flush: software flush enabled.\n"); + } } - patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR)); - count_cache_flush_type = COUNT_CACHE_FLUSH_HW; - pr_info("count-cache-flush: hardware assisted flush sequence enabled\n"); + update_branch_cache_flush(); } void setup_count_cache_flush(void) @@ -505,7 +524,7 @@ void setup_count_cache_flush(void) security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS @@ -520,14 +539,14 @@ static int count_cache_flush_set(void *data, u64 val) else return -EINVAL; - toggle_count_cache_flush(enable); + toggle_branch_cache_flush(enable); return 0; } static int count_cache_flush_get(void *data, u64 *val) { - if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE) + if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) *val = 0; else *val = 1; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d3faac53295..b198b0ff25bc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -928,6 +928,9 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); + /* Reserve large chunks of memory for us by CMA for hugetlb */ + gigantic_hugetlb_cma_reserve(); + klp_init_thread_info(&init_task); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0ba1ed77dc68..6be430107c6f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -196,7 +196,10 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { /* Enable AIL if possible */ - pseries_enable_reloc_on_exc(); + if (!pseries_enable_reloc_on_exc()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } /* * Tell the hypervisor that we want our exceptions to diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index b4143b6ff093..d15a98c758b8 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -205,8 +205,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, return; /* error signalled ? */ - if (!(regs->ccr & 0x10000000)) + if (trap_is_scv(regs)) { + /* 32-bit compat mode sign extend? */ + if (!IS_ERR_VALUE(ret)) + return; + ret = -ret; + } else if (!(regs->ccr & 0x10000000)) { return; + } switch (ret) { case ERESTART_RESTARTBLOCK: @@ -239,9 +245,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->nip -= 4; regs->result = 0; } else { - regs->result = -EINTR; - regs->gpr[3] = EINTR; - regs->ccr |= 0x10000000; + if (trap_is_scv(regs)) { + regs->result = -EINTR; + regs->gpr[3] = -EINTR; + } else { + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + } } } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1415c16ab628..96950f189b5a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -102,22 +102,18 @@ static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; - int i; - /* Force usr to alway see softe as 1 (interrupts enabled) */ - elf_greg_t64 softe = 0x1; + int val, i; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { - if (i == 14 && !FULL_REGS(regs)) - i = 32; - if ( i == PT_SOFTE) { - if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) - return -EFAULT; - else - continue; - } - if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) + /* Force usr to alway see softe as 1 (interrupts enabled) */ + if (i == PT_SOFTE) + val = 1; + else + val = gregs[i]; + + if (__put_user(val, &frame->mc_gregs[i])) return -EFAULT; } return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 55e5f76554da..bfc939360bad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -39,8 +40,8 @@ #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) -#define TRAMP_TRACEBACK 3 -#define TRAMP_SIZE 6 +#define TRAMP_TRACEBACK 4 +#define TRAMP_SIZE 7 /* * When we have signals to deliver, we set up on the user stack, @@ -600,13 +601,15 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; + /* bctrl # call the handler */ + err |= __put_user(PPC_INST_BCTRL, &tramp[0]); /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]); + err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[2]); + err |= __put_user(PPC_INST_SC, &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -632,7 +635,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { - unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; @@ -667,9 +669,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -864,12 +865,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->link = (unsigned long) &frame->tramp[0]; + regs->nip = (unsigned long) &frame->tramp[0]; } /* Allocate a dummy caller frame for the signal handler. */ @@ -878,8 +879,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up "regs" so we "return" to the signal handler. */ if (is_elf2_task()) { - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; - regs->gpr[12] = regs->nip; + regs->ctr = (unsigned long) ksig->ka.sa.sa_handler; + regs->gpr[12] = regs->ctr; } else { /* Handler is *really* a pointer to the function descriptor for * the signal routine. The first entry in the function @@ -889,7 +890,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, func_descr_t __user *funct_desc_ptr = (func_descr_t __user *) ksig->ka.sa.sa_handler; - err |= get_user(regs->nip, &funct_desc_ptr->entry); + err |= get_user(regs->ctr, &funct_desc_ptr->entry); err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 73199470c265..8261999c7d52 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,6 +59,7 @@ #include #include #include +#include #ifdef DEBUG #include diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 79edba3ab312..8e50818aa50b 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } /* * We use the return value of do_syscall_trace_enter() as the * syscall number. If the syscall was rejected for any reason @@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5, r8 = regs->gpr[8]; } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(regs->trap == 0x7ff0)) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } return -ENOSYS; } @@ -105,16 +115,20 @@ notrace long system_call_exception(long r3, long r4, long r5, * local irqs must be disabled. Returns false if the caller must re-enable * them, check for new work, and try again. */ -static notrace inline bool prep_irq_for_enabled_exit(void) +static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri) { /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); /* This pattern matches prep_irq_for_idle */ - __hard_EE_RI_disable(); + if (clear_ri) + __hard_EE_RI_disable(); + else + __hard_irq_disable(); if (unlikely(lazy_irq_pending_nocheck())) { /* Took an interrupt, may have more exit work to do. */ - __hard_RI_enable(); + if (clear_ri) + __hard_RI_enable(); trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -136,7 +150,8 @@ static notrace inline bool prep_irq_for_enabled_exit(void) * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. */ notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs) + struct pt_regs *regs, + long scv) { unsigned long *ti_flagsp = ¤t_thread_info()->flags; unsigned long ti_flags; @@ -151,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, ti_flags = *ti_flagsp; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) { + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { r3 = -r3; regs->ccr |= 0x10000000; /* Set SO bit in CR */ @@ -206,12 +221,20 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* + * If userspace MSR has all available FP bits set, + * then they are live and no need to restore. If not, + * it means the regs were given up and restore_math + * may decide to restore them (to avoid taking an FP + * fault). + */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + /* scv need not set RI=0 because SRRs are not used */ + if (unlikely(!prep_irq_for_enabled_exit(!scv))) { local_irq_enable(); goto again; } @@ -277,12 +300,13 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned else if (cpu_has_feature(CPU_FTR_ALTIVEC)) mathflags |= MSR_VEC; + /* See above restore_math comment */ if ((regs->msr & mathflags) != mathflags) restore_math(regs); } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { local_irq_enable(); local_irq_disable(); goto again; @@ -345,7 +369,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign } } - if (unlikely(!prep_irq_for_enabled_exit())) { + if (unlikely(!prep_irq_for_enabled_exit(true))) { /* * Can't local_irq_restore to replay if we were in * interrupt context. Must replay directly. diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 571b3259697e..46b4ebc33db7 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -622,8 +622,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7); SYSFS_PMCSETUP(pmc8, SPRN_PMC8); SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); +SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3); static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3); #endif /* HAS_PPC_PMC56 */ @@ -886,6 +888,9 @@ static int register_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_create_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_create_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { @@ -980,6 +985,9 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef CONFIG_PMU_SYSFS if (cpu_has_feature(CPU_FTR_MMCRA)) device_remove_file(s, &dev_attr_mmcra); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + device_remove_file(s, &dev_attr_mmcr3); #endif /* CONFIG_PMU_SYSFS */ if (cpu_has_feature(CPU_FTR_PURR)) { diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index c1fede6ec934..42761ebec9f7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -73,8 +73,8 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) /* Make sure it is what we expect it to be */ if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%#x) != old (%#x)", - (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old)); + pr_err("%p: replaced (%s) != old (%s)", + (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); return -EINVAL; } @@ -137,7 +137,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -172,8 +172,8 @@ __ftrace_make_nop(struct module *mod, /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %08x around bl _mcount\n", - ppc_inst_val(op)); + pr_err("Unexpected instruction %s around bl _mcount\n", + ppc_inst_as_str(op)); return -EINVAL; } #else @@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op)); + pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ @@ -231,7 +231,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -406,7 +406,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -533,8 +533,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %x %x\n", - ip, ppc_inst_val(op[0]), ppc_inst_val(op[1])); + pr_err("Unexpected call sequence at %p: %s %s\n", + ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); return -EINVAL; } @@ -597,7 +597,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Expected NOP but have %x\n", ppc_inst_val(op)); + pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -654,7 +654,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { - pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op)); + pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -733,7 +733,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op)); + pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); return -EINVAL; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 97413a385720..d1ebe152f210 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2060,14 +2060,6 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#if !defined(CONFIG_TAU_INT) -void TAUException(struct pt_regs *regs) -{ - printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", - regs->nip, regs->msr, regs->trap, print_tainted()); -} -#endif /* CONFIG_INT_TAU */ - #ifdef CONFIG_ALTIVEC void altivec_assist_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index e0f4ba45b6cc..8dad44262e75 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -677,7 +677,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index e147bbdc12cd..87ab1152d5ce 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -50,7 +50,7 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 5206c2eb2a1d..4c985467a668 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -111,6 +111,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 32ebb3522ea1..38c317f25141 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -34,7 +34,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 526f5ba2593e..cab14324242b 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -8,6 +8,7 @@ #include #include #include +#include #include .text @@ -24,14 +25,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r10, r0 mtlr r12 - mr r10,r3 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) @@ -48,7 +47,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index dc84f5ae3802..067247d3efb9 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -10,35 +10,13 @@ #include #include #include +#include .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -53,7 +31,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -75,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 1c9a04703250..20f8be40c653 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -26,7 +27,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r11,r3 /* r11 holds tv */ mr r10,r4 /* r10 holds tz */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 cmpldi r11,0 /* check if tv is NULL */ beq 2f lis r7,1000000@ha /* load up USEC_PER_SEC */ @@ -71,7 +72,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl V_LOCAL_FUNC(__get_datapage) /* get data page */ + get_datapage r3, r0 lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l beq cr5,70f @@ -188,7 +189,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) mflr r12 .cfi_register lr,r12 - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 li r3,0 @@ -221,7 +222,7 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl V_LOCAL_FUNC(__get_datapage) + get_datapage r3, r0 ld r4,STAMP_XTIME_SEC(r3) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index a8cc0409d7d2..bbf68cd01088 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -6,6 +6,7 @@ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp. * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp. */ +#include /* IFETCH_ALIGN_BYTES */ #include #include #include @@ -14,21 +15,17 @@ .text -/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from - the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artificially - extend the range covered by the unwind info by padding before the - real start. */ - nop .balign 8 + .balign IFETCH_ALIGN_BYTES V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) -.Lsigrt_start = . - 4 +.Lsigrt_start: + bctrl /* call the handler */ addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc .Lsigrt_end: V_FUNCTION_END(__kernel_sigtramp_rt64) -/* The ".balign 8" above and the following zeros mimic the old stack +/* The .balign 8 above and the following zeros mimic the old stack trampoline layout. The last magic value is the ucontext pointer, chosen in such a way that older libgcc unwind code returns a zero for a sigcontext pointer. */ diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..4e3a8d4ee614 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -30,7 +30,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr .glink) + *(.sfpr) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -111,6 +111,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.glink .iplt .plt .rela*) } } diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index efc5b52f95d2..801dc28fdcca 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -76,9 +76,7 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif - /* Don't care if r4 overflows, this is desired behaviour */ - lbz r4,THREAD_LOAD_VEC(r5) - addi r4,r4,1 + li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile index 86380c69f5ce..4aff6846c772 100644 --- a/arch/powerpc/kexec/Makefile +++ b/arch/powerpc/kexec/Makefile @@ -7,7 +7,7 @@ obj-y += core.o crash.o core_$(BITS).o obj-$(CONFIG_PPC32) += relocate_32.o -obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o ifdef CONFIG_HAVE_IMA_KEXEC ifdef CONFIG_IMA diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index b4184092172a..8a449b2d8715 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -152,6 +152,8 @@ static void kexec_smp_down(void *arg) if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); + reset_sprs(); + kexec_smp_wait(); /* NOTREACHED */ } diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index 3072fd6dbe94..d0e459bb2f05 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -35,6 +35,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, void *fdt; const void *slave_code; struct elfhdr ehdr; + char *modified_cmdline = NULL; struct kexec_elf_info elf_info; struct kexec_buf kbuf = { .image = image, .buf_min = 0, .buf_max = ppc64_rma_size }; @@ -46,6 +47,14 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, if (ret) goto out; + if (image->type == KEXEC_TYPE_CRASH) { + /* min & max buffer values for kdump case */ + kbuf.buf_min = pbuf.buf_min = crashk_res.start; + kbuf.buf_max = pbuf.buf_max = + ((crashk_res.end < ppc64_rma_size) ? + crashk_res.end : (ppc64_rma_size - 1)); + } + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); if (ret) goto out; @@ -60,6 +69,25 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); + /* Load additional segments needed for panic kernel */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = load_crashdump_segments_ppc64(image, &kbuf); + if (ret) { + pr_err("Failed to load kdump kernel segments\n"); + goto out; + } + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + if (initrd != NULL) { kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; @@ -88,7 +116,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, goto out; } - ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr, + initrd_len, cmdline); if (ret) goto out; @@ -107,12 +136,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; - ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, - fdt_load_addr); + ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); if (ret) pr_err("Error setting up the purgatory.\n"); out: + kfree(modified_cmdline); kexec_free_elf_info(&elf_info); /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c index 143c91724617..9a232bc36c8f 100644 --- a/arch/powerpc/kexec/file_load.c +++ b/arch/powerpc/kexec/file_load.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * ppc64 code to implement the kexec_file_load syscall + * powerpc code to implement the kexec_file_load syscall * * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) * Copyright (C) 2004 IBM Corp. @@ -18,23 +18,45 @@ #include #include #include +#include #include -#define SLAVE_CODE_SIZE 256 +#define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */ -const struct kexec_file_ops * const kexec_file_loaders[] = { - &kexec_elf64_ops, - NULL -}; - -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) +/** + * setup_kdump_cmdline - Prepend "elfcorehdr= " to command line + * of kdump kernel for exporting the core. + * @image: Kexec image + * @cmdline: Command line parameters to update. + * @cmdline_len: Length of the cmdline parameters. + * + * kdump segment must be setup before calling this function. + * + * Returns new cmdline buffer for kdump kernel on success, NULL otherwise. + */ +char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) { - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return -EOPNOTSUPP; + int elfcorehdr_strlen; + char *cmdline_ptr; - return kexec_image_probe_default(image, buf, buf_len); + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->arch.elfcorehdr_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr= exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + // Ensure it's nul terminated + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; } /** @@ -236,6 +258,20 @@ int setup_new_fdt(const struct kimage *image, void *fdt, } } + if (image->type == KEXEC_TYPE_CRASH) { + /* + * Avoid elfcorehdr from being stomped on in kdump kernel by + * setting up memory reserve map. + */ + ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr, + image->arch.elf_headers_sz); + if (ret) { + pr_err("Error reserving elfcorehdr memory: %s\n", + fdt_strerror(ret)); + goto err; + } + } + ret = setup_ima_buffer(image, fdt, chosen_node); if (ret) { pr_err("Error setting up the new device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c new file mode 100644 index 000000000000..53bb71e3a2e1 --- /dev/null +++ b/arch/powerpc/kexec/file_load_64.c @@ -0,0 +1,1119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct umem_info { + u64 *buf; /* data buffer for usable-memory property */ + u32 size; /* size allocated for the data buffer */ + u32 max_entries; /* maximum no. of entries */ + u32 idx; /* index of current entry */ + + /* usable memory ranges to look up */ + unsigned int nr_ranges; + const struct crash_mem_range *ranges; +}; + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &kexec_elf64_ops, + NULL +}; + +/** + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes + * regions like opal/rtas, tce-table, initrd, + * kernel, htab which should be avoided while + * setting up kexec load segments. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_initrd_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_htab_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_kernel_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); + if (ret) + goto out; + + /* exclude memory ranges should be sorted for easy lookup */ + sort_memory_ranges(*mem_ranges, true); +out: + if (ret) + pr_err("Failed to setup exclude memory ranges\n"); + return ret; +} + +/** + * get_usable_memory_ranges - Get usable memory ranges. This list includes + * regions like crashkernel, opal/rtas & tce-table, + * that kdump kernel could use. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_usable_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + /* + * Early boot failure observed on guests when low memory (first memory + * block?) is not added to usable memory. So, add [0, crashk_res.end] + * instead of [crashk_res.start, crashk_res.end] to workaround it. + * Also, crashed kernel's memory must be added to reserve map to + * avoid kdump kernel from using it. + */ + ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); + if (ret) + goto out; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup usable memory ranges\n"); + return ret; +} + +/** + * get_crash_memory_ranges - Get crash memory ranges. This list includes + * first/crashing kernel's memory regions that + * would be exported via an elfcore. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_crash_memory_ranges(struct crash_mem **mem_ranges) +{ + struct memblock_region *reg; + struct crash_mem *tmem; + int ret; + + for_each_memblock(memory, reg) { + u64 base, size; + + base = (u64)reg->base; + size = (u64)reg->size; + + /* Skip backup memory region, which needs a separate entry */ + if (base == BACKUP_SRC_START) { + if (size > BACKUP_SRC_SIZE) { + base = BACKUP_SRC_END + 1; + size -= BACKUP_SRC_SIZE; + } else + continue; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + goto out; + + /* Try merging adjacent ranges before reallocation attempt */ + if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) + sort_memory_ranges(*mem_ranges, true); + } + + /* Reallocate memory ranges if there is no space to split ranges */ + tmem = *mem_ranges; + if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { + tmem = realloc_mem_ranges(mem_ranges); + if (!tmem) + goto out; + } + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + /* + * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL + * regions are exported to save their context at the time of + * crash, they should actually be backed up just like the + * first 64K bytes of memory. + */ + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_opal_mem_range(mem_ranges); + if (ret) + goto out; + + /* create a separate program header for the backup region */ + ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); + if (ret) + goto out; + + sort_memory_ranges(*mem_ranges, false); +out: + if (ret) + pr_err("Failed to setup crash memory ranges\n"); + return ret; +} + +/** + * get_reserved_memory_ranges - Get reserve memory ranges. This list includes + * memory regions that should be added to the + * memory reserve map to ensure the region is + * protected from any mischief. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) +{ + int ret; + + ret = add_rtas_mem_range(mem_ranges); + if (ret) + goto out; + + ret = add_tce_mem_ranges(mem_ranges); + if (ret) + goto out; + + ret = add_reserved_mem_ranges(mem_ranges); +out: + if (ret) + pr_err("Failed to setup reserved memory ranges\n"); + return ret; +} + +/** + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (start > buf_max) + continue; + + /* Memory hole not found */ + if (end < buf_min) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, + kbuf->buf_align); + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a + * suitable buffer with top down approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmax = buf_max; + for (i = (emem->nr_ranges - 1); i >= 0; i--) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (start > tmax) + continue; + + if (end < tmax) { + tmin = (end < buf_min ? buf_min : end + 1); + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmax = start - 1; + + if (tmax < buf_min) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmin = buf_min; + ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); + } + return ret; +} + +/** + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole + * in the memory regions between buf_min & buf_max + * for the buffer. If found, sets kbuf->mem. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * + * Returns 0 on success, negative errno on error. + */ +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max) +{ + int ret = -EADDRNOTAVAIL; + phys_addr_t start, end; + u64 i; + + for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { + /* + * memblock uses [start, end) convention while it is + * [start, end] here. Fix the off-by-one to have the + * same convention. + */ + end -= 1; + + if (end < buf_min) + continue; + + /* Memory hole not found */ + if (start > buf_max) + break; + + /* Adjust memory region based on the given range */ + if (start < buf_min) + start = buf_min; + if (end > buf_max) + end = buf_max; + + start = ALIGN(start, kbuf->buf_align); + if (start < end && (end - start + 1) >= kbuf->memsz) { + /* Suitable memory range found. Set kbuf->mem */ + kbuf->mem = start; + ret = 0; + break; + } + } + + return ret; +} + +/** + * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a + * suitable buffer with bottom up approach. + * @kbuf: Buffer contents and memory parameters. + * @buf_min: Minimum address for the buffer. + * @buf_max: Maximum address for the buffer. + * @emem: Exclude memory ranges. + * + * Returns 0 on success, negative errno on error. + */ +static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, + u64 buf_min, u64 buf_max, + const struct crash_mem *emem) +{ + int i, ret = 0, err = -EADDRNOTAVAIL; + u64 start, end, tmin, tmax; + + tmin = buf_min; + for (i = 0; i < emem->nr_ranges; i++) { + start = emem->ranges[i].start; + end = emem->ranges[i].end; + + if (end < tmin) + continue; + + if (start > tmin) { + tmax = (start > buf_max ? buf_max : start - 1); + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + if (!ret) + return 0; + } + + tmin = end + 1; + + if (tmin > buf_max) { + ret = err; + break; + } + ret = 0; + } + + if (!ret) { + tmax = buf_max; + ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); + } + return ret; +} + +/** + * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries + * @um_info: Usable memory buffer and ranges info. + * @cnt: No. of entries to accommodate. + * + * Frees up the old buffer if memory reallocation fails. + * + * Returns buffer on success, NULL on error. + */ +static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) +{ + u32 new_size; + u64 *tbuf; + + if ((um_info->idx + cnt) <= um_info->max_entries) + return um_info->buf; + + new_size = um_info->size + MEM_RANGE_CHUNK_SZ; + tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL); + if (tbuf) { + um_info->buf = tbuf; + um_info->size = new_size; + um_info->max_entries = (um_info->size / sizeof(u64)); + } + + return tbuf; +} + +/** + * add_usable_mem - Add the usable memory ranges within the given memory range + * to the buffer + * @um_info: Usable memory buffer and ranges info. + * @base: Base address of memory range to look for. + * @end: End address of memory range to look for. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end) +{ + u64 loc_base, loc_end; + bool add; + int i; + + for (i = 0; i < um_info->nr_ranges; i++) { + add = false; + loc_base = um_info->ranges[i].start; + loc_end = um_info->ranges[i].end; + if (loc_base >= base && loc_end <= end) + add = true; + else if (base < loc_end && end > loc_base) { + if (loc_base < base) + loc_base = base; + if (loc_end > end) + loc_end = end; + add = true; + } + + if (add) { + if (!check_realloc_usable_mem(um_info, 2)) + return -ENOMEM; + + um_info->buf[um_info->idx++] = cpu_to_be64(loc_base); + um_info->buf[um_info->idx++] = + cpu_to_be64(loc_end - loc_base + 1); + } + } + + return 0; +} + +/** + * kdump_setup_usable_lmb - This is a callback function that gets called by + * walk_drmem_lmbs for every LMB to set its + * usable memory ranges. + * @lmb: LMB info. + * @usm: linux,drconf-usable-memory property value. + * @data: Pointer to usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm, + void *data) +{ + struct umem_info *um_info; + int tmp_idx, ret; + u64 base, end; + + /* + * kdump load isn't supported on kernels already booted with + * linux,drconf-usable-memory property. + */ + if (*usm) { + pr_err("linux,drconf-usable-memory property already exists!"); + return -EINVAL; + } + + um_info = data; + tmp_idx = um_info->idx; + if (!check_realloc_usable_mem(um_info, 1)) + return -ENOMEM; + + um_info->idx++; + base = lmb->base_addr; + end = base + drmem_lmb_size() - 1; + ret = add_usable_mem(um_info, base, end); + if (!ret) { + /* + * Update the no. of ranges added. Two entries (base & size) + * for every range added. + */ + um_info->buf[tmp_idx] = + cpu_to_be64((um_info->idx - tmp_idx - 1) / 2); + } + + return ret; +} + +#define NODE_PATH_LEN 256 +/** + * add_usable_mem_property - Add usable memory property for the given + * memory node. + * @fdt: Flattened device tree for the kdump kernel. + * @dn: Memory node. + * @um_info: Usable memory buffer and ranges info. + * + * Returns 0 on success, negative errno on error. + */ +static int add_usable_mem_property(void *fdt, struct device_node *dn, + struct umem_info *um_info) +{ + int n_mem_addr_cells, n_mem_size_cells, node; + char path[NODE_PATH_LEN]; + int i, len, ranges, ret; + const __be32 *prop; + u64 base, end; + + of_node_get(dn); + + if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) { + pr_err("Buffer (%d) too small for memory node: %pOF\n", + NODE_PATH_LEN, dn); + return -EOVERFLOW; + } + pr_debug("Memory node path: %s\n", path); + + /* Now that we know the path, find its offset in kdump kernel's fdt */ + node = fdt_path_offset(fdt, path); + if (node < 0) { + pr_err("Malformed device tree: error reading %s\n", path); + ret = -EINVAL; + goto out; + } + + /* Get the address & size cells */ + n_mem_addr_cells = of_n_addr_cells(dn); + n_mem_size_cells = of_n_size_cells(dn); + pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells, + n_mem_size_cells); + + um_info->idx = 0; + if (!check_realloc_usable_mem(um_info, 2)) { + ret = -ENOMEM; + goto out; + } + + prop = of_get_property(dn, "reg", &len); + if (!prop || len <= 0) { + ret = 0; + goto out; + } + + /* + * "reg" property represents sequence of (addr,size) tuples + * each representing a memory range. + */ + ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); + + for (i = 0; i < ranges; i++) { + base = of_read_number(prop, n_mem_addr_cells); + prop += n_mem_addr_cells; + end = base + of_read_number(prop, n_mem_size_cells) - 1; + prop += n_mem_size_cells; + + ret = add_usable_mem(um_info, base, end); + if (ret) + goto out; + } + + /* + * No kdump kernel usable memory found in this memory node. + * Write (0,0) tuple in linux,usable-memory property for + * this region to be ignored. + */ + if (um_info->idx == 0) { + um_info->buf[0] = 0; + um_info->buf[1] = 0; + um_info->idx = 2; + } + + ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf, + (um_info->idx * sizeof(u64))); + +out: + of_node_put(dn); + return ret; +} + + +/** + * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory + * and linux,drconf-usable-memory DT properties as + * appropriate to restrict its memory usage. + * @fdt: Flattened device tree for the kdump kernel. + * @usable_mem: Usable memory ranges for kdump kernel. + * + * Returns 0 on success, negative errno on error. + */ +static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) +{ + struct umem_info um_info; + struct device_node *dn; + int node, ret = 0; + + if (!usable_mem) { + pr_err("Usable memory ranges for kdump kernel not found\n"); + return -ENOENT; + } + + node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + if (node == -FDT_ERR_NOTFOUND) + pr_debug("No dynamic reconfiguration memory found\n"); + else if (node < 0) { + pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n"); + return -EINVAL; + } + + um_info.buf = NULL; + um_info.size = 0; + um_info.max_entries = 0; + um_info.idx = 0; + /* Memory ranges to look up */ + um_info.ranges = &(usable_mem->ranges[0]); + um_info.nr_ranges = usable_mem->nr_ranges; + + dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (dn) { + ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb); + of_node_put(dn); + + if (ret) { + pr_err("Could not setup linux,drconf-usable-memory property for kdump\n"); + goto out; + } + + ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", + um_info.buf, (um_info.idx * sizeof(u64))); + if (ret) { + pr_err("Failed to update fdt with linux,drconf-usable-memory property"); + goto out; + } + } + + /* + * Walk through each memory node and set linux,usable-memory property + * for the corresponding node in kdump kernel's fdt. + */ + for_each_node_by_type(dn, "memory") { + ret = add_usable_mem_property(fdt, dn, &um_info); + if (ret) { + pr_err("Failed to set linux,usable-memory property for %s node", + dn->full_name); + goto out; + } + } + +out: + kfree(um_info.buf); + return ret; +} + +/** + * load_backup_segment - Locate a memory hole to place the backup region. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + void *buf; + int ret; + + /* + * Setup a source buffer for backup segment. + * + * A source buffer has no meaning for backup region as data will + * be copied from backup source, after crash, in the purgatory. + * But as load segment code doesn't recognize such segments, + * setup a dummy source buffer to keep it happy for now. + */ + buf = vzalloc(BACKUP_SRC_SIZE); + if (!buf) + return -ENOMEM; + + kbuf->buffer = buf; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(buf); + return ret; + } + + image->arch.backup_buf = buf; + image->arch.backup_start = kbuf->mem; + return 0; +} + +/** + * update_backup_region_phdr - Update backup region's offset for the core to + * export the region appropriately. + * @image: Kexec image. + * @ehdr: ELF core header. + * + * Assumes an exclusive program header is setup for the backup region + * in the ELF headers + * + * Returns nothing. + */ +static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) +{ + Elf64_Phdr *phdr; + unsigned int i; + + phdr = (Elf64_Phdr *)(ehdr + 1); + for (i = 0; i < ehdr->e_phnum; i++) { + if (phdr->p_paddr == BACKUP_SRC_START) { + phdr->p_offset = image->arch.backup_start; + pr_debug("Backup region offset updated to 0x%lx\n", + image->arch.backup_start); + return; + } + } +} + +/** + * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr + * segment needed to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) +{ + struct crash_mem *cmem = NULL; + unsigned long headers_sz; + void *headers = NULL; + int ret; + + ret = get_crash_memory_ranges(&cmem); + if (ret) + goto out; + + /* Setup elfcorehdr segment */ + ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); + if (ret) { + pr_err("Failed to prepare elf headers for the core\n"); + goto out; + } + + /* Fix the offset for backup region in the ELF header */ + update_backup_region_phdr(image, headers); + + kbuf->buffer = headers; + kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf->bufsz = kbuf->memsz = headers_sz; + kbuf->top_down = false; + + ret = kexec_add_buffer(kbuf); + if (ret) { + vfree(headers); + goto out; + } + + image->arch.elfcorehdr_addr = kbuf->mem; + image->arch.elf_headers_sz = headers_sz; + image->arch.elf_headers = headers; +out: + kfree(cmem); + return ret; +} + +/** + * load_crashdump_segments_ppc64 - Initialize the additional segements needed + * to load kdump kernel. + * @image: Kexec image. + * @kbuf: Buffer contents and memory parameters. + * + * Returns 0 on success, negative errno on error. + */ +int load_crashdump_segments_ppc64(struct kimage *image, + struct kexec_buf *kbuf) +{ + int ret; + + /* Load backup segment - first 64K bytes of the crashing kernel */ + ret = load_backup_segment(image, kbuf); + if (ret) { + pr_err("Failed to load backup segment\n"); + return ret; + } + pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem); + + /* Load elfcorehdr segment - to export crashing kernel's vmcore */ + ret = load_elfcorehdr_segment(image, kbuf); + if (ret) { + pr_err("Failed to load elfcorehdr segment\n"); + return ret; + } + pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", + image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz); + + return 0; +} + +/** + * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global + * variables and call setup_purgatory() to initialize + * common global variable. + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Returns 0 on success, negative errno on error. + */ +int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + struct device_node *dn = NULL; + int ret; + + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + goto out; + + if (image->type == KEXEC_TYPE_CRASH) { + u32 my_run_at_load = 1; + + /* + * Tell relocatable kernel to run at load address + * via the word meant for that at 0x5c. + */ + ret = kexec_purgatory_get_set_symbol(image, "run_at_load", + &my_run_at_load, + sizeof(my_run_at_load), + false); + if (ret) + goto out; + } + + /* Tell purgatory where to look for backup region */ + ret = kexec_purgatory_get_set_symbol(image, "backup_start", + &image->arch.backup_start, + sizeof(image->arch.backup_start), + false); + if (ret) + goto out; + + /* Setup OPAL base & entry values */ + dn = of_find_node_by_path("/ibm,opal"); + if (dn) { + u64 val; + + of_property_read_u64(dn, "opal-base-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, + sizeof(val), false); + if (ret) + goto out; + + of_property_read_u64(dn, "opal-entry-address", &val); + ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, + sizeof(val), false); + } +out: + if (ret) + pr_err("Failed to setup purgatory symbols"); + of_node_put(dn); + return ret; +} + +/** + * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel + * being loaded. + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Returns 0 on success, negative errno on error. + */ +int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline) +{ + struct crash_mem *umem = NULL, *rmem = NULL; + int i, nr_ranges, ret; + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + /* + * Restrict memory usage for kdump kernel by setting up + * usable memory ranges and memory reserve map. + */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = get_usable_memory_ranges(&umem); + if (ret) + goto out; + + ret = update_usable_mem_fdt(fdt, umem); + if (ret) { + pr_err("Error setting up usable-memory property for kdump kernel\n"); + goto out; + } + + /* + * Ensure we don't touch crashed kernel's memory except the + * first 64K of RAM, which will be backed up. + */ + ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1, + crashk_res.start - BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving crash memory: %s\n", + fdt_strerror(ret)); + goto out; + } + + /* Ensure backup region is not used by kdump/capture kernel */ + ret = fdt_add_mem_rsv(fdt, image->arch.backup_start, + BACKUP_SRC_SIZE); + if (ret) { + pr_err("Error reserving memory for backup: %s\n", + fdt_strerror(ret)); + goto out; + } + } + + /* Update memory reserve map */ + ret = get_reserved_memory_ranges(&rmem); + if (ret) + goto out; + + nr_ranges = rmem ? rmem->nr_ranges : 0; + for (i = 0; i < nr_ranges; i++) { + u64 base, size; + + base = rmem->ranges[i].start; + size = rmem->ranges[i].end - base + 1; + ret = fdt_add_mem_rsv(fdt, base, size); + if (ret) { + pr_err("Error updating memory reserve map: %s\n", + fdt_strerror(ret)); + goto out; + } + } + +out: + kfree(rmem); + kfree(umem); + return ret; +} + +/** + * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, + * tce-table, reserved-ranges & such (exclude + * memory ranges) as they can't be used for kexec + * segment buffer. Sets kbuf->mem when a suitable + * memory hole is found. + * @kbuf: Buffer contents and memory parameters. + * + * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + struct crash_mem **emem; + u64 buf_min, buf_max; + int ret; + + /* Look up the exclude ranges list while locating the memory hole */ + emem = &(kbuf->image->arch.exclude_ranges); + if (!(*emem) || ((*emem)->nr_ranges == 0)) { + pr_warn("No exclude range list. Using the default locate mem hole method\n"); + return kexec_locate_mem_hole(kbuf); + } + + buf_min = kbuf->buf_min; + buf_max = kbuf->buf_max; + /* Segments for kdump kernel should be within crashkernel region */ + if (kbuf->image->type == KEXEC_TYPE_CRASH) { + buf_min = (buf_min < crashk_res.start ? + crashk_res.start : buf_min); + buf_max = (buf_max > crashk_res.end ? + crashk_res.end : buf_max); + } + + if (buf_min > buf_max) { + pr_err("Invalid buffer min and/or max values\n"); + return -EINVAL; + } + + if (kbuf->top_down) + ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, + *emem); + else + ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, + *emem); + + /* Add the buffer allocated to the exclude list for the next lookup */ + if (!ret) { + add_mem_range(emem, kbuf->mem, kbuf->memsz); + sort_memory_ranges(*emem, true); + } else { + pr_err("Failed to locate memory buffer of size %lu\n", + kbuf->memsz); + } + return ret; +} + +/** + * arch_kexec_kernel_image_probe - Does additional handling needed to setup + * kexec segments. + * @image: kexec image being loaded. + * @buf: Buffer pointing to elf data. + * @buf_len: Length of the buffer. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int ret; + + /* Get exclude memory ranges needed for setting up kexec segments */ + ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); + if (ret) { + pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); + return ret; + } + + return kexec_image_probe_default(image, buf, buf_len); +} + +/** + * arch_kimage_file_post_load_cleanup - Frees up all the allocations done + * while loading the image. + * @image: kexec image being loaded. + * + * Returns 0 on success, negative errno on error. + */ +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kfree(image->arch.exclude_ranges); + image->arch.exclude_ranges = NULL; + + vfree(image->arch.backup_buf); + image->arch.backup_buf = NULL; + + vfree(image->arch.elf_headers); + image->arch.elf_headers = NULL; + image->arch.elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c new file mode 100644 index 000000000000..6b81c852feab --- /dev/null +++ b/arch/powerpc/kexec/ranges.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * powerpc code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2020 IBM Corporation + * + * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "kexec ranges: " fmt + +#include +#include +#include +#include +#include +#include + +/** + * get_max_nr_ranges - Get the max no. of ranges crash_mem structure + * could hold, given the size allocated for it. + * @size: Allocation size of crash_mem structure. + * + * Returns the maximum no. of ranges. + */ +static inline unsigned int get_max_nr_ranges(size_t size) +{ + return ((size - sizeof(struct crash_mem)) / + sizeof(struct crash_mem_range)); +} + +/** + * get_mem_rngs_size - Get the allocated size of mem_rngs based on + * max_nr_ranges and chunk size. + * @mem_rngs: Memory ranges. + * + * Returns the maximum size of @mem_rngs. + */ +static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) +{ + size_t size; + + if (!mem_rngs) + return 0; + + size = (sizeof(struct crash_mem) + + (mem_rngs->max_nr_ranges * sizeof(struct crash_mem_range))); + + /* + * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. + * So, align to get the actual length. + */ + return ALIGN(size, MEM_RANGE_CHUNK_SZ); +} + +/** + * __add_mem_range - add a memory range to memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + + if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { + mem_rngs = realloc_mem_ranges(mem_ranges); + if (!mem_rngs) + return -ENOMEM; + } + + mem_rngs->ranges[mem_rngs->nr_ranges].start = base; + mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; + pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", + base, base + size - 1, mem_rngs->nr_ranges); + mem_rngs->nr_ranges++; + return 0; +} + +/** + * __merge_memory_ranges - Merges the given memory ranges list. + * @mem_rngs: Range list to merge. + * + * Assumes a sorted range list. + * + * Returns nothing. + */ +static void __merge_memory_ranges(struct crash_mem *mem_rngs) +{ + struct crash_mem_range *ranges; + int i, idx; + + if (!mem_rngs) + return; + + idx = 0; + ranges = &(mem_rngs->ranges[0]); + for (i = 1; i < mem_rngs->nr_ranges; i++) { + if (ranges[i].start <= (ranges[i-1].end + 1)) + ranges[idx].end = ranges[i].end; + else { + idx++; + if (i == idx) + continue; + + ranges[idx] = ranges[i]; + } + } + mem_rngs->nr_ranges = idx + 1; +} + +/* cmp_func_t callback to sort ranges with sort() */ +static int rngcmp(const void *_x, const void *_y) +{ + const struct crash_mem_range *x = _x, *y = _y; + + if (x->start > y->start) + return 1; + if (x->start < y->start) + return -1; + return 0; +} + +/** + * sort_memory_ranges - Sorts the given memory ranges list. + * @mem_rngs: Range list to sort. + * @merge: If true, merge the list after sorting. + * + * Returns nothing. + */ +void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) +{ + int i; + + if (!mem_rngs) + return; + + /* Sort the ranges in-place */ + sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, + sizeof(mem_rngs->ranges[0]), rngcmp, NULL); + + if (merge) + __merge_memory_ranges(mem_rngs); + + /* For debugging purpose */ + pr_debug("Memory ranges:\n"); + for (i = 0; i < mem_rngs->nr_ranges; i++) { + pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, + mem_rngs->ranges[i].start, + mem_rngs->ranges[i].end); + } +} + +/** + * realloc_mem_ranges - reallocate mem_ranges with size incremented + * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, + * if memory allocation fails. + * @mem_ranges: Memory ranges to reallocate. + * + * Returns pointer to reallocated memory on success, NULL otherwise. + */ +struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) +{ + struct crash_mem *mem_rngs = *mem_ranges; + unsigned int nr_ranges; + size_t size; + + size = get_mem_rngs_size(mem_rngs); + nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; + + size += MEM_RANGE_CHUNK_SZ; + mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); + if (!mem_rngs) { + kfree(*mem_ranges); + *mem_ranges = NULL; + return NULL; + } + + mem_rngs->nr_ranges = nr_ranges; + mem_rngs->max_nr_ranges = get_max_nr_ranges(size); + *mem_ranges = mem_rngs; + + return mem_rngs; +} + +/** + * add_mem_range - Updates existing memory range, if there is an overlap. + * Else, adds a new memory range. + * @mem_ranges: Range list to add the memory range to. + * @base: Base address of the range to add. + * @size: Size of the memory range to add. + * + * (Re)allocates memory, if needed. + * + * Returns 0 on success, negative errno on error. + */ +int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) +{ + struct crash_mem *mem_rngs = *mem_ranges; + u64 mstart, mend, end; + unsigned int i; + + if (!size) + return 0; + + end = base + size - 1; + + if (!mem_rngs || !(mem_rngs->nr_ranges)) + return __add_mem_range(mem_ranges, base, size); + + for (i = 0; i < mem_rngs->nr_ranges; i++) { + mstart = mem_rngs->ranges[i].start; + mend = mem_rngs->ranges[i].end; + if (base < mend && end > mstart) { + if (base < mstart) + mem_rngs->ranges[i].start = base; + if (end > mend) + mem_rngs->ranges[i].end = end; + return 0; + } + } + + return __add_mem_range(mem_ranges, base, size); +} + +/** + * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. + * @mem_ranges: Range list to add the memory range(s) to. + * + * Returns 0 on success, negative errno on error. + */ +int add_tce_mem_ranges(struct crash_mem **mem_ranges) +{ + struct device_node *dn = NULL; + int ret = 0; + + for_each_node_by_type(dn, "pci") { + u64 base; + u32 size; + + ret = of_property_read_u64(dn, "linux,tce-base", &base); + ret |= of_property_read_u32(dn, "linux,tce-size", &size); + if (ret) { + /* + * It is ok to have pci nodes without tce. So, ignore + * property does not exist error. + */ + if (ret == -EINVAL) { + ret = 0; + continue; + } + break; + } + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + of_node_put(dn); + return ret; +} + +/** + * add_initrd_mem_range - Adds initrd range to the given memory ranges list, + * if the initrd was retained. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_initrd_mem_range(struct crash_mem **mem_ranges) +{ + u64 base, end; + int ret; + + /* This range means something, only if initrd was retained */ + if (!strstr(saved_command_line, "retain_initrd")) + return 0; + + ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); + ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); + if (!ret) + ret = add_mem_range(mem_ranges, base, end - base + 1); + + return ret; +} + +#ifdef CONFIG_PPC_BOOK3S_64 +/** + * add_htab_mem_range - Adds htab range to the given memory ranges list, + * if it exists + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_htab_mem_range(struct crash_mem **mem_ranges) +{ + if (!htab_address) + return 0; + + return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); +} +#endif + +/** + * add_kernel_mem_range - Adds kernel text region to the given + * memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_kernel_mem_range(struct crash_mem **mem_ranges) +{ + return add_mem_range(mem_ranges, 0, __pa(_end)); +} + +/** + * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_rtas_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u32 base, size; + int ret = 0; + + dn = of_find_node_by_path("/rtas"); + if (!dn) + return 0; + + ret = of_property_read_u32(dn, "linux,rtas-base", &base); + ret |= of_property_read_u32(dn, "rtas-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_opal_mem_range - Adds OPAL region to the given memory ranges list. + * @mem_ranges: Range list to add the memory range to. + * + * Returns 0 on success, negative errno on error. + */ +int add_opal_mem_range(struct crash_mem **mem_ranges) +{ + struct device_node *dn; + u64 base, size; + int ret; + + dn = of_find_node_by_path("/ibm,opal"); + if (!dn) + return 0; + + ret = of_property_read_u64(dn, "opal-base-address", &base); + ret |= of_property_read_u64(dn, "opal-runtime-size", &size); + if (!ret) + ret = add_mem_range(mem_ranges, base, size); + + of_node_put(dn); + return ret; +} + +/** + * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w + * to the given memory ranges list. + * @mem_ranges: Range list to add the memory ranges to. + * + * Returns 0 on success, negative errno on error. + */ +int add_reserved_mem_ranges(struct crash_mem **mem_ranges) +{ + int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; + const __be32 *prop; + + prop = of_get_property(of_root, "reserved-ranges", &len); + if (!prop) + return 0; + + n_mem_addr_cells = of_n_addr_cells(of_root); + n_mem_size_cells = of_n_size_cells(of_root); + cells = n_mem_addr_cells + n_mem_size_cells; + + /* Each reserved range is an (address,size) pair */ + for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { + u64 base, size; + + base = of_read_number(prop + (i * cells), n_mem_addr_cells); + size = of_read_number(prop + (i * cells) + n_mem_addr_cells, + n_mem_size_cells); + + ret = add_mem_range(mem_ranges, base, size); + if (ret) + break; + } + + return ret; +} diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebb04f331ad3..0f83f39a2bd2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -765,7 +765,7 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, return H_P3; vcpu->arch.ciabr = value1; return H_SUCCESS; - case H_SET_MODE_RESOURCE_SET_DAWR: + case H_SET_MODE_RESOURCE_SET_DAWR0: if (!kvmppc_power8_compatible(vcpu)) return H_P2; if (!ppc_breakpoint_available()) @@ -1680,10 +1680,22 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: *val = get_reg_val(id, vcpu->arch.uamor); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; *val = get_reg_val(id, vcpu->arch.mmcr[i]); break; + case KVM_REG_PPC_MMCR2: + *val = get_reg_val(id, vcpu->arch.mmcr[2]); + break; + case KVM_REG_PPC_MMCRA: + *val = get_reg_val(id, vcpu->arch.mmcra); + break; + case KVM_REG_PPC_MMCRS: + *val = get_reg_val(id, vcpu->arch.mmcrs); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); @@ -1699,7 +1711,13 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.sdar); break; case KVM_REG_PPC_SIER: - *val = get_reg_val(id, vcpu->arch.sier); + *val = get_reg_val(id, vcpu->arch.sier[0]); + break; + case KVM_REG_PPC_SIER2: + *val = get_reg_val(id, vcpu->arch.sier[1]); + break; + case KVM_REG_PPC_SIER3: + *val = get_reg_val(id, vcpu->arch.sier[2]); break; case KVM_REG_PPC_IAMR: *val = get_reg_val(id, vcpu->arch.iamr); @@ -1901,10 +1919,22 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_UAMOR: vcpu->arch.uamor = set_reg_val(id, *val); break; - case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1: i = id - KVM_REG_PPC_MMCR0; vcpu->arch.mmcr[i] = set_reg_val(id, *val); break; + case KVM_REG_PPC_MMCR2: + vcpu->arch.mmcr[2] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRA: + vcpu->arch.mmcra = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCRS: + vcpu->arch.mmcrs = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR3: + *val = get_reg_val(id, vcpu->arch.mmcr[3]); + break; case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); @@ -1920,7 +1950,13 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.sdar = set_reg_val(id, *val); break; case KVM_REG_PPC_SIER: - vcpu->arch.sier = set_reg_val(id, *val); + vcpu->arch.sier[0] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER2: + vcpu->arch.sier[1] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SIER3: + vcpu->arch.sier[2] = set_reg_val(id, *val); break; case KVM_REG_PPC_IAMR: vcpu->arch.iamr = set_reg_val(id, *val); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cd3cf3d366b..073617ce83e0 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -113,7 +113,7 @@ void __init kvm_cma_reserve(void) selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; if (selected_size) { - pr_debug("%s: reserving %ld MiB for global area\n", __func__, + pr_info("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); align_size = HPT_ALIGN_PAGES << PAGE_SHIFT; cma_declare_contiguous(0, selected_size, 0, align_size, diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 63fd81f3039d..59822cba454d 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -140,6 +140,14 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, HSTATE_MMCR3(r13) + std r6, HSTATE_SIER2(r13) + std r7, HSTATE_SIER3(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 79f7d07ef674..6028628ea3ac 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -244,7 +244,7 @@ long kvmppc_realmode_hmi_handler(void) { bool resync_req; - __this_cpu_inc(irq_stat.hmi_exceptions); + local_paca->hmi_irqs++; if (hmi_handle_debugtrig(NULL) >= 0) return 1; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 71943892c81c..799d6d0f4ead 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -3428,7 +3428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_PMC6, r9 ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) - ld r6, VCPU_MMCR + 16(r4) + ld r6, VCPU_MMCRA(r4) ld r7, VCPU_SIAR(r4) ld r8, VCPU_SDAR(r4) mtspr SPRN_MMCR1, r5 @@ -3436,14 +3436,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) + ld r5, VCPU_MMCR + 24(r4) + ld r6, VCPU_SIER + 8(r4) + ld r7, VCPU_SIER + 16(r4) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) +BEGIN_FTR_SECTION + ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) - ld r9, VCPU_MMCR + 32(r4) + ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 @@ -3496,6 +3504,14 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + ld r5, HSTATE_MMCR3(r13) + ld r6, HSTATE_SIER2(r13) + ld r7, HSTATE_SIER3(r13) + mtspr SPRN_MMCR3, r5 + mtspr SPRN_SIER2, r6 + mtspr SPRN_SIER3, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 @@ -3551,10 +3567,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r8, SPRN_SDAR std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) - std r6, VCPU_MMCR + 16(r9) + std r6, VCPU_MMCRA(r9) BEGIN_FTR_SECTION - std r10, VCPU_MMCR + 24(r9) + std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r5, SPRN_MMCR3 + mfspr r6, SPRN_SIER2 + mfspr r7, SPRN_SIER3 + std r5, VCPU_MMCR + 24(r9) + std r6, VCPU_SIER + 8(r9) + std r7, VCPU_SIER + 16(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -3578,7 +3602,7 @@ BEGIN_FTR_SECTION_NESTED(96) mfspr r8, SPRN_MMCRS stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) - std r8, VCPU_MMCR + 32(r9) + std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f7ad99d972ce..607a9b99c334 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -26,7 +26,7 @@ #define FUNC(name) name #define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2) -#endif /* CONFIG_PPC_BOOK3S_XX */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #define VCPU_LOAD_NVGPRS(vcpu) \ PPC_LL r14, VCPU_GPR(R14)(vcpu); \ diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 4a61a971c34e..830a126e095d 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -89,7 +89,7 @@ {H_CREATE_RPT, "H_CREATE_RPT"}, \ {H_REMOVE_RPT, "H_REMOVE_RPT"}, \ {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \ - {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \ + {H_DISABLE_AND_GET, "H_DISABLE_AND_GET"}, \ {H_ERROR_DATA, "H_ERROR_DATA"}, \ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5e994cda8e40..d66a645503eb 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -41,7 +41,10 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o memcpy_mcsafe_64.o +ifndef CONFIG_PPC_QUEUED_SPINLOCKS obj64-$(CONFIG_SMP) += locks.o +endif + obj64-$(CONFIG_ALTIVEC) += vmx-helper.o obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0a051dfeb177..8c3934ea6220 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -93,7 +93,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr) unsigned long pfn; int err; - if (is_vmalloc_addr(addr)) + if (is_vmalloc_or_module_addr(addr)) pfn = vmalloc_to_pfn(addr); else pfn = __pa_symbol(addr) >> PAGE_SHIFT; diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 6440d5943c00..04165b7a163f 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -27,14 +27,14 @@ void splpar_spin_yield(arch_spinlock_t *lock) return; holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (lock->slock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } EXPORT_SYMBOL_GPL(splpar_spin_yield); @@ -53,13 +53,13 @@ void splpar_rw_yield(arch_rwlock_t *rw) return; /* no write lock at present */ holder_cpu = lock_value & 0xffff; BUG_ON(holder_cpu >= NR_CPUS); - yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count); + + yield_count = yield_count_of(holder_cpu); if ((yield_count & 1) == 0) return; /* virtual cpu is currently running */ rmb(); if (rw->lock != lock_value) return; /* something has changed */ - plpar_hcall_norets(H_CONFER, - get_hard_smp_processor_id(holder_cpu), yield_count); + yield_to_preempted(holder_cpu, yield_count); } #endif diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 0666a8d29596..1550e0d2513a 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -9,20 +9,56 @@ #include +static inline void __clean_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void __flush_pmem_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory"); +} + +static inline void clean_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __clean_pmem_range(start, stop); +} + +static inline void flush_pmem_range(unsigned long start, unsigned long stop) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return __flush_pmem_range(start, stop); +} + /* * CONFIG_ARCH_HAS_PMEM_API symbols */ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_dcache_range(start, start + size); + flush_pmem_range(start, start + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); @@ -35,19 +71,17 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_dcache_range(start, start + size); + clean_pmem_range(start, start + size); return copied; } -void *memcpy_flushcache(void *dest, const void *src, size_t size) +void memcpy_flushcache(void *dest, const void *src, size_t size) { unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_dcache_range(start, start + size); - - return dest; + clean_pmem_range(start, start + size); } EXPORT_SYMBOL(memcpy_flushcache); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5abe98216dc2..caee8cc77e19 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -16,6 +16,7 @@ #include extern char system_call_common[]; +extern char system_call_vectored_emulate[]; #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ @@ -200,8 +201,8 @@ static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr, unsigned int dd; unsigned long ea, d0, d1, d; - prefix_r = instr & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(instr); + ra = GET_PREFIX_RA(suffix); d0 = instr & 0x3ffff; d1 = suffix & 0xffff; @@ -1236,6 +1237,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 17: /* sc */ if ((word & 0xfe2) == 2) op->type = SYSCALL; + else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && + (word & 0xfe3) == 1) + op->type = SYSCALL_VECTORED_0; else op->type = UNKNOWN; return 0; @@ -1339,8 +1343,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, switch (opcode) { #ifdef __powerpc64__ case 1: - prefix_r = word & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); rd = (suffix >> 21) & 0x1f; op->reg = rd; op->val = regs->gpr[rd]; @@ -1802,7 +1806,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = (int) regs->gpr[ra] / (int) regs->gpr[rb]; goto arith_done; - +#ifdef __powerpc64__ + case 425: /* divde[.] */ + asm volatile(PPC_DIVDE(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; + case 393: /* divdeu[.] */ + asm volatile(PPC_DIVDEU(%0, %1, %2) : + "=r" (op->val) : "r" (regs->gpr[ra]), + "r" (regs->gpr[rb])); + goto arith_done; +#endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -1; @@ -2715,8 +2730,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } break; case 1: /* Prefixed instructions */ - prefix_r = word & (1ul << 20); - ra = (suffix >> 16) & 0x1f; + prefix_r = GET_PREFIX_R(word); + ra = GET_PREFIX_RA(suffix); op->update_reg = ra; rd = (suffix >> 21) & 0x1f; op->reg = rd; @@ -3378,6 +3393,18 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->msr = MSR_KERNEL; return 1; +#ifdef CONFIG_PPC_BOOK3S_64 + case SYSCALL_VECTORED_0: /* scv 0 */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_vectored_emulate; + regs->msr = MSR_KERNEL; + return 1; +#endif + case RFI: return -1; #endif diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 46af80279ebc..0a201b771477 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -8,59 +8,51 @@ #define pr_fmt(fmt) "emulate_step_test: " fmt #include +#include #include #include #include #include -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) -#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) - -/* - * Defined with TEST_ prefix so it does not conflict with other - * definitions. - */ -#define TEST_LD(r, base, i) ppc_inst(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LWZ(r, base, i) ppc_inst(PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define TEST_LWZX(t, a, b) ppc_inst(PPC_INST_LWZX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STD(r, base, i) ppc_inst(PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_DS(i)) -#define TEST_LDARX(t, a, b, eh) ppc_inst(PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define TEST_STDCX(s, a, b) ppc_inst(PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFSX(t, a, b) ppc_inst(PPC_INST_LFSX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFSX(s, a, b) ppc_inst(PPC_INST_STFSX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LFDX(t, a, b) ppc_inst(PPC_INST_LFDX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STFDX(s, a, b) ppc_inst(PPC_INST_STFDX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LVX(t, a, b) ppc_inst(PPC_INST_LVX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_STVX(s, a, b) ppc_inst(PPC_INST_STVX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_LXVD2X(s, a, b) ppc_inst(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_STXVD2X(s, a, b) ppc_inst(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) -#define TEST_ADD(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADD_DOT(t, a, b) ppc_inst(PPC_INST_ADD | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define TEST_ADDC(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define TEST_ADDC_DOT(t, a, b) ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | 0x1) - #define MAX_SUBTESTS 16 #define IGNORE_GPR(n) (0x1UL << (n)) #define IGNORE_XER (0x1UL << 32) #define IGNORE_CCR (0x1UL << 33) +#define NEGATIVE_TEST (0x1UL << 63) + +#define TEST_PLD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLWZ(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_LWZ(r, base, i)) + +#define TEST_PSTD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFS(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PLFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PSTFD(r, base, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + +#define TEST_PADDI(t, a, i, pr) \ + ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ + PPC_RAW_ADDI(t, a, i)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -105,7 +97,7 @@ static void __init test_ld(void) regs.gpr[3] = (unsigned long) &a; /* ld r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("ld", "PASS"); @@ -113,6 +105,29 @@ static void __init test_ld(void) show_result("ld", "FAIL"); } +static void __init test_pld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* pld r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLD(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("pld", "PASS"); + else + show_result("pld", "FAIL"); +} + static void __init test_lwz(void) { struct pt_regs regs; @@ -123,7 +138,7 @@ static void __init test_lwz(void) regs.gpr[3] = (unsigned long) &a; /* lwz r5, 0(r3) */ - stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZ(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("lwz", "PASS"); @@ -131,6 +146,30 @@ static void __init test_lwz(void) show_result("lwz", "FAIL"); } +static void __init test_plwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + + /* plwz r5, 0(r3), 0 */ + + stepped = emulate_step(®s, TEST_PLWZ(5, 3, 0, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("plwz", "PASS"); + else + show_result("plwz", "FAIL"); +} + static void __init test_lwzx(void) { struct pt_regs regs; @@ -143,7 +182,7 @@ static void __init test_lwzx(void) regs.gpr[5] = 0x8765; /* lwzx r5, r3, r4 */ - stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LWZX(5, 3, 4))); if (stepped == 1 && regs.gpr[5] == a[2]) show_result("lwzx", "PASS"); else @@ -161,13 +200,36 @@ static void __init test_std(void) regs.gpr[5] = 0x5678; /* std r5, 0(r3) */ - stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STD(5, 3, 0))); if (stepped == 1 && regs.gpr[5] == a) show_result("std", "PASS"); else show_result("std", "FAIL"); } +static void __init test_pstd(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long)&a; + regs.gpr[5] = 0x5678; + + /* pstd r5, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTD(5, 3, 0, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("pstd", "PASS"); + else + show_result("pstd", "FAIL"); +} + static void __init test_ldarx_stdcx(void) { struct pt_regs regs; @@ -186,7 +248,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x5678; /* ldarx r5, r3, r4, 0 */ - stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0))); /* * Don't touch 'a' here. Touching 'a' can do Load/store @@ -204,7 +266,7 @@ static void __init test_ldarx_stdcx(void) regs.gpr[5] = 0x9ABC; /* stdcx. r5, r3, r4 */ - stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STDCX(5, 3, 4))); /* * Two possible scenarios that indicates successful emulation @@ -244,7 +306,7 @@ static void __init test_lfsx_stfsx(void) regs.gpr[4] = 0; /* lfsx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFSX(10, 3, 4))); if (stepped == 1) show_result("lfsx", "PASS"); @@ -257,7 +319,7 @@ static void __init test_lfsx_stfsx(void) c.a = 678.91; /* stfsx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFSX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfsx", "PASS"); @@ -265,6 +327,53 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "FAIL"); } +static void __init test_plfs_pstfs(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfs ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfs frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFS(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfs", "PASS"); + else + show_result("plfs", "FAIL"); + + + /*** pstfs ***/ + + c.a = 678.91; + + /* pstfs frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFS(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfs", "PASS"); + else + show_result("pstfs", "FAIL"); +} + static void __init test_lfdx_stfdx(void) { struct pt_regs regs; @@ -287,7 +396,7 @@ static void __init test_lfdx_stfdx(void) regs.gpr[4] = 0; /* lfdx frt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LFDX(10, 3, 4))); if (stepped == 1) show_result("lfdx", "PASS"); @@ -300,13 +409,60 @@ static void __init test_lfdx_stfdx(void) c.a = 987654.32; /* stfdx frs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STFDX(10, 3, 4))); if (stepped == 1 && c.b == cached_b) show_result("stfdx", "PASS"); else show_result("stfdx", "FAIL"); } + +static void __init test_plfd_pstfd(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + show_result("pld", "SKIP (!CPU_FTR_ARCH_31)"); + return; + } + + init_pt_regs(®s); + + + /*** plfd ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long)&c.a; + + /* plfd frt10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PLFD(10, 3, 0, 0)); + + if (stepped == 1) + show_result("plfd", "PASS"); + else + show_result("plfd", "FAIL"); + + + /*** pstfd ***/ + + c.a = 987654.32; + + /* pstfd frs10, 0(r3), 0 */ + stepped = emulate_step(®s, TEST_PSTFD(10, 3, 0, 0)); + + if (stepped == 1 && c.b == cached_b) + show_result("pstfd", "PASS"); + else + show_result("pstfd", "FAIL"); +} #else static void __init test_lfsx_stfsx(void) { @@ -314,11 +470,23 @@ static void __init test_lfsx_stfsx(void) show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); } +static void __init test_plfs_pstfs(void) +{ + show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)"); +} + static void __init test_lfdx_stfdx(void) { show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); } + +static void __init test_plfd_pstfd(void) +{ + show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)"); +} #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC @@ -346,7 +514,7 @@ static void __init test_lvx_stvx(void) regs.gpr[4] = 0; /* lvx vrt10, r3, r4 */ - stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LVX(10, 3, 4))); if (stepped == 1) show_result("lvx", "PASS"); @@ -362,7 +530,7 @@ static void __init test_lvx_stvx(void) c.b[3] = 498532; /* stvx vrs10, r3, r4 */ - stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STVX(10, 3, 4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) @@ -403,7 +571,7 @@ static void __init test_lxvd2x_stxvd2x(void) regs.gpr[4] = 0; /* lxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4))); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { show_result("lxvd2x", "PASS"); @@ -423,7 +591,7 @@ static void __init test_lxvd2x_stxvd2x(void) c.b[3] = 4; /* stxvd2x vsr39, r3, r4 */ - stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + stepped = emulate_step(®s, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4))); if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && cached_b[2] == c.b[2] && cached_b[3] == c.b[3] && @@ -447,18 +615,24 @@ static void __init test_lxvd2x_stxvd2x(void) static void __init run_tests_load_store(void) { test_ld(); + test_pld(); test_lwz(); + test_plwz(); test_lwzx(); test_std(); + test_pstd(); test_ldarx_stdcx(); test_lfsx_stfsx(); + test_plfs_pstfs(); test_lfdx_stfdx(); + test_plfd_pstfd(); test_lvx_stvx(); test_lxvd2x_stxvd2x(); } struct compute_test { char *mnemonic; + unsigned long cpu_feature; struct { char *descr; unsigned long flags; @@ -467,6 +641,11 @@ struct compute_test { } subtests[MAX_SUBTESTS + 1]; }; +/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */ +#define SI_MIN BIT(33) +#define SI_MAX (BIT(33) - 1) +#define SI_UMAX (BIT(34) - 1) + static struct compute_test compute_tests[] = { { .mnemonic = "nop", @@ -485,7 +664,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -493,7 +672,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -501,7 +680,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -509,7 +688,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -517,7 +696,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -525,7 +704,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -533,7 +712,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -541,7 +720,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -549,7 +728,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -557,7 +736,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -571,7 +750,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -579,7 +758,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -588,7 +767,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -596,7 +775,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -604,7 +783,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -612,7 +791,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -620,7 +799,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -628,7 +807,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -636,7 +815,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -644,7 +823,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADD_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -657,7 +836,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -665,7 +844,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -673,7 +852,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MAX, RB = LONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -681,7 +860,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -689,7 +868,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -697,7 +876,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -705,7 +884,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -713,7 +892,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -721,7 +900,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -729,7 +908,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -737,7 +916,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, @@ -751,7 +930,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MIN, RB = LONG_MIN", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MIN, @@ -759,7 +938,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN, RB = LONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN, .gpr[22] = LONG_MAX, @@ -768,7 +947,7 @@ static struct compute_test compute_tests[] = { { .descr = "RA = LONG_MAX, RB = LONG_MAX", .flags = IGNORE_CCR, - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MAX, .gpr[22] = LONG_MAX, @@ -776,7 +955,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = ULONG_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = ULONG_MAX, @@ -784,7 +963,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = ULONG_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = ULONG_MAX, .gpr[22] = 0x1, @@ -792,7 +971,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MIN, @@ -800,7 +979,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MIN, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MIN, .gpr[22] = INT_MAX, @@ -808,7 +987,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = INT_MAX, RB = INT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = INT_MAX, .gpr[22] = INT_MAX, @@ -816,7 +995,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = UINT_MAX", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = UINT_MAX, @@ -824,7 +1003,7 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = UINT_MAX, RB = 0x1", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = UINT_MAX, .gpr[22] = 0x1, @@ -832,31 +1011,320 @@ static struct compute_test compute_tests[] = { }, { .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN", - .instr = TEST_ADDC_DOT(20, 21, 22), + .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)), .regs = { .gpr[21] = LONG_MIN | (uint)INT_MIN, .gpr[22] = LONG_MIN | (uint)INT_MIN, } } } + }, + { + .mnemonic = "divde", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divde.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + } + } + }, + { + .mnemonic = "divdeu", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "divdeu.", + .subtests = { + { + .descr = "RA = LONG_MIN, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = 1L, RB = 0", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = 1L, + .gpr[22] = 0, + } + }, + { + .descr = "RA = LONG_MIN, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MIN, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MAX - 1, RB = LONG_MAX", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .regs = { + .gpr[21] = LONG_MAX - 1, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = LONG_MIN + 1, RB = LONG_MIN", + .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)), + .flags = IGNORE_GPR(20), + .regs = { + .gpr[21] = LONG_MIN + 1, + .gpr[22] = LONG_MIN, + } + } + } + }, + { + .mnemonic = "paddi", + .cpu_feature = CPU_FTR_ARCH_31, + .subtests = { + { + .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MIN, + } + }, + { + .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = LONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_UMAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = ULONG_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = ULONG_MAX, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MIN, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MIN, + } + }, + { + .descr = "RA = INT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = INT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = 0x1, R = 0", + .instr = TEST_PADDI(21, 22, 0x1, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0", + .instr = TEST_PADDI(21, 22, SI_MAX, 0), + .regs = { + .gpr[21] = 0, + .gpr[22] = UINT_MAX, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 0, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + } + }, + { + .descr = "RA = 0, SI = SI_MIN, R = 0", + .instr = TEST_PADDI(21, 22, SI_MIN, 0), + .regs = { + .gpr[21] = 0x0, + .gpr[22] = 0x0, + } + }, + { + .descr = "RA is r0, SI = 0, R = 1", + .instr = TEST_PADDI(21, 0, 0, 1), + .regs = { + .gpr[21] = 0, + } + }, + { + .descr = "RA is r0, SI = SI_MIN, R = 1", + .instr = TEST_PADDI(21, 0, SI_MIN, 1), + .regs = { + .gpr[21] = 0, + } + }, + /* Invalid instruction form with R = 1 and RA != 0 */ + { + .descr = "RA = R22(0), SI = 0, R = 1", + .instr = TEST_PADDI(21, 22, 0, 1), + .flags = NEGATIVE_TEST, + .regs = { + .gpr[21] = 0, + .gpr[22] = 0, + } + } + } } }; static int __init emulate_compute_instr(struct pt_regs *regs, - struct ppc_inst instr) + struct ppc_inst instr, + bool negative) { + int analysed; struct instruction_op op; if (!regs || !ppc_inst_val(instr)) return -EINVAL; - if (analyse_instr(&op, regs, instr) != 1 || - GETTYPE(op.type) != COMPUTE) { - pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr)); + regs->nip = patch_site_addr(&patch__exec_instr); + + analysed = analyse_instr(&op, regs, instr); + if (analysed != 1 || GETTYPE(op.type) != COMPUTE) { + if (negative) + return -EFAULT; + pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } - - emulate_update_regs(regs, &op); + if (analysed == 1 && negative) + pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr)); + if (!negative) + emulate_update_regs(regs, &op); return 0; } @@ -864,7 +1332,6 @@ static int __init execute_compute_instr(struct pt_regs *regs, struct ppc_inst instr) { extern int exec_instr(struct pt_regs *regs); - extern s32 patch__exec_instr; if (!regs || !ppc_inst_val(instr)) return -EINVAL; @@ -872,7 +1339,7 @@ static int __init execute_compute_instr(struct pt_regs *regs, /* Patch the NOP with the actual instruction */ patch_instruction_site(&patch__exec_instr, instr); if (exec_instr(regs)) { - pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr)); + pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr)); return -EFAULT; } @@ -894,15 +1361,21 @@ static void __init run_tests_compute(void) struct pt_regs *regs, exp, got; unsigned int i, j, k; struct ppc_inst instr; - bool ignore_gpr, ignore_xer, ignore_ccr, passed; + bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { test = &compute_tests[i]; + if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) { + show_result(test->mnemonic, "SKIP (!CPU_FTR)"); + continue; + } + for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) { instr = test->subtests[j].instr; flags = test->subtests[j].flags; regs = &test->subtests[j].regs; + negative = flags & NEGATIVE_TEST; ignore_xer = flags & IGNORE_XER; ignore_ccr = flags & IGNORE_CCR; passed = true; @@ -917,8 +1390,12 @@ static void __init run_tests_compute(void) exp.msr = MSR_KERNEL; got.msr = MSR_KERNEL; - if (emulate_compute_instr(&got, instr) || - execute_compute_instr(&exp, instr)) { + rc = emulate_compute_instr(&got, instr, negative) != 0; + if (negative) { + /* skip executing instruction */ + passed = rc; + goto print; + } else if (rc || execute_compute_instr(&exp, instr)) { passed = false; goto print; } diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 1580f34f4f4f..9ef941d958d8 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -80,7 +80,9 @@ _GLOBAL(exec_instr) REST_NVGPRS(r31) /* Placeholder for the test instruction */ + .balign 64 1: nop + nop patch_site 1b patch__exec_instr /* diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 923ad8f374eb..1690d369688b 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -62,7 +62,7 @@ _GLOBAL(hash_page) isync #endif /* Get PTE (linux-style) and check access */ - lis r0,KERNELBASE@h /* check if kernel address */ + lis r0, TASK_SIZE@h /* check if kernel address */ cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 03b6ba54460e..c0162911f6cb 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -187,6 +187,17 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return __mmu_mapin_ram(border, top); } +static bool is_module_segment(unsigned long addr) +{ + if (!IS_ENABLED(CONFIG_MODULES)) + return false; + if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M)) + return false; + if (addr >= ALIGN(VMALLOC_END, SZ_256M)) + return false; + return true; +} + void mmu_mark_initmem_nx(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -223,9 +234,9 @@ void mmu_mark_initmem_nx(void) for (i = TASK_SIZE >> 28; i < 16; i++) { /* Do not set NX on VM space for modules */ - if (IS_ENABLED(CONFIG_MODULES) && - (VMALLOC_START & 0xf0000000) == i << 28) - break; + if (is_module_segment(i << 28)) + continue; + mtsrin(mfsrin(i << 28) | 0x10000000, i << 28); } } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9b9f92ad0e7a..1478fceeb683 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= HPTE_R_I; else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); else /* * Add memory coherence if cache inhibited is not set @@ -596,7 +594,7 @@ static void __init htab_scan_page_sizes(void) } #ifdef CONFIG_HUGETLB_PAGE - if (!hugetlb_disabled) { + if (!hugetlb_disabled && !early_radix_enabled() ) { /* Reserve 16G huge page memory sections for huge pages */ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); } @@ -663,11 +661,10 @@ static void __init htab_init_page_sizes(void) * Pick a size for the linear mapping. Currently, we only * support 16M, 1M and 4K which is the default */ - if (IS_ENABLED(STRICT_KERNEL_RWX) && + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && (unsigned long)_stext % 0x1000000) { if (mmu_psize_defs[MMU_PAGE_16M].shift) - pr_warn("Kernel not 16M aligned, " - "disabling 16M linear map alignment"); + pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n"); aligned = false; } @@ -788,7 +785,7 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int resize_hpt_for_hotplug(unsigned long new_mem_size) +static int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; @@ -822,6 +819,8 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, return -1; } + resize_hpt_for_hotplug(memblock_phys_mem_size()); + rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); @@ -839,6 +838,10 @@ int hash__remove_section_mapping(unsigned long start, unsigned long end) int rc = htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize); WARN_ON(rc < 0); + + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); + return rc; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -1111,6 +1114,10 @@ void hash__early_init_mmu_secondary(void) if (cpu_has_feature(CPU_FTR_ARCH_206) && cpu_has_feature(CPU_FTR_HVMODE)) tlbiel_all(); + +#ifdef CONFIG_PPC_MEM_KEYS + mtspr(SPRN_UAMOR, default_uamor); +#endif } #endif /* CONFIG_SMP */ @@ -1731,10 +1738,6 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, return gslot; } -/* - * WARNING: This is called from hash_low_64.S, if you change this prototype, - * do not forget to update the assembly call site ! - */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index c58ad1049909..e18ae50a275c 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -165,6 +166,8 @@ void mmu_cleanup_all(void) radix__mmu_cleanup_all(); else if (mmu_hash_ops.hpte_clear_all) mmu_hash_ops.hpte_clear_all(); + + reset_sprs(); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -339,6 +342,9 @@ void pmd_fragment_free(unsigned long *pmd) { struct page *page = virt_to_page(pmd); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { pgtable_pmd_page_dtor(page); @@ -356,7 +362,7 @@ static inline void pgtable_free(void *table, int index) pmd_fragment_free(table); break; case PUD_INDEX: - kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); + __pud_free(table); break; #if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) /* 16M hugepd directory at pud level */ diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index d174106bab67..69a6b87f2bb4 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -10,58 +10,103 @@ #include #include #include -#include +#include -DEFINE_STATIC_KEY_TRUE(pkey_disabled); -int pkeys_total; /* Total pkeys as per device tree */ -u32 initial_allocation_mask; /* Bits set for the initially allocated keys */ -u32 reserved_allocation_mask; /* Bits set for reserved keys */ -static bool pkey_execute_disable_supported; -static bool pkeys_devtree_defined; /* property exported by device tree */ -static u64 pkey_amr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ -static u64 pkey_uamor_mask; /* Bits in UMOR not to be touched */ +int num_pkey; /* Max number of pkeys supported */ +/* + * Keys marked in the reservation list cannot be allocated by userspace + */ +u32 reserved_allocation_mask __ro_after_init; + +/* Bits set for the initially allocated keys */ +static u32 initial_allocation_mask __ro_after_init; + +/* + * Even if we allocate keys with sys_pkey_alloc(), we need to make sure + * other thread still find the access denied using the same keys. + */ +static u64 default_amr = ~0x0UL; +static u64 default_iamr = 0x5555555555555555UL; +u64 default_uamor __ro_after_init; +/* + * Key used to implement PROT_EXEC mmap. Denies READ/WRITE + * We pick key 2 because 0 is special key and 1 is reserved as per ISA. + */ static int execute_only_key = 2; +static bool pkey_execute_disable_supported; + #define AMR_BITS_PER_PKEY 2 #define AMR_RD_BIT 0x1UL #define AMR_WR_BIT 0x2UL #define IAMR_EX_BIT 0x1UL -#define PKEY_REG_BITS (sizeof(u64)*8) +#define PKEY_REG_BITS (sizeof(u64) * 8) #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) -static void scan_pkey_feature(void) +static int __init dt_scan_storage_keys(unsigned long node, + const char *uname, int depth, + void *data) { - u32 vals[2]; - struct device_node *cpu; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *prop; + int *pkeys_total = (int *) data; - cpu = of_find_node_by_type(NULL, "cpu"); - if (!cpu) - return; + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; - if (of_property_read_u32_array(cpu, - "ibm,processor-storage-keys", vals, 2)) - return; + prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL); + if (!prop) + return 0; + *pkeys_total = be32_to_cpu(prop[0]); + return 1; +} + +static int scan_pkey_feature(void) +{ + int ret; + int pkeys_total = 0; /* - * Since any pkey can be used for data or execute, we will just treat - * all keys as equal and track them as one entity. + * Pkey is not supported with Radix translation. */ - pkeys_total = vals[0]; - pkeys_devtree_defined = true; + if (early_radix_enabled()) + return 0; + + /* + * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1 + */ + if (!early_cpu_has_feature(CPU_FTR_ARCH_206)) + return 0; + + ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total); + if (ret == 0) { + /* + * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device + * tree. We make this exception since some version of skiboot forgot to + * expose this property on power8/9. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + pkeys_total = 32; + } + } + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1)); + return pkeys_total; } -static inline bool pkey_mmu_enabled(void) +void __init pkey_early_init_devtree(void) { - if (firmware_has_feature(FW_FEATURE_LPAR)) - return pkeys_total; - else - return cpu_has_feature(CPU_FTR_PKEY); -} - -static int pkey_initialize(void) -{ - int os_reserved, i; + int pkeys_total, i; /* * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral @@ -80,31 +125,14 @@ static int pkey_initialize(void) != (sizeof(u64) * BITS_PER_BYTE)); /* scan the device tree for pkey feature */ - scan_pkey_feature(); + pkeys_total = scan_pkey_feature(); + if (!pkeys_total) + goto out; - /* - * Let's assume 32 pkeys on P8 bare metal, if its not defined by device - * tree. We make this exception since skiboot forgot to expose this - * property on power8. - */ - if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && - cpu_has_feature(CPU_FTRS_POWER8)) - pkeys_total = 32; + /* Allow all keys to be modified by default */ + default_uamor = ~0x0UL; - /* - * Adjust the upper limit, based on the number of bits supported by - * arch-neutral code. - */ - pkeys_total = min_t(int, pkeys_total, - ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1)); - - if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) - static_branch_enable(&pkey_disabled); - else - static_branch_disable(&pkey_disabled); - - if (static_branch_likely(&pkey_disabled)) - return 0; + cur_cpu_spec->mmu_features |= MMU_FTR_PKEY; /* * The device tree cannot be relied to indicate support for @@ -118,53 +146,86 @@ static int pkey_initialize(void) #ifdef CONFIG_PPC_4K_PAGES /* * The OS can manage only 8 pkeys due to its inability to represent them - * in the Linux 4K PTE. + * in the Linux 4K PTE. Mark all other keys reserved. */ - os_reserved = pkeys_total - 8; + num_pkey = min(8, pkeys_total); #else - os_reserved = 0; + num_pkey = pkeys_total; #endif - /* Bits are in LE format. */ - reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key); - /* register mask is in BE format */ - pkey_amr_mask = ~0x0ul; - pkey_amr_mask &= ~(0x3ul << pkeyshift(0)); - - pkey_iamr_mask = ~0x0ul; - pkey_iamr_mask &= ~(0x3ul << pkeyshift(0)); - pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - - pkey_uamor_mask = ~0x0ul; - pkey_uamor_mask &= ~(0x3ul << pkeyshift(0)); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key)); - - /* mark the rest of the keys as reserved and hence unavailable */ - for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) { - reserved_allocation_mask |= (0x1 << i); - pkey_uamor_mask &= ~(0x3ul << pkeyshift(i)); - } - initial_allocation_mask = reserved_allocation_mask | (0x1 << 0); - - if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) { + if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) { /* * Insufficient number of keys to support * execute only key. Mark it unavailable. - * Any AMR, UAMOR, IAMR bit set for - * this key is irrelevant since this key - * can never be allocated. */ execute_only_key = -1; + } else { + /* + * Mark the execute_only_pkey as not available for + * user allocation via pkey_alloc. + */ + reserved_allocation_mask |= (0x1 << execute_only_key); + + /* + * Deny READ/WRITE for execute_only_key. + * Allow execute in IAMR. + */ + default_amr |= (0x3ul << pkeyshift(execute_only_key)); + default_iamr &= ~(0x1ul << pkeyshift(execute_only_key)); + + /* + * Clear the uamor bits for this key. + */ + default_uamor &= ~(0x3ul << pkeyshift(execute_only_key)); } - return 0; -} + /* + * Allow access for only key 0. And prevent any other modification. + */ + default_amr &= ~(0x3ul << pkeyshift(0)); + default_iamr &= ~(0x1ul << pkeyshift(0)); + default_uamor &= ~(0x3ul << pkeyshift(0)); + /* + * key 0 is special in that we want to consider it an allocated + * key which is preallocated. We don't allow changing AMR bits + * w.r.t key 0. But one can pkey_free(key0) + */ + initial_allocation_mask |= (0x1 << 0); -arch_initcall(pkey_initialize); + /* + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + reserved_allocation_mask |= (0x1 << 1); + default_uamor &= ~(0x3ul << pkeyshift(1)); + + /* + * Prevent the usage of OS reserved keys. Update UAMOR + * for those keys. Also mark the rest of the bits in the + * 32 bit mask as reserved. + */ + for (i = num_pkey; i < 32 ; i++) { + reserved_allocation_mask |= (0x1 << i); + default_uamor &= ~(0x3ul << pkeyshift(i)); + } + /* + * Prevent the allocation of reserved keys too. + */ + initial_allocation_mask |= reserved_allocation_mask; + + pr_info("Enabling pkeys with max key count %d\n", num_pkey); +out: + /* + * Setup uamor on boot cpu + */ + mtspr(SPRN_UAMOR, default_uamor); + + return; +} void pkey_mm_init(struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; mm_pkey_allocation_map(mm) = initial_allocation_mask; mm->context.execute_only_pkey = execute_only_key; @@ -196,30 +257,6 @@ static inline void write_iamr(u64 value) mtspr(SPRN_IAMR, value); } -static inline u64 read_uamor(void) -{ - return mfspr(SPRN_UAMOR); -} - -static inline void write_uamor(u64 value) -{ - mtspr(SPRN_UAMOR, value); -} - -static bool is_pkey_enabled(int pkey) -{ - u64 uamor = read_uamor(); - u64 pkey_bits = 0x3ul << pkeyshift(pkey); - u64 uamor_pkey_bits = (uamor & pkey_bits); - - /* - * Both the bits in UAMOR corresponding to the key should be set or - * reset. - */ - WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); - return !!(uamor_pkey_bits); -} - static inline void init_amr(int pkey, u8 init_bits) { u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); @@ -245,8 +282,18 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, { u64 new_amr_bits = 0x0ul; u64 new_iamr_bits = 0x0ul; + u64 pkey_bits, uamor_pkey_bits; - if (!is_pkey_enabled(pkey)) + /* + * Check whether the key is disabled by UAMOR. + */ + pkey_bits = 0x3ul << pkeyshift(pkey); + uamor_pkey_bits = (default_uamor & pkey_bits); + + /* + * Both the bits in UAMOR corresponding to the key should be set + */ + if (uamor_pkey_bits != pkey_bits) return -EINVAL; if (init_val & PKEY_DISABLE_EXECUTE) { @@ -268,7 +315,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, void thread_pkey_regs_save(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* @@ -276,38 +323,33 @@ void thread_pkey_regs_save(struct thread_struct *thread) */ thread->amr = read_amr(); thread->iamr = read_iamr(); - thread->uamor = read_uamor(); } void thread_pkey_regs_restore(struct thread_struct *new_thread, struct thread_struct *old_thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; if (old_thread->amr != new_thread->amr) write_amr(new_thread->amr); if (old_thread->iamr != new_thread->iamr) write_iamr(new_thread->iamr); - if (old_thread->uamor != new_thread->uamor) - write_uamor(new_thread->uamor); } void thread_pkey_regs_init(struct thread_struct *thread) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; - thread->amr = pkey_amr_mask; - thread->iamr = pkey_iamr_mask; - thread->uamor = pkey_uamor_mask; + thread->amr = default_amr; + thread->iamr = default_iamr; - write_uamor(pkey_uamor_mask); - write_amr(pkey_amr_mask); - write_iamr(pkey_iamr_mask); + write_amr(default_amr); + write_iamr(default_iamr); } -int __execute_only_pkey(struct mm_struct *mm) +int execute_only_pkey(struct mm_struct *mm) { return mm->context.execute_only_pkey; } @@ -366,7 +408,7 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) bool arch_pte_access_permitted(u64 pte, bool write, bool execute) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); @@ -383,7 +425,7 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute) bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return true; /* * Do not enforce our key-permissions on a foreign vma. @@ -396,7 +438,7 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { - if (static_branch_likely(&pkey_disabled)) + if (!mmu_has_feature(MMU_FTR_PKEY)) return; /* Duplicate the oldmm pkey state in mm: */ diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index bb00e0cba119..28c784976bed 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -34,6 +34,7 @@ unsigned int mmu_pid_bits; unsigned int mmu_base_pid; +unsigned int radix_mem_block_size __ro_after_init; static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) @@ -56,6 +57,13 @@ static __ref void *early_alloc_pgtable(unsigned long size, int nid, return ptr; } +/* + * When allocating pud or pmd pointers, we allocate a complete page + * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This + * is to ensure that the page obtained from the memblock allocator + * can be completely used as page table page and can be freed + * correctly when the page table entries are removed. + */ static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size, @@ -72,8 +80,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgdp = pgd_offset_k(ea); p4dp = p4d_offset(pgdp, ea); if (p4d_none(*p4dp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, - region_start, region_end); + pudp = early_alloc_pgtable(PAGE_SIZE, nid, + region_start, region_end); p4d_populate(&init_mm, p4dp, pudp); } pudp = pud_offset(p4dp, ea); @@ -82,8 +90,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, goto set_the_pte; } if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, - region_start, region_end); + pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start, + region_end); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); @@ -259,6 +267,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, + unsigned long max_mapping_size, int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; @@ -272,6 +281,8 @@ static int __meminit create_physical_mapping(unsigned long start, int rc; gap = next_boundary(addr, end) - addr; + if (gap > max_mapping_size) + gap = max_mapping_size; previous_size = mapping_size; prev_exec = exec; @@ -322,8 +333,9 @@ static void __init radix_init_pgtable(void) /* We don't support slb for radix */ mmu_slb_size = 0; + /* - * Create the linear mapping, using standard page size for now + * Create the linear mapping */ for_each_memblock(memory, reg) { /* @@ -339,6 +351,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, + radix_mem_block_size, -1, PAGE_KERNEL)); } @@ -479,6 +492,57 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } +#ifdef CONFIG_MEMORY_HOTPLUG +static int __init probe_memory_block_size(unsigned long node, const char *uname, int + depth, void *data) +{ + unsigned long *mem_block_size = (unsigned long *)data; + const __be64 *prop; + int len; + + if (depth != 1) + return 0; + + if (strcmp(uname, "ibm,dynamic-reconfiguration-memory")) + return 0; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); + if (!prop || len < sizeof(__be64)) + /* + * Nothing in the device tree + */ + *mem_block_size = MIN_MEMORY_BLOCK_SIZE; + else + *mem_block_size = be64_to_cpup(prop); + return 1; +} + +static unsigned long radix_memory_block_size(void) +{ + unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; + + /* + * OPAL firmware feature is set by now. Hence we are ok + * to test OPAL feature. + */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + mem_block_size = 1UL * 1024 * 1024 * 1024; + else + of_scan_flat_dt(probe_memory_block_size, &mem_block_size); + + return mem_block_size; +} + +#else /* CONFIG_MEMORY_HOTPLUG */ + +static unsigned long radix_memory_block_size(void) +{ + return 1UL * 1024 * 1024 * 1024; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + + void __init radix__early_init_devtree(void) { int rc; @@ -487,17 +551,27 @@ void __init radix__early_init_devtree(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; - /* - * let's assume we have page 4k and 64k support - */ - mmu_psize_defs[MMU_PAGE_4K].shift = 12; - mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; + if (!rc) { + /* + * No page size details found in device tree. + * Let's assume we have page 4k and 64k support + */ + mmu_psize_defs[MMU_PAGE_4K].shift = 12; + mmu_psize_defs[MMU_PAGE_4K].ap = 0x0; - mmu_psize_defs[MMU_PAGE_64K].shift = 16; - mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; -found: + mmu_psize_defs[MMU_PAGE_64K].shift = 16; + mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; + } + + /* + * Max mapping size used when mapping pages. We don't use + * ppc_md.memory_block_size() here because this get called + * early and we don't have machine probe called yet. Also + * the pseries implementation only check for ibm,lmb-size. + * All hypervisor supporting radix do expose that device + * tree node. + */ + radix_mem_block_size = radix_memory_block_size(); return; } @@ -519,8 +593,10 @@ void setup_kuep(bool disabled) if (disabled || !early_radix_enabled()) return; - if (smp_processor_id() == boot_cpuid) + if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Execution Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_KUEP; + } /* * Radix always uses key0 of the IAMR to determine if an access is @@ -544,6 +620,10 @@ void setup_kuap(bool disabled) /* Make sure userspace can't change the AMR */ mtspr(SPRN_UAMOR, 0); + + /* + * Set the default kernel AMR values on all cpus. + */ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); isync(); } @@ -700,30 +780,19 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) pud_clear(pud); } -struct change_mapping_params { - pte_t *pte; - unsigned long start; - unsigned long end; - unsigned long aligned_start; - unsigned long aligned_end; -}; - -static int __meminit stop_machine_change_mapping(void *data) +static void free_pud_table(pud_t *pud_start, p4d_t *p4d) { - struct change_mapping_params *params = - (struct change_mapping_params *)data; + pud_t *pud; + int i; - if (!data) - return -1; + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } - spin_unlock(&init_mm.page_table_lock); - pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), - __pa(params->start), -1, PAGE_KERNEL); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), - -1, PAGE_KERNEL); - spin_lock(&init_mm.page_table_lock); - return 0; + pud_free(&init_mm, pud_start); + p4d_clear(p4d); } static void remove_pte_table(pte_t *pte_start, unsigned long addr, @@ -754,53 +823,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } } -/* - * clear the pte and potentially split the mapping helper - */ -static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, - unsigned long size, pte_t *pte) -{ - unsigned long mask = ~(size - 1); - unsigned long aligned_start = addr & mask; - unsigned long aligned_end = addr + size; - struct change_mapping_params params; - bool split_region = false; - - if ((end - addr) < size) { - /* - * We're going to clear the PTE, but not flushed - * the mapping, time to remap and flush. The - * effects if visible outside the processor or - * if we are running in code close to the - * mapping we cleared, we are in trouble. - */ - if (overlaps_kernel_text(aligned_start, addr) || - overlaps_kernel_text(end, aligned_end)) { - /* - * Hack, just return, don't pte_clear - */ - WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel " - "text, not splitting\n", addr, end); - return; - } - split_region = true; - } - - if (split_region) { - params.pte = pte; - params.start = addr; - params.end = end; - params.aligned_start = addr & ~(size - 1); - params.aligned_end = min_t(unsigned long, aligned_end, - (unsigned long)__va(memblock_end_of_DRAM())); - stop_machine(stop_machine_change_mapping, ¶ms, NULL); - return; - } - - pte_clear(&init_mm, addr, pte); -} - -static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, +static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -815,7 +838,12 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; if (pmd_is_leaf(*pmd)) { - split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); + if (!IS_ALIGNED(addr, PMD_SIZE) || + !IS_ALIGNED(next, PMD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pmd); continue; } @@ -825,7 +853,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, } } -static void remove_pud_table(pud_t *pud_start, unsigned long addr, +static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end) { unsigned long next; @@ -840,7 +868,12 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, continue; if (pud_is_leaf(*pud)) { - split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); + if (!IS_ALIGNED(addr, PUD_SIZE) || + !IS_ALIGNED(next, PUD_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + pte_clear(&init_mm, addr, (pte_t *)pud); continue; } @@ -868,12 +901,19 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) continue; if (p4d_is_leaf(*p4d)) { - split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); + if (!IS_ALIGNED(addr, P4D_SIZE) || + !IS_ALIGNED(next, P4D_SIZE)) { + WARN_ONCE(1, "%s: unaligned range\n", __func__); + continue; + } + + pte_clear(&init_mm, addr, (pte_t *)pgd); continue; } pud_base = (pud_t *)p4d_page_vaddr(*p4d); remove_pud_table(pud_base, addr, next); + free_pud_table(pud_base, p4d); } spin_unlock(&init_mm.page_table_lock); @@ -889,7 +929,8 @@ int __meminit radix__create_section_mapping(unsigned long start, return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid, prot); + return create_physical_mapping(__pa(start), __pa(end), + radix_mem_block_size, nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index b5cc9b23cf02..0d233763441f 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -16,6 +16,7 @@ #include #include #include +#include #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto local; } - if (cputlb_use_tlbie()) { + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) { if (mm_needs_flush_escalation(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); else @@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) goto local; } } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, + H_RPTI_PAGE_ALL, 0, -1UL); + } else if (cputlb_use_tlbie()) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); @@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, pg_sizes, size; + + tgt = H_RPTI_TARGET_CMMU; + pg_sizes = psize_to_rpti_pgsize(psize); + size = 1UL << mmu_psize_to_shift(psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, + pg_sizes, vmaddr, + vmaddr + size); + } else if (cputlb_use_tlbie()) _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); @@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void) */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; + unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + + pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, + start, end); + } else if (cputlb_use_tlbie()) _tlbie_pid(0, RIC_FLUSH_ALL); else _tlbiel_kernel_broadcast(); @@ -864,7 +900,17 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, + start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { @@ -1046,7 +1092,17 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, nr_pages > tlb_local_single_page_flush_ceiling); } - if (full) { + if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + unsigned long tgt = H_RPTI_TARGET_CMMU; + unsigned long type = H_RPTI_TYPE_TLB; + unsigned long pg_sizes = psize_to_rpti_pgsize(psize); + + if (also_pwc) + type |= H_RPTI_TYPE_PWC; + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); + } else if (full) { if (local) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { @@ -1111,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) exit_flush_lazy_tlbs(mm); goto local; } - if (cputlb_use_tlbie()) + if (!mmu_has_feature(MMU_FTR_GTSE)) { + unsigned long tgt, type, pg_sizes; + + tgt = H_RPTI_TARGET_CMMU; + type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | + H_RPTI_TYPE_PRT; + pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); + + if (atomic_read(&mm->context.copros) > 0) + tgt |= H_RPTI_TARGET_NMMU; + pseries_rpt_invalidate(pid, tgt, type, pg_sizes, + addr, end); + } else if (cputlb_use_tlbie()) _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); else _tlbiel_va_range_multicast(mm, diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a..b2eeea39684c 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -14,6 +14,8 @@ #include #include +static int n_root_addr_cells, n_root_size_cells; + static struct drmem_lmb_info __drmem_info; struct drmem_lmb_info *drmem_info = &__drmem_info; @@ -189,12 +191,13 @@ int drmem_update_dt(void) return rc; } -static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, +static void read_drconf_v1_cell(struct drmem_lmb *lmb, const __be32 **prop) { const __be32 *p = *prop; - lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + lmb->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; lmb->drc_index = of_read_number(p++, 1); p++; /* skip reserved field */ @@ -205,29 +208,33 @@ static void __init read_drconf_v1_cell(struct drmem_lmb *lmb, *prop = p; } -static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct drmem_lmb lmb; u32 i, n_lmbs; + int ret = 0; n_lmbs = of_read_number(prop++, 1); - if (n_lmbs == 0) - return; - for (i = 0; i < n_lmbs; i++) { read_drconf_v1_cell(&lmb, &prop); - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } + + return ret; } -static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, +static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, const __be32 **prop) { const __be32 *p = *prop; dr_cell->seq_lmbs = of_read_number(p++, 1); - dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p); + dr_cell->base_addr = of_read_number(p, n_root_addr_cells); + p += n_root_addr_cells; dr_cell->drc_index = of_read_number(p++, 1); dr_cell->aa_index = of_read_number(p++, 1); dr_cell->flags = of_read_number(p++, 1); @@ -235,17 +242,16 @@ static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell, *prop = p; } -static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, - void (*func)(struct drmem_lmb *, const __be32 **)) +static int +__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { struct of_drconf_cell_v2 dr_cell; struct drmem_lmb lmb; u32 i, j, lmb_sets; + int ret = 0; lmb_sets = of_read_number(prop++, 1); - if (lmb_sets == 0) - return; - for (i = 0; i < lmb_sets; i++) { read_drconf_v2_cell(&dr_cell, &prop); @@ -259,21 +265,29 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, lmb.aa_index = dr_cell.aa_index; lmb.flags = dr_cell.flags; - func(&lmb, &usm); + ret = func(&lmb, &usm, data); + if (ret) + break; } } + + return ret; } #ifdef CONFIG_PPC_PSERIES -void __init walk_drmem_lmbs_early(unsigned long node, - void (*func)(struct drmem_lmb *, const __be32 **)) +int __init walk_drmem_lmbs_early(unsigned long node, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; - int len; + int len, ret = -ENODEV; prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); if (!prop || len < dt_root_size_cells * sizeof(__be32)) - return; + return ret; + + /* Get the address & size cells */ + n_root_addr_cells = dt_root_addr_cells; + n_root_size_cells = dt_root_size_cells; drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); @@ -281,20 +295,21 @@ void __init walk_drmem_lmbs_early(unsigned long node, prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2", &len); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } memblock_dump_all(); + return ret; } #endif -static int __init init_drmem_lmb_size(struct device_node *dn) +static int init_drmem_lmb_size(struct device_node *dn) { const __be32 *prop; int len; @@ -303,12 +318,12 @@ static int __init init_drmem_lmb_size(struct device_node *dn) return 0; prop = of_get_property(dn, "ibm,lmb-size", &len); - if (!prop || len < dt_root_size_cells * sizeof(__be32)) { + if (!prop || len < n_root_size_cells * sizeof(__be32)) { pr_info("Could not determine LMB size\n"); return -1; } - drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop); + drmem_info->lmb_size = of_read_number(prop, n_root_size_cells); return 0; } @@ -329,24 +344,36 @@ static const __be32 *of_get_usable_memory(struct device_node *dn) return prop; } -void __init walk_drmem_lmbs(struct device_node *dn, - void (*func)(struct drmem_lmb *, const __be32 **)) +int walk_drmem_lmbs(struct device_node *dn, void *data, + int (*func)(struct drmem_lmb *, const __be32 **, void *)) { const __be32 *prop, *usm; + int ret = -ENODEV; + + if (!of_root) + return ret; + + /* Get the address & size cells */ + of_node_get(of_root); + n_root_addr_cells = of_n_addr_cells(of_root); + n_root_size_cells = of_n_size_cells(of_root); + of_node_put(of_root); if (init_drmem_lmb_size(dn)) - return; + return ret; usm = of_get_usable_memory(dn); prop = of_get_property(dn, "ibm,dynamic-memory", NULL); if (prop) { - __walk_drmem_v1_lmbs(prop, usm, func); + ret = __walk_drmem_v1_lmbs(prop, usm, data, func); } else { prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL); if (prop) - __walk_drmem_v2_lmbs(prop, usm, func); + ret = __walk_drmem_v2_lmbs(prop, usm, data, func); } + + return ret; } static void __init init_drmem_v1_lmbs(const __be32 *prop) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 641fc5f3d7dd..925a7231abb3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -42,39 +42,7 @@ #include #include -/* - * Check whether the instruction inst is a store using - * an update addressing form which will update r1. - */ -static bool store_updates_sp(struct ppc_inst inst) -{ - /* check for 1 in the rA field */ - if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1) - return false; - /* check major opcode */ - switch (ppc_inst_primary_opcode(inst)) { - case OP_STWU: - case OP_STBU: - case OP_STHU: - case OP_STFSU: - case OP_STFDU: - return true; - case OP_STD: /* std or stdu */ - return (ppc_inst_val(inst) & 3) == 1; - case OP_31: - /* check minor opcode */ - switch ((ppc_inst_val(inst) >> 1) & 0x3ff) { - case OP_31_XOP_STDUX: - case OP_31_XOP_STWUX: - case OP_31_XOP_STBUX: - case OP_31_XOP_STHUX: - case OP_31_XOP_STFSUX: - case OP_31_XOP_STFDUX: - return true; - } - } - return false; -} + /* * do_page_fault error handling helpers */ @@ -267,54 +235,6 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } -static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, - struct vm_area_struct *vma, unsigned int flags, - bool *must_retry) -{ - /* - * N.B. The POWER/Open ABI allows programs to access up to - * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB - * below the stack pointer (r1) before decrementing it. - * The exec code can write slightly over 640kB to the stack - * before setting the user r1. Thus we allow the stack to - * expand to 1MB without further checks. - */ - if (address + 0x100000 < vma->vm_end) { - struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip; - /* get user regs even if this fault is in kernel mode */ - struct pt_regs *uregs = current->thread.regs; - if (uregs == NULL) - return true; - - /* - * A user-mode access to an address a long way below - * the stack pointer is only valid if the instruction - * is one which would update the stack pointer to the - * address accessed if the instruction completed, - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb - * (or the byte, halfword, float or double forms). - * - * If we don't check this then any write to the area - * between the last mapped region and the stack will - * expand the stack rather than segfaulting. - */ - if (address + 2048 >= uregs->gpr[1]) - return false; - - if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && - access_ok(nip, sizeof(*nip))) { - struct ppc_inst inst; - - if (!probe_user_read_inst(&inst, nip)) - return !store_updates_sp(inst); - *must_retry = true; - } - return true; - } - return false; -} - #ifdef CONFIG_PPC_MEM_KEYS static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, struct vm_area_struct *vma) @@ -480,7 +400,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; - bool must_retry = false; bool kprobe_fault = kprobe_page_fault(regs, 11); if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) @@ -569,30 +488,15 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, vma = find_vma(mm, address); if (unlikely(!vma)) return bad_area(regs, address); - if (likely(vma->vm_start <= address)) - goto good_area; - if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) - return bad_area(regs, address); - /* The stack is being expanded, check if it's valid */ - if (unlikely(bad_stack_expansion(regs, address, vma, flags, - &must_retry))) { - if (!must_retry) + if (unlikely(vma->vm_start > address)) { + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) return bad_area(regs, address); - mmap_read_unlock(mm); - if (fault_in_pages_readable((const char __user *)regs->nip, - sizeof(unsigned int))) - return bad_area_nosemaphore(regs, address); - goto retry; + if (unlikely(expand_stack(vma, address))) + return bad_area(regs, address); } - /* Try to expand it */ - if (unlikely(expand_stack(vma, address))) - return bad_area(regs, address); - -good_area: - #ifdef CONFIG_PPC_MEM_KEYS if (unlikely(access_pkey_error(is_write, is_exec, (error_code & DSISR_KEYFAULT), vma))) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e9bfbccd975d..26292544630f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -684,3 +684,21 @@ void flush_dcache_icache_hugepage(struct page *page) } } } + +void __init gigantic_hugetlb_cma_reserve(void) +{ + unsigned long order = 0; + + if (radix_enabled()) + order = PUD_SHIFT - PAGE_SHIFT; + else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift) + /* + * For pseries we do use ibm,expected#pages for reserving 16G pages. + */ + order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT; + + if (order) { + VM_WARN_ON(order < MAX_ORDER); + hugetlb_cma_reserve(order); + } +} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 5a5469eb3174..bf1717f8d5f4 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -171,6 +171,8 @@ void __init MMU_init(void) btext_unmap(); #endif + kasan_mmu_init(); + setup_kup(); /* Shortly after that, the entire linear mapping will be available */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index bc73abf0bc25..4ae5fc0ceb30 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -406,13 +406,15 @@ static void __init early_check_vec5(void) } if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & OV5_FEAT(OV5_RADIX_GTSE))) { - pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); - } + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; + } else + cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; /* Do radix anyway - the hypervisor said we had to */ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { /* Hypervisor only supports hash - disable radix */ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE; } } diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0760e1e754e4..fb294046e00e 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -115,24 +115,17 @@ static void __init kasan_unmap_early_shadow_vmalloc(void) unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END); kasan_update_early_region(k_start, k_end, __pte(0)); + +#ifdef MODULES_VADDR + k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR); + k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END); + kasan_update_early_region(k_start, k_end, __pte(0)); +#endif } -static void __init kasan_mmu_init(void) +void __init kasan_mmu_init(void) { int ret; - struct memblock_region *reg; - - for_each_memblock(memory, reg) { - phys_addr_t base = reg->base; - phys_addr_t top = min(base + reg->size, total_lowmem); - - if (base >= top) - continue; - - ret = kasan_init_region(__va(base), top - base); - if (ret) - panic("kasan: kasan_init_region() failed"); - } if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || IS_ENABLED(CONFIG_KASAN_VMALLOC)) { @@ -141,12 +134,24 @@ static void __init kasan_mmu_init(void) if (ret) panic("kasan: kasan_init_shadow_page_tables() failed"); } - } void __init kasan_init(void) { - kasan_mmu_init(); + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + phys_addr_t base = reg->base; + phys_addr_t top = min(base + reg->size, total_lowmem); + int ret; + + if (base >= top) + continue; + + ret = kasan_init_region(__va(base), top - base); + if (ret) + panic("kasan: kasan_init_region() failed"); + } kasan_remap_early_shadow_ro(); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c2c11eb8dcfc..9dafc636588f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,8 +127,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int rc; - resize_hpt_for_hotplug(memblock_phys_mem_size()); - start = (unsigned long)__va(start); rc = create_section_mapping(start, start + size, nid, params->pgprot); @@ -161,9 +159,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, * hit that section of memory */ vm_unmap_aliases(); - - if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) - pr_warn("Hash collision while resizing HPT\n"); } #endif diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index d5e2704d0096..bf24451f3e71 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -71,7 +71,6 @@ START_BTB_FLUSH_SECTION END_BTB_FLUSH_SECTION std r7,EX_TLB_R7(r12) #endif - TLB_MISS_PROLOG_STATS .endm .macro tlb_epilog_bolted @@ -85,7 +84,6 @@ END_BTB_FLUSH_SECTION mtcr r14 ld r14,EX_TLB_R14(r12) ld r15,EX_TLB_R15(r12) - TLB_MISS_RESTORE_STATS ld r16,EX_TLB_R16(r12) mfspr r12,SPRN_SPRG_GEN_SCRATCH .endm @@ -128,7 +126,6 @@ END_BTB_FLUSH_SECTION ori r10,r10,_PAGE_PRESENT oris r11,r10,_PAGE_ACCESSED@h - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_bolted tlb_miss_common_bolted: @@ -209,7 +206,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) tlbwe tlb_miss_done_bolted: - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -229,11 +225,9 @@ tlb_miss_fault_bolted: andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX bne itlb_miss_fault_bolted dtlb_miss_fault_bolted: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_bolted: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e @@ -243,7 +237,6 @@ itlb_miss_fault_bolted: rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 srdi r15,r16,60 /* get region */ - TLB_MISS_STATS_SAVE_INFO_BOLTED bne- itlb_miss_fault_bolted li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ @@ -276,7 +269,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ ori r16,r16,1 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user/kernel test */ b tlb_miss_common_e6500 @@ -288,7 +280,6 @@ itlb_miss_fault_bolted: srdi. r15,r16,60 /* get region */ rldicr r16,r16,0,62 - TLB_MISS_STATS_SAVE_INFO_BOLTED bne tlb_miss_kernel_e6500 /* user vs kernel check */ /* @@ -460,7 +451,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) .endm tlb_unlock_e6500 - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) tlb_epilog_bolted rfi @@ -519,11 +509,9 @@ tlb_miss_fault_e6500: andi. r16,r16,1 bne itlb_miss_fault_e6500 dtlb_miss_fault_e6500: - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_data_storage_book3e itlb_miss_fault_e6500: - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) tlb_epilog_bolted b exc_instruction_storage_book3e #endif /* CONFIG_PPC_FSL_BOOK3E */ @@ -548,7 +536,6 @@ itlb_miss_fault_e6500: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* The page tables are mapped virtually linear. At this point, though, @@ -600,7 +587,6 @@ itlb_miss_fault_e6500: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -624,7 +610,6 @@ itlb_miss_fault_e6500: */ srdi r15,r16,60 /* get region */ cmpldi cr0,r15,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -646,7 +631,6 @@ itlb_miss_fault_e6500: beq+ normal_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -745,7 +729,6 @@ normal_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -757,11 +740,9 @@ normal_tlb_miss_access_fault: ld r15,EX_TLB_ESR(r12) mtspr SPRN_DEAR,r14 mtspr SPRN_ESR,r15 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -899,7 +880,6 @@ virt_page_table_tlb_miss_done: 1: END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) /* Return to caller, normal case */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK); TLB_MISS_EPILOG_SUCCESS rfi @@ -935,18 +915,15 @@ virt_page_table_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r15 mtspr SPRN_ESR,r16 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT); - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e virt_page_table_tlb_miss_whacko_fault: /* The linear fault will restart everything so ESR and DEAR will * not have been clobbered, let's just fault with what we have */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT); TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -971,7 +948,6 @@ virt_page_table_tlb_miss_whacko_fault: mfspr r16,SPRN_DEAR /* get faulting address */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -991,7 +967,6 @@ virt_page_table_tlb_miss_whacko_fault: /* We got a crappy address, just fault with whatever DEAR and ESR * are here */ - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e @@ -1015,7 +990,6 @@ virt_page_table_tlb_miss_whacko_fault: */ srdi r11,r16,60 /* get region */ cmpldi cr0,r11,0xc /* linear mapping ? */ - TLB_MISS_STATS_SAVE_INFO beq tlb_load_linear /* yes -> go to linear map load */ /* We do the user/kernel test for the PID here along with the RW test @@ -1033,7 +1007,6 @@ virt_page_table_tlb_miss_whacko_fault: beq+ htw_tlb_miss /* We got a crappy address, just fault */ - TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e @@ -1130,7 +1103,6 @@ htw_tlb_miss_done: * level 0 and just going back to userland. They are only needed * if you are going to take an access fault */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK) TLB_MISS_EPILOG_SUCCESS rfi @@ -1142,11 +1114,9 @@ htw_tlb_miss_fault: beq 1f mtspr SPRN_DEAR,r16 mtspr SPRN_ESR,r14 - TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT) TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e -1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT) - TLB_MISS_EPILOG_ERROR +1: TLB_MISS_EPILOG_ERROR b exc_instruction_storage_book3e /* @@ -1221,7 +1191,6 @@ tlb_load_linear_done: * We do that because we can't resume a fault within a TLB * miss handler, due to MAS and TLB reservation being clobbered. */ - TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR) TLB_MISS_EPILOG_ERROR rfi @@ -1233,13 +1202,3 @@ tlb_load_linear_fault: b exc_data_storage_book3e 1: TLB_MISS_EPILOG_ERROR_SPECIAL b exc_instruction_storage_book3e - - -#ifdef CONFIG_BOOK3E_MMU_TLB_STATS -.tlb_stat_inc: -1: ldarx r8,0,r9 - addi r8,r8,1 - stdcx. r8,0,r9 - bne- 1b - blr -#endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9fcf2d195830..058fee9a0835 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -221,7 +221,8 @@ static void initialize_distance_lookup_table(int nid, } } -/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa +/* + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA * info is found. */ static int associativity_to_nid(const __be32 *associativity) @@ -235,7 +236,7 @@ static int associativity_to_nid(const __be32 *associativity) nid = of_read_number(&associativity[min_common_depth], 1); /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = NUMA_NO_NODE; if (nid > 0 && @@ -448,7 +449,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) index = lmb->aa_index * aa.array_sz + min_common_depth - 1; nid = of_read_number(&aa.arrays[index], 1); - if (nid == 0xffff || nid >= MAX_NUMNODES) + if (nid == 0xffff || nid >= nr_node_ids) nid = default_nid; if (nid > 0) { @@ -644,8 +645,9 @@ static inline int __init read_usm_ranges(const __be32 **usm) * Extract NUMA information from the ibm,dynamic-reconfiguration-memory * node. This assumes n_mem_{addr,size}_cells have been set. */ -static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, - const __be32 **usm) +static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, + const __be32 **usm, + void *data) { unsigned int ranges, is_kexec_kdump = 0; unsigned long base, size, sz; @@ -657,7 +659,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, */ if ((lmb->flags & DRCONF_MEM_RESERVED) || !(lmb->flags & DRCONF_MEM_ASSIGNED)) - return; + return 0; if (*usm) is_kexec_kdump = 1; @@ -669,7 +671,7 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (is_kexec_kdump) { ranges = read_usm_ranges(usm); if (!ranges) /* there are no (base, size) duple */ - return; + return 0; } do { @@ -686,6 +688,8 @@ static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, if (sz) memblock_set_node(base, sz, &memblock.memory, nid); } while (--ranges); + + return 0; } static int __init parse_numa_properties(void) @@ -787,7 +791,7 @@ static int __init parse_numa_properties(void) */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { - walk_drmem_lmbs(memory, numa_setup_drmem_lmb); + walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb); of_node_put(memory); } @@ -984,28 +988,6 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); -/* - * The platform can inform us through one of several mechanisms - * (post-migration device tree updates, PRRN or VPHN) that the NUMA - * assignment of a resource has changed. This controls whether we act - * on that. Disabled by default. - */ -static bool topology_updates_enabled; - -static int __init early_topology_updates(char *p) -{ - if (!p) - return 0; - - if (!strcmp(p, "on")) { - pr_warn("Caution: enabling topology updates\n"); - topology_updates_enabled = true; - } - - return 0; -} -early_param("topology_updates", early_topology_updates); - #ifdef CONFIG_MEMORY_HOTPLUG /* * Find the node associated with a hot added memory section for @@ -1144,97 +1126,8 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR -struct topology_update_data { - struct topology_update_data *next; - unsigned int cpu; - int old_nid; - int new_nid; -}; - -#define TOPOLOGY_DEF_TIMER_SECS 60 - -static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; -static cpumask_t cpu_associativity_changes_mask; -static int vphn_enabled; -static int prrn_enabled; -static void reset_topology_timer(void); -static int topology_timer_secs = 1; static int topology_inited; -/* - * Change polling interval for associativity changes. - */ -int timed_topology_update(int nsecs) -{ - if (vphn_enabled) { - if (nsecs > 0) - topology_timer_secs = nsecs; - else - topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS; - - reset_topology_timer(); - } - - return 0; -} - -/* - * Store the current values of the associativity change counters in the - * hypervisor. - */ -static void setup_cpu_associativity_change_counters(void) -{ - int cpu; - - /* The VPHN feature supports a maximum of 8 reference points */ - BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8); - - for_each_possible_cpu(cpu) { - int i; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) - counts[i] = hypervisor_counts[i]; - } -} - -/* - * The hypervisor maintains a set of 8 associativity change counters in - * the VPA of each cpu that correspond to the associativity levels in the - * ibm,associativity-reference-points property. When an associativity - * level changes, the corresponding counter is incremented. - * - * Set a bit in cpu_associativity_changes_mask for each cpu whose home - * node associativity levels have changed. - * - * Returns the number of cpus with unhandled associativity changes. - */ -static int update_cpu_associativity_changes_mask(void) -{ - int cpu; - cpumask_t *changes = &cpu_associativity_changes_mask; - - for_each_possible_cpu(cpu) { - int i, changed = 0; - u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; - - for (i = 0; i < distance_ref_points_depth; i++) { - if (hypervisor_counts[i] != counts[i]) { - counts[i] = hypervisor_counts[i]; - changed = 1; - } - } - if (changed) { - cpumask_or(changes, changes, cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - } - } - - return cpumask_weight(changes); -} - /* * Retrieve the new associativity information for a virtual processor's * home node. @@ -1250,7 +1143,6 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_SUCCESS: dbg("VPHN hcall succeeded. Reset polling...\n"); - timed_topology_update(0); goto out; case H_FUNCTION: @@ -1269,8 +1161,6 @@ static long vphn_get_associativity(unsigned long cpu, , rc); break; } - - stop_topology_update(); out: return rc; } @@ -1314,380 +1204,8 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } -/* - * Update the CPU maps and sysfs entries for a single CPU when its NUMA - * characteristics change. This function doesn't perform any locking and is - * only safe to call from stop_machine(). - */ -static int update_cpu_topology(void *data) -{ - struct topology_update_data *update; - unsigned long cpu; - - if (!data) - return -EINVAL; - - cpu = smp_processor_id(); - - for (update = data; update; update = update->next) { - int new_nid = update->new_nid; - if (cpu != update->cpu) - continue; - - unmap_cpu_from_node(cpu); - map_cpu_to_node(cpu, new_nid); - set_cpu_numa_node(cpu, new_nid); - set_cpu_numa_mem(cpu, local_memory_node(new_nid)); - vdso_getcpu_init(); - } - - return 0; -} - -static int update_lookup_table(void *data) -{ - struct topology_update_data *update; - - if (!data) - return -EINVAL; - - /* - * Upon topology update, the numa-cpu lookup table needs to be updated - * for all threads in the core, including offline CPUs, to ensure that - * future hotplug operations respect the cpu-to-node associativity - * properly. - */ - for (update = data; update; update = update->next) { - int nid, base, j; - - nid = update->new_nid; - base = cpu_first_thread_sibling(update->cpu); - - for (j = 0; j < threads_per_core; j++) { - update_numa_cpu_lookup_table(base + j, nid); - } - } - - return 0; -} - -/* - * Update the node maps and sysfs entries for each cpu whose home node - * has changed. Returns 1 when the topology has changed, and 0 otherwise. - * - * cpus_locked says whether we already hold cpu_hotplug_lock. - */ -int numa_update_cpu_topology(bool cpus_locked) -{ - unsigned int cpu, sibling, changed = 0; - struct topology_update_data *updates, *ud; - cpumask_t updated_cpus; - struct device *dev; - int weight, new_nid, i = 0; - - if (!prrn_enabled && !vphn_enabled && topology_inited) - return 0; - - weight = cpumask_weight(&cpu_associativity_changes_mask); - if (!weight) - return 0; - - updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); - if (!updates) - return 0; - - cpumask_clear(&updated_cpus); - - for_each_cpu(cpu, &cpu_associativity_changes_mask) { - /* - * If siblings aren't flagged for changes, updates list - * will be too short. Skip on this update and set for next - * update. - */ - if (!cpumask_subset(cpu_sibling_mask(cpu), - &cpu_associativity_changes_mask)) { - pr_info("Sibling bits not set for associativity " - "change, cpu%d\n", cpu); - cpumask_or(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - new_nid = find_and_online_cpu_nid(cpu); - - if (new_nid == numa_cpu_lookup_table[cpu]) { - cpumask_andnot(&cpu_associativity_changes_mask, - &cpu_associativity_changes_mask, - cpu_sibling_mask(cpu)); - dbg("Assoc chg gives same node %d for cpu%d\n", - new_nid, cpu); - cpu = cpu_last_thread_sibling(cpu); - continue; - } - - for_each_cpu(sibling, cpu_sibling_mask(cpu)) { - ud = &updates[i++]; - ud->next = &updates[i]; - ud->cpu = sibling; - ud->new_nid = new_nid; - ud->old_nid = numa_cpu_lookup_table[sibling]; - cpumask_set_cpu(sibling, &updated_cpus); - } - cpu = cpu_last_thread_sibling(cpu); - } - - /* - * Prevent processing of 'updates' from overflowing array - * where last entry filled in a 'next' pointer. - */ - if (i) - updates[i-1].next = NULL; - - pr_debug("Topology update for the following CPUs:\n"); - if (cpumask_weight(&updated_cpus)) { - for (ud = &updates[0]; ud; ud = ud->next) { - pr_debug("cpu %d moving from node %d " - "to %d\n", ud->cpu, - ud->old_nid, ud->new_nid); - } - } - - /* - * In cases where we have nothing to update (because the updates list - * is too short or because the new topology is same as the old one), - * skip invoking update_cpu_topology() via stop-machine(). This is - * necessary (and not just a fast-path optimization) since stop-machine - * can end up electing a random CPU to run update_cpu_topology(), and - * thus trick us into setting up incorrect cpu-node mappings (since - * 'updates' is kzalloc()'ed). - * - * And for the similar reason, we will skip all the following updating. - */ - if (!cpumask_weight(&updated_cpus)) - goto out; - - if (cpus_locked) - stop_machine_cpuslocked(update_cpu_topology, &updates[0], - &updated_cpus); - else - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); - - /* - * Update the numa-cpu lookup table with the new mappings, even for - * offline CPUs. It is best to perform this update from the stop- - * machine context. - */ - if (cpus_locked) - stop_machine_cpuslocked(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - else - stop_machine(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - - for (ud = &updates[0]; ud; ud = ud->next) { - unregister_cpu_under_node(ud->cpu, ud->old_nid); - register_cpu_under_node(ud->cpu, ud->new_nid); - - dev = get_cpu_device(ud->cpu); - if (dev) - kobject_uevent(&dev->kobj, KOBJ_CHANGE); - cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); - changed = 1; - } - -out: - kfree(updates); - return changed; -} - -int arch_update_cpu_topology(void) -{ - return numa_update_cpu_topology(true); -} - -static void topology_work_fn(struct work_struct *work) -{ - rebuild_sched_domains(); -} -static DECLARE_WORK(topology_work, topology_work_fn); - -static void topology_schedule_update(void) -{ - schedule_work(&topology_work); -} - -static void topology_timer_fn(struct timer_list *unused) -{ - if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) - topology_schedule_update(); - else if (vphn_enabled) { - if (update_cpu_associativity_changes_mask() > 0) - topology_schedule_update(); - reset_topology_timer(); - } -} -static struct timer_list topology_timer; - -static void reset_topology_timer(void) -{ - if (vphn_enabled) - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); -} - -#ifdef CONFIG_SMP - -static int dt_update_callback(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct of_reconfig_data *update = data; - int rc = NOTIFY_DONE; - - switch (action) { - case OF_RECONFIG_UPDATE_PROPERTY: - if (of_node_is_type(update->dn, "cpu") && - !of_prop_cmp(update->prop->name, "ibm,associativity")) { - u32 core_id; - of_property_read_u32(update->dn, "reg", &core_id); - rc = dlpar_cpu_readd(core_id); - rc = NOTIFY_OK; - } - break; - } - - return rc; -} - -static struct notifier_block dt_update_nb = { - .notifier_call = dt_update_callback, -}; - -#endif - -/* - * Start polling for associativity changes. - */ -int start_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (firmware_has_feature(FW_FEATURE_PRRN)) { - if (!prrn_enabled) { - prrn_enabled = 1; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_register(&dt_update_nb); -#endif - } - } - if (firmware_has_feature(FW_FEATURE_VPHN) && - lppaca_shared_proc(get_lppaca())) { - if (!vphn_enabled) { - vphn_enabled = 1; - setup_cpu_associativity_change_counters(); - timer_setup(&topology_timer, topology_timer_fn, - TIMER_DEFERRABLE); - reset_topology_timer(); - } - } - - pr_info("Starting topology update%s%s\n", - (prrn_enabled ? " prrn_enabled" : ""), - (vphn_enabled ? " vphn_enabled" : "")); - - return rc; -} - -/* - * Disable polling for VPHN associativity changes. - */ -int stop_topology_update(void) -{ - int rc = 0; - - if (!topology_updates_enabled) - return 0; - - if (prrn_enabled) { - prrn_enabled = 0; -#ifdef CONFIG_SMP - rc = of_reconfig_notifier_unregister(&dt_update_nb); -#endif - } - if (vphn_enabled) { - vphn_enabled = 0; - rc = del_timer_sync(&topology_timer); - } - - pr_info("Stopping topology update\n"); - - return rc; -} - -int prrn_is_enabled(void) -{ - return prrn_enabled; -} - -static int topology_read(struct seq_file *file, void *v) -{ - if (vphn_enabled || prrn_enabled) - seq_puts(file, "on\n"); - else - seq_puts(file, "off\n"); - - return 0; -} - -static int topology_open(struct inode *inode, struct file *file) -{ - return single_open(file, topology_read, NULL); -} - -static ssize_t topology_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) -{ - char kbuf[4]; /* "on" or "off" plus null. */ - int read_len; - - read_len = count < 3 ? count : 3; - if (copy_from_user(kbuf, buf, read_len)) - return -EINVAL; - - kbuf[read_len] = '\0'; - - if (!strncmp(kbuf, "on", 2)) { - topology_updates_enabled = true; - start_topology_update(); - } else if (!strncmp(kbuf, "off", 3)) { - stop_topology_update(); - topology_updates_enabled = false; - } else - return -EINVAL; - - return count; -} - -static const struct proc_ops topology_proc_ops = { - .proc_read = seq_read, - .proc_write = topology_write, - .proc_open = topology_open, - .proc_release = single_release, -}; - static int topology_update_init(void) { - start_topology_update(); - - if (vphn_enabled) - topology_schedule_update(); - - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops)) - return -ENOMEM; - topology_inited = 1; return 0; } diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c index ee4bd6d38602..97ae4935da79 100644 --- a/arch/powerpc/mm/pgtable-frag.c +++ b/arch/powerpc/mm/pgtable-frag.c @@ -110,6 +110,9 @@ void pte_fragment_free(unsigned long *table, int kernel) { struct page *page = virt_to_page(table); + if (PageReserved(page)) + return free_reserved_page(page); + BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); if (atomic_dec_and_test(&page->pt_frag_refcount)) { if (!kernel) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a2c33efc7ce8..5b8bd34cd3a1 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -258,7 +258,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc) continue; for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].v, want_v) && diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index de6e05ef871c..c911cd757f7d 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -74,6 +74,10 @@ struct addr_marker { static struct addr_marker address_markers[] = { { 0, "Start of kernel VM" }, +#ifdef MODULES_VADDR + { 0, "modules start" }, + { 0, "modules end" }, +#endif { 0, "vmalloc() Area" }, { 0, "vmalloc() End" }, #ifdef CONFIG_PPC64 @@ -195,6 +199,24 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val, unsigned long page_size) +{ + u64 flag = val & pg_level[level].mask; + u64 pa = val & PTE_RPN_MASK; + + st->level = level; + st->current_flags = flag; + st->start_address = addr; + st->start_pa = pa; + st->page_size = page_size; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val, unsigned long page_size) { @@ -203,13 +225,8 @@ static void note_page(struct pg_state *st, unsigned long addr, /* At first no level is set */ if (!st->level) { - st->level = level; - st->current_flags = flag; - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; - st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + note_page_update_state(st, addr, level, val, page_size); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -241,19 +258,9 @@ static void note_page(struct pg_state *st, unsigned long addr, * Address indicates we have passed the end of the * current section of virtual memory */ - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->start_pa = pa; - st->last_pa = pa; - st->page_size = page_size; - st->current_flags = flag; - st->level = level; - } else { - st->last_pa = pa; + note_page_update_state(st, addr, level, val, page_size); } + st->last_pa = pa; } static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) @@ -348,7 +355,15 @@ static void populate_markers(void) { int i = 0; +#ifdef CONFIG_PPC64 address_markers[i++].start_address = PAGE_OFFSET; +#else + address_markers[i++].start_address = TASK_SIZE; +#endif +#ifdef MODULES_VADDR + address_markers[i++].start_address = MODULES_VADDR; + address_markers[i++].start_address = MODULES_END; +#endif address_markers[i++].start_address = VMALLOC_START; address_markers[i++].start_address = VMALLOC_END; #ifdef CONFIG_PPC64 @@ -385,7 +400,7 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 @@ -429,7 +444,7 @@ void ptdump_check_wx(void) .seq = NULL, .marker = address_markers, .check_wx = true, - .start_address = PAGE_OFFSET, + .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae..d0a67a1bbaf1 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include +#include #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -18,167 +19,10 @@ #define FUNCTION_DESCR_SIZE 0 #endif -/* - * 16-bit immediate helper macros: HA() is for use with sign-extending instrs - * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the - * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000). - */ -#define IMM_H(i) ((uintptr_t)(i)>>16) -#define IMM_HA(i) (((uintptr_t)(i)>>16) + \ - (((uintptr_t)(i) & 0x8000) >> 15)) -#define IMM_L(i) ((uintptr_t)(i) & 0xffff) - #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) -#define PPC_NOP() EMIT(PPC_INST_NOP) -#define PPC_BLR() EMIT(PPC_INST_BLR) -#define PPC_BLRL() EMIT(PPC_INST_BLRL) -#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r)) -#define PPC_BCTR() EMIT(PPC_INST_BCTR) -#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_LI(r, i) PPC_ADDI(r, 0, i) -#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ - ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) -#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) -#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \ - ___PPC_RA(base) | IMM_L(i)) - -#define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) -#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) -#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) -#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \ - ___PPC_RA(base) | ___PPC_RB(b)) - -#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \ - ___PPC_RA(a) | ___PPC_RB(b) | \ - __PPC_EH(eh)) -#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ - ___PPC_RB(b)) -#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \ - ___PPC_RB(b)) - -#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ - ___PPC_RB(a) | ___PPC_RA(b)) -#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | ___PPC_RT(d) | \ - ___PPC_RA(a) | IMM_L(i)) -#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ - ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_MR(d, a) PPC_OR(d, a, a) -#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(b)) -#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ - ___PPC_RS(a) | IMM_L(i)) -#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \ - ___PPC_RS(a)) -#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAW(d, a, s) EMIT(PPC_INST_SRAW | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAWI(d, a, i) EMIT(PPC_INST_SRAWI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i)) -#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ - ___PPC_RS(a) | ___PPC_RB(s)) -#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i)) -#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \ - ___PPC_RA(d) | ___PPC_RS(a) | \ - __PPC_SH(i) | __PPC_MB(mb) | \ - __PPC_ME(me)) -#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(mb) | __PPC_ME(me)) -#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_MB64(mb)) -#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH64(i) | \ - __PPC_ME64(me)) - -/* slwi = rlwinm Rx, Ry, n, 0, 31-n */ -#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i)) -/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ -#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) -/* sldi = rldicr Rx, Ry, n, 63-n */ -#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) -/* sldi = rldicl Rx, Ry, 64-n, n */ -#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i) - -#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) - /* Long jump; (unconditional 'branch') */ #define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ (((dest) - (ctx->idx * 4)) & 0x03fffffc)) @@ -191,11 +35,11 @@ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ (int)(uintptr_t)(i) < 32768) \ - PPC_LI(d, i); \ + EMIT(PPC_RAW_LI(d, i)); \ else { \ - PPC_LIS(d, IMM_H(i)); \ + EMIT(PPC_RAW_LIS(d, IMM_H(i))); \ if (IMM_L(i)) \ - PPC_ORI(d, d, IMM_L(i)); \ + EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) #define PPC_LI64(d, i) do { \ @@ -204,19 +48,21 @@ PPC_LI32(d, i); \ else { \ if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \ - PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \ + 0xffff)); \ else { \ - PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ + EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \ if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ - PPC_ORI(d, d, \ - ((uintptr_t)(i) >> 32) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, \ + ((uintptr_t)(i) >> 32) & 0xffff)); \ } \ - PPC_SLDI(d, d, 32); \ + EMIT(PPC_RAW_SLDI(d, d, 32)); \ if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ - PPC_ORIS(d, d, \ - ((uintptr_t)(i) >> 16) & 0xffff); \ + EMIT(PPC_RAW_ORIS(d, d, \ + ((uintptr_t)(i) >> 16) & 0xffff)); \ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ - PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ + EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ + 0xffff)); \ } } while (0) #ifdef CONFIG_PPC64 @@ -240,7 +86,7 @@ static inline bool is_nearbranch(int offset) #define PPC_BCC(cond, dest) do { \ if (is_nearbranch((dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ - PPC_NOP(); \ + EMIT(PPC_RAW_NOP()); \ } else { \ /* Flip the 'T or F' bit to invert comparison */ \ PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 4ec2a9f14f84..448dfd4d98e1 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -72,21 +72,21 @@ DECLARE_LOAD_FUNC(sk_load_half); DECLARE_LOAD_FUNC(sk_load_byte); DECLARE_LOAD_FUNC(sk_load_byte_msh); -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LBZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LD(r, r, IMM_L(i)); } } while(0) +#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0) -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LWZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0) -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LHZ(r, r, IMM_L(i)); } } while(0) +#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i)); \ + else { EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i))); \ + EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0) #ifdef CONFIG_PPC64 #define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) @@ -107,20 +107,20 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); } while(0) #endif #else -#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0) +#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0) #endif #define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0) + do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0) #ifdef __LITTLE_ENDIAN__ #define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) #else #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) +#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0) +#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0) #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f02..2e33c6673ff9 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -70,19 +70,21 @@ static const int b2p[] = { */ #define PPC_BPF_LL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_LDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_LDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_LD(r, base, i); \ + EMIT(PPC_RAW_LD(r, base, i)); \ } while(0) #define PPC_BPF_STL(r, base, i) do { \ if ((i) % 4) { \ - PPC_LI(b2p[TMP_REG_2], (i)); \ - PPC_STDX(r, base, b2p[TMP_REG_2]); \ + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ + EMIT(PPC_RAW_STDX(r, base, \ + b2p[TMP_REG_2])); \ } else \ - PPC_STD(r, base, i); \ + EMIT(PPC_RAW_STD(r, base, i)); \ } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 0acc9d5fb19e..16d09b36fe06 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -61,7 +61,7 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, data_len)); PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len)); - PPC_SUB(r_HL, r_HL, r_scratch1); + EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1)); PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data)); } @@ -70,12 +70,12 @@ static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image, * TODO: Could also detect whether first instr. sets X and * avoid this (as below, with A). */ - PPC_LI(r_X, 0); + EMIT(PPC_RAW_LI(r_X, 0)); } /* make sure we dont leak kernel information to user */ if (bpf_needs_clear_a(&filter[0])) - PPC_LI(r_A, 0); + EMIT(PPC_RAW_LI(r_A, 0)); } static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) @@ -83,10 +83,10 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) int i; if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME)); if (ctx->seen & SEEN_DATAREF) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D))); PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL))); } @@ -100,7 +100,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) } /* The RETs have left a return value in R3. */ - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } #define CHOOSE_LOAD_FUNC(K, func) \ @@ -134,124 +134,124 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /*** ALU ops ***/ case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; - PPC_ADD(r_A, r_A, r_X); + EMIT(PPC_RAW_ADD(r_A, r_A, r_X)); break; case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K))); break; case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; - PPC_SUB(r_A, r_A, r_X); + EMIT(PPC_RAW_SUB(r_A, r_A, r_X)); break; case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; - PPC_ADDI(r_A, r_A, IMM_L(-K)); + EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K))); if (K >= 32768) - PPC_ADDIS(r_A, r_A, IMM_HA(-K)); + EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K))); break; case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; - PPC_MULW(r_A, r_A, r_X); + EMIT(PPC_RAW_MULW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) - PPC_MULI(r_A, r_A, K); + EMIT(PPC_RAW_MULI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_MULW(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; - PPC_CMPWI(r_X, 0); + EMIT(PPC_RAW_CMPWI(r_X, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ALU | BPF_MOD | BPF_X)) { - PPC_DIVWU(r_scratch1, r_A, r_X); - PPC_MULW(r_scratch1, r_X, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X)); + EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); } else { - PPC_DIVWU(r_A, r_A, r_X); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X)); } break; case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); - PPC_DIVWU(r_scratch1, r_A, r_scratch2); - PPC_MULW(r_scratch1, r_scratch2, r_scratch1); - PPC_SUB(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2)); + EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1)); + EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); - PPC_DIVWU(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1)); break; case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND(r_A, r_A, r_X); + EMIT(PPC_RAW_AND(r_A, r_A, r_X)); break; case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) - PPC_ANDI(r_A, r_A, K); + EMIT(PPC_RAW_ANDI(r_A, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND(r_A, r_A, r_scratch1); + EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1)); } break; case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; - PPC_OR(r_A, r_A, r_X); + EMIT(PPC_RAW_OR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) - PPC_ORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_ORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K))); break; case BPF_ANC | SKF_AD_ALU_XOR_X: case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; - PPC_XOR(r_A, r_A, r_X); + EMIT(PPC_RAW_XOR(r_A, r_A, r_X)); break; case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) - PPC_XORI(r_A, r_A, IMM_L(K)); + EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K))); if (K >= 65536) - PPC_XORIS(r_A, r_A, IMM_H(K)); + EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K))); break; case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; - PPC_SLW(r_A, r_A, r_X); + EMIT(PPC_RAW_SLW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else - PPC_SLWI(r_A, r_A, K); + EMIT(PPC_RAW_SLWI(r_A, r_A, K)); break; case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; - PPC_SRW(r_A, r_A, r_X); + EMIT(PPC_RAW_SRW(r_A, r_A, r_X)); break; case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else - PPC_SRWI(r_A, r_A, K); + EMIT(PPC_RAW_SRWI(r_A, r_A, K)); break; case BPF_ALU | BPF_NEG: - PPC_NEG(r_A, r_A); + EMIT(PPC_RAW_NEG(r_A, r_A)); break; case BPF_RET | BPF_K: PPC_LI32(r_ret, K); @@ -277,24 +277,24 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_RET | BPF_A: - PPC_MR(r_ret, r_A); + EMIT(PPC_RAW_MR(r_ret, r_A)); if (i != flen - 1) { if (ctx->seen) PPC_JMP(exit_addr); else - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } break; case BPF_MISC | BPF_TAX: /* X = A */ - PPC_MR(r_X, r_A); + EMIT(PPC_RAW_MR(r_X, r_A)); break; case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; - PPC_MR(r_A, r_X); + EMIT(PPC_RAW_MR(r_A, r_X)); break; /*** Constant loads/M[] access ***/ @@ -305,19 +305,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(r_X, K); break; case BPF_LD | BPF_MEM: /* A = mem[K] */ - PPC_MR(r_A, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LDX | BPF_MEM: /* X = mem[K] */ - PPC_MR(r_X, r_M + (K & 0xf)); + EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf))); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_ST: /* mem[K] = A */ - PPC_MR(r_M + (K & 0xf), r_A); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; case BPF_STX: /* mem[K] = X */ - PPC_MR(r_M + (K & 0xf), r_X); + EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X)); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ @@ -346,13 +346,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, type) != 2); PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); - PPC_CMPDI(r_scratch1, 0); + EMIT(PPC_RAW_CMPDI(r_scratch1, 0)); if (ctx->pc_ret0 != -1) { PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]); } else { /* Exit, returning 0; first pass hits here. */ PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12); - PPC_LI(r_ret, 0); + EMIT(PPC_RAW_LI(r_ret, 0)); PPC_JMP(exit_addr); } if (code == (BPF_ANC | SKF_AD_IFINDEX)) { @@ -383,9 +383,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET()); if (PKT_VLAN_PRESENT_BIT) - PPC_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT); + EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT)); if (PKT_VLAN_PRESENT_BIT < 7) - PPC_ANDI(r_A, r_A, 1); + EMIT(PPC_RAW_ANDI(r_A, r_A, 1)); break; case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(sizeof_field(struct sk_buff, @@ -395,8 +395,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ANC | SKF_AD_PKTTYPE: PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET()); - PPC_ANDI(r_A, r_A, PKT_TYPE_MAX); - PPC_SRWI(r_A, r_A, 5); + EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX)); + EMIT(PPC_RAW_SRWI(r_A, r_A, 5)); break; case BPF_ANC | SKF_AD_CPU: PPC_BPF_LOAD_CPU(r_A); @@ -414,9 +414,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* Load from [K]. */ ctx->seen |= SEEN_DATAREF; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); + EMIT(PPC_RAW_MTLR(r_scratch1)); PPC_LI32(r_addr, K); - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); /* * Helper returns 'lt' condition on error, and an * appropriate return value in r3 @@ -440,11 +440,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ ctx->seen |= SEEN_DATAREF | SEEN_XREG; PPC_FUNC_ADDR(r_scratch1, func); - PPC_MTLR(r_scratch1); - PPC_ADDI(r_addr, r_X, IMM_L(K)); + EMIT(PPC_RAW_MTLR(r_scratch1)); + EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K))); if (K >= 32768) - PPC_ADDIS(r_addr, r_addr, IMM_HA(K)); - PPC_BLRL(); + EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K))); + EMIT(PPC_RAW_BLRL()); /* If error, cr0.LT set */ PPC_BCC(COND_LT, exit_addr); break; @@ -489,30 +489,30 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; - PPC_CMPLW(r_A, r_X); + EMIT(PPC_RAW_CMPLW(r_A, r_X)); break; case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; - PPC_AND_DOT(r_scratch1, r_A, r_X); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X)); break; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) - PPC_CMPLWI(r_A, K); + EMIT(PPC_RAW_CMPLWI(r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_CMPLW(r_A, r_scratch1); + EMIT(PPC_RAW_CMPLW(r_A, r_scratch1)); } break; case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ - PPC_ANDI(r_scratch1, r_A, K); + EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K)); else { PPC_LI32(r_scratch1, K); - PPC_AND_DOT(r_scratch1, r_A, - r_scratch1); + EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, + r_scratch1)); } break; } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574..022103c6a201 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -95,12 +95,12 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - PPC_LI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); /* this goes in the redzone */ PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); } else { - PPC_NOP(); - PPC_NOP(); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } #define BPF_TAILCALL_PROLOGUE_SIZE 8 @@ -129,8 +129,8 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) - PPC_ADDI(b2p[BPF_REG_FP], 1, - STACK_FRAME_MIN_SIZE + ctx->stack_size); + EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + STACK_FRAME_MIN_SIZE + ctx->stack_size)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -144,10 +144,10 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); + EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - PPC_MTLR(0); + EMIT(PPC_RAW_MTLR(0)); } } } @@ -157,9 +157,9 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - PPC_MR(3, b2p[BPF_REG_0]); + EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); - PPC_BLR(); + EMIT(PPC_RAW_BLR()); } static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, @@ -171,7 +171,7 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, /* Load actual entry point from function descriptor */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a @@ -182,9 +182,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + EMIT(PPC_RAW_MTLR(12)); #endif - PPC_BLRL(); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, @@ -206,7 +206,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, * that PPC_LI64() can emit. */ for (i = ctx->idx - ctx_idx; i < 5; i++) - PPC_NOP(); + EMIT(PPC_RAW_NOP()); #ifdef PPC64_ELF_ABI_v1 /* @@ -220,8 +220,8 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BPF_LL(12, 12, 0); #endif - PPC_MTLR(12); - PPC_BLRL(); + EMIT(PPC_RAW_MTLR(12)); + EMIT(PPC_RAW_BLRL()); } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -239,9 +239,9 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); - PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); - PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); + EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); PPC_BCC(COND_GE, out); /* @@ -249,42 +249,42 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * goto out; */ PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); + EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); PPC_BCC(COND_GT, out); /* * tail_call_cnt++; */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); /* prog = array->ptrs[index]; */ - PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); - PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); + EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) * goto out; */ - PPC_CMPLDI(b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); #else - PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); #endif - PPC_MTCTR(b2p[TMP_REG_1]); + EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); - PPC_BCTR(); + EMIT(PPC_RAW_BCTR()); /* out: */ } @@ -340,11 +340,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ - PPC_ADD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ - PPC_SUB(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -354,53 +354,53 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, imm = -imm; if (imm) { if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); } } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); else - PPC_MULD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ if (imm >= -32768 && imm < 32768) - PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { PPC_LI32(b2p[TMP_REG_1], imm); if (BPF_CLASS(code) == BPF_ALU) - PPC_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, + b2p[TMP_REG_1])); else - PPC_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, + b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); - PPC_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1]); - PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ @@ -415,35 +415,37 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVWU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULW(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVDU(b2p[TMP_REG_2], dst_reg, - b2p[TMP_REG_1]); - PPC_MULD(b2p[TMP_REG_1], + EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], + dst_reg, + b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], b2p[TMP_REG_1], - b2p[TMP_REG_2]); - PPC_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1]); + b2p[TMP_REG_2])); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1])); } else - PPC_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, + b2p[TMP_REG_1])); break; } goto bpf_alu32_trunc; case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ case BPF_ALU64 | BPF_NEG: /* dst = -dst */ - PPC_NEG(dst_reg, dst_reg); + EMIT(PPC_RAW_NEG(dst_reg, dst_reg)); goto bpf_alu32_trunc; /* @@ -451,101 +453,101 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ - PPC_AND(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ if (!IMM_H(imm)) - PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ - PPC_OR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ - PPC_XOR(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ PPC_LI32(b2p[TMP_REG_1], imm); - PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); } else { if (IMM_L(imm)) - PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); + EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); if (IMM_H(imm)) - PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); + EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm))); } goto bpf_alu32_trunc; case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ /* slw clears top 32 bits */ - PPC_SLW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); /* skip zero extension move, but set address map. */ if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ - PPC_SLD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ /* with imm 0, we still need to clear top 32 bits */ - PPC_SLWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ if (imm != 0) - PPC_SLDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ - PPC_SRW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ - PPC_SRD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ - PPC_SRWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm != 0) - PPC_SRDI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm)); break; case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */ - PPC_SRAW(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ - PPC_SRAD(dst_reg, dst_reg, src_reg); + EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ - PPC_SRAWI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ if (imm != 0) - PPC_SRADI(dst_reg, dst_reg, imm); + EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm)); break; /* @@ -555,10 +557,10 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ if (imm == 1) { /* special mov32 for zext */ - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; } - PPC_MR(dst_reg, src_reg); + EMIT(PPC_RAW_MR(dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ @@ -572,7 +574,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, bpf_alu32_trunc: /* Truncate to 32-bits */ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) - PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31)); break; /* @@ -590,11 +592,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 32: /* @@ -602,12 +604,12 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); + EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); - PPC_MR(dst_reg, b2p[TMP_REG_1]); + EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: /* @@ -619,8 +621,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * same across all passes */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); - PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); break; } break; @@ -629,14 +631,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, switch (imm) { case 16: /* zero-extend 16 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 48); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; case 32: if (!fp->aux->verifier_zext) /* zero-extend 32 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 32); + EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32)); break; case 64: /* nop */ @@ -650,18 +652,18 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STB(src_reg, dst_reg, off); + EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); src_reg = b2p[TMP_REG_1]; } - PPC_STH(src_reg, dst_reg, off); + EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ @@ -669,7 +671,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STW(src_reg, dst_reg, off); + EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ @@ -686,24 +688,24 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); /* add value from src_reg into this */ - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); /* store result back */ - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); + EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); PPC_BCC_SHORT(COND_NE, tmp_idx); break; @@ -712,19 +714,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - PPC_LBZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - PPC_LHZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - PPC_LWZ(dst_reg, src_reg, off); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); if (insn_is_zext(&insn[i + 1])) addrs[++i] = ctx->idx * 4; break; @@ -775,7 +777,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, else bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ - PPC_MR(b2p[BPF_REG_0], 3); + EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); break; /* @@ -860,9 +862,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP32 | BPF_JNE | BPF_X: /* unsigned comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPLW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLW(dst_reg, src_reg)); else - PPC_CMPLD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPLD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: @@ -874,21 +876,21 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP32 | BPF_JSLE | BPF_X: /* signed comparison */ if (BPF_CLASS(code) == BPF_JMP32) - PPC_CMPW(dst_reg, src_reg); + EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); else - PPC_CMPD(dst_reg, src_reg); + EMIT(PPC_RAW_CMPD(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg); + EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, + src_reg)); } else { int tmp_reg = b2p[TMP_REG_1]; - PPC_AND(tmp_reg, dst_reg, src_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, + 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -912,19 +914,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ if (imm >= 0 && imm < 32768) { if (is_jmp32) - PPC_CMPLWI(dst_reg, imm); + EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); else - PPC_CMPLDI(dst_reg, imm); + EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ PPC_LI32(b2p[TMP_REG_1], imm); /* ... but unsigned comparison */ if (is_jmp32) - PPC_CMPLW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPLD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPLD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -945,17 +947,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ if (imm >= -32768 && imm < 32768) { if (is_jmp32) - PPC_CMPWI(dst_reg, imm); + EMIT(PPC_RAW_CMPWI(dst_reg, imm)); else - PPC_CMPDI(dst_reg, imm); + EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { PPC_LI32(b2p[TMP_REG_1], imm); if (is_jmp32) - PPC_CMPW(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPW(dst_reg, + b2p[TMP_REG_1])); else - PPC_CMPD(dst_reg, - b2p[TMP_REG_1]); + EMIT(PPC_RAW_CMPD(dst_reg, + b2p[TMP_REG_1])); } break; } @@ -964,19 +966,19 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); + EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); else { int tmp_reg = b2p[TMP_REG_1]; PPC_LI32(tmp_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - PPC_AND_DOT(tmp_reg, dst_reg, - tmp_reg); + EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, + tmp_reg)); } else { - PPC_AND(tmp_reg, dst_reg, - tmp_reg); - PPC_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31); + EMIT(PPC_RAW_AND(tmp_reg, dst_reg, + tmp_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, + 0, 0, 31)); } } break; diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 53d614e98537..c02854dea2b2 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o \ - generic-compat-pmu.o + generic-compat-pmu.o power10-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index 7a2cb9e1181a..ae24d4a00da6 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,7 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(void __user *ptr, void *buf, int nb); +int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -16,4 +16,27 @@ static inline bool invalid_user_sp(unsigned long sp) return (!sp || (sp & mask) || (sp > top)); } +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static inline int __read_user_stack(const void __user *ptr, void *ret, + size_t size) +{ + unsigned long addr = (unsigned long)ptr; + int rc; + + if (addr > TASK_SIZE - size || (addr & (size - 1))) + return -EFAULT; + + rc = copy_from_user_nofault(ret, ptr, size); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, size); + + return rc; +} + #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 542e68b8eae0..64e4013d8060 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -30,26 +30,9 @@ #endif /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret) { - int rc; - - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); - - if (IS_ENABLED(CONFIG_PPC64) && rc) - return read_user_stack_slow(ptr, ret, 4); - - return rc; + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fa2a1b83b9b0..fed90e827f3a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -23,7 +23,7 @@ * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ -int read_user_stack_slow(void __user *ptr, void *buf, int nb) +int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { unsigned long addr = (unsigned long) ptr; @@ -44,16 +44,9 @@ int read_user_stack_slow(void __user *ptr, void *buf, int nb) return -EFAULT; } -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 8); + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 01d70280d287..78fe34986594 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -37,12 +37,7 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - /* - * The order of the MMCR array is: - * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 - * - 32-bit, MMCR0, MMCR1, MMCR2 - */ - unsigned long mmcr[4]; + struct mmcr_regs mmcr; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -77,6 +72,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; /* * 32-bit doesn't have MMCRA but does have an MMCR2, * and a few other names are different. + * Also 32-bit doesn't have MMCR3, SIER2 and SIER3. + * Define them as zero knowing that any code path accessing + * these registers (via mtspr/mfspr) are done under ppmu flag + * check for PPMU_ARCH_31 and we will not enter that code path + * for 32-bit. */ #ifdef CONFIG_PPC32 @@ -90,7 +90,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_PMCC_U6 0 #define SPRN_MMCRA SPRN_MMCR2 +#define SPRN_MMCR3 0 +#define SPRN_SIER2 0 +#define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 +#define MMCRA_BHRB_DISABLE 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -121,7 +125,7 @@ static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return cpuhw->mmcr[0]; + return cpuhw->mmcr.mmcr0; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} @@ -466,8 +470,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses at this point. Check the privileges before * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) + * Incase of ISA v3.1, BHRB will capture only user-space + * addresses, hence include a check before filtering code */ - if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + if (!(ppmu->flags & PPMU_ARCH_31) && + is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -586,11 +593,16 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.sdar = mfspr(SPRN_SDAR); current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; + if (ppmu->flags & PPMU_ARCH_31) { + current->thread.mmcr3 = mfspr(SPRN_MMCR3); + current->thread.sier2 = mfspr(SPRN_SIER2); + current->thread.sier3 = mfspr(SPRN_SIER3); + } } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - unsigned long mmcr0 = cpuhw->mmcr[0]; + unsigned long mmcr0 = cpuhw->mmcr.mmcr0; if (!ebb) goto out; @@ -624,7 +636,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) * unfreeze counters, it should not set exclude_xxx in its events and * instead manage the MMCR2 entirely by itself. */ - mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) { + mtspr(SPRN_MMCR3, current->thread.mmcr3); + mtspr(SPRN_SIER2, current->thread.sier2); + mtspr(SPRN_SIER3, current->thread.sier3); + } out: return mmcr0; } @@ -845,6 +863,11 @@ void perf_event_print_debug(void) pr_info("EBBRR: %016lx BESCR: %016lx\n", mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); } + + if (ppmu->flags & PPMU_ARCH_31) { + pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + } #endif pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n", mfspr(SPRN_SIAR), sdar, sier); @@ -1196,7 +1219,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags, mmcr0, val; + unsigned long flags, mmcr0, val, mmcra; if (!ppmu) return; @@ -1229,12 +1252,24 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + val = mmcra = cpuhw->mmcr.mmcra; + /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) + val &= ~MMCRA_SAMPLE_ENABLE; + + /* Disable BHRB via mmcra (BHRBRD) for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCRA_BHRB_DISABLE; + + /* + * Write SPRN_MMCRA if mmcra has either disabled + * instruction sampling or BHRB. + */ + if (val != mmcra) { + mtspr(SPRN_MMCRA, mmcra); mb(); isync(); } @@ -1308,18 +1343,20 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); goto out_enable; } /* * Clear all MMCR settings and recompute them for the new set of events. */ - memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1333,11 +1370,11 @@ static void power_pmu_enable(struct pmu *pmu) */ event = cpuhw->event[0]; if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; + cpuhw->mmcr.mmcr0 |= MMCR0_FCP; if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; + cpuhw->mmcr.mmcr0 |= freeze_events_kernel; if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + cpuhw->mmcr.mmcr0 |= MMCR0_FCHV; } /* @@ -1346,12 +1383,15 @@ static void power_pmu_enable(struct pmu *pmu) * Then unfreeze the events. */ ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); /* * Read off any pre-existing events that need to move @@ -1402,7 +1442,7 @@ static void power_pmu_enable(struct pmu *pmu) perf_event_update_userpage(event); } cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE; out_enable: pmao_restore_workaround(ebb); @@ -1418,9 +1458,9 @@ static void power_pmu_enable(struct pmu *pmu) /* * Enable instruction sampling if necessary */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) { mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra); } out: @@ -1550,7 +1590,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) cpuhw->flags[i-1] = cpuhw->flags[i]; } --cpuhw->n_events; - ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr); if (event->hw.idx) { write_pmc(event->hw.idx, 0); event->hw.idx = 0; @@ -1571,7 +1611,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) } if (cpuhw->n_events == 0) { /* disable exceptions if no events are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE); } if (has_branch_stack(event)) @@ -1795,7 +1835,7 @@ static void hw_perf_event_destroy(struct perf_event *event) static int hw_perf_cache_event(u64 config, u64 *eventp) { unsigned long type, op, result; - int ev; + u64 ev; if (!ppmu->cache_events) return -EINVAL; @@ -2246,7 +2286,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the events frozen until * we get back out of this interrupt. */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); if (nmi) nmi_exit(); @@ -2268,7 +2308,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) if (ppmu) { memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; + cpuhw->mmcr.mmcr0 = MMCR0_FC; } return 0; } @@ -2314,6 +2354,8 @@ static int __init init_ppc64_pmu(void) return 0; else if (!init_power9_pmu()) return 0; + else if (!init_power10_pmu()) + return 0; else if (!init_ppc970_pmu()) return 0; else diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 5e5a54d5588e..eb8a6aaf4cc1 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -101,7 +101,7 @@ static int compat_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index db213eb7cb02..cdb7bfbd157e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -31,6 +31,8 @@ static int interface_version; /* Whether we have to aggregate result data for some domains. */ static bool aggregate_result_elements; +static cpumask_t hv_24x7_cpumask; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -446,6 +448,12 @@ static ssize_t device_show_string(struct device *dev, return sprintf(buf, "%s\n", (char *)d->var); } +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask); +} + static ssize_t sockets_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1113,6 +1121,7 @@ static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); +static DEVICE_ATTR_RO(cpumask); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -1126,6 +1135,7 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, + &dev_attr_cpumask.attr, NULL, }; @@ -1641,6 +1651,45 @@ static struct pmu h_24x7_pmu = { .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; +static int ppc_hv_24x7_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&hv_24x7_cpumask)) + cpumask_set_cpu(cpu, &hv_24x7_cpumask); + + return 0; +} + +static int ppc_hv_24x7_cpu_offline(unsigned int cpu) +{ + int target; + + /* Check if exiting cpu is used for collecting 24x7 events */ + if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask)) + return 0; + + /* Find a new cpu to collect 24x7 events */ + target = cpumask_last(cpu_active_mask); + + if (target < 0 || target >= nr_cpu_ids) { + pr_err("hv_24x7: CPU hotplug init failed\n"); + return -1; + } + + /* Migrate 24x7 events to the new target */ + cpumask_set_cpu(target, &hv_24x7_cpumask); + perf_pmu_migrate_context(&h_24x7_pmu, cpu, target); + + return 0; +} + +static int hv_24x7_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + "perf/powerpc/hv_24x7:online", + ppc_hv_24x7_cpu_online, + ppc_hv_24x7_cpu_offline); +} + static int hv_24x7_init(void) { int r; @@ -1685,6 +1734,11 @@ static int hv_24x7_init(void) if (r) return r; + /* init cpuhotplug */ + r = hv_24x7_cpu_hotplug_init(); + if (r) + return r; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0edcfd0b491d..a45d694a5d5d 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1288,11 +1288,30 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->size = sizeof(*header) + event->header_size; header->misc = 0; - if (is_kernel_addr(data->ip)) - header->misc |= PERF_RECORD_MISC_KERNEL; - else - header->misc |= PERF_RECORD_MISC_USER; - + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) { + case 0:/* when MSR HV and PR not set in the trace-record */ + header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; + break; + case 1: /* MSR HV is 0 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_GUEST_USER; + break; + case 2: /* MSR HV is 1 and PR is 0 */ + header->misc |= PERF_RECORD_MISC_HYPERVISOR; + break; + case 3: /* MSR HV is 1 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_USER; + break; + default: + pr_info("IMC: Unable to set the flag based on MSR bits\n"); + break; + } + } else { + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + } perf_event_header__init_id(header, data, event); return 0; diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index f755c64da137..80bbf72bfec2 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -9,4 +9,5 @@ extern int init_power6_pmu(void); extern int init_power7_pmu(void); extern int init_power8_pmu(void); extern int init_power9_pmu(void); +extern int init_power10_pmu(void); extern int init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..964437adec18 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -55,7 +55,9 @@ static bool is_event_valid(u64 event) { u64 valid_mask = EVENT_VALID_MASK; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + valid_mask = p10_EVENT_VALID_MASK; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) valid_mask = p9_EVENT_VALID_MASK; return !(event & ~valid_mask); @@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event) return false; } +static unsigned long sdar_mod_val(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_SDAR_MODE(event); + + return p9_SDAR_MODE(event); +} + static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) { /* @@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling * mode and will be un-changed when setting MMCRA[63] (Marked events). * - * Incase of Power9: + * Incase of Power9/power10: * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * or if group already have any marked events. * For rest @@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (p9_SDAR_MODE(event)) - *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (sdar_mod_val(event)) + *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; else *mmcra |= MMCRA_SDAR_MODE_DCACHE; } else @@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event) /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. + * Power10: thresh_cmp is replaced by l2_l3 event select. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; + cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + p10_EVENT_CACHE_SEL_MASK; + else + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + EVENT_CACHE_SEL_MASK; ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; if (pmc) { @@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -363,10 +385,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; + unsigned long mmcr3; unsigned int pmc, pmc_inuse; int i; @@ -379,7 +402,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - mmcra = mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = mmcr3 = 0; + + /* + * Disable bhrb unless explicitly requested + * by setting MMCRA (BHRBRD) bit. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcra |= MMCRA_BHRB_DISABLE; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -438,8 +468,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_THR_CTL_SHIFT; val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= thresh_cmp_val(val); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> EVENT_THR_CMP_SHIFT) & + EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); + } + } + + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + val = (event[i] >> p10_L2L3_EVENT_SHIFT) & + p10_EVENT_L2L3_SEL_MASK; + mmcr2 |= val << p10_L2L3_SEL_SHIFT; } if (event[i] & EVENT_WANTS_BHRB) { @@ -447,6 +486,11 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */ + if (cpu_has_feature(CPU_FTR_ARCH_31) && + (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB))) + mmcra &= ~MMCRA_BHRB_DISABLE; + if (pevents[i]->attr.exclude_user) mmcr2 |= MMCR2_FCP(pmc); @@ -460,34 +504,43 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (pmc <= 4) { + val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & + p10_EVENT_MMCR3_MASK; + mmcr3 |= val << MMCR3_SHIFT(pmc); + } + } + hwc[i] = pmc - 1; } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; /* pmc_inuse is 1-based */ if (pmc_inuse & 2) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x7c) - mmcr[0] |= MMCR0_PMCjCE; + mmcr->mmcr0 |= MMCR0_PMCjCE; /* If we're not using PMC 5 or 6, freeze them */ if (!(pmc_inuse & 0x60)) - mmcr[0] |= MMCR0_FC56; + mmcr->mmcr0 |= MMCR0_FC56; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - mmcr[3] = mmcr2; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; + mmcr->mmcr2 = mmcr2; + mmcr->mmcr3 = mmcr3; return 0; } -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size) diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 63fd4f3f6013..044de65e96b9 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -87,6 +87,31 @@ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) +/* Contants to support power10 raw encoding format */ +#define p10_SDAR_MODE_SHIFT 22 +#define p10_SDAR_MODE_MASK 0x3ull +#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \ + p10_SDAR_MODE_MASK) +#define p10_EVENT_L2L3_SEL_MASK 0x1f +#define p10_L2L3_SEL_SHIFT 3 +#define p10_L2L3_EVENT_SHIFT 40 +#define p10_EVENT_THRESH_MASK 0xffffull +#define p10_EVENT_CACHE_SEL_MASK 0x3ull +#define p10_EVENT_MMCR3_MASK 0x7fffull +#define p10_EVENT_MMCR3_SHIFT 45 + +#define p10_EVENT_VALID_MASK \ + ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ + (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) /* * Layout of constraint bits: * @@ -135,6 +160,9 @@ #define CNST_CACHE_PMC4_VAL (1ull << 54) #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL +#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) +#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -191,7 +219,7 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) -/* MMCR1 Threshold Compare bit constant for power9 */ +/* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 /* Bits in MMCR2 for PowerISA v2.07 */ @@ -202,6 +230,9 @@ #define MAX_ALT 2 #define MAX_PMU_COUNTERS 6 +/* Bits in MMCR3 for PowerISA v3.10 */ +#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1))) + #define ISA207_SIER_TYPE_SHIFT 15 #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT) @@ -217,9 +248,9 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 4d5ef92511d1..1919e9df9165 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -257,7 +257,7 @@ static const u32 pmcsel_mask[N_COUNTER] = { * Compute MMCR0/1/2 values for a set of events. */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], - unsigned long mmcr[], + struct mmcr_regs *mmcr, struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; @@ -321,9 +321,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], mmcr0 |= MMCR0_PMCnCE; /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcr2; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcr2 = mmcr2; + /* + * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define + * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2` + * value to ensure that any write to this SPRN_MMCRA will + * use mmcr2 value. + */ + mmcr->mmcra = mmcr2; return 0; } @@ -331,12 +338,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], * Disable counting by a PMC. * Note that the pmc argument is 0-based here, not 1-based. */ -static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 1) - mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); else - mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); } static int mpc7450_generic_events[] = { @@ -354,7 +361,7 @@ static int mpc7450_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x225 }, [C(OP_WRITE)] = { 0, 0x227 }, diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h new file mode 100644 index 000000000000..60c1b8111082 --- /dev/null +++ b/arch/powerpc/perf/power10-events-list.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +/* + * Power10 event codes. + */ +EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_DISP_STALL_CYC, 0x100f8); +EVENT(PM_EXEC_STALL, 0x30008); +EVENT(PM_RUN_INST_CMPL, 0x500fa); +EVENT(PM_BR_CMPL, 0x4d05e); +EVENT(PM_BR_MPRED_CMPL, 0x400f6); + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100fc); +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054); +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0); +/* L1 cache data prefetches */ +EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c); +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fc); +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080); +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040); +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_REQ, 0x040a0); +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x01340000001c040); +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc); +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc); + +EVENT(PM_RUN_CYC_ALT, 0x0001e); +EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); + +/* + * Memory Access Events + * + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * To enable capturing of memory profiling, these MMCRA bits + * needs to be programmed and corresponding raw event format + * encoding. + * + * MMCRA bits encoding needed are + * SM (Sampling Mode) + * EM (Eligibility for Random Sampling) + * TECE (Threshold Event Counter Event) + * TS (Threshold Start Event) + * TE (Threshold End Event) + * + * Corresponding Raw Encoding bits: + * sample [EM,SM] + * thresh_sel (TECE) + * thresh start (TS) + * thresh end (TE) + */ + +EVENT(MEM_LOADS, 0x34340401e0); +EVENT(MEM_STORES, 0x343c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c new file mode 100644 index 000000000000..f7cff7f36a1c --- /dev/null +++ b/arch/powerpc/perf/power10-pmu.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +#define pr_fmt(fmt) "power10-pmu: " fmt + +#include "isa207-common.h" +#include "internal.h" + +/* + * Raw event encoding for Power10: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ] + * | | | | | | + * | | *- IFM (Linux) | | thresh start/stop -* + * | *- BHRB (Linux) | src_sel + * *- EBB (Linux) *invert_bit + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | | | | | + * | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | + * | | sdar_mode | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * MMCR1[20:27] = thresh_ctl + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * MMCR1[20:27] = thresh_ctl + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if l2l3_sel: + * MMCR2[56:60] = l2l3_sel[0:4] + * + * MMCR1[16] = cache_sel[0] + * MMCR1[17] = cache_sel[1] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sdar_mode[0:1] + */ + +/* + * Some power10 event codes. + */ +#define EVENT(_name, _code) enum{_name = _code} + +#include "power10-events-list.h" + +#undef EVENT + +/* MMCRA IFM bits - POWER10 */ +#define POWER10_MMCRA_IFM1 0x0000000040000000UL +#define POWER10_MMCRA_IFM2 0x0000000080000000UL +#define POWER10_MMCRA_IFM3 0x00000000C0000000UL +#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL + +/* Table of alternatives, sorted by column 0 */ +static const unsigned int power10_event_alternatives[][MAX_ALT] = { + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, +}; + +static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(power10_event_alternatives), flags, + power10_event_alternatives); + + return num_alt; +} + +GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); +GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); +GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); +GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group = { + .name = "events", + .attrs = power10_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-59"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:10-11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-21"); +PMU_FORMAT_ATTR(sdar_mode, "config:22-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(l2l3_sel, "config:40-44"); +PMU_FORMAT_ATTR(src_sel, "config:45-46"); +PMU_FORMAT_ATTR(invert_bit, "config:47"); +PMU_FORMAT_ATTR(src_mask, "config:48-53"); +PMU_FORMAT_ATTR(src_match, "config:54-59"); + +static struct attribute *power10_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sdar_mode.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_l2l3_sel.attr, + &format_attr_src_sel.attr, + &format_attr_invert_bit.attr, + &format_attr_src_mask.attr, + &format_attr_src_match.attr, + NULL, +}; + +static struct attribute_group power10_pmu_format_group = { + .name = "format", + .attrs = power10_pmu_format_attr, +}; + +static const struct attribute_group *power10_pmu_attr_groups[] = { + &power10_pmu_format_group, + &power10_pmu_events_group, + NULL, +}; + +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, +}; + +static u64 power10_bhrb_filter_map(u64 branch_sample_type) +{ + u64 pmu_bhrb_filter = 0; + + /* BHRB and regular PMU events share the same privilege state + * filter configuration. BHRB is always recorded along with a + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. + */ + + /* No branch filter requested */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) + return pmu_bhrb_filter; + + /* Invalid branch filter options - HW does not support */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM2; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM3; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM1; + return pmu_bhrb_filter; + } + + /* Every thing else is unsupported */ + return -1; +} + +static void power10_config_bhrb(u64 pmu_bhrb_filter) +{ + pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK; + + /* Enable BHRB filter in PMU */ + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu power10_pmu = { + .name = "POWER10", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .group_constraint_mask = CNST_CACHE_PMC4_MASK, + .group_constraint_val = CNST_CACHE_PMC4_VAL, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power10_config_bhrb, + .bhrb_filter_map = power10_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .get_alternatives = power10_get_alternatives, + .get_mem_data_src = isa207_get_mem_data_src, + .get_mem_weight = isa207_get_mem_weight, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | + PPMU_ARCH_31, + .n_generic = ARRAY_SIZE(power10_generic_events), + .generic_events = power10_generic_events, + .cache_events = &power10_cache_events, + .attr_groups = power10_pmu_attr_groups, + .bhrb_nr = 32, +}; + +int init_power10_pmu(void) +{ + int rc; + + /* Comes from cpu_specs[] */ + if (!cur_cpu_spec->oprofile_cpu_type || + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + return -ENODEV; + + rc = register_power_pmu(&power10_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index f8574547fc6b..a62b2cd7914f 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -448,7 +448,8 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; @@ -586,20 +587,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5p_generic_events[] = { @@ -618,7 +619,7 @@ static int power5p_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index da52ecab7c9a..8732b587cf71 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -379,7 +379,8 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -528,20 +529,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5_generic_events[] = { @@ -560,7 +561,7 @@ static int power5_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 3929cacf72ed..0e318cf87129 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -171,7 +171,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -243,13 +243,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (pmc < 4) mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0xe) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } @@ -457,11 +457,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -481,7 +481,7 @@ static int power6_generic_events[] = { * are event codes. * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. */ -static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x280030, 0x80080 }, [C(OP_WRITE)] = { 0x180032, 0x80088 }, diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a137813a3076..5e0bf09cf077 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -242,7 +242,8 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -298,20 +299,20 @@ static int power7_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -332,7 +333,7 @@ static int power7_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 3a5fcc20ff31..5282e8415ddf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -253,7 +253,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef796198..05dae38b969a 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -310,7 +310,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 4035d93d87ab..d35223fb112c 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -253,7 +253,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; @@ -393,27 +394,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev, mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { - int shift, i; + int shift; - if (pmc <= 1) { - shift = MMCR0_PMC1SEL_SH - 7 * pmc; - i = 0; - } else { - shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); - i = 1; - } /* * Setting the PMCxSEL field to 0x08 disables PMC x. */ - mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); + if (pmc <= 1) { + shift = MMCR0_PMC1SEL_SH - 7 * pmc; + mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift); + } else { + shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); + mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift); + } } static int ppc970_generic_events[] = { @@ -432,7 +432,7 @@ static int ppc970_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x8810, 0x3810 }, [C(OP_WRITE)] = { 0x7810, 0x813 }, diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 70083649c9ea..11475c58ea43 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -56,7 +56,7 @@ lite5200_low_power: /* * save stuff BDI overwrites * 0xf0 (0xe0->0x100 gets overwritten when BDI connected; - * even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?)) + * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?)) * WARNING: self-refresh doesn't seem to work when BDI2000 is connected, * possibly because BDI sets SDRAM registers before wakeup code does */ diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index fa3d29dcb57e..b77cbb0a50e1 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -160,7 +160,7 @@ config XES_MPC85xx computers from Extreme Engineering Solutions (X-ES) based on Freescale MPC85xx processors. Manufacturer: Extreme Engineering Solutions, Inc. - URL: + URL: config STX_GP3 bool "Silicon Turnkey Express GP3" diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 0f7c8241912b..f2ff359041ee 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -44,6 +44,7 @@ config SPU_FS tristate "SPU file system" default m depends on PPC_CELL + depends on COREDUMP select SPU_BASE help The SPU file system is used to access Synergistic Processing diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 3b75e8f60609..026c181a98c5 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -66,13 +66,20 @@ static int match_context(const void *v, struct file *file, unsigned fd) */ static struct spu_context *coredump_next_context(int *fd) { + struct spu_context *ctx; struct file *file; int n = iterate_fd(current->files, *fd, match_context, NULL); if (!n) return NULL; *fd = n - 1; + + rcu_read_lock(); file = fcheck(*fd); - return SPUFS_I(file_inode(file))->i_ctx; + ctx = SPUFS_I(file_inode(file))->i_ctx; + get_spu_context(ctx); + rcu_read_unlock(); + + return ctx; } int spufs_coredump_extra_notes_size(void) @@ -83,17 +90,23 @@ int spufs_coredump_extra_notes_size(void) fd = 0; while ((ctx = coredump_next_context(&fd)) != NULL) { rc = spu_acquire_saved(ctx); - if (rc) + if (rc) { + put_spu_context(ctx); break; + } + rc = spufs_ctx_note_size(ctx, fd); spu_release_saved(ctx); - if (rc < 0) + if (rc < 0) { + put_spu_context(ctx); break; + } size += rc; /* start searching the next fd next time */ fd++; + put_spu_context(ctx); } return size; @@ -105,7 +118,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, size_t sz = spufs_coredump_read[i].size; char fullname[80]; struct elf_note en; - size_t ret; + int ret; sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name); en.n_namesz = strlen(fullname) + 1; diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index 1cd4ca14b08d..1bf65d02d3ba 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -56,8 +56,7 @@ static int __init pasemi_register_i2c_devices(void) if (!adap_node) continue; - node = NULL; - while ((node = of_get_next_child(adap_node, node))) { + for_each_child_of_node(adap_node, node) { struct i2c_board_info info = {}; const u32 *addr; int len; diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 181caa3f6717..5c77b9a24c0e 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1465,7 +1465,7 @@ static long g5_i2s_enable(struct device_node *node, long param, long value) case 2: if (macio->type == macio_shasta) break; - /* fall through */ + fallthrough; default: return -ENODEV; } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44..f77a59b5c2e1 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -629,8 +629,7 @@ static void __init kw_i2c_probe(void) for (i = 0; i < chans; i++) kw_i2c_add(host, np, np, i); } else { - for (child = NULL; - (child = of_get_next_child(np, child)) != NULL;) { + for_each_child_of_node(np, child) { const u32 *reg = of_get_property(child, "reg", NULL); if (reg == NULL) @@ -1193,8 +1192,7 @@ static void pmac_i2c_devscan(void (*callback)(struct device_node *dev, * platform function instance */ list_for_each_entry(bus, &pmac_i2c_busses, link) { - for (np = NULL; - (np = of_get_next_child(bus->busnode, np)) != NULL;) { + for_each_child_of_node(bus->busnode, np) { struct whitelist_ent *p; /* If multibus, check if device is on that bus */ if (bus->flags & pmac_i2c_multibus) diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 62311e84a423..f5422506d4b0 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -114,7 +114,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) * Ok, got one, we dont need anything special to track them down, so * we just create them all */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) { + for_each_child_of_node(gparent, gp) { const u32 *reg = of_get_property(gp, "reg", NULL); unsigned long offset; if (reg == NULL) @@ -133,7 +133,7 @@ static void macio_gpio_init_one(struct macio_chip *macio) macio->of_node); /* And now we run all the init ones */ - for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) + for_each_child_of_node(gparent, gp) pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL); /* Note: We do not at this point implement the "at sleep" or "at wake" diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index 6b61a18e8db3..f286bdfe8346 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -80,7 +80,7 @@ void udbg_scc_init(int force_scc) path = of_get_property(of_chosen, "linux,stdout-path", NULL); if (path != NULL) stdout = of_find_node_by_path(path); - for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) { + for_each_child_of_node(escc, ch) { if (ch == stdout) ch_def = of_node_get(ch); if (of_node_name_eq(ch, "ch-a")) diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index fe3f0fb5aeca..2eb6ae150d1f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_FA_DUMP) += opal-fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o obj-$(CONFIG_OPAL_CORE) += opal-core.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o +obj-$(CONFIG_PCI_IOV) += pci-sriov.o obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 79409e005fcd..9af8c3b98853 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -338,6 +338,28 @@ static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) return 0; } +static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev) +{ + struct pci_controller *hose = pdev->bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pci_dev *parent = pdev->bus->self; + +#ifdef CONFIG_PCI_IOV + /* for VFs we use the PF's PE as the upstream PE */ + if (pdev->is_virtfn) + parent = pdev->physfn; +#endif + + /* otherwise use the PE of our parent bridge */ + if (parent) { + struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); + + return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0); + } + + return NULL; +} + /** * pnv_eeh_probe - Do probe on PCI device * @pdev: pci_dev to probe @@ -350,6 +372,7 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + struct eeh_pe *upstream_pe; uint32_t pcie_flags; int ret; int config_addr = (pdn->busno << 8) | (pdn->devfn); @@ -372,19 +395,18 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) } /* Skip for PCI-ISA bridge */ - if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; eeh_edev_dbg(edev, "Probing device\n"); /* Initialize eeh device */ - edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -399,8 +421,10 @@ static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; + upstream_pe = pnv_eeh_get_upstream_pe(pdev); + /* Create PE */ - ret = eeh_add_to_parent_pe(edev); + ret = eeh_pe_tree_insert(edev, upstream_pe); if (ret) { eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret); return NULL; @@ -535,18 +559,6 @@ static int pnv_eeh_set_option(struct eeh_pe *pe, int option) return 0; } -/** - * pnv_eeh_get_pe_addr - Retrieve PE address - * @pe: EEH PE - * - * Retrieve the PE address according to the given tranditional - * PCI BDF (Bus/Device/Function) address. - */ -static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) -{ - return pe->addr; -} - static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; @@ -850,32 +862,32 @@ static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) case EEH_RESET_HOT: /* Don't report linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl |= PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); msleep(EEH_PE_RST_SETTLE_TIME); /* Continue reporting linkDown event */ if (aer) { - eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, &ctrl); ctrl &= ~PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, 4, ctrl); } @@ -944,11 +956,12 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, int pos, u16 mask) { + struct eeh_dev *edev = pdn->edev; int i, status = 0; /* Wait for Transaction Pending bit to be cleared */ for (i = 0; i < 4; i++) { - eeh_ops->read_config(pdn, pos, 2, &status); + eeh_ops->read_config(edev, pos, 2, &status); if (!(status & mask)) return; @@ -969,7 +982,7 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->pcie_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); if (!(reg & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; @@ -979,18 +992,18 @@ static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "", edev->pcie_cap + PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_TRPND); - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg |= PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, ®); reg &= ~PCI_EXP_DEVCTL_BCR_FLR; - eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, 4, reg); msleep(EEH_PE_RST_SETTLE_TIME); break; @@ -1007,7 +1020,7 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) if (WARN_ON(!edev->af_cap)) return -ENOTTY; - eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap); if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) return -ENOTTY; @@ -1022,12 +1035,12 @@ static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) pnv_eeh_wait_for_pending(pdn, "AF", edev->af_cap + PCI_AF_CTRL, PCI_AF_STATUS_TP << 8); - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, PCI_AF_CTRL_FLR); msleep(EEH_PE_RST_HOLD_TIME); break; case EEH_RESET_DEACTIVATE: - eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0); msleep(EEH_PE_RST_SETTLE_TIME); break; } @@ -1261,9 +1274,11 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) return false; } -static int pnv_eeh_read_config(struct pci_dn *pdn, +static int pnv_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1275,9 +1290,11 @@ static int pnv_eeh_read_config(struct pci_dn *pdn, return pnv_pci_cfg_read(pdn, where, size, val); } -static int pnv_eeh_write_config(struct pci_dn *pdn, +static int pnv_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + if (!pdn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -1631,34 +1648,24 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } -static int pnv_eeh_restore_config(struct pci_dn *pdn) +static int pnv_eeh_restore_config(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; s64 ret = 0; - int config_addr = (pdn->busno << 8) | (pdn->devfn); if (!edev) return -EEXIST; - /* - * We have to restore the PCI config space after reset since the - * firmware can't see SRIOV VFs. - * - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) { - ret = eeh_restore_vf_config(pdn); - } else { - phb = pdn->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, config_addr); - } + if (edev->physfn) + return 0; + + phb = edev->controller->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->bdfn); if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, config_addr, ret); + __func__, edev->bdfn, ret); return -EIO; } @@ -1670,7 +1677,6 @@ static struct eeh_ops pnv_eeh_ops = { .init = pnv_eeh_init, .probe = pnv_eeh_probe, .set_option = pnv_eeh_set_option, - .get_pe_addr = pnv_eeh_get_pe_addr, .get_state = pnv_eeh_get_state, .reset = pnv_eeh_reset, .get_log = pnv_eeh_get_log, diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2dd467383a88..77513a80cef9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -48,7 +48,7 @@ static bool default_stop_found; * First stop state levels when SPR and TB loss can occur. */ static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; -static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; +static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -73,9 +73,6 @@ static int pnv_save_sprs_for_deep_states(void) */ uint64_t lpcr_val = mfspr(SPRN_LPCR); uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; @@ -117,6 +114,9 @@ static int pnv_save_sprs_for_deep_states(void) /* Only p8 needs to set extra HID regiters */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); if (rc != 0) @@ -611,6 +611,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) unsigned long srr1; unsigned long pls; unsigned long mmcr0 = 0; + unsigned long mmcra = 0; struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ bool sprs_saved = false; @@ -657,7 +658,22 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) */ mmcr0 = mfspr(SPRN_MMCR0); } - if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + /* + * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit. + * If the user hasn't asked for the BHRB to be + * written, the value of MMCRA[BHRBRD] is 1. + * On wakeup from stop, MMCRA[BHRBD] will be 0, + * since it is previleged resource and will be lost. + * Thus, if we do not save and restore the MMCRA[BHRBD], + * hardware will be needlessly writing to the BHRB + * in problem mode. + */ + mmcra = mfspr(SPRN_MMCRA); + } + + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { sprs.lpcr = mfspr(SPRN_LPCR); sprs.hfscr = mfspr(SPRN_HFSCR); sprs.fscr = mfspr(SPRN_FSCR); @@ -700,8 +716,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { - unsigned long mmcra; - /* * We don't need an isync after the mtsprs here because the * upcoming mtmsrd is execution synchronizing. @@ -721,6 +735,10 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_MMCR0, mmcr0); } + /* Reload MMCRA to restore BHRB disable bit for POWER10 */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mtspr(SPRN_MMCRA, mmcra); + /* * DD2.2 and earlier need to set then clear bit 60 in MMCRA * to ensure the PMU starts running. @@ -741,7 +759,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) * just always test PSSCR for SPR/TB state loss. */ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; - if (likely(pls < pnv_first_spr_loss_level)) { + if (likely(pls < deep_spr_loss_state)) { if (sprs_saved) atomic_stop_thread_idle(); goto out; @@ -1088,7 +1106,7 @@ static void __init pnv_power9_idle_init(void) * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ pnv_first_tb_loss_level = MAX_STOP_STATE + 1; - pnv_first_spr_loss_level = MAX_STOP_STATE + 1; + deep_spr_loss_state = MAX_STOP_STATE + 1; for (i = 0; i < nr_pnv_idle_states; i++) { int err; struct pnv_idle_states_t *state = &pnv_idle_states[i]; @@ -1099,8 +1117,8 @@ static void __init pnv_power9_idle_init(void) pnv_first_tb_loss_level = psscr_rl; if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; /* * The idle code does not deal with TB loss occurring @@ -1111,8 +1129,8 @@ static void __init pnv_power9_idle_init(void) * compatibility. */ if ((state->flags & OPAL_PM_TIMEBASE_STOP) && - (pnv_first_spr_loss_level > psscr_rl)) - pnv_first_spr_loss_level = psscr_rl; + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; err = validate_psscr_val_mask(&state->psscr_val, &state->psscr_mask, @@ -1158,7 +1176,7 @@ static void __init pnv_power9_idle_init(void) } pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", - pnv_first_spr_loss_level); + deep_spr_loss_state); pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); @@ -1205,7 +1223,7 @@ static void __init pnv_probe_idle_states(void) return; } - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (pvr_version_is(PVR_POWER9)) pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 1656e8965d6b..c094fdf5825c 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -104,7 +104,7 @@ static int __opal_async_release_token(int token) */ case ASYNC_TOKEN_DISPATCHED: opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED; - /* Fall through */ + fallthrough; default: rc = 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index f923359d8afc..5218f5da2737 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -166,7 +166,7 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, if (!ptce) { ptce = pnv_tce(tbl, false, idx, alloc); if (!ptce) - return alloc ? H_HARDWARE : H_TOO_HARD; + return -ENOMEM; } if (newtce & TCE_PCI_WRITE) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 73a63efcf855..c9c25fb0783c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -115,32 +115,13 @@ static int __init pci_reset_phbs_setup(char *str) early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); -static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) -{ - /* - * WARNING: We cannot rely on the resource flags. The Linux PCI - * allocation code sometimes decides to put a 64-bit prefetchable - * BAR in the 32-bit window, so we have to compare the addresses. - * - * For simplicity we only test resource start. - */ - return (r->start >= phb->ioda.m64_base && - r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); -} - -static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) -{ - unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); - - return (resource_flags & flags) == flags; -} - static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { s64 rc; phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; + phb->ioda.pe_array[pe_no].dma_setup_done = false; /* * Clear the PE frozen state as it might be put into frozen state @@ -164,26 +145,48 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) return; } + mutex_lock(&phb->ioda.pe_alloc_mutex); if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) pr_debug("%s: PE %x was reserved on PHB#%x\n", __func__, pe_no, phb->hose->global_number); + mutex_unlock(&phb->ioda.pe_alloc_mutex); pnv_ioda_init_pe(phb, pe_no); } -static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count) { - long pe; + struct pnv_ioda_pe *ret = NULL; + int run = 0, pe, i; + mutex_lock(&phb->ioda.pe_alloc_mutex); + + /* scan backwards for a run of @count cleared bits */ for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { - if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) - return pnv_ioda_init_pe(phb, pe); - } + if (test_bit(pe, phb->ioda.pe_alloc)) { + run = 0; + continue; + } - return NULL; + run++; + if (run == count) + break; + } + if (run != count) + goto out; + + for (i = pe; i < pe + count; i++) { + set_bit(i, phb->ioda.pe_alloc); + pnv_ioda_init_pe(phb, i); + } + ret = &phb->ioda.pe_array[pe]; + +out: + mutex_unlock(&phb->ioda.pe_alloc_mutex); + return ret; } -static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; unsigned int pe_num = pe->pe_number; @@ -192,7 +195,10 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); + + mutex_lock(&phb->ioda.pe_alloc_mutex); clear_bit(pe_num, phb->ioda.pe_alloc); + mutex_unlock(&phb->ioda.pe_alloc_mutex); } /* The default M64 BAR is shared by all PEs */ @@ -252,8 +258,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, unsigned long *pe_bitmap) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct resource *r; resource_size_t base, sgsz, start, end; int segno, i; @@ -311,6 +316,28 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) } } + for (index = 0; index < phb->ioda.total_pe_num; index++) { + int64_t rc; + + /* + * P7IOC supports M64DT, which helps mapping M64 segment + * to one particular PE#. However, PHB3 has fixed mapping + * between M64 segment and PE#. In order to have same logic + * for P7IOC and PHB3, we enforce fixed mapping between M64 + * segment and PE# on P7IOC. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + index, OPAL_M64_WINDOW_TYPE, + index / PNV_IODA1_M64_SEGS, + index % PNV_IODA1_M64_SEGS); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + index); + goto fail; + } + } + /* * Exclude the segments for reserved and root bus PE, which * are first or last two PEs. @@ -351,8 +378,7 @@ static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *master_pe, *pe; unsigned long size, *pe_alloc; int i; @@ -403,26 +429,6 @@ static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) pe->master = master_pe; list_add_tail(&pe->list, &master_pe->slaves); } - - /* - * P7IOC supports M64DT, which helps mapping M64 segment - * to one particular PE#. However, PHB3 has fixed mapping - * between M64 segment and PE#. In order to have same logic - * for P7IOC and PHB3, we enforce fixed mapping between M64 - * segment and PE# on P7IOC. - */ - if (phb->type == PNV_PHB_IODA1) { - int64_t rc; - - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe->pe_number, OPAL_M64_WINDOW_TYPE, - pe->pe_number / PNV_IODA1_M64_SEGS, - pe->pe_number % PNV_IODA1_M64_SEGS); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n", - __func__, rc, phb->hose->global_number, - pe->pe_number); - } } kfree(pe_alloc); @@ -673,8 +679,7 @@ struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); if (!pdn) @@ -816,7 +821,7 @@ static void pnv_ioda_unset_peltv(struct pnv_phb *phb, pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); } -static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -887,7 +892,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return 0; } -static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; uint8_t bcomp, dcomp, fcomp; @@ -982,95 +987,9 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) return 0; } -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) -{ - struct pci_dn *pdn = pci_get_pdn(dev); - int i; - struct resource *res, res2; - resource_size_t size; - u16 num_vfs; - - if (!dev->is_physfn) - return -EINVAL; - - /* - * "offset" is in VFs. The M64 windows are sized so that when they - * are segmented, each segment is the same size as the IOV BAR. - * Each segment is in a separate PE, and the high order bits of the - * address are the PE number. Therefore, each VF's BAR is in a - * separate PE, and changing the IOV BAR start address changes the - * range of PEs the VFs are in. - */ - num_vfs = pdn->num_vfs; - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - /* - * The actual IOV BAR range is determined by the start address - * and the actual size for num_vfs VFs BAR. This check is to - * make sure that after shifting, the range will not overlap - * with another device. - */ - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2.flags = res->flags; - res2.start = res->start + (size * offset); - res2.end = res2.start + (size * num_vfs) - 1; - - if (res2.end > res->end) { - dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", - i, &res2, res, num_vfs, offset); - return -EBUSY; - } - } - - /* - * Since M64 BAR shares segments among all possible 256 PEs, - * we have to shift the beginning of PF IOV BAR to make it start from - * the segment which belongs to the PE number assigned to the first VF. - * This creates a "hole" in the /proc/iomem which could be used for - * allocating other resources so we reserve this area below and - * release when IOV is released. - */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &dev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); - res2 = *res; - res->start += size * offset; - - dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", - i, &res2, res, (offset > 0) ? "En" : "Dis", - num_vfs, offset); - - if (offset < 0) { - devm_release_resource(&dev->dev, &pdn->holes[i]); - memset(&pdn->holes[i], 0, sizeof(pdn->holes[i])); - } - - pci_update_resource(dev, i + PCI_IOV_RESOURCES); - - if (offset > 0) { - pdn->holes[i].start = res2.start; - pdn->holes[i].end = res2.start + size * offset - 1; - pdn->holes[i].flags = IORESOURCE_BUS; - pdn->holes[i].name = "pnv_iov_reserved"; - devm_request_resource(&dev->dev, res->parent, - &pdn->holes[i]); - } - } - return 0; -} -#endif /* CONFIG_PCI_IOV */ - static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn = pci_get_pdn(dev); struct pnv_ioda_pe *pe; @@ -1082,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) if (pdn->pe_number != IODA_INVALID_PE) return NULL; - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available, disabling device\n", pci_name(dev)); @@ -1129,8 +1048,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) */ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe = NULL; unsigned int pe_num; @@ -1154,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* The PE number isn't pinned by M64 */ if (!pe) - pe = pnv_ioda_alloc_pe(phb); + pe = pnv_ioda_alloc_pe(phb, 1); if (!pe) { pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", @@ -1196,8 +1114,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) struct pnv_ioda_pe *pe; struct pci_dev *gpu_pdev; struct pci_dn *npu_pdn; - struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); /* * Intentionally leak a reference on the npu device (for @@ -1297,446 +1214,12 @@ static void pnv_pci_ioda_setup_nvlink(void) #endif } -#ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - int i, j; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < m64_bars; j++) { - if (pdn->m64_map[j][i] == IODA_INVALID_M64) - continue; - opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); - clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); - pdn->m64_map[j][i] = IODA_INVALID_M64; - } - - kfree(pdn->m64_map); - return 0; -} - -static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pci_dn *pdn; - unsigned int win; - struct resource *res; - int i, j; - int64_t rc; - int total_vfs; - resource_size_t size, start; - int pe_num; - int m64_bars; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - total_vfs = pci_sriov_get_totalvfs(pdev); - - if (pdn->m64_single_mode) - m64_bars = num_vfs; - else - m64_bars = 1; - - pdn->m64_map = kmalloc_array(m64_bars, - sizeof(*pdn->m64_map), - GFP_KERNEL); - if (!pdn->m64_map) - return -ENOMEM; - /* Initialize the m64_map to IODA_INVALID_M64 */ - for (i = 0; i < m64_bars ; i++) - for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) - pdn->m64_map[i][j] = IODA_INVALID_M64; - - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || !res->parent) - continue; - - for (j = 0; j < m64_bars; j++) { - do { - win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, - phb->ioda.m64_bar_idx + 1, 0); - - if (win >= phb->ioda.m64_bar_idx + 1) - goto m64_failed; - } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - - pdn->m64_map[j][i] = win; - - if (pdn->m64_single_mode) { - size = pci_iov_resource_size(pdev, - PCI_IOV_RESOURCES + i); - start = res->start + size * j; - } else { - size = resource_size(res); - start = res->start; - } - - /* Map the M64 here */ - if (pdn->m64_single_mode) { - pe_num = pdn->pe_num_map[j]; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], 0); - } - - rc = opal_pci_set_phb_mem_window(phb->opal_id, - OPAL_M64_WINDOW_TYPE, - pdn->m64_map[j][i], - start, - 0, /* unused */ - size); - - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", - win, rc); - goto m64_failed; - } - - if (pdn->m64_single_mode) - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); - else - rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); - - if (rc != OPAL_SUCCESS) { - dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", - win, rc); - goto m64_failed; - } - } - } - return 0; - -m64_failed: - pnv_pci_vf_release_m64(pdev, num_vfs); - return -EBUSY; -} - -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, - int num); - -static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) -{ - struct iommu_table *tbl; - int64_t rc; - - tbl = pe->table_group.tables[0]; - rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); - if (rc) - pe_warn(pe, "OPAL error %lld release DMA window\n", rc); - - pnv_pci_ioda2_set_bypass(pe, false); - if (pe->table_group.group) { - iommu_group_put(pe->table_group.group); - BUG_ON(pe->table_group.group); - } - iommu_tce_table_put(tbl); -} - -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe, *pe_n; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { - if (pe->parent_dev != pdev) - continue; - - pnv_pci_ioda2_release_dma_pe(pdev, pe); - - /* Remove from list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_del(&pe->list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - pnv_ioda_deconfigure_pe(phb, pe); - - pnv_ioda_free_pe(pe); - } -} - -void pnv_pci_sriov_disable(struct pci_dev *pdev) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - u16 num_vfs, i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - num_vfs = pdn->num_vfs; - - /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->m64_single_mode) - pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); - - /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev, num_vfs); - - /* Release PE numbers */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - } -} - -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); -static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - int pe_num; - u16 vf_index; - struct pci_dn *pdn; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (!pdev->is_physfn) - return; - - /* Reserve PE for each VF */ - for (vf_index = 0; vf_index < num_vfs; vf_index++) { - int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); - int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); - struct pci_dn *vf_pdn; - - if (pdn->m64_single_mode) - pe_num = pdn->pe_num_map[vf_index]; - else - pe_num = *pdn->pe_num_map + vf_index; - - pe = &phb->ioda.pe_array[pe_num]; - pe->pe_number = pe_num; - pe->phb = phb; - pe->flags = PNV_IODA_PE_VF; - pe->pbus = NULL; - pe->parent_dev = pdev; - pe->mve_number = -1; - pe->rid = (vf_bus << 8) | vf_devfn; - - pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", - hose->global_number, pdev->bus->number, - PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); - - if (pnv_ioda_configure_pe(phb, pe)) { - /* XXX What do we do here ? */ - pnv_ioda_free_pe(pe); - pe->pdev = NULL; - continue; - } - - /* Put PE to the list */ - mutex_lock(&phb->ioda.pe_list_mutex); - list_add_tail(&pe->list, &phb->ioda.pe_list); - mutex_unlock(&phb->ioda.pe_list_mutex); - - /* associate this pe to it's pdn */ - list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { - if (vf_pdn->busno == vf_bus && - vf_pdn->devfn == vf_devfn) { - vf_pdn->pe_number = pe_num; - break; - } - } - - pnv_pci_ioda2_setup_dma_pe(phb, pe); - } -} - -int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - int ret; - u16 i; - - bus = pdev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); - - if (phb->type == PNV_PHB_IODA2) { - if (!pdn->vfs_expanded) { - dev_info(&pdev->dev, "don't support this SRIOV device" - " with non 64bit-prefetchable IOV BAR\n"); - return -ENOSPC; - } - - /* - * When M64 BARs functions in Single PE mode, the number of VFs - * could be enabled must be less than the number of M64 BARs. - */ - if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { - dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); - return -EBUSY; - } - - /* Allocating pe_num_map */ - if (pdn->m64_single_mode) - pdn->pe_num_map = kmalloc_array(num_vfs, - sizeof(*pdn->pe_num_map), - GFP_KERNEL); - else - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); - - if (!pdn->pe_num_map) - return -ENOMEM; - - if (pdn->m64_single_mode) - for (i = 0; i < num_vfs; i++) - pdn->pe_num_map[i] = IODA_INVALID_PE; - - /* Calculate available PE for required VFs */ - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - pe = pnv_ioda_alloc_pe(phb); - if (!pe) { - ret = -EBUSY; - goto m64_failed; - } - - pdn->pe_num_map[i] = pe->pe_number; - } - } else { - mutex_lock(&phb->ioda.pe_alloc_mutex); - *pdn->pe_num_map = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe_num, - 0, num_vfs, 0); - if (*pdn->pe_num_map >= phb->ioda.total_pe_num) { - mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - kfree(pdn->pe_num_map); - return -EBUSY; - } - bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - mutex_unlock(&phb->ioda.pe_alloc_mutex); - } - pdn->num_vfs = num_vfs; - - /* Assign M64 window accordingly */ - ret = pnv_pci_vf_assign_m64(pdev, num_vfs); - if (ret) { - dev_info(&pdev->dev, "Not enough M64 window resources\n"); - goto m64_failed; - } - - /* - * When using one M64 BAR to map one IOV BAR, we need to shift - * the IOV BAR according to the PE# allocated to the VFs. - * Otherwise, the PE# for the VF will conflict with others. - */ - if (!pdn->m64_single_mode) { - ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); - if (ret) - goto m64_failed; - } - } - - /* Setup VF PEs */ - pnv_ioda_setup_vf_PE(pdev, num_vfs); - - return 0; - -m64_failed: - if (pdn->m64_single_mode) { - for (i = 0; i < num_vfs; i++) { - if (pdn->pe_num_map[i] == IODA_INVALID_PE) - continue; - - pe = &phb->ioda.pe_array[pdn->pe_num_map[i]]; - pnv_ioda_free_pe(pe); - } - } else - bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); - - /* Releasing pe_num_map */ - kfree(pdn->pe_num_map); - - return ret; -} - -int pnv_pcibios_sriov_disable(struct pci_dev *pdev) -{ - pnv_pci_sriov_disable(pdev); - - /* Release PCI data */ - remove_sriov_vf_pdns(pdev); - return 0; -} - -int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) -{ - /* Allocate PCI data */ - add_sriov_vf_pdns(pdev); - - return pnv_pci_sriov_enable(pdev, num_vfs); -} -#endif /* CONFIG_PCI_IOV */ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -1762,6 +1245,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); } + /* + * We assume that bridges *probably* don't need to do any DMA so we can + * skip allocating a TCE table, etc unless we get a non-bridge device. + */ + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_setup_dma_pe(phb, pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%x (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } + } + if (pdn) pdn->pe_number = pe->pe_number; pe->device_count++; @@ -1847,8 +1348,7 @@ static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe) static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, u64 dma_mask) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; @@ -1885,19 +1385,6 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, return false; } -static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); - dev->dev.archdata.dma_offset = pe->tce_bypass_base; - - if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); - } -} - static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, bool real_mode) { @@ -2285,6 +1772,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node, 0, 0); + pe->dma_setup_done = true; return; fail: /* XXX Failure: Try to fallback to 64-bit only ? */ @@ -2474,7 +1962,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) return 0; } -#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num) { @@ -2498,7 +1985,6 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, return ret; } -#endif #ifdef CONFIG_IOMMU_API unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, @@ -2547,6 +2033,19 @@ static long pnv_pci_ioda2_create_table_userspace( return ret; } +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + dev->dev.archdata.dma_offset = pe->tce_bypass_base; + + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -2583,14 +2082,11 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { }; #endif -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) { int64_t rc; - if (!pnv_pci_ioda_pe_dma_weight(pe)) - return; - /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2615,6 +2111,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); #endif + pe->dma_setup_done = true; } int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) @@ -2763,118 +2260,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) count, phb->msi_base); } -#ifdef CONFIG_PCI_IOV -static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - const resource_size_t gate = phb->ioda.m64_segsize >> 2; - struct resource *res; - int i; - resource_size_t size, total_vf_bar_sz; - struct pci_dn *pdn; - int mul, total_vfs; - - pdn = pci_get_pdn(pdev); - pdn->vfs_expanded = 0; - pdn->m64_single_mode = false; - - total_vfs = pci_sriov_get_totalvfs(pdev); - mul = phb->ioda.total_pe_num; - total_vf_bar_sz = 0; - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - if (!pnv_pci_is_m64_flags(res->flags)) { - dev_warn(&pdev->dev, "Don't support SR-IOV with" - " non M64 VF BAR%d: %pR. \n", - i, res); - goto truncate_iov; - } - - total_vf_bar_sz += pci_iov_resource_size(pdev, - i + PCI_IOV_RESOURCES); - - /* - * If bigger than quarter of M64 segment size, just round up - * power of two. - * - * Generally, one M64 BAR maps one IOV BAR. To avoid conflict - * with other devices, IOV BAR size is expanded to be - * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 - * segment size , the expanded size would equal to half of the - * whole M64 space size, which will exhaust the M64 Space and - * limit the system flexibility. This is a design decision to - * set the boundary to quarter of the M64 segment size. - */ - if (total_vf_bar_sz > gate) { - mul = roundup_pow_of_two(total_vfs); - dev_info(&pdev->dev, - "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", - total_vf_bar_sz, gate, mul); - pdn->m64_single_mode = true; - break; - } - } - - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - if (!res->flags || res->parent) - continue; - - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); - /* - * On PHB3, the minimum size alignment of M64 BAR in single - * mode is 32MB. - */ - if (pdn->m64_single_mode && (size < SZ_32M)) - goto truncate_iov; - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); - res->end = res->start + size * mul - 1; - dev_dbg(&pdev->dev, " %pR\n", res); - dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", - i, res, mul); - } - pdn->vfs_expanded = mul; - - return; - -truncate_iov: - /* To save MMIO space, IOV BAR is truncated. */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - res = &pdev->resource[i + PCI_IOV_RESOURCES]; - res->flags = 0; - res->end = res->start - 1; - } -} - -static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) -{ - if (WARN_ON(pci_dev_is_added(pdev))) - return; - - if (pdev->is_virtfn) { - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); - - /* - * VF PEs are single-device PEs so their pdev pointer needs to - * be set. The pdev doesn't exist when the PE is allocated (in - * (pcibios_sriov_enable()) so we fix it up here. - */ - pe->pdev = pdev; - WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); - } else if (pdev->is_physfn) { - /* - * For PFs adjust their allocated IOV resources to match what - * the PHB can support using it's M64 BAR table. - */ - pnv_pci_ioda_fixup_iov_resources(pdev); - } -} -#endif /* CONFIG_PCI_IOV */ - static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, struct resource *res) { @@ -3101,10 +2486,9 @@ static void pnv_pci_ioda_fixup(void) static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, unsigned long type) { - struct pci_dev *bridge; - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); int num_pci_bridges = 0; + struct pci_dev *bridge; bridge = bus->self; while (bridge) { @@ -3190,8 +2574,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, static void pnv_pci_configure_bus(struct pci_bus *bus) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; struct pci_dev *bridge = bus->self; struct pnv_ioda_pe *pe; bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); @@ -3215,17 +2597,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus) return; pnv_ioda_setup_pe_seg(pe); - switch (phb->type) { - case PNV_PHB_IODA1: - pnv_pci_ioda1_setup_dma_pe(phb, pe); - break; - case PNV_PHB_IODA2: - pnv_pci_ioda2_setup_dma_pe(phb, pe); - break; - default: - pr_warn("%s: No DMA for PHB#%x (type %d)\n", - __func__, phb->hose->global_number, phb->type); - } } static resource_size_t pnv_pci_default_alignment(void) @@ -3233,49 +2604,12 @@ static resource_size_t pnv_pci_default_alignment(void) return PAGE_SIZE; } -#ifdef CONFIG_PCI_IOV -static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, - int resno) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align; - - /* - * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the - * SR-IOV. While from hardware perspective, the range mapped by M64 - * BAR should be size aligned. - * - * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra - * powernv-specific hardware restriction is gone. But if just use the - * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with - * in one segment of M64 #15, which introduces the PE conflict between - * PF and VF. Based on this, the minimum alignment of an IOV BAR is - * m64_segsize. - * - * This function returns the total IOV BAR size if M64 BAR is in - * Shared PE mode or just VF BAR size if not. - * If the M64 BAR is in Single PE mode, return the VF BAR size or - * M64 segment size if IOV BAR size is less. - */ - align = pci_iov_resource_size(pdev, resno); - if (!pdn->vfs_expanded) - return align; - if (pdn->m64_single_mode) - return max(align, (resource_size_t)phb->ioda.m64_segsize); - - return pdn->vfs_expanded * align; -} -#endif /* CONFIG_PCI_IOV */ - /* Prevent enabling devices for which we couldn't properly * assign a PE */ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); struct pci_dn *pdn; /* The function is probably called while the PEs have @@ -3346,11 +2680,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) { - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); struct iommu_table *tbl = pe->table_group.tables[0]; int64_t rc; - if (!weight) + if (!pe->dma_setup_done) return; rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); @@ -3367,22 +2700,17 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) iommu_tce_table_put(tbl); } -static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) { struct iommu_table *tbl = pe->table_group.tables[0]; - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); -#ifdef CONFIG_IOMMU_API int64_t rc; -#endif - if (!weight) + if (pe->dma_setup_done) return; -#ifdef CONFIG_IOMMU_API rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %lld release DMA window\n", rc); -#endif pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { @@ -3405,14 +2733,8 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, if (map[idx] != pe->pe_number) continue; - if (win == OPAL_M64_WINDOW_TYPE) - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, - idx / PNV_IODA1_M64_SEGS, - idx % PNV_IODA1_M64_SEGS); - else - rc = opal_pci_map_pe_mmio_window(phb->opal_id, - phb->ioda.reserved_pe_idx, win, 0, idx); + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); if (rc != OPAL_SUCCESS) pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", @@ -3431,8 +2753,7 @@ static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) phb->ioda.io_segmap); pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); - pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, - phb->ioda.m64_segmap); + /* M64 is pre-configured by pnv_ioda1_init_m64() */ } else if (phb->type == PNV_PHB_IODA2) { pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, phb->ioda.m32_segmap); @@ -3488,17 +2809,27 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) static void pnv_pci_release_device(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; + /* The VF PE state is torn down when sriov_disable() is called */ if (pdev->is_virtfn) return; if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; +#ifdef CONFIG_PCI_IOV + /* + * FIXME: Try move this to sriov_disable(). It's here since we allocate + * the iov state at probe time since we need to fiddle with the IOV + * resources. + */ + if (pdev->is_physfn) + kfree(pdev->dev.archdata.iov_data); +#endif + /* * PCI hotplug can happen as part of EEH error recovery. The @pdn * isn't removed and added afterwards in this scenario. We should @@ -3534,8 +2865,7 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose) static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) { - struct pci_controller *hose = bus->sysdata; - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); struct pnv_ioda_pe *pe; list_for_each_entry(pe, &phb->ioda.pe_list, list) { @@ -3760,7 +3090,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); } else { /* otherwise just allocate one */ - root_pe = pnv_ioda_alloc_pe(phb); + root_pe = pnv_ioda_alloc_pe(phb, 1); phb->ioda.root_pe_idx = root_pe->pe_number; } @@ -3873,8 +3203,7 @@ void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); if (!machine_is(powernv)) return; diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c new file mode 100644 index 000000000000..c4434f20f42f --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -0,0 +1,766 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include + +#include "pci.h" + +/* for pci_dev_is_added() */ +#include "../../../../drivers/pci/pci.h" + +/* + * The majority of the complexity in supporting SR-IOV on PowerNV comes from + * the need to put the MMIO space for each VF into a separate PE. Internally + * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table". + * The MBT historically only applied to the 64bit MMIO window of the PHB + * so it's common to see it referred to as the "M64BT". + * + * An MBT entry stores the mapped range as an , pair. This forces + * the address range that we want to map to be power-of-two sized and aligned. + * For conventional PCI devices this isn't really an issue since PCI device BARs + * have the same requirement. + * + * For a SR-IOV BAR things are a little more awkward since size and alignment + * are not coupled. The alignment is set based on the the per-VF BAR size, but + * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs + * isn't necessarily a power of two, so neither is the total size. To fix that + * we need to finesse (read: hack) the Linux BAR allocator so that it will + * allocate the SR-IOV BARs in a way that lets us map them using the MBT. + * + * The changes to size and alignment that we need to do depend on the "mode" + * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above, + * so as a baseline we can assume that we have the following BAR modes + * available: + * + * NB: $PE_COUNT is the number of PEs that the PHB supports. + * + * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized + * segments. The n'th segment is mapped to the n'th PE. + * b) An un-segmented BAR that maps the whole address range to a specific PE. + * + * + * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR + * For comparison b) requires one entry per-VF per-BAR, or: + * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment + * to equal the size of the per-VF BAR area. So: + * + * new_size = per-vf-size * number-of-PEs + * + * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size + * to "new_size", calculated above. Implementing this is a convoluted process + * which requires several hooks in the PCI core: + * + * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov(). + * + * At this point the device has been probed and the device's BARs are sized, + * but no resource allocations have been done. The SR-IOV BARs are sized + * based on the maximum number of VFs supported by the device and we need + * to increase that to new_size. + * + * 2. Later, when Linux actually assigns resources it tries to make the resource + * allocations for each PCI bus as compact as possible. As a part of that it + * sorts the BARs on a bus by their required alignment, which is calculated + * using pci_resource_alignment(). + * + * For IOV resources this goes: + * pci_resource_alignment() + * pci_sriov_resource_alignment() + * pcibios_sriov_resource_alignment() + * pnv_pci_iov_resource_alignment() + * + * Our hook overrides the default alignment, equal to the per-vf-size, with + * new_size computed above. + * + * 3. When userspace enables VFs for a device: + * + * sriov_enable() + * pcibios_sriov_enable() + * pnv_pcibios_sriov_enable() + * + * This is where we actually allocate PE numbers for each VF and setup the + * MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena" + * where each MBT segment is equal in size to the VF BAR so we can shift + * around the actual SR-IOV BAR location within this arena. We need this + * ability because the PE space is shared by all devices on the same PHB. + * When using mode a) described above segment 0 in maps to PE#0 which might + * be already being used by another device on the PHB. + * + * As a result we need allocate a contigious range of PE numbers, then shift + * the address programmed into the SR-IOV BAR of the PF so that the address + * of VF0 matches up with the segment corresponding to the first allocated + * PE number. This is handled in pnv_pci_vf_resource_shift(). + * + * Once all that is done we return to the PCI core which then enables VFs, + * scans them and creates pci_devs for each. The init process for a VF is + * largely the same as a normal device, but the VF is inserted into the IODA + * PE that we allocated for it rather than the PE associated with the bus. + * + * 4. When userspace disables VFs we unwind the above in + * pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since + * we don't need to validate anything, just tear down the mappings and + * move SR-IOV resource back to its "proper" location. + * + * That's how mode a) works. In theory mode b) (single PE mapping) is less work + * since we can map each individual VF with a separate BAR. However, there's a + * few limitations: + * + * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes + * it only usable for devices with very large per-VF BARs. Such devices are + * similar to Big Foot. They definitely exist, but I've never seen one. + * + * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only + * 16 total and some are needed for. Most SR-IOV capable network cards can support + * more than 16 VFs on each port. + * + * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO + * window of the PHB. + * + * + * + * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It + * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows + * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since + * the Linux BAR allocation will place any BAR marked as non-prefetchable into + * the non-prefetchable bridge window, which is 32bit only. It also added two + * new modes: + * + * c) A segmented BAR similar to a), but each segment can be individually + * mapped to any PE. This is matches how the 32bit MMIO window worked on + * IODA1&2. + * + * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a), + * but with fewer segments and configurable base PE. + * + * i.e. The n'th segment maps to the (n + base)'th PE. + * + * The base PE is also required to be a multiple of the window size. + * + * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us + * to exploit any of the IODA3 features. + */ + +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct resource *res; + int i; + resource_size_t vf_bar_sz; + struct pnv_iov_data *iov; + int mul; + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + goto disable_iov; + pdev->dev.archdata.iov_data = iov; + mul = phb->ioda.total_pe_num; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || res->parent) + continue; + if (!pnv_pci_is_m64_flags(res->flags)) { + dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n", + i, res); + goto disable_iov; + } + + vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + + /* + * Generally, one segmented M64 BAR maps one IOV BAR. However, + * if a VF BAR is too large we end up wasting a lot of space. + * If each VF needs more than 1/4 of the default m64 segment + * then each VF BAR should be mapped in single-PE mode to reduce + * the amount of space required. This does however limit the + * number of VFs we can support. + * + * The 1/4 limit is arbitrary and can be tweaked. + */ + if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) { + /* + * On PHB3, the minimum size alignment of M64 BAR in + * single mode is 32MB. If this VF BAR is smaller than + * 32MB, but still too large for a segmented window + * then we can't map it and need to disable SR-IOV for + * this device. + */ + if (vf_bar_sz < SZ_32M) { + pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n", + i, res); + goto disable_iov; + } + + iov->m64_single_mode[i] = true; + continue; + } + + /* + * This BAR can be mapped with one segmented window, so adjust + * te resource size to accommodate. + */ + pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res); + res->end = res->start + vf_bar_sz * mul - 1; + pci_dbg(pdev, " %pR\n", res); + + pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", + i, res, mul); + + iov->need_shift = true; + } + + return; + +disable_iov: + /* Save ourselves some MMIO space by disabling the unusable BARs */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } + + pdev->dev.archdata.iov_data = NULL; + kfree(iov); +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} + +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + resource_size_t align = pci_iov_resource_size(pdev, resno); + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pnv_iov_data *iov = pnv_iov_get(pdev); + + /* + * iov can be null if we have an SR-IOV device with IOV BAR that can't + * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch). + * In that case we don't allow VFs to be enabled since one of their + * BARs would not be placed in the correct PE. + */ + if (!iov) + return align; + + /* + * If we're using single mode then we can just use the native VF BAR + * alignment. We validated that it's possible to use a single PE + * window above when we did the fixup. + */ + if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES]) + return align; + + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ + return phb->ioda.total_pe_num * align; +} + +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int window_id; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) { + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + + clear_bit(window_id, &phb->ioda.m64_bar_alloc); + } + + return 0; +} + + +/* + * PHB3 and beyond support segmented windows. The window's address range + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1 + * mapping between PEs and segments. + */ +static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* unused */ + size); + if (rc) + goto out; + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_SPLIT); +out: + if (rc) + pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc); + + return rc; +} + +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb, + int pe_num, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + /* + * The API for setting up m64 mmio windows seems to have been designed + * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed + * split of 8 equally sized segments each of which could individually + * assigned to a PE. + * + * The problem with this is that the API doesn't have any way to + * communicate the number of segments we want on a BAR. This wasn't + * a problem for p7-ioc since you didn't have a choice, but the + * single PE windows added in PHB3 don't map cleanly to this API. + * + * As a result we've got this slightly awkward process where we + * call opal_pci_map_pe_mmio_window() to put the single in single + * PE mode, and set the PE for the window before setting the address + * bounds. We need to do it this way because the single PE windows + * for PHB3 have different alignment requirements on PHB3. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe_num, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + if (rc) + goto out; + + /* + * NB: In single PE mode the window needs to be aligned to 32MB + */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* ignored by FW, m64 is 1-1 */ + size); + if (rc) + goto out; + + /* + * Now actually enable it. We specified the BAR should be in "non-split" + * mode so FW will validate that the BAR is in single PE mode. + */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_NON_SPLIT); +out: + if (rc) + pr_err("Error mapping single PE BAR\n"); + + return rc; +} + +static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov) +{ + int win; + + do { + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, + phb->ioda.m64_bar_idx + 1, 0); + + if (win >= phb->ioda.m64_bar_idx + 1) + return -1; + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); + + set_bit(win, iov->used_m64_bar_mask); + + return win; +} + +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + unsigned int win; + struct resource *res; + int i, j; + int64_t rc; + resource_size_t size, start; + int base_pe_num; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + /* don't need single mode? map everything in one go! */ + if (!iov->m64_single_mode[i]) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + size = resource_size(res); + start = res->start; + + rc = pnv_ioda_map_m64_segmented(phb, win, start, size); + if (rc) + goto m64_failed; + + continue; + } + + /* otherwise map each VF with single PE BARs */ + size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); + base_pe_num = iov->vf_pe_arr[0].pe_number; + + for (j = 0; j < num_vfs; j++) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + start = res->start + size * j; + rc = pnv_ioda_map_m64_single(phb, win, + base_pe_num + j, + start, + size); + if (rc) + goto m64_failed; + } + } + return 0; + +m64_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + return -EBUSY; +} + +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *pe_n; + + phb = pci_bus_to_pnvhb(pdev->bus); + + if (!pdev->is_physfn) + return; + + /* FIXME: Use pnv_ioda_release_pe()? */ + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { + if (pe->parent_dev != pdev) + continue; + + pnv_pci_ioda2_release_pe_dma(pe); + + /* Remove from list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_ioda_deconfigure_pe(phb, pe); + + pnv_ioda_free_pe(pe); + } +} + +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) +{ + struct resource *res, res2; + struct pnv_iov_data *iov; + resource_size_t size; + u16 num_vfs; + int i; + + if (!dev->is_physfn) + return -EINVAL; + iov = pnv_iov_get(dev); + + /* + * "offset" is in VFs. The M64 windows are sized so that when they + * are segmented, each segment is the same size as the IOV BAR. + * Each segment is in a separate PE, and the high order bits of the + * address are the PE number. Therefore, each VF's BAR is in a + * separate PE, and changing the IOV BAR start address changes the + * range of PEs the VFs are in. + */ + num_vfs = iov->num_vfs; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + /* + * The actual IOV BAR range is determined by the start address + * and the actual size for num_vfs VFs BAR. This check is to + * make sure that after shifting, the range will not overlap + * with another device. + */ + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2.flags = res->flags; + res2.start = res->start + (size * offset); + res2.end = res2.start + (size * num_vfs) - 1; + + if (res2.end > res->end) { + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + return -EBUSY; + } + } + + /* + * Since M64 BAR shares segments among all possible 256 PEs, + * we have to shift the beginning of PF IOV BAR to make it start from + * the segment which belongs to the PE number assigned to the first VF. + * This creates a "hole" in the /proc/iomem which could be used for + * allocating other resources so we reserve this area below and + * release when IOV is released. + */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2 = *res; + res->start += size * offset; + + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); + + if (offset < 0) { + devm_release_resource(&dev->dev, &iov->holes[i]); + memset(&iov->holes[i], 0, sizeof(iov->holes[i])); + } + + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + + if (offset > 0) { + iov->holes[i].start = res2.start; + iov->holes[i].end = res2.start + size * offset - 1; + iov->holes[i].flags = IORESOURCE_BUS; + iov->holes[i].name = "pnv_iov_reserved"; + devm_request_resource(&dev->dev, res->parent, + &iov->holes[i]); + } + } + return 0; +} + +static void pnv_pci_sriov_disable(struct pci_dev *pdev) +{ + u16 num_vfs, base_pe; + struct pnv_iov_data *iov; + + iov = pnv_iov_get(pdev); + num_vfs = iov->num_vfs; + base_pe = iov->vf_pe_arr[0].pe_number; + + if (WARN_ON(!iov)) + return; + + /* Release VF PEs */ + pnv_ioda_release_vf_PE(pdev); + + /* Un-shift the IOV BARs if we need to */ + if (iov->need_shift) + pnv_pci_vf_resource_shift(pdev, -base_pe); + + /* Release M64 windows */ + pnv_pci_vf_release_m64(pdev, num_vfs); +} + +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + int pe_num; + u16 vf_index; + struct pnv_iov_data *iov; + struct pci_dn *pdn; + + if (!pdev->is_physfn) + return; + + phb = pci_bus_to_pnvhb(pdev->bus); + pdn = pci_get_pdn(pdev); + iov = pnv_iov_get(pdev); + + /* Reserve PE for each VF */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + + pe = &iov->vf_pe_arr[vf_index]; + pe->phb = phb; + pe->flags = PNV_IODA_PE_VF; + pe->pbus = NULL; + pe->parent_dev = pdev; + pe->mve_number = -1; + pe->rid = (vf_bus << 8) | vf_devfn; + + pe_num = pe->pe_number; + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pe->pdev = NULL; + continue; + } + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + + pnv_pci_ioda2_setup_dma_pe(phb, pe); + } +} + +static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_ioda_pe *base_pe; + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int ret; + u16 i; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + /* + * There's a calls to IODA2 PE setup code littered throughout. We could + * probably fix that, but we'd still have problems due to the + * restriction inherent on IODA1 PHBs. + * + * NB: We class IODA3 as IODA2 since they're very similar. + */ + if (phb->type != PNV_PHB_IODA2) { + pci_err(pdev, "SR-IOV is not supported on this PHB\n"); + return -ENXIO; + } + + if (!iov) { + dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* allocate a contigious block of PEs for our VFs */ + base_pe = pnv_ioda_alloc_pe(phb, num_vfs); + if (!base_pe) { + pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs); + return -EBUSY; + } + + iov->vf_pe_arr = base_pe; + iov->num_vfs = num_vfs; + + /* Assign M64 window accordingly */ + ret = pnv_pci_vf_assign_m64(pdev, num_vfs); + if (ret) { + dev_info(&pdev->dev, "Not enough M64 window resources\n"); + goto m64_failed; + } + + /* + * When using one M64 BAR to map one IOV BAR, we need to shift + * the IOV BAR according to the PE# allocated to the VFs. + * Otherwise, the PE# for the VF will conflict with others. + */ + if (iov->need_shift) { + ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number); + if (ret) + goto shift_failed; + } + + /* Setup VF PEs */ + pnv_ioda_setup_vf_PE(pdev, num_vfs); + + return 0; + +shift_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + +m64_failed: + for (i = 0; i < num_vfs; i++) + pnv_ioda_free_pe(&iov->vf_pe_arr[i]); + + return ret; +} + +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) +{ + pnv_pci_sriov_disable(pdev); + + /* Release PCI data */ + remove_sriov_vf_pdns(pdev); + return 0; +} + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_sriov_vf_pdns(pdev); + + return pnv_pci_sriov_enable(pdev, num_vfs); +} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 091fe1cf386b..9b9bca169275 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -162,8 +162,7 @@ EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; struct msi_msg msg; int hwirq; @@ -211,8 +210,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) void pnv_teardown_msi_irqs(struct pci_dev *pdev) { - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); struct msi_desc *entry; irq_hw_number_t hwirq; @@ -824,10 +822,9 @@ EXPORT_SYMBOL(pnv_pci_get_phb_node); int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) { - __be64 val; - struct pci_controller *hose; - struct pnv_phb *phb; + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); u64 tunnel_bar; + __be64 val; int rc; if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) @@ -835,9 +832,6 @@ int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) return -ENXIO; - hose = pci_bus_to_host(dev->bus); - phb = hose->private_data; - mutex_lock(&tunnel_mutex); rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); if (rc != OPAL_SUCCESS) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 51c254f2f3cb..739a0b3b72e1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -87,7 +87,14 @@ struct pnv_ioda_pe { bool tce_bypass_enabled; uint64_t tce_bypass_base; - /* MSIs. MVE index is identical for for 32 and 64 bit MSI + /* + * Used to track whether we've done DMA setup for this PE or not. We + * want to defer allocating TCE tables, etc until we've added a + * non-bridge device to the PE. + */ + bool dma_setup_done; + + /* MSIs. MVE index is identical for 32 and 64 bit MSI * and -1 if not supported. (It's actually identical to the * PE number) */ @@ -147,6 +154,7 @@ struct pnv_phb { unsigned long m64_size; unsigned long m64_segsize; unsigned long m64_base; +#define MAX_M64_BARS 64 unsigned long m64_bar_alloc; /* IO ports */ @@ -187,6 +195,89 @@ struct pnv_phb { u8 *diag_data; }; + +/* IODA PE management */ + +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) +{ + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); + +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe); + +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count); +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe); + +#ifdef CONFIG_PCI_IOV +/* + * For SR-IOV we want to put each VF's MMIO resource in to a separate PE. + * This requires a bit of acrobatics with the MMIO -> PE configuration + * and this structure is used to keep track of it all. + */ +struct pnv_iov_data { + /* number of VFs enabled */ + u16 num_vfs; + + /* pointer to the array of VF PEs. num_vfs long*/ + struct pnv_ioda_pe *vf_pe_arr; + + /* Did we map the VF BAR with single-PE IODA BARs? */ + bool m64_single_mode[PCI_SRIOV_NUM_BARS]; + + /* + * True if we're using any segmented windows. In that case we need + * shift the start of the IOV resource the segment corresponding to + * the allocated PE. + */ + bool need_shift; + + /* + * Bit mask used to track which m64 windows are used to map the + * SR-IOV BARs for this device. + */ + DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS); + + /* + * If we map the SR-IOV BARs with a segmented window then + * parts of that window will be "claimed" by other PEs. + * + * "holes" here is used to reserve the leading portion + * of the window that is used by other (non VF) PEs. + */ + struct resource holes[PCI_SRIOV_NUM_BARS]; +}; + +static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iov_data; +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev); +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno); + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); +int pnv_pcibios_sriov_disable(struct pci_dev *pdev); +#endif /* CONFIG_PCI_IOV */ + extern struct pci_ops pnv_pci_ops; void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, @@ -260,4 +351,14 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); +static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + + if (hose) + return hose->private_data; + + return NULL; +} + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 3bc188da82ba..7fcb88623081 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -399,7 +399,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) { - return 256UL * 1024 * 1024; + /* + * We map the kernel linear region with 1GB large pages on radix. For + * memory hot unplug to work our memory block size must be at least + * this size. + */ + if (radix_enabled()) + return radix_mem_block_size; + else + return 256UL * 1024 * 1024; } #endif diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 24c18362e5ea..5e037df2a3a1 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -25,15 +25,22 @@ config PPC_PSERIES select SWIOTLB default y +config PARAVIRT_SPINLOCKS + bool + config PPC_SPLPAR - depends on PPC_PSERIES bool "Support for shared-processor logical partitions" + depends on PPC_PSERIES + select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS + default y help Enabling this option will make the kernel run more efficiently on logically-partitioned pSeries systems which use shared processors, that is, which share physical processors between two or more partitions. + Say Y if you are unsure. + config DTL bool "Dispatch Trace Log" depends on PPC_SPLPAR && DEBUG_FS diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index ace117f99d94..cb2d9a970b7b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,8 @@ #include #include +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn); + /* RTAS tokens */ static int ibm_set_eeh_option; static int ibm_set_slot_reset; @@ -52,8 +55,6 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) dev_dbg(&pdev->dev, "EEH: Setting up device\n"); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { - struct pci_dn *physfn_pdn; - pdn->device_id = pdev->device; pdn->vendor_id = pdev->vendor; pdn->class_code = pdev->class; @@ -63,23 +64,172 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) * completion from platform. */ pdn->last_allow_rc = 0; - physfn_pdn = pci_get_pdn(pdev->physfn); - pdn->pe_number = physfn_pdn->pe_num_map[pdn->vf_index]; } #endif pseries_eeh_init_edev(pdn); #ifdef CONFIG_PCI_IOV if (pdev->is_virtfn) { + /* + * FIXME: This really should be handled by choosing the right + * parent PE in in pseries_eeh_init_edev(). + */ + struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; struct eeh_dev *edev = pdn_to_eeh_dev(pdn); edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); - eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */ - eeh_add_to_parent_pe(edev); /* Add as VF PE type */ + eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */ + eeh_pe_tree_insert(edev, physfn_pe); /* Add as VF PE type */ } #endif eeh_probe_device(pdev); } + +/** + * pseries_eeh_get_config_addr - Retrieve config address + * + * Retrieve the assocated config address. Actually, there're 2 RTAS + * function calls dedicated for the purpose. We need implement + * it through the new function and then the old one. Besides, + * you should make sure the config address is figured out from + * FDT node before calling the function. + * + * It's notable that zero'ed return value means invalid PE config + * address. + */ +static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr) +{ + int ret = 0; + int rets[3]; + + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* + * First of all, we need to make sure there has one PE + * associated with the device. Otherwise, PE address is + * meaningless. + */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 1); + if (ret || (rets[0] == 0)) + return 0; + + /* Retrieve the associated PE config address */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return 0; + } + + return rets[0]; + } + + return ret; +} + +/** + * pseries_eeh_phb_reset - Reset the specified PHB + * @phb: PCI controller + * @config_adddr: the associated config address + * @option: reset option + * + * Reset the specified PHB/PE + */ +static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option) +{ + int ret; + + /* Reset PE through RTAS call */ + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + + /* If fundamental-reset not supported, try hot-reset */ + if (option == EEH_RESET_FUNDAMENTAL && + ret == -8) { + option = EEH_RESET_HOT; + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + } + + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + + return ret; +} + +/** + * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE + * @phb: PCI controller + * @config_adddr: the associated config address + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr) +{ + int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; + + while (max_wait > 0) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid)); + + if (!ret) + return ret; + if (ret < 0) + break; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); + } + + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", + __func__, phb->global_number, config_addr, ret); + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; +} + /* * Buffer for reporting slot-error-detail rtas calls. Its here * in BSS, and not dynamically alloced, so that it ends up in @@ -96,6 +246,10 @@ static int eeh_error_buf_size; */ static int pseries_eeh_init(void) { + struct pci_controller *phb; + struct pci_dn *pdn; + int addr, config_addr; + /* figure out EEH RTAS function call tokens */ ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); @@ -148,6 +302,22 @@ static int pseries_eeh_init(void) /* Set EEH machine dependent code */ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + list_for_each_entry(phb, &hose_list, list_node) { + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); + addr = (pdn->busno << 16) | (pdn->devfn << 8); + config_addr = pseries_eeh_get_config_addr(phb, addr); + /* invalid PE config addr */ + if (config_addr == 0) + continue; + + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); + pseries_eeh_phb_configure_bridge(phb, config_addr); + } + } + return 0; } @@ -220,6 +390,43 @@ static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) return 0; } +/** + * pseries_eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct eeh_dev *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; + while (pdn) { + /* We're poking out of PCI territory */ + parent = pdn_to_eeh_dev(pdn); + if (!parent) + return NULL; + + if (parent->pe) + return parent->pe; + + pdn = pdn->parent; + } + + return NULL; +} + /** * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn * @@ -275,12 +482,11 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) * correctly reflects that current device is root port * or PCIe switch downstream port. */ - edev->class_code = pdn->class_code; edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; - if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, @@ -304,8 +510,10 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret) { eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); } else { + struct eeh_pe *parent; + /* Retrieve PE address */ - edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn); pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the ibm,set-eeh-option @@ -316,16 +524,19 @@ void pseries_eeh_init_edev(struct pci_dn *pdn) if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; - if (enable) { + /* + * This device doesn't support EEH, but it may have an + * EEH parent. In this case any error on the device will + * freeze the PE of it's upstream bridge, so added it to + * the upstream PE. + */ + parent = pseries_eeh_pe_get_parent(edev); + if (parent && !enable) + edev->pe_config_addr = parent->addr; + + if (enable || parent) { eeh_add_flag(EEH_ENABLED); - eeh_add_to_parent_pe(edev); - } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) && - (pdn_to_eeh_dev(pdn->parent))->pe) { - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. - */ - edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr; - eeh_add_to_parent_pe(edev); + eeh_pe_tree_insert(edev, parent); } eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n", (enable ? "enabled" : "unsupported"), ret); @@ -435,8 +646,10 @@ static int pseries_eeh_set_option(struct eeh_pe *pe, int option) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) +static int pseries_eeh_get_pe_addr(struct pci_dn *pdn) { + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + unsigned long buid = pdn->phb->buid; int ret = 0; int rets[3]; @@ -447,18 +660,16 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 1); + config_addr, BUID_HI(buid), BUID_LO(buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -467,11 +678,10 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - pe->config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), 0); + config_addr, BUID_HI(buid), BUID_LO(buid), 0); if (ret) { pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", - __func__, pe->phb->global_number, pe->config_addr); + __func__, pdn->phb->global_number, config_addr); return 0; } @@ -569,35 +779,13 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) static int pseries_eeh_reset(struct eeh_pe *pe, int option) { int config_addr; - int ret; /* Figure out PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - /* Reset PE through RTAS call */ - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - - /* If fundamental-reset not supported, try hot-reset */ - if (option == EEH_RESET_FUNDAMENTAL && - ret == -8) { - option = EEH_RESET_HOT; - ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid), option); - } - - /* We need reset hold or settlement delay */ - if (option == EEH_RESET_FUNDAMENTAL || - option == EEH_RESET_HOT) - msleep(EEH_PE_RST_HOLD_TIME); - else - msleep(EEH_PE_RST_SETTLE_TIME); - - return ret; + return pseries_eeh_phb_reset(pe->phb, config_addr, option); } /** @@ -641,110 +829,51 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, u * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE * @pe: EEH PE * - * The function will be called to reconfigure the bridges included - * in the specified PE so that the mulfunctional PE would be recovered - * again. */ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { int config_addr; - int ret; - /* Waiting 0.2s maximum before skipping configuration */ - int max_wait = 200; /* Figure out the PE address */ config_addr = pe->config_addr; if (pe->addr) config_addr = pe->addr; - while (max_wait > 0) { - ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(pe->phb->buid), - BUID_LO(pe->phb->buid)); - - if (!ret) - return ret; - if (ret < 0) - break; - - /* - * If RTAS returns a delay value that's above 100ms, cut it - * down to 100ms in case firmware made a mistake. For more - * on how these delay values work see rtas_busy_delay_time - */ - if (ret > RTAS_EXTENDED_DELAY_MIN+2 && - ret <= RTAS_EXTENDED_DELAY_MAX) - ret = RTAS_EXTENDED_DELAY_MIN+2; - - max_wait -= rtas_busy_delay_time(ret); - - if (max_wait < 0) - break; - - rtas_busy_delay(ret); - } - - pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", - __func__, pe->phb->global_number, pe->addr, ret); - /* PAPR defines -3 as "Parameter Error" for this function: */ - if (ret == -3) - return -EINVAL; - else - return -EIO; + return pseries_eeh_phb_configure_bridge(pe->phb, config_addr); } /** * pseries_eeh_read_config - Read PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to read * @val: return value * * Read config space from the speicifed device */ -static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + return rtas_read_config(pdn, where, size, val); } /** * pseries_eeh_write_config - Write PCI config space - * @pdn: PCI device node - * @where: PCI address + * @edev: EEH device handle + * @where: PCI config space offset * @size: size to write * @val: value to be written * * Write config space to the specified device */ -static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val) +static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) { + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + return rtas_write_config(pdn, where, size, val); } -static int pseries_eeh_restore_config(struct pci_dn *pdn) -{ - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - s64 ret = 0; - - if (!edev) - return -EEXIST; - - /* - * FIXME: The MPS, error routing rules, timeout setting are worthy - * to be exported by firmware in extendible way. - */ - if (edev->physfn) - ret = eeh_restore_vf_config(pdn); - - if (ret) { - pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", - __func__, edev->pe_config_addr, ret); - return -EIO; - } - - return ret; -} - #ifdef CONFIG_PCI_IOV int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) @@ -772,8 +901,8 @@ int pseries_send_allow_unfreeze(struct pci_dn *pdn, static int pseries_call_allow_unfreeze(struct eeh_dev *edev) { + int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num; struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; - int cur_vfs = 0, rc = 0, vf_index, bus, devfn; u16 *vf_pe_array; vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); @@ -806,8 +935,10 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) } } else { pdn = pci_get_pdn(edev->pdev); - vf_pe_array[0] = cpu_to_be16(pdn->pe_number); physfn_pdn = pci_get_pdn(edev->physfn); + + vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index]; + vf_pe_array[0] = cpu_to_be16(vf_pe_num); rc = pseries_send_allow_unfreeze(physfn_pdn, vf_pe_array, 1); pdn->last_allow_rc = rc; @@ -818,10 +949,8 @@ static int pseries_call_allow_unfreeze(struct eeh_dev *edev) return rc; } -static int pseries_notify_resume(struct pci_dn *pdn) +static int pseries_notify_resume(struct eeh_dev *edev) { - struct eeh_dev *edev = pdn_to_eeh_dev(pdn); - if (!edev) return -EEXIST; @@ -841,7 +970,6 @@ static struct eeh_ops pseries_eeh_ops = { .init = pseries_eeh_init, .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, - .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, .reset = pseries_eeh_reset, .get_log = pseries_eeh_get_log, @@ -850,7 +978,7 @@ static struct eeh_ops pseries_eeh_ops = { .read_config = pseries_eeh_read_config, .write_config = pseries_eeh_write_config, .next_error = NULL, - .restore_config = pseries_eeh_restore_config, + .restore_config = NULL, /* NB: configure_bridge() does this */ #ifdef CONFIG_PCI_IOV .notify_resume = pseries_notify_resume #endif diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 3e49cc23a97a..4c7b7f5a2ebc 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -65,6 +65,7 @@ hypertas_fw_features_table[] = { {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, {FW_FEATURE_PAPR_SCM, "hcall-scm"}, + {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 3e8cbfe7a80f..c6e0d8abf75e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -35,54 +35,10 @@ #include #include "pseries.h" -#include "offline_states.h" /* This version can't take the spinlock, because it never returns */ static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; -static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = - CPU_STATE_OFFLINE; -static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; - -static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; - -static bool cede_offline_enabled __read_mostly = true; - -/* - * Enable/disable cede_offline when available. - */ -static int __init setup_cede_offline(char *str) -{ - return (kstrtobool(str, &cede_offline_enabled) == 0); -} - -__setup("cede_offline=", setup_cede_offline); - -enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return per_cpu(current_state, cpu); -} - -void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(current_state, cpu) = state; -} - -enum cpu_state_vals get_preferred_offline_state(int cpu) -{ - return per_cpu(preferred_offline_state, cpu); -} - -void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ - per_cpu(preferred_offline_state, cpu) = state; -} - -void set_default_offline_state(int cpu) -{ - per_cpu(preferred_offline_state, cpu) = default_offline_state; -} - static void rtas_stop_self(void) { static struct rtas_args args; @@ -101,9 +57,7 @@ static void rtas_stop_self(void) static void pseries_mach_cpu_die(void) { - unsigned int cpu = smp_processor_id(); unsigned int hwcpu = hard_smp_processor_id(); - u8 cede_latency_hint = 0; local_irq_disable(); idle_task_exit(); @@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void) else xics_teardown_cpu(); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - set_cpu_current_state(cpu, CPU_STATE_INACTIVE); - if (ppc_md.suspend_disable_cpu) - ppc_md.suspend_disable_cpu(); - - cede_latency_hint = 2; - - get_lppaca()->idle = 1; - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 1; - - while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - while (!prep_irq_for_idle()) { - local_irq_enable(); - local_irq_disable(); - } - - extended_cede_processor(cede_latency_hint); - } - - local_irq_disable(); - - if (!lppaca_shared_proc(get_lppaca())) - get_lppaca()->donate_dedicated_cpu = 0; - get_lppaca()->idle = 0; - - if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { - unregister_slb_shadow(hwcpu); - - hard_irq_disable(); - /* - * Call to start_secondary_resume() will not return. - * Kernel stack will be reset and start_secondary() - * will be called to continue the online operation. - */ - start_secondary_resume(); - } - } - - /* Requested state is CPU_STATE_OFFLINE at this point */ - WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE); - - set_cpu_current_state(cpu, CPU_STATE_OFFLINE); unregister_slb_shadow(hwcpu); rtas_stop_self(); @@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu) int cpu_status = 1; unsigned int pcpu = get_hard_smp_processor_id(cpu); - if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 1; - for (tries = 0; tries < 5000; tries++) { - if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { - cpu_status = 0; - break; - } - msleep(1); - } - } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { + for (tries = 0; tries < 25; tries++) { + cpu_status = smp_query_cpu_stopped(pcpu); + if (cpu_status == QCSS_STOPPED || + cpu_status == QCSS_HARDWARE_ERROR) + break; + cpu_relax(); - for (tries = 0; tries < 25; tries++) { - cpu_status = smp_query_cpu_stopped(pcpu); - if (cpu_status == QCSS_STOPPED || - cpu_status == QCSS_HARDWARE_ERROR) - break; - cpu_relax(); - } } if (cpu_status != 0) { @@ -359,28 +259,14 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_hard_smp_processor_id(cpu) != thread) continue; - if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) + if (!cpu_online(cpu)) break; - if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { - set_preferred_offline_state(cpu, - CPU_STATE_OFFLINE); - cpu_maps_update_done(); - timed_topology_update(1); - rc = device_offline(get_cpu_device(cpu)); - if (rc) - goto out; - cpu_maps_update_begin(); - break; - } - - /* - * The cpu is in CPU_STATE_INACTIVE. - * Upgrade it's state to CPU_STATE_OFFLINE. - */ - set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); - WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS); - __cpu_die(cpu); + cpu_maps_update_done(); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); break; } if (cpu == num_possible_cpus()) { @@ -414,10 +300,7 @@ static int dlpar_online_cpu(struct device_node *dn) for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != thread) continue; - BUG_ON(get_cpu_current_state(cpu) - != CPU_STATE_OFFLINE); cpu_maps_update_done(); - timed_topology_update(1); find_and_online_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); if (rc) { @@ -854,7 +737,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) parent = of_find_node_by_path("/cpus"); if (!parent) { pr_warn("Could not find CPU root node in device tree\n"); - kfree(cpu_drcs); return -1; } @@ -896,25 +778,6 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add) return rc; } -int dlpar_cpu_readd(int cpu) -{ - struct device_node *dn; - struct device *dev; - u32 drc_index; - int rc; - - dev = get_cpu_device(cpu); - dn = dev->of_node; - - rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); - - rc = dlpar_cpu_remove_by_index(drc_index); - if (!rc) - rc = dlpar_cpu_add(drc_index); - - return rc; -} - int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { u32 count, drc_index; @@ -1013,27 +876,8 @@ static struct notifier_block pseries_smp_nb = { .notifier_call = pseries_smp_notifier, }; -#define MAX_CEDE_LATENCY_LEVELS 4 -#define CEDE_LATENCY_PARAM_LENGTH 10 -#define CEDE_LATENCY_PARAM_MAX_LENGTH \ - (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) -#define CEDE_LATENCY_TOKEN 45 - -static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; - -static int parse_cede_parameters(void) -{ - memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); - return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - CEDE_LATENCY_TOKEN, - __pa(cede_parameters), - CEDE_LATENCY_PARAM_MAX_LENGTH); -} - static int __init pseries_cpu_hotplug_init(void) { - int cpu; int qcss_tok; #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -1056,16 +900,8 @@ static int __init pseries_cpu_hotplug_init(void) smp_ops->cpu_die = pseries_cpu_die; /* Processors can be added/removed only on LPAR */ - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_smp_nb); - cpu_maps_update_begin(); - if (cede_offline_enabled && parse_cede_parameters() == 0) { - default_offline_state = CPU_STATE_INACTIVE; - for_each_online_cpu(cpu) - set_default_offline_state(cpu); - } - cpu_maps_update_done(); - } return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 5ace2f9a277e..5d545b78111f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -22,12 +22,10 @@ #include #include "pseries.h" -static bool rtas_hp_event; - unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -487,40 +485,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) return rc; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - struct drmem_lmb *lmb; - int lmb_found; - int rc; - - pr_info("Attempting to update LMB, drc index %x\n", drc_index); - - lmb_found = 0; - for_each_drmem_lmb(lmb) { - if (lmb->drc_index == drc_index) { - lmb_found = 1; - rc = dlpar_remove_lmb(lmb); - if (!rc) { - rc = dlpar_add_lmb(lmb); - if (rc) - dlpar_release_drc(lmb->drc_index); - } - break; - } - } - - if (!lmb_found) - rc = -EINVAL; - - if (rc) - pr_info("Failed to update memory at %llx\n", - lmb->base_addr); - else - pr_info("Memory at %llx was updated\n", lmb->base_addr); - - return rc; -} - static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; @@ -617,10 +581,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) { return -EOPNOTSUPP; } -static int dlpar_memory_readd_by_index(u32 drc_index) -{ - return -EOPNOTSUPP; -} static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { @@ -902,10 +862,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - break; - case PSERIES_HP_ELOG_ACTION_READD: - drc_index = hp_elog->_drc_u.drc_index; - rc = dlpar_memory_readd_by_index(drc_index); break; default: pr_err("Invalid action (%d) specified\n", hp_elog->action); @@ -913,11 +869,8 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - if (!rc) { - rtas_hp_event = true; + if (!rc) rc = drmem_update_dt(); - rtas_hp_event = false; - } unlock_device_hotplug(); return rc; @@ -953,60 +906,6 @@ static int pseries_add_mem_node(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_update_drconf_memory(struct of_reconfig_data *pr) -{ - struct of_drconf_cell_v1 *new_drmem, *old_drmem; - unsigned long memblock_size; - u32 entries; - __be32 *p; - int i, rc = -EINVAL; - - if (rtas_hp_event) - return 0; - - memblock_size = pseries_memory_block_size(); - if (!memblock_size) - return -EINVAL; - - if (!pr->old_prop) - return 0; - - p = (__be32 *) pr->old_prop->value; - if (!p) - return -EINVAL; - - /* The first int of the property is the number of lmb's described - * by the property. This is followed by an array of of_drconf_cell - * entries. Get the number of entries and skip to the array of - * of_drconf_cell's. - */ - entries = be32_to_cpu(*p++); - old_drmem = (struct of_drconf_cell_v1 *)p; - - p = (__be32 *)pr->prop->value; - p++; - new_drmem = (struct of_drconf_cell_v1 *)p; - - for (i = 0; i < entries; i++) { - if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) && - (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) { - rc = pseries_remove_memblock( - be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - break; - } else if ((!(be32_to_cpu(old_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) && - (be32_to_cpu(new_drmem[i].flags) & - DRCONF_MEM_ASSIGNED)) { - rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr), - memblock_size); - rc = (rc < 0) ? -EINVAL : 0; - break; - } - } - return rc; -} - static int pseries_memory_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1020,10 +919,6 @@ static int pseries_memory_notifier(struct notifier_block *nb, case OF_RECONFIG_DETACH_NODE: err = pseries_remove_mem_node(rd->dn); break; - case OF_RECONFIG_UPDATE_PROPERTY: - if (!strcmp(rd->prop->name, "ibm,dynamic-memory")) - err = pseries_update_drconf_memory(rd); - break; } return notifier_from_errno(err); } diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 267139b13530..96e18d3b2fcf 100644 --- a/arch/powerpc/platforms/pseries/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -45,7 +45,7 @@ static int hvcs_convert(long to_convert) case H_LONG_BUSY_ORDER_10_SEC: case H_LONG_BUSY_ORDER_100_SEC: return -EBUSY; - case H_FUNCTION: /* fall through */ + case H_FUNCTION: default: return -EPERM; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f71ff2c94efe..baf24eacd268 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1681,9 +1681,11 @@ static int pseries_lpar_register_process_table(unsigned long base, if (table_size) flags |= PROC_TABLE_NEW; - if (radix_enabled()) - flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; - else + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else flags |= PROC_TABLE_HPT_SLB; for (;;) { rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 10d982997736..d6f4162478a5 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -6,6 +6,9 @@ * Copyright (C) 2010 IBM Corporation */ + +#define pr_fmt(fmt) "mobility: " fmt + #include #include #include @@ -64,6 +67,8 @@ static int delete_dt_node(__be32 phandle) if (!dn) return -ENOENT; + pr_debug("removing node %pOFfp\n", dn); + dlpar_detach_node(dn); of_node_put(dn); return 0; @@ -122,6 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, } if (!more) { + pr_debug("updating node %pOF property %s\n", dn, name); of_update_property(dn, new_prop); *prop = NULL; } @@ -240,33 +246,12 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) if (rc) dlpar_free_cc_nodes(dn); + pr_debug("added node %pOFfp\n", dn); + of_node_put(parent_dn); return rc; } -static void prrn_update_node(__be32 phandle) -{ - struct pseries_hp_errorlog hp_elog; - struct device_node *dn; - - /* - * If a node is found from a the given phandle, the phandle does not - * represent the drc index of an LMB and we can ignore. - */ - dn = of_find_node_by_phandle(be32_to_cpu(phandle)); - if (dn) { - of_node_put(dn); - return; - } - - hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM; - hp_elog.action = PSERIES_HP_ELOG_ACTION_READD; - hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - hp_elog._drc_u.drc_index = phandle; - - handle_dlpar_errorlog(&hp_elog); -} - int pseries_devicetree_update(s32 scope) { char *rtas_buf; @@ -305,10 +290,6 @@ int pseries_devicetree_update(s32 scope) break; case UPDATE_DT_NODE: update_dt_node(phandle, scope); - - if (scope == PRRN_SCOPE) - prrn_update_node(phandle); - break; case ADD_DT_NODE: drc_index = *data++; @@ -388,8 +369,6 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; - stop_topology_update(); - do { rc = rtas_ibm_suspend_me(streamid); if (rc == -EAGAIN) @@ -401,8 +380,6 @@ static ssize_t migration_store(struct class *class, post_mobility_fixup(); - start_topology_update(); - return count; } @@ -427,11 +404,11 @@ static int __init mobility_sysfs_init(void) rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); if (rc) - pr_err("mobility: unable to create migration sysfs file (%d)\n", rc); + pr_err("unable to create migration sysfs file (%d)\n", rc); rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr); if (rc) - pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc); + pr_err("unable to create api_version sysfs file (%d)\n", rc); return 0; } diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h deleted file mode 100644 index 51414aee2862..000000000000 --- a/arch/powerpc/platforms/pseries/offline_states.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _OFFLINE_STATES_H_ -#define _OFFLINE_STATES_H_ - -/* Cpu offline states go here */ -enum cpu_state_vals { - CPU_STATE_OFFLINE, - CPU_STATE_INACTIVE, - CPU_STATE_ONLINE, - CPU_MAX_OFFLINE_STATES -}; - -#ifdef CONFIG_HOTPLUG_CPU -extern enum cpu_state_vals get_cpu_current_state(int cpu); -extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); -extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); -extern void set_default_offline_state(int cpu); -#else -static inline enum cpu_state_vals get_cpu_current_state(int cpu) -{ - return CPU_STATE_ONLINE; -} - -static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state) -{ -} - -static inline void set_default_offline_state(int cpu) -{ -} -#endif - -extern enum cpu_state_vals get_preferred_offline_state(int cpu); -#endif diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9c569078a09f..f439f0dfea7d 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -13,9 +13,11 @@ #include #include #include +#include #include #include +#include #define BIND_ANY_ADDR (~0ul) @@ -62,6 +64,26 @@ PAPR_PMEM_HEALTH_FATAL | \ PAPR_PMEM_HEALTH_UNHEALTHY) +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) +#define PAPR_SCM_PERF_STATS_VERSION 0x1 + +/* Struct holding a single performance metric */ +struct papr_scm_perf_stat { + u8 stat_id[8]; + __be64 stat_val; +} __packed; + +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ +struct papr_scm_perf_stats { + u8 eye_catcher[8]; + /* Should be PAPR_SCM_PERF_STATS_VERSION */ + __be32 stats_version; + /* Number of stats following */ + __be32 num_statistics; + /* zero or more performance matrics */ + struct papr_scm_perf_stat scm_statistic[]; +} __packed; + /* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; @@ -80,6 +102,7 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + struct list_head region_list; /* Protect dimm health data from concurrent read/writes */ struct mutex health_mutex; @@ -89,8 +112,14 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + + /* length of the stat buffer as expected by phyp */ + size_t stat_buffer_len; }; +static LIST_HEAD(papr_nd_regions); +static DEFINE_MUTEX(papr_ndr_lock); + static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; @@ -194,6 +223,79 @@ static int drc_pmem_query_n_bind(struct papr_scm_priv *p) return drc_pmem_bind(p); } +/* + * Query the Dimm performance stats from PHYP and copy them (if returned) to + * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast + * (num_stats + header) bytes. + * - If buff_stats == NULL the return value is the size in byes of the buffer + * needed to hold all supported performance-statistics. + * - If buff_stats != NULL and num_stats == 0 then we copy all known + * performance-statistics to 'buff_stat' and expect to be large enough to + * hold them. + * - if buff_stats != NULL and num_stats > 0 then copy the requested + * performance-statistics to buff_stats. + */ +static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, + struct papr_scm_perf_stats *buff_stats, + unsigned int num_stats) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + size_t size; + s64 rc; + + /* Setup the out buffer */ + if (buff_stats) { + memcpy(buff_stats->eye_catcher, + PAPR_SCM_PERF_STATS_EYECATCHER, 8); + buff_stats->stats_version = + cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); + buff_stats->num_statistics = + cpu_to_be32(num_stats); + + /* + * Calculate the buffer size based on num-stats provided + * or use the prefetched max buffer length + */ + if (num_stats) + /* Calculate size from the num_stats */ + size = sizeof(struct papr_scm_perf_stats) + + num_stats * sizeof(struct papr_scm_perf_stat); + else + size = p->stat_buffer_len; + } else { + /* In case of no out buffer ignore the size */ + size = 0; + } + + /* Do the HCALL asking PHYP for info */ + rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, + buff_stats ? virt_to_phys(buff_stats) : 0, + size); + + /* Check if the error was due to an unknown stat-id */ + if (rc == H_PARTIAL) { + dev_err(&p->pdev->dev, + "Unknown performance stats, Err:0x%016lX\n", ret[0]); + return -ENOENT; + } else if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query performance stats, Err:%lld\n", rc); + return -EIO; + + } else if (!size) { + /* Handle case where stat buffer size was requested */ + dev_dbg(&p->pdev->dev, + "Performance stats size %ld\n", ret[0]); + return ret[0]; + } + + /* Successfully fetched the requested stats from phyp */ + dev_dbg(&p->pdev->dev, + "Performance stats returned %d stats\n", + be32_to_cpu(buff_stats->num_statistics)); + return 0; +} + /* * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the * health information. @@ -416,6 +518,51 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc, size; + u64 statval; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + + /* Silently fail if fetching performance metrics isn't supported */ + if (!p->stat_buffer_len) + return 0; + + /* Allocate request buffer enough to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); + stat->stat_val = 0; + + /* Fetch the fuel gauge and populate it in payload */ + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); + goto free_stats; + } + + statval = be64_to_cpu(stat->stat_val); + dev_dbg(&p->pdev->dev, + "Fetched fuel-gauge %llu", statval); + payload->health.extension_flags |= + PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; + payload->health.dimm_fuel_gauge = statval; + + rc = sizeof(struct nd_papr_pdsm_health); + +free_stats: + kfree(stats); + return rc; +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -456,6 +603,10 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* struct populated hence can release the mutex now */ mutex_unlock(&p->health_mutex); + + /* Populate the fuel gauge meter in the payload */ + papr_pdsm_fuel_gauge(p, payload); + rc = sizeof(struct nd_papr_pdsm_health); out: @@ -631,6 +782,48 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t perf_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int index, rc; + struct seq_buf s; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + if (!p->stat_buffer_len) + return -ENOENT; + + /* Allocate the buffer for phyp where stats are written */ + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + /* Ask phyp to return all dimm perf stats */ + rc = drc_pmem_query_stats(p, stats, 0); + if (rc) + goto free_stats; + /* + * Go through the returned output buffer and print stats and + * values. Since stat_id is essentially a char string of + * 8 bytes, simply use the string format specifier to print it. + */ + seq_buf_init(&s, buf, PAGE_SIZE); + for (index = 0, stat = stats->scm_statistic; + index < be32_to_cpu(stats->num_statistics); + ++index, ++stat) { + seq_buf_printf(&s, "%.8s = 0x%016llX\n", + stat->stat_id, + be64_to_cpu(stat->stat_val)); + } + +free_stats: + kfree(stats); + return rc ? rc : seq_buf_used(&s); +} +DEVICE_ATTR_RO(perf_stats); + static ssize_t flags_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -676,6 +869,7 @@ DEVICE_ATTR_RO(flags); /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, + &dev_attr_perf_stats.attr, NULL, }; @@ -696,6 +890,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; + ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -759,6 +954,20 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dev_info(dev, "Region registered with target node %d and online node %d", target_nid, online_nid); + mutex_lock(&papr_ndr_lock); + list_add_tail(&p->region_list, &papr_nd_regions); + mutex_unlock(&papr_ndr_lock); + + /* Try retriving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } else { + dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); + } + return 0; err: nvdimm_bus_unregister(p->bus); @@ -766,6 +975,68 @@ err: nvdimm_bus_unregister(p->bus); return -ENXIO; } +static void papr_scm_add_badblock(struct nd_region *region, + struct nvdimm_bus *bus, u64 phys_addr) +{ + u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { + pr_err("Bad block registration for 0x%llx failed\n", phys_addr); + return; + } + + pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); +} + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + bool found = false; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&papr_nd_regions)) + return NOTIFY_DONE; + + /* + * The physical address obtained here is PAGE_SIZE aligned, so get the + * exact address from the effective address + */ + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + /* mce notifier is called from a process context, so mutex is safe */ + mutex_lock(&papr_ndr_lock); + list_for_each_entry(p, &papr_nd_regions, region_list) { + if (phys_addr >= p->res.start && phys_addr <= p->res.end) { + found = true; + break; + } + } + + if (found) + papr_scm_add_badblock(p->region, p->bus, phys_addr); + + mutex_unlock(&papr_ndr_lock); + + return found ? NOTIFY_OK : NOTIFY_DONE; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int papr_scm_probe(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -866,6 +1137,10 @@ static int papr_scm_remove(struct platform_device *pdev) { struct papr_scm_priv *p = platform_get_drvdata(pdev); + mutex_lock(&papr_ndr_lock); + list_del(&p->region_list); + mutex_unlock(&papr_ndr_lock); + nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); kfree(p->bus_desc.provider_name); @@ -876,6 +1151,7 @@ static int papr_scm_remove(struct platform_device *pdev) static const struct of_device_id papr_scm_match[] = { { .compatible = "ibm,pmemory" }, + { .compatible = "ibm,pmemory-v2" }, { }, }; @@ -888,7 +1164,25 @@ static struct platform_driver papr_scm_driver = { }, }; -module_platform_driver(papr_scm_driver); +static int __init papr_scm_init(void) +{ + int ret; + + ret = platform_driver_register(&papr_scm_driver); + if (!ret) + mce_register_notifier(&mce_ue_nb); + + return ret; +} +module_init(papr_scm_init); + +static void __exit papr_scm_exit(void) +{ + mce_unregister_notifier(&mce_ue_nb); + platform_driver_unregister(&papr_scm_driver); +} +module_exit(papr_scm_exit); + MODULE_DEVICE_TABLE(of, papr_scm_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index b3a38f5a6b68..f9ae17e8a0f4 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -34,7 +34,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pci_devs_phb_init_dynamic(phb); /* Create EEH devices for the PHB */ - eeh_dev_phb_init_dynamic(phb); + eeh_phb_pe_create(phb); if (dn->child) pseries_eeh_init_edev_recursive(PCI_DN(dn)); diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index f860a897a9e0..e1dc5d3254df 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -24,7 +24,6 @@ #include #include "pseries.h" -#include "offline_states.h" static struct device_node *pmem_node; @@ -147,6 +146,12 @@ const struct of_device_id drc_pmem_match[] = { static int pseries_pmem_init(void) { + /* + * Only supported on POWER8 and above. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return 0; + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); if (!pmem_node) return 0; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 27094c872fd6..2f4ee0a90284 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -359,7 +359,7 @@ static void pseries_lpar_idle(void) * to ever be a problem in practice we can move this into a kernel thread to * finish off the process later in boot. */ -void pseries_enable_reloc_on_exc(void) +bool pseries_enable_reloc_on_exc(void) { long rc; unsigned int delay, total_delay = 0; @@ -370,11 +370,13 @@ void pseries_enable_reloc_on_exc(void) if (rc == H_P2) { pr_info("Relocation on exceptions not" " supported\n"); + return false; } else if (rc != H_SUCCESS) { pr_warn("Unable to enable relocation" " on exceptions: %ld\n", rc); + return false; } - break; + return true; } delay = get_longbusy_msecs(rc); @@ -383,7 +385,7 @@ void pseries_enable_reloc_on_exc(void) pr_warn("Warning: Giving up waiting to enable " "relocation on exceptions (%u msec)!\n", total_delay); - return; + return false; } mdelay(delay); @@ -746,6 +748,11 @@ static void __init pSeries_setup_arch(void) smp_init_pseries(); + if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n"); + + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ /* python0 facility area (mmio) (64-bit format) REAL address. */ @@ -772,8 +779,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - if (lppaca_shared_proc(get_lppaca())) + if (lppaca_shared_proc(get_lppaca())) { static_branch_enable(&shared_processor); + pv_spinlocks_init(); + } ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; @@ -832,12 +841,15 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } -static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) +static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx) { /* PAPR says we can't set HYP */ dawrx &= ~DAWRX_HYP; - return plpar_set_watchpoint0(dawr, dawrx); + if (nr == 0) + return plpar_set_watchpoint0(dawr, dawrx); + else + return plpar_set_watchpoint1(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 6891710833be..92922491a81c 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -44,8 +44,6 @@ #include #include "pseries.h" -#include "offline_states.h" - /* * The Primary thread of each non-boot processor was started from the OF client @@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) /* Fixup atomic count: it exited inside IRQ handler. */ task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; -#ifdef CONFIG_HOTPLUG_CPU - if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) - goto out; -#endif + /* * If the RTAS start-cpu token does not exist then presume the * cpu is already spinning. @@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) return 0; } -#ifdef CONFIG_HOTPLUG_CPU -out: -#endif return 1; } @@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu) vpa_init(cpu); cpumask_clear_cpu(cpu, of_spin_mask); -#ifdef CONFIG_HOTPLUG_CPU - set_cpu_current_state(cpu, CPU_STATE_ONLINE); - set_default_offline_state(cpu); -#endif } static int smp_pSeries_kick_cpu(int nr) @@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr) * the processor will continue on to secondary_start */ paca_ptrs[nr]->cpu_start = 1; -#ifdef CONFIG_HOTPLUG_CPU - set_preferred_offline_state(nr, CPU_STATE_ONLINE); - - if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { - long rc; - unsigned long hcpuid; - - hcpuid = get_hard_smp_processor_id(nr); - rc = plpar_hcall_norets(H_PROD, hcpuid); - if (rc != H_SUCCESS) - printk(KERN_ERR "Error: Prod to wake up processor %d " - "Ret= %ld\n", nr, rc); - } -#endif return 0; } @@ -188,13 +162,16 @@ static int pseries_smp_prepare_cpu(int cpu) return 0; } -static void smp_pseries_cause_ipi(int cpu) +/* Cause IPI as setup by the interrupt controller (xics or xive) */ +static void (*ic_cause_ipi)(int cpu) __ro_after_init; + +/* Use msgsndp doorbells target is a sibling, else use interrupt controller */ +static void dbell_or_ic_cause_ipi(int cpu) { - /* POWER9 should not use this handler */ if (doorbell_try_core_ipi(cpu)) return; - icp_ops->cause_ipi(cpu); + ic_cause_ipi(cpu); } static int pseries_cause_nmi_ipi(int cpu) @@ -218,26 +195,49 @@ static int pseries_cause_nmi_ipi(int cpu) return 0; } -static __init void pSeries_smp_probe_xics(void) -{ - xics_smp_probe(); - - if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest()) - smp_ops->cause_ipi = smp_pseries_cause_ipi; - else - smp_ops->cause_ipi = icp_ops->cause_ipi; -} - static __init void pSeries_smp_probe(void) { if (xive_enabled()) - /* - * Don't use P9 doorbells when XIVE is enabled. IPIs - * using MMIOs should be faster - */ xive_smp_probe(); else - pSeries_smp_probe_xics(); + xics_smp_probe(); + + /* No doorbell facility, must use the interrupt controller for IPIs */ + if (!cpu_has_feature(CPU_FTR_DBELL)) + return; + + /* Doorbells can only be used for IPIs between SMT siblings */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + if (is_kvm_guest()) { + /* + * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp + * faults to the hypervisor which then reads the instruction + * from guest memory, which tends to be slower than using XIVE. + */ + if (xive_enabled()) + return; + + /* + * XICS hcalls aren't as fast, so we can use msgsndp (which + * also helps exercise KVM emulation), however KVM can't + * emulate secure guests because it can't read the instruction + * out of their memory. + */ + if (is_secure_guest()) + return; + } + + /* + * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are + * gang scheduled on the same physical core, so doorbells are always + * faster than the interrupt controller, and they can be used by + * secure guests. + */ + + ic_cause_ipi = smp_ops->cause_ipi; + smp_ops->cause_ipi = dbell_or_ic_cause_ipi; } static struct smp_ops_t pseries_smp_ops = { diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 0a24a5a185f0..81e0ac58d620 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - stream_id = simple_strtoul(buf, NULL, 16); do { @@ -149,33 +145,14 @@ static ssize_t store_hibernate(struct device *dev, ssleep(1); } while (rc == -EAGAIN); - if (!rc) { - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, - cpu_online_mask); - rc = rtas_online_cpus_mask(offline_mask); - if (rc) { - pr_err("%s: Could not bring present CPUs online.\n", - __func__); - goto out; - } - - stop_topology_update(); + if (!rc) rc = pm_suspend(PM_SUSPEND_MEM); - start_topology_update(); - - /* Take down CPUs not online prior to suspend */ - if (!rtas_offline_cpus_mask(offline_mask)) - pr_warn("%s: Could not restore CPUs to offline " - "state.\n", __func__); - } stream_id = 0; if (!rc) rc = count; -out: - free_cpumask_var(offline_mask); + return rc; } diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 7c6d8b14f440..348f59581052 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -2,11 +2,11 @@ KASAN_SANITIZE := n -targets += trampoline.o purgatory.ro kexec-purgatory.c +targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE +$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE $(call if_changed,ld) quiet_cmd_bin2c = BIN2C $@ diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline_64.S similarity index 70% rename from arch/powerpc/purgatory/trampoline.S rename to arch/powerpc/purgatory/trampoline_64.S index a5a83c3f53e6..d956b8a35fd1 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline_64.S @@ -10,6 +10,7 @@ */ #include +#include .machine ppc64 .balign 256 @@ -43,14 +44,39 @@ master: mr %r17,%r3 /* save cpu id to r17 */ mr %r15,%r4 /* save physical address in reg15 */ + /* Work out where we're running */ + bcl 20, 31, 0f +0: mflr %r18 + + /* + * Copy BACKUP_SRC_SIZE bytes from BACKUP_SRC_START to + * backup_start 8 bytes at a time. + * + * Use r3 = dest, r4 = src, r5 = size, r6 = count + */ + ld %r3, (backup_start - 0b)(%r18) + cmpdi %cr0, %r3, 0 + beq .Lskip_copy /* skip if there is no backup region */ + lis %r5, BACKUP_SRC_SIZE@h + ori %r5, %r5, BACKUP_SRC_SIZE@l + cmpdi %cr0, %r5, 0 + beq .Lskip_copy /* skip if copy size is zero */ + lis %r4, BACKUP_SRC_START@h + ori %r4, %r4, BACKUP_SRC_START@l + li %r6, 0 +.Lcopy_loop: + ldx %r0, %r6, %r4 + stdx %r0, %r6, %r3 + addi %r6, %r6, 8 + cmpld %cr0, %r6, %r5 + blt .Lcopy_loop + +.Lskip_copy: or %r3,%r3,%r3 /* ok now to high priority, lets boot */ lis %r6,0x1 mtctr %r6 /* delay a bit for slaves to catch up */ bdnz . /* before we overwrite 0-100 again */ - bl 0f /* Work out where we're running */ -0: mflr %r18 - /* load device-tree address */ ld %r3, (dt_offset - 0b)(%r18) mr %r16,%r3 /* save dt address in reg16 */ @@ -61,6 +87,10 @@ master: li %r4,28 STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ 1: + /* Load opal base and entry values in r8 & r9 respectively */ + ld %r8,(opal_base - 0b)(%r18) + ld %r9,(opal_entry - 0b)(%r18) + /* load the kernel address */ ld %r4,(kernel - 0b)(%r18) @@ -89,7 +119,6 @@ master: rfid /* update MSR and start kernel */ - .balign 8 .globl kernel kernel: @@ -102,6 +131,23 @@ dt_offset: .8byte 0x0 .size dt_offset, . - dt_offset + .balign 8 + .globl backup_start +backup_start: + .8byte 0x0 + .size backup_start, . - backup_start + + .balign 8 + .globl opal_base +opal_base: + .8byte 0x0 + .size opal_base, . - opal_base + + .balign 8 + .globl opal_entry +opal_entry: + .8byte 0x0 + .size opal_entry, . - opal_entry .data .balign 8 diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 71b881e554fc..cb58ec7ce77a 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -647,6 +648,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f0551a2be9df..1e3674d7ea7b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -768,7 +768,7 @@ static const u8 *get_vec5_feature(unsigned int index) return vec5 + index; } -static bool xive_spapr_disabled(void) +static bool __init xive_spapr_disabled(void) { const u8 *vec5_xive; diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh index 77114755dc6f..6e6a30aea3ed 100755 --- a/arch/powerpc/tools/unrel_branch_check.sh +++ b/arch/powerpc/tools/unrel_branch_check.sh @@ -31,7 +31,10 @@ grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]] grep -v '\<__start_initialization_multiplatform>' | grep -v -e 'b.\?.\?ctr' | grep -v -e 'b.\?.\?lr' | -sed 's/://' | +sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \ + -e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \ + -e 's/[[:space:]]0x/ /' \ + -e 's/://' | awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' ) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 7efe4bc3ccf6..df7bca00f5ec 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs) #endif } +static void xmon_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); +} + static int xmon_core(struct pt_regs *regs, int fromipi) { int cmd = 0; @@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) else insert_cpu_bpts(); - touch_nmi_watchdog(); + xmon_touch_watchdogs(); local_irq_restore(flags); return cmd != 'X' && cmd != EOF; @@ -1593,6 +1600,7 @@ const char *getvecname(unsigned long vec) case 0x1300: ret = "(Instruction Breakpoint)"; break; case 0x1500: ret = "(Denormalisation)"; break; case 0x1700: ret = "(Altivec Assist)"; break; + case 0x3000: ret = "(System Call Vectored)"; break; default: ret = ""; } return ret; @@ -1861,7 +1869,7 @@ static void cacheflush(void) catch_memory_errors = 1; sync(); - if (cmd != 'i') { + if (cmd != 'i' || IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) cflush((void *) adrs); } else { @@ -2022,6 +2030,18 @@ static void dump_300_sprs(void) #endif } +static void dump_310_sprs(void) +{ +#ifdef CONFIG_PPC64 + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return; + + printf("mmcr3 = %.16lx, sier2 = %.16lx, sier3 = %.16lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + +#endif +} + static void dump_one_spr(int spr, bool show_unimplemented) { unsigned long val; @@ -2076,6 +2096,7 @@ static void super_regs(void) dump_206_sprs(); dump_207_sprs(); dump_300_sprs(); + dump_310_sprs(); return; } @@ -2934,11 +2955,10 @@ generic_inst_dump(unsigned long adr, long count, int praddr, int nr, dotted; unsigned long first_adr; struct ppc_inst inst, last_inst = ppc_inst(0); - unsigned char val[4]; dotted = 0; - for (first_adr = adr; count > 0; --count, adr += 4) { - nr = mread(adr, val, 4); + for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { + nr = mread_instr(adr, &inst); if (nr == 0) { if (praddr) { const char *x = fault_chars[fault_type]; @@ -2946,7 +2966,6 @@ generic_inst_dump(unsigned long adr, long count, int praddr, } break; } - inst = ppc_inst(GETWORD(val)); if (adr > first_adr && ppc_inst_equal(inst, last_inst)) { if (!dotted) { printf(" ...\n"); @@ -2957,9 +2976,12 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8x", adr, ppc_inst_val(inst)); + printf(REG" %s", adr, ppc_inst_as_str(inst)); printf("\t"); - dump_func(ppc_inst_val(inst), adr); + if (!ppc_inst_prefixed(inst)) + dump_func(ppc_inst_val(inst), adr); + else + dump_func(ppc_inst_as_u64(inst), adr); printf("\n"); } return adr - first_adr; @@ -4256,7 +4278,7 @@ static int do_spu_cmd(void) subcmd = inchar(); if (isxdigit(subcmd) || subcmd == '\n') termch = subcmd; - /* fall through */ + fallthrough; case 'f': scanhex(&num); if (num >= XMON_NUM_SPUS || !spu_info[num].spu) { diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 1b299e801f74..addaa6e6718b 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -244,20 +244,6 @@ static inline void add_powernv_state(int index, const char *name, stop_psscr_table[index].mask = psscr_mask; } -/* - * Returns 0 if prop1_len == prop2_len. Else returns -1 - */ -static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, - const char *prop2, int prop2_len) -{ - if (prop1_len == prop2_len) - return 0; - - pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n", - prop1, prop2); - return -1; -} - extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 6513ef2af66a..ff6d99e923a4 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -21,8 +21,9 @@ #include #include #include +#include -struct cpuidle_driver pseries_idle_driver = { +static struct cpuidle_driver pseries_idle_driver = { .name = "pseries_idle", .owner = THIS_MODULE, }; @@ -86,19 +87,150 @@ static void check_and_cede_processor(void) } } +/* + * XCEDE: Extended CEDE states discovered through the + * "ibm,get-systems-parameter" RTAS call with the token + * CEDE_LATENCY_TOKEN + */ + +/* + * Section 7.3.16 System Parameters Option of PAPR version 2.8.1 has a + * table with all the parameters to ibm,get-system-parameters. + * CEDE_LATENCY_TOKEN corresponds to the token value for Cede Latency + * Settings Information. + */ +#define CEDE_LATENCY_TOKEN 45 + +/* + * If the platform supports the cede latency settings information system + * parameter it must provide the following information in the NULL terminated + * parameter string: + * + * a. The first byte is the length “N” of each cede latency setting record minus + * one (zero indicates a length of 1 byte). + * + * b. For each supported cede latency setting a cede latency setting record + * consisting of the first “N” bytes as per the following table. + * + * ----------------------------- + * | Field | Field | + * | Name | Length | + * ----------------------------- + * | Cede Latency | 1 Byte | + * | Specifier Value | | + * ----------------------------- + * | Maximum wakeup | | + * | latency in | 8 Bytes | + * | tb-ticks | | + * ----------------------------- + * | Responsive to | | + * | external | 1 Byte | + * | interrupts | | + * ----------------------------- + * + * This version has cede latency record size = 10. + * + * The structure xcede_latency_payload represents a) and b) with + * xcede_latency_record representing the table in b). + * + * xcede_latency_parameter is what gets returned by + * ibm,get-systems-parameter RTAS call when made with + * CEDE_LATENCY_TOKEN. + * + * These structures are only used to represent the data obtained by the RTAS + * call. The data is in big-endian. + */ +struct xcede_latency_record { + u8 hint; + __be64 latency_ticks; + u8 wake_on_irqs; +} __packed; + +// Make space for 16 records, which "should be enough". +struct xcede_latency_payload { + u8 record_size; + struct xcede_latency_record records[16]; +} __packed; + +struct xcede_latency_parameter { + __be16 payload_size; + struct xcede_latency_payload payload; + u8 null_char; +} __packed; + +static unsigned int nr_xcede_records; +static struct xcede_latency_parameter xcede_latency_parameter __initdata; + +static int __init parse_cede_parameters(void) +{ + struct xcede_latency_payload *payload; + u32 total_xcede_records_size; + u8 xcede_record_size; + u16 payload_size; + int ret, i; + + ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter), + sizeof(xcede_latency_parameter)); + if (ret) { + pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n"); + return ret; + } + + payload_size = be16_to_cpu(xcede_latency_parameter.payload_size); + payload = &xcede_latency_parameter.payload; + + xcede_record_size = payload->record_size + 1; + + if (xcede_record_size != sizeof(struct xcede_latency_record)) { + pr_err("xcede: Expected record-size %lu. Observed size %u.\n", + sizeof(struct xcede_latency_record), xcede_record_size); + return -EINVAL; + } + + pr_info("xcede: xcede_record_size = %d\n", xcede_record_size); + + /* + * Since the payload_size includes the last NULL byte and the + * xcede_record_size, the remaining bytes correspond to array of all + * cede_latency settings. + */ + total_xcede_records_size = payload_size - 2; + nr_xcede_records = total_xcede_records_size / xcede_record_size; + + for (i = 0; i < nr_xcede_records; i++) { + struct xcede_latency_record *record = &payload->records[i]; + u64 latency_ticks = be64_to_cpu(record->latency_ticks); + u8 wake_on_irqs = record->wake_on_irqs; + u8 hint = record->hint; + + pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n", + i, hint, latency_ticks, wake_on_irqs); + } + + return 0; +} + +#define NR_DEDICATED_STATES 2 /* snooze, CEDE */ +static u8 cede_latency_hint[NR_DEDICATED_STATES]; + static int dedicated_cede_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + u8 old_latency_hint; pseries_idle_prolog(); get_lppaca()->donate_dedicated_cpu = 1; + old_latency_hint = get_lppaca()->cede_latency_hint; + get_lppaca()->cede_latency_hint = cede_latency_hint[index]; HMT_medium(); check_and_cede_processor(); local_irq_disable(); get_lppaca()->donate_dedicated_cpu = 0; + get_lppaca()->cede_latency_hint = old_latency_hint; pseries_idle_epilog(); @@ -130,7 +262,7 @@ static int shared_cede_loop(struct cpuidle_device *dev, /* * States for dedicated partition case. */ -static struct cpuidle_state dedicated_states[] = { +static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = { { /* Snooze */ .name = "snooze", .desc = "snooze", @@ -211,6 +343,54 @@ static int pseries_cpuidle_driver_init(void) return 0; } +static void __init fixup_cede0_latency(void) +{ + struct xcede_latency_payload *payload; + u64 min_latency_us; + int i; + + min_latency_us = dedicated_states[1].exit_latency; // CEDE latency + + if (parse_cede_parameters()) + return; + + pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n", + nr_xcede_records); + + payload = &xcede_latency_parameter.payload; + for (i = 0; i < nr_xcede_records; i++) { + struct xcede_latency_record *record = &payload->records[i]; + u64 latency_tb = be64_to_cpu(record->latency_ticks); + u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC; + + if (latency_us < min_latency_us) + min_latency_us = latency_us; + } + + /* + * By default, we assume that CEDE(0) has exit latency 10us, + * since there is no way for us to query from the platform. + * + * However, if the wakeup latency of an Extended CEDE state is + * smaller than 10us, then we can be sure that CEDE(0) + * requires no more than that. + * + * Perform the fix-up. + */ + if (min_latency_us < dedicated_states[1].exit_latency) { + u64 cede0_latency = min_latency_us - 1; + + if (cede0_latency <= 0) + cede0_latency = min_latency_us; + + dedicated_states[1].exit_latency = cede0_latency; + dedicated_states[1].target_residency = 10 * (cede0_latency); + pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n", + cede0_latency); + } + +} + /* * pseries_idle_probe() * Choose state table for shared versus dedicated partition @@ -232,8 +412,9 @@ static int pseries_idle_probe(void) cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { + fixup_cede0_latency(); cpuidle_state_table = dedicated_states; - max_idle_state = ARRAY_SIZE(dedicated_states); + max_idle_state = NR_DEDICATED_STATES; } } else return -ENODEV; diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index db874367b602..50a0a18f35da 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -50,7 +50,7 @@ # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. +# details see https://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # This module implements support for AES instructions as per PowerISA diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl index 38b06503ede0..09bba1852eec 100644 --- a/drivers/crypto/vmx/ghashp8-ppc.pl +++ b/drivers/crypto/vmx/ghashp8-ppc.pl @@ -13,7 +13,7 @@ # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. +# details see https://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # GHASH for for PowerISA v2.07. diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c index fca31640e3ef..f3d1a460fbce 100644 --- a/drivers/macintosh/adb-iop.c +++ b/drivers/macintosh/adb-iop.c @@ -7,10 +7,6 @@ * 1999-07-01 (jmt) - First implementation for new driver architecture. * * 1999-07-31 (jmt) - First working version. - * - * TODO: - * - * o Implement SRQ handling. */ #include @@ -18,24 +14,17 @@ #include #include #include -#include #include #include #include -#include #include #include -/*#define DEBUG_ADB_IOP*/ - static struct adb_request *current_req; static struct adb_request *last_req; -#if 0 -static unsigned char reply_buff[16]; -static unsigned char *reply_ptr; -#endif +static unsigned int autopoll_devs; static enum adb_iop_state { idle, @@ -62,13 +51,19 @@ struct adb_driver adb_iop_driver = { .reset_bus = adb_iop_reset_bus }; -static void adb_iop_end_req(struct adb_request *req, int state) +static void adb_iop_done(void) { + struct adb_request *req = current_req; + + adb_iop_state = idle; + req->complete = 1; current_req = req->next; if (req->done) (*req->done)(req); - adb_iop_state = state; + + if (adb_iop_state == idle) + adb_iop_start(); } /* @@ -79,15 +74,14 @@ static void adb_iop_end_req(struct adb_request *req, int state) static void adb_iop_complete(struct iop_msg *msg) { - struct adb_request *req; unsigned long flags; local_irq_save(flags); - req = current_req; - if ((adb_iop_state == sending) && req && req->reply_expected) { + if (current_req->reply_expected) adb_iop_state = awaiting_reply; - } + else + adb_iop_done(); local_irq_restore(flags); } @@ -102,52 +96,36 @@ static void adb_iop_complete(struct iop_msg *msg) static void adb_iop_listen(struct iop_msg *msg) { struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message; - struct adb_request *req; unsigned long flags; -#ifdef DEBUG_ADB_IOP - int i; -#endif + bool req_done = false; local_irq_save(flags); - req = current_req; + /* Handle a timeout. Timeout packets seem to occur even after + * we've gotten a valid reply to a TALK, presumably because of + * autopolling. + */ -#ifdef DEBUG_ADB_IOP - printk("adb_iop_listen %p: rcvd packet, %d bytes: %02X %02X", req, - (uint)amsg->count + 2, (uint)amsg->flags, (uint)amsg->cmd); - for (i = 0; i < amsg->count; i++) - printk(" %02X", (uint)amsg->data[i]); - printk("\n"); -#endif + if (amsg->flags & ADB_IOP_EXPLICIT) { + if (adb_iop_state == awaiting_reply) { + struct adb_request *req = current_req; - /* Handle a timeout. Timeout packets seem to occur even after */ - /* we've gotten a valid reply to a TALK, so I'm assuming that */ - /* a "timeout" is actually more like an "end-of-data" signal. */ - /* We need to send back a timeout packet to the IOP to shut */ - /* it up, plus complete the current request, if any. */ - - if (amsg->flags & ADB_IOP_TIMEOUT) { - msg->reply[0] = ADB_IOP_TIMEOUT | ADB_IOP_AUTOPOLL; - msg->reply[1] = 0; - msg->reply[2] = 0; - if (req && (adb_iop_state != idle)) { - adb_iop_end_req(req, idle); - } - } else { - /* TODO: is it possible for more than one chunk of data */ - /* to arrive before the timeout? If so we need to */ - /* use reply_ptr here like the other drivers do. */ - if ((adb_iop_state == awaiting_reply) && - (amsg->flags & ADB_IOP_EXPLICIT)) { req->reply_len = amsg->count + 1; memcpy(req->reply, &amsg->cmd, req->reply_len); - } else { - adb_input(&amsg->cmd, amsg->count + 1, - amsg->flags & ADB_IOP_AUTOPOLL); + + req_done = true; } - memcpy(msg->reply, msg->message, IOP_MSG_LEN); + } else if (!(amsg->flags & ADB_IOP_TIMEOUT)) { + adb_input(&amsg->cmd, amsg->count + 1, + amsg->flags & ADB_IOP_AUTOPOLL); } + + msg->reply[0] = autopoll_devs ? ADB_IOP_AUTOPOLL : 0; iop_complete_message(msg); + + if (req_done) + adb_iop_done(); + local_irq_restore(flags); } @@ -160,63 +138,50 @@ static void adb_iop_listen(struct iop_msg *msg) static void adb_iop_start(void) { - unsigned long flags; struct adb_request *req; struct adb_iopmsg amsg; -#ifdef DEBUG_ADB_IOP - int i; -#endif /* get the packet to send */ req = current_req; if (!req) return; - local_irq_save(flags); - -#ifdef DEBUG_ADB_IOP - printk("adb_iop_start %p: sending packet, %d bytes:", req, req->nbytes); - for (i = 0; i < req->nbytes; i++) - printk(" %02X", (uint)req->data[i]); - printk("\n"); -#endif - - /* The IOP takes MacII-style packets, so */ - /* strip the initial ADB_PACKET byte. */ - + /* The IOP takes MacII-style packets, so strip the initial + * ADB_PACKET byte. + */ amsg.flags = ADB_IOP_EXPLICIT; amsg.count = req->nbytes - 2; - /* amsg.data immediately follows amsg.cmd, effectively making */ - /* amsg.cmd a pointer to the beginning of a full ADB packet. */ + /* amsg.data immediately follows amsg.cmd, effectively making + * &amsg.cmd a pointer to the beginning of a full ADB packet. + */ memcpy(&amsg.cmd, req->data + 1, req->nbytes - 1); req->sent = 1; adb_iop_state = sending; - local_irq_restore(flags); - - /* Now send it. The IOP manager will call adb_iop_complete */ - /* when the packet has been sent. */ + /* Now send it. The IOP manager will call adb_iop_complete + * when the message has been sent. + */ iop_send_message(ADB_IOP, ADB_CHAN, req, sizeof(amsg), (__u8 *)&amsg, adb_iop_complete); } -int adb_iop_probe(void) +static int adb_iop_probe(void) { if (!iop_ism_present) return -ENODEV; return 0; } -int adb_iop_init(void) +static int adb_iop_init(void) { pr_info("adb: IOP ISM driver v0.4 for Unified ADB\n"); iop_listen(ADB_IOP, ADB_CHAN, adb_iop_listen, "ADB"); return 0; } -int adb_iop_send_request(struct adb_request *req, int sync) +static int adb_iop_send_request(struct adb_request *req, int sync) { int err; @@ -240,14 +205,14 @@ static int adb_iop_write(struct adb_request *req) return -EINVAL; } - local_irq_save(flags); - req->next = NULL; req->sent = 0; req->complete = 0; req->reply_len = 0; - if (current_req != 0) { + local_irq_save(flags); + + if (current_req) { last_req->next = req; last_req = req; } else { @@ -255,39 +220,58 @@ static int adb_iop_write(struct adb_request *req) last_req = req; } + if (adb_iop_state == idle) + adb_iop_start(); + local_irq_restore(flags); - if (adb_iop_state == idle) - adb_iop_start(); return 0; } -int adb_iop_autopoll(int devs) +static void adb_iop_set_ap_complete(struct iop_msg *msg) { - /* TODO: how do we enable/disable autopoll? */ + struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message; + + autopoll_devs = (amsg->data[1] << 8) | amsg->data[0]; +} + +static int adb_iop_autopoll(int devs) +{ + struct adb_iopmsg amsg; + unsigned long flags; + unsigned int mask = (unsigned int)devs & 0xFFFE; + + local_irq_save(flags); + + amsg.flags = ADB_IOP_SET_AUTOPOLL | (mask ? ADB_IOP_AUTOPOLL : 0); + amsg.count = 2; + amsg.cmd = 0; + amsg.data[0] = mask & 0xFF; + amsg.data[1] = (mask >> 8) & 0xFF; + + iop_send_message(ADB_IOP, ADB_CHAN, NULL, sizeof(amsg), (__u8 *)&amsg, + adb_iop_set_ap_complete); + + local_irq_restore(flags); + return 0; } -void adb_iop_poll(void) +static void adb_iop_poll(void) { - if (adb_iop_state == idle) - adb_iop_start(); iop_ism_irq_poll(ADB_IOP); } -int adb_iop_reset_bus(void) +static int adb_iop_reset_bus(void) { - struct adb_request req = { - .reply_expected = 0, - .nbytes = 2, - .data = { ADB_PACKET, 0 }, - }; + struct adb_request req; - adb_iop_write(&req); - while (!req.complete) { - adb_iop_poll(); - schedule(); - } + /* Command = 0, Address = ignored */ + adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET); + adb_iop_send_request(&req, 1); + + /* Don't want any more requests during the Global Reset low time. */ + mdelay(3); return 0; } diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index e49d1f287a17..73b396189039 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -163,7 +163,7 @@ static int adb_scan_bus(void) * See if anybody actually moved. This is suggested * by HW TechNote 01: * - * http://developer.apple.com/technotes/hw/hw_01.html + * https://developer.apple.com/technotes/hw/hw_01.html */ adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1, (highFree << 4) | 0xf); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 8f7725dc2166..7e218437730c 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -5,8 +5,8 @@ * Copyright (C) 2003, 2004 Colin Leroy, Rasmus Rohde, Benjamin Herrenschmidt * * Documentation from 115254175ADT7467_pra.pdf and 3686221171167ADT7460_b.pdf - * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7467 - * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7460 + * https://www.onsemi.com/PowerSolutions/product.do?id=ADT7467 + * https://www.onsemi.com/PowerSolutions/product.do?id=ADT7460 * */ diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c index ac824d7b2dcf..060e03f2264b 100644 --- a/drivers/macintosh/via-macii.c +++ b/drivers/macintosh/via-macii.c @@ -77,6 +77,12 @@ static volatile unsigned char *via; #define ST_ODD 0x20 /* ADB state: odd data byte */ #define ST_IDLE 0x30 /* ADB state: idle, nothing to send */ +/* ADB command byte structure */ +#define ADDR_MASK 0xF0 +#define CMD_MASK 0x0F +#define OP_MASK 0x0C +#define TALK 0x0C + static int macii_init_via(void); static void macii_start(void); static irqreturn_t macii_interrupt(int irq, void *arg); @@ -104,21 +110,22 @@ static enum macii_state { idle, sending, reading, - read_done, } macii_state; static struct adb_request *current_req; /* first request struct in the queue */ static struct adb_request *last_req; /* last request struct in the queue */ static unsigned char reply_buf[16]; /* storage for autopolled replies */ static unsigned char *reply_ptr; /* next byte in reply_buf or req->reply */ -static int reading_reply; /* store reply in reply_buf else req->reply */ +static bool reading_reply; /* store reply in reply_buf else req->reply */ static int data_index; /* index of the next byte to send from req->data */ static int reply_len; /* number of bytes received in reply_buf or req->reply */ static int status; /* VIA's ADB status bits captured upon interrupt */ -static int last_status; /* status bits as at previous interrupt */ -static int srq_asserted; /* have to poll for the device that asserted it */ -static int command_byte; /* the most recent command byte transmitted */ -static int autopoll_devs; /* bits set are device addresses to be polled */ +static bool bus_timeout; /* no data was sent by the device */ +static bool srq_asserted; /* have to poll for the device that asserted it */ +static u8 last_cmd; /* the most recent command byte transmitted */ +static u8 last_talk_cmd; /* the most recent Talk command byte transmitted */ +static u8 last_poll_cmd; /* the most recent Talk R0 command byte transmitted */ +static unsigned int autopoll_devs; /* bits set are device addresses to poll */ /* Check for MacII style ADB */ static int macii_probe(void) @@ -133,7 +140,7 @@ static int macii_probe(void) } /* Initialize the driver */ -int macii_init(void) +static int macii_init(void) { unsigned long flags; int err; @@ -165,7 +172,6 @@ static int macii_init_via(void) /* Set up state: idle */ via[B] |= ST_IDLE; - last_status = via[B] & (ST_MASK | CTLR_IRQ); /* Shift register on input */ via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT; @@ -179,35 +185,49 @@ static int macii_init_via(void) /* Send an ADB poll (Talk Register 0 command prepended to the request queue) */ static void macii_queue_poll(void) { - /* No point polling the active device as it will never assert SRQ, so - * poll the next device in the autopoll list. This could leave us - * stuck in a polling loop if an unprobed device is asserting SRQ. - * In theory, that could only happen if a device was plugged in after - * probing started. Unplugging it again will break the cycle. - * (Simply polling the next higher device often ends up polling almost - * every device (after wrapping around), which takes too long.) - */ - int device_mask; - int next_device; static struct adb_request req; + unsigned char poll_command; + unsigned int poll_addr; + /* This only polls devices in the autopoll list, which assumes that + * unprobed devices never assert SRQ. That could happen if a device was + * plugged in after the adb bus scan. Unplugging it again will resolve + * the problem. This behaviour is similar to MacOS. + */ if (!autopoll_devs) return; - device_mask = (1 << (((command_byte & 0xF0) >> 4) + 1)) - 1; - if (autopoll_devs & ~device_mask) - next_device = ffs(autopoll_devs & ~device_mask) - 1; - else - next_device = ffs(autopoll_devs) - 1; + /* The device most recently polled may not be the best device to poll + * right now. Some other device(s) may have signalled SRQ (the active + * device won't do that). Or the autopoll list may have been changed. + * Try polling the next higher address. + */ + poll_addr = (last_poll_cmd & ADDR_MASK) >> 4; + if ((srq_asserted && last_cmd == last_poll_cmd) || + !(autopoll_devs & (1 << poll_addr))) { + unsigned int higher_devs; - adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_READREG(next_device, 0)); + higher_devs = autopoll_devs & -(1 << (poll_addr + 1)); + poll_addr = ffs(higher_devs ? higher_devs : autopoll_devs) - 1; + } + + /* Send a Talk Register 0 command */ + poll_command = ADB_READREG(poll_addr, 0); + + /* No need to repeat this Talk command. The transceiver will do that + * as long as it is idle. + */ + if (poll_command == last_cmd) + return; + + adb_request(&req, NULL, ADBREQ_NOSEND, 1, poll_command); req.sent = 0; req.complete = 0; req.reply_len = 0; req.next = current_req; - if (current_req != NULL) { + if (WARN_ON(current_req)) { current_req = &req; } else { current_req = &req; @@ -266,40 +286,22 @@ static int macii_write(struct adb_request *req) /* Start auto-polling */ static int macii_autopoll(int devs) { - static struct adb_request req; unsigned long flags; - int err = 0; - - /* bit 1 == device 1, and so on. */ - autopoll_devs = devs & 0xFFFE; - - if (!autopoll_devs) - return 0; local_irq_save(flags); - if (current_req == NULL) { - /* Send a Talk Reg 0. The controller will repeatedly transmit - * this as long as it is idle. - */ - adb_request(&req, NULL, ADBREQ_NOSEND, 1, - ADB_READREG(ffs(autopoll_devs) - 1, 0)); - err = macii_write(&req); + /* bit 1 == device 1, and so on. */ + autopoll_devs = (unsigned int)devs & 0xFFFE; + + if (!current_req) { + macii_queue_poll(); + if (current_req && macii_state == idle) + macii_start(); } local_irq_restore(flags); - return err; -} -static inline int need_autopoll(void) -{ - /* Was the last command Talk Reg 0 - * and is the target on the autopoll list? - */ - if ((command_byte & 0x0F) == 0x0C && - ((1 << ((command_byte & 0xF0) >> 4)) & autopoll_devs)) - return 0; - return 1; + return 0; } /* Prod the chip without interrupts */ @@ -311,7 +313,7 @@ static void macii_poll(void) /* Reset the bus */ static int macii_reset_bus(void) { - static struct adb_request req; + struct adb_request req; /* Command = 0, Address = ignored */ adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET); @@ -335,8 +337,6 @@ static void macii_start(void) * And req->nbytes is the number of bytes of real data plus one. */ - /* store command byte */ - command_byte = req->data[1]; /* Output mode */ via[ACR] |= SR_OUT; /* Load data */ @@ -346,6 +346,9 @@ static void macii_start(void) macii_state = sending; data_index = 2; + + bus_timeout = false; + srq_asserted = false; } /* @@ -354,15 +357,17 @@ static void macii_start(void) * generating shift register interrupts (SR_INT) for us. This means there has * to be activity on the ADB bus. The chip will poll to achieve this. * - * The basic ADB state machine was left unchanged from the original MacII code - * by Alan Cox, which was based on the CUDA driver for PowerMac. - * The syntax of the ADB status lines is totally different on MacII, - * though. MacII uses the states Command -> Even -> Odd -> Even ->...-> Idle - * for sending and Idle -> Even -> Odd -> Even ->...-> Idle for receiving. - * Start and end of a receive packet are signalled by asserting /IRQ on the - * interrupt line (/IRQ means the CTLR_IRQ bit in port B; not to be confused - * with the VIA shift register interrupt. /IRQ never actually interrupts the - * processor, it's just an ordinary input.) + * The VIA Port B output signalling works as follows. After the ADB transceiver + * sees a transition on the PB4 and PB5 lines it will crank over the VIA shift + * register which eventually raises the SR_INT interrupt. The PB4/PB5 outputs + * are toggled with each byte as the ADB transaction progresses. + * + * Request with no reply expected (and empty transceiver buffer): + * CMD -> IDLE + * Request with expected reply packet (or with buffered autopoll packet): + * CMD -> EVEN -> ODD -> EVEN -> ... -> IDLE + * Unsolicited packet: + * IDLE -> EVEN -> ODD -> EVEN -> ... -> IDLE */ static irqreturn_t macii_interrupt(int irq, void *arg) { @@ -382,31 +387,31 @@ static irqreturn_t macii_interrupt(int irq, void *arg) } } - last_status = status; status = via[B] & (ST_MASK | CTLR_IRQ); switch (macii_state) { case idle: - if (reading_reply) { - reply_ptr = current_req->reply; - } else { - WARN_ON(current_req); - reply_ptr = reply_buf; - } + WARN_ON((status & ST_MASK) != ST_IDLE); + + reply_ptr = reply_buf; + reading_reply = false; + + bus_timeout = false; + srq_asserted = false; x = via[SR]; - if ((status & CTLR_IRQ) && (x == 0xFF)) { - /* Bus timeout without SRQ sequence: - * data is "FF" while CTLR_IRQ is "H" + if (!(status & CTLR_IRQ)) { + /* /CTLR_IRQ asserted in idle state means we must + * read an autopoll reply from the transceiver buffer. */ - reply_len = 0; - srq_asserted = 0; - macii_state = read_done; - } else { macii_state = reading; *reply_ptr = x; reply_len = 1; + } else { + /* bus timeout */ + reply_len = 0; + break; } /* set ADB state = even for first data byte */ @@ -415,41 +420,83 @@ static irqreturn_t macii_interrupt(int irq, void *arg) case sending: req = current_req; - if (data_index >= req->nbytes) { + + if (status == (ST_CMD | CTLR_IRQ)) { + /* /CTLR_IRQ de-asserted after the command byte means + * the host can continue with the transaction. + */ + + /* Store command byte */ + last_cmd = req->data[1]; + if ((last_cmd & OP_MASK) == TALK) { + last_talk_cmd = last_cmd; + if ((last_cmd & CMD_MASK) == ADB_READREG(0, 0)) + last_poll_cmd = last_cmd; + } + } + + if (status == ST_CMD) { + /* /CTLR_IRQ asserted after the command byte means we + * must read an autopoll reply. The first byte was + * lost because the shift register was an output. + */ + macii_state = reading; + + reading_reply = false; + reply_ptr = reply_buf; + *reply_ptr = last_talk_cmd; + reply_len = 1; + + /* reset to shift in */ + via[ACR] &= ~SR_OUT; + x = via[SR]; + } else if (data_index >= req->nbytes) { req->sent = 1; - macii_state = idle; if (req->reply_expected) { - reading_reply = 1; - } else { + macii_state = reading; + + reading_reply = true; + reply_ptr = req->reply; + *reply_ptr = req->data[1]; + reply_len = 1; + + via[ACR] &= ~SR_OUT; + x = via[SR]; + } else if ((req->data[1] & OP_MASK) == TALK) { + macii_state = reading; + + reading_reply = false; + reply_ptr = reply_buf; + *reply_ptr = req->data[1]; + reply_len = 1; + + via[ACR] &= ~SR_OUT; + x = via[SR]; + req->complete = 1; current_req = req->next; if (req->done) (*req->done)(req); + } else { + macii_state = idle; - if (current_req) - macii_start(); - else if (need_autopoll()) - macii_autopoll(autopoll_devs); - } - - if (macii_state == idle) { - /* reset to shift in */ - via[ACR] &= ~SR_OUT; - x = via[SR]; - /* set ADB state idle - might get SRQ */ - via[B] = (via[B] & ~ST_MASK) | ST_IDLE; + req->complete = 1; + current_req = req->next; + if (req->done) + (*req->done)(req); + break; } } else { via[SR] = req->data[data_index++]; + } - if ((via[B] & ST_MASK) == ST_CMD) { - /* just sent the command byte, set to EVEN */ - via[B] = (via[B] & ~ST_MASK) | ST_EVEN; - } else { - /* invert state bits, toggle ODD/EVEN */ - via[B] ^= ST_MASK; - } + if ((via[B] & ST_MASK) == ST_CMD) { + /* just sent the command byte, set to EVEN */ + via[B] = (via[B] & ~ST_MASK) | ST_EVEN; + } else { + /* invert state bits, toggle ODD/EVEN */ + via[B] ^= ST_MASK; } break; @@ -458,33 +505,35 @@ static irqreturn_t macii_interrupt(int irq, void *arg) WARN_ON((status & ST_MASK) == ST_CMD || (status & ST_MASK) == ST_IDLE); - /* Bus timeout with SRQ sequence: - * data is "XX FF" while CTLR_IRQ is "L L" - * End of packet without SRQ sequence: - * data is "XX...YY 00" while CTLR_IRQ is "L...H L" - * End of packet SRQ sequence: - * data is "XX...YY 00" while CTLR_IRQ is "L...L L" - * (where XX is the first response byte and - * YY is the last byte of valid response data.) - */ - - srq_asserted = 0; if (!(status & CTLR_IRQ)) { - if (x == 0xFF) { - if (!(last_status & CTLR_IRQ)) { - macii_state = read_done; + if (status == ST_EVEN && reply_len == 1) { + bus_timeout = true; + } else if (status == ST_ODD && reply_len == 2) { + srq_asserted = true; + } else { + macii_state = idle; + + if (bus_timeout) reply_len = 0; - srq_asserted = 1; + + if (reading_reply) { + struct adb_request *req = current_req; + + req->reply_len = reply_len; + + req->complete = 1; + current_req = req->next; + if (req->done) + (*req->done)(req); + } else if (reply_len && autopoll_devs && + reply_buf[0] == last_poll_cmd) { + adb_input(reply_buf, reply_len, 1); } - } else if (x == 0x00) { - macii_state = read_done; - if (!(last_status & CTLR_IRQ)) - srq_asserted = 1; + break; } } - if (macii_state == reading && - reply_len < ARRAY_SIZE(reply_buf)) { + if (reply_len < ARRAY_SIZE(reply_buf)) { reply_ptr++; *reply_ptr = x; reply_len++; @@ -494,37 +543,22 @@ static irqreturn_t macii_interrupt(int irq, void *arg) via[B] ^= ST_MASK; break; - case read_done: - x = via[SR]; + default: + break; + } - if (reading_reply) { - reading_reply = 0; - req = current_req; - req->reply_len = reply_len; - req->complete = 1; - current_req = req->next; - if (req->done) - (*req->done)(req); - } else if (reply_len && autopoll_devs) - adb_input(reply_buf, reply_len, 0); - - macii_state = idle; - - /* SRQ seen before, initiate poll now */ - if (srq_asserted) + if (macii_state == idle) { + if (!current_req) macii_queue_poll(); if (current_req) macii_start(); - else if (need_autopoll()) - macii_autopoll(autopoll_devs); - if (macii_state == idle) + if (macii_state == idle) { + via[ACR] &= ~SR_OUT; + x = via[SR]; via[B] = (via[B] & ~ST_MASK) | ST_IDLE; - break; - - default: - break; + } } local_irq_restore(flags); diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index cfea054863a4..86dbe0c8b45c 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -538,7 +538,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) - wmb(); + pmem_wmb(); else ssd_commit_flushed(wc, wait_for_ios); } diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig index 2d2266c1439e..6551007a066c 100644 --- a/drivers/misc/ocxl/Kconfig +++ b/drivers/misc/ocxl/Kconfig @@ -23,7 +23,7 @@ config OCXL The ocxl driver enables userspace programs to access these accelerators through devices in /dev/ocxl/. - For more information, see http://opencapi.org. + For more information, see https://opencapi.org. This is not to be confused with the support for IBM CAPI accelerators (CONFIG_CXL), which are PCI-based instead of a diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c index e3b99a39d207..4d490b92d951 100644 --- a/drivers/misc/ocxl/config.c +++ b/drivers/misc/ocxl/config.c @@ -71,6 +71,20 @@ static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) return 0; } +/** + * get_function_0() - Find a related PCI device (function 0) + * @device: PCI device to match + * + * Returns a pointer to the related device, or null if not found + */ +static struct pci_dev *get_function_0(struct pci_dev *dev) +{ + unsigned int devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); + + return pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus), + dev->bus->number, devfn); +} + static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) { u16 val; @@ -159,14 +173,15 @@ static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn) static int read_dvsec_vendor(struct pci_dev *dev) { int pos; - u32 cfg, tlx, dlx; + u32 cfg, tlx, dlx, reset_reload; /* - * vendor specific DVSEC is optional + * vendor specific DVSEC, for IBM images only. Some older + * images may not have it * - * It's currently only used on function 0 to specify the - * version of some logic blocks. Some older images may not - * even have it so we ignore any errors + * It's only used on function 0 to specify the version of some + * logic blocks and to give access to special registers to + * enable host-based flashing. */ if (PCI_FUNC(dev->devfn) != 0) return 0; @@ -178,11 +193,67 @@ static int read_dvsec_vendor(struct pci_dev *dev) pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg); pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx); pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + &reset_reload); dev_dbg(&dev->dev, "Vendor specific DVSEC:\n"); dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg); dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx); dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx); + dev_dbg(&dev->dev, " ResetReload = 0x%x\n", reset_reload); + return 0; +} + +static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0, + int *out_pos) +{ + int pos; + + if (PCI_FUNC(dev->devfn) != 0) { + dev = get_function_0(dev); + if (!dev) + return -1; + } + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); + if (!pos) + return -1; + *dev0 = dev; + *out_pos = pos; + return 0; +} + +int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val) +{ + struct pci_dev *dev0; + u32 reset_reload; + int pos; + + if (get_dvsec_vendor0(dev, &dev0, &pos)) + return -1; + + pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + &reset_reload); + *val = !!(reset_reload & BIT(0)); + return 0; +} + +int ocxl_config_set_reset_reload(struct pci_dev *dev, int val) +{ + struct pci_dev *dev0; + u32 reset_reload; + int pos; + + if (get_dvsec_vendor0(dev, &dev0, &pos)) + return -1; + + pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + &reset_reload); + if (val) + reset_reload |= BIT(0); + else + reset_reload &= ~BIT(0); + pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD, + reset_reload); return 0; } @@ -273,7 +344,7 @@ static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, } /** - * read_template_version - Read the template version from the AFU + * read_template_version() - Read the template version from the AFU * @dev: the device for the AFU * @fn: the AFU offsets * @len: outputs the template length @@ -282,7 +353,7 @@ static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, * Returns 0 on success, negative on failure */ static int read_template_version(struct pci_dev *dev, struct ocxl_fn_config *fn, - u16 *len, u16 *version) + u16 *len, u16 *version) { u32 val32; u8 major, minor; @@ -476,7 +547,7 @@ static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) } /** - * read_afu_lpc_memory_info - Populate AFU metadata regarding LPC memory + * read_afu_lpc_memory_info() - Populate AFU metadata regarding LPC memory * @dev: the device for the AFU * @fn: the AFU offsets * @afu: the AFU struct to populate the LPC metadata into @@ -484,8 +555,8 @@ static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) * Returns 0 on success, negative on failure */ static int read_afu_lpc_memory_info(struct pci_dev *dev, - struct ocxl_fn_config *fn, - struct ocxl_afu_config *afu) + struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) { int rc; u32 val32; diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 345bf843a38e..0bad0a123af6 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -112,6 +112,12 @@ void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); */ int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count); +/* + * Control whether the FPGA is reloaded on a link reset + */ +int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val); +int ocxl_config_set_reset_reload(struct pci_dev *dev, int val); + /* * Check if an AFU index is valid for the given function. * @@ -122,11 +128,12 @@ int ocxl_config_check_afu_index(struct pci_dev *dev, struct ocxl_fn_config *fn, int afu_idx); /** - * Update values within a Process Element + * ocxl_link_update_pe() - Update values within a Process Element + * @link_handle: the link handle associated with the process element + * @pasid: the PASID for the AFU context + * @tid: the new thread id for the process element * - * link_handle: the link handle associated with the process element - * pasid: the PASID for the AFU context - * tid: the new thread id for the process element + * Returns 0 on success */ int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid); diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c index 58f1ba264206..25c78df8055d 100644 --- a/drivers/misc/ocxl/sysfs.c +++ b/drivers/misc/ocxl/sysfs.c @@ -51,11 +51,46 @@ static ssize_t contexts_show(struct device *device, afu->pasid_count, afu->pasid_max); } +static ssize_t reload_on_reset_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_afu(device); + struct ocxl_fn *fn = afu->fn; + struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent); + int val; + + if (ocxl_config_get_reset_reload(pci_dev, &val)) + return scnprintf(buf, PAGE_SIZE, "unavailable\n"); + + return scnprintf(buf, PAGE_SIZE, "%d\n", val); +} + +static ssize_t reload_on_reset_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ocxl_afu *afu = to_afu(device); + struct ocxl_fn *fn = afu->fn; + struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc || (val != 0 && val != 1)) + return -EINVAL; + + if (ocxl_config_set_reset_reload(pci_dev, val)) + return -ENODEV; + + return count; +} + static struct device_attribute afu_attrs[] = { __ATTR_RO(global_mmio_size), __ATTR_RO(pp_mmio_size), __ATTR_RO(afu_version), __ATTR_RO(contexts), + __ATTR_RW(reload_on_reset), }; static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 6826a274a1f1..10dbdcdfb9ce 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -90,6 +90,7 @@ static int of_pmem_region_remove(struct platform_device *pdev) static const struct of_device_id of_pmem_region_match[] = { { .compatible = "pmem-region" }, + { .compatible = "pmem-region-v2" }, { }, }; diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 4502f9c4708d..c3237c2b03a6 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region) idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8)); /* - * The first wmb() is needed to 'sfence' all previous writes - * such that they are architecturally visible for the platform - * buffer flush. Note that we've already arranged for pmem + * The pmem_wmb() is needed to 'sfence' all + * previous writes such that they are architecturally visible for + * the platform buffer flush. Note that we've already arranged for pmem * writes to avoid the cache via memcpy_flushcache(). The final * wmb() ensures ordering for the NVDIMM flush write. */ - wmb(); + pmem_wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) if (ndrd_get_flush_wpq(ndrd, i, 0)) writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index fec97dc34de7..798027bb89be 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -240,5 +240,15 @@ do { \ }) #endif +/* + * pmem_wmb() ensures that all stores for which the modification + * are written to persistent storage by preceding instructions have + * updated persistent storage before any data access or data transfer + * caused by subsequent instructions is initiated. + */ +#ifndef pmem_wmb +#define pmem_wmb() wmb() +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 2b26cd729b94..4fe7fd0fe834 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -13,6 +13,7 @@ #include #include +#ifndef queued_spin_is_locked /** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure @@ -26,6 +27,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock) */ return atomic_read(&lock->val); } +#endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? @@ -68,6 +70,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +#ifndef queued_spin_lock /** * queued_spin_lock - acquire a queued spinlock * @lock: Pointer to queued spinlock structure @@ -81,6 +84,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) queued_spin_lock_slowpath(lock, val); } +#endif #ifndef queued_spin_unlock /** diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 191772d4a4d7..a2710e654b64 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -181,6 +181,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, + CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ea67910ae6b7..9e93bef52968 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -183,17 +183,24 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); -int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void * __weak arch_kexec_kernel_image_load(struct kimage *image); -int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); -int __weak arch_kexec_apply_relocations(struct purgatory_info *pi, - Elf_Shdr *section, - const Elf_Shdr *relsec, - const Elf_Shdr *symtab); +/* Architectures may override the below functions */ +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len); +void *arch_kexec_kernel_image_load(struct kimage *image); +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +int arch_kexec_apply_relocations(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#ifdef CONFIG_KEXEC_SIG +int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, + unsigned long buf_len); +#endif +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); diff --git a/include/linux/mm.h b/include/linux/mm.h index dc7b87310c10..6c8333d6c991 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -317,8 +317,6 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC) -# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h index 3526fa996a22..ccfd3b463517 100644 --- a/include/misc/ocxl-config.h +++ b/include/misc/ocxl-config.h @@ -41,5 +41,6 @@ #define OCXL_DVSEC_VENDOR_CFG_VERS 0x0C #define OCXL_DVSEC_VENDOR_TLX_VERS 0x10 #define OCXL_DVSEC_VENDOR_DLX_VERS 0x20 +#define OCXL_DVSEC_VENDOR_RESET_RELOAD 0x38 #endif /* _OCXL_CONFIG_H_ */ diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h index 06dd5839e438..357ef1aadbc0 100644 --- a/include/misc/ocxl.h +++ b/include/misc/ocxl.h @@ -62,8 +62,7 @@ struct ocxl_context; // Device detection & initialisation /** - * Open an OpenCAPI function on an OpenCAPI device - * + * ocxl_function_open() - Open an OpenCAPI function on an OpenCAPI device * @dev: The PCI device that contains the function * * Returns an opaque pointer to the function, or an error pointer (check with IS_ERR) @@ -71,8 +70,7 @@ struct ocxl_context; struct ocxl_fn *ocxl_function_open(struct pci_dev *dev); /** - * Get the list of AFUs associated with a PCI function device - * + * ocxl_function_afu_list() - Get the list of AFUs associated with a PCI function device * Returns a list of struct ocxl_afu * * * @fn: The OpenCAPI function containing the AFUs @@ -80,8 +78,7 @@ struct ocxl_fn *ocxl_function_open(struct pci_dev *dev); struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn); /** - * Fetch an AFU instance from an OpenCAPI function - * + * ocxl_function_fetch_afu() - Fetch an AFU instance from an OpenCAPI function * @fn: The OpenCAPI function to get the AFU from * @afu_idx: The index of the AFU to get * @@ -92,23 +89,20 @@ struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn); struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx); /** - * Take a reference to an AFU - * + * ocxl_afu_get() - Take a reference to an AFU * @afu: The AFU to increment the reference count on */ void ocxl_afu_get(struct ocxl_afu *afu); /** - * Release a reference to an AFU - * + * ocxl_afu_put() - Release a reference to an AFU * @afu: The AFU to decrement the reference count on */ void ocxl_afu_put(struct ocxl_afu *afu); /** - * Get the configuration information for an OpenCAPI function - * + * ocxl_function_config() - Get the configuration information for an OpenCAPI function * @fn: The OpenCAPI function to get the config for * * Returns the function config, or NULL on error @@ -116,8 +110,7 @@ void ocxl_afu_put(struct ocxl_afu *afu); const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn); /** - * Close an OpenCAPI function - * + * ocxl_function_close() - Close an OpenCAPI function * This will free any AFUs previously retrieved from the function, and * detach and associated contexts. The contexts must by freed by the caller. * @@ -129,8 +122,7 @@ void ocxl_function_close(struct ocxl_fn *fn); // Context allocation /** - * Allocate an OpenCAPI context - * + * ocxl_context_alloc() - Allocate an OpenCAPI context * @context: The OpenCAPI context to allocate, must be freed with ocxl_context_free * @afu: The AFU the context belongs to * @mapping: The mapping to unmap when the context is closed (may be NULL) @@ -139,14 +131,13 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu, struct address_space *mapping); /** - * Free an OpenCAPI context - * + * ocxl_context_free() - Free an OpenCAPI context * @ctx: The OpenCAPI context to free */ void ocxl_context_free(struct ocxl_context *ctx); /** - * Grant access to an MM to an OpenCAPI context + * ocxl_context_attach() - Grant access to an MM to an OpenCAPI context * @ctx: The OpenCAPI context to attach * @amr: The value of the AMR register to restrict access * @mm: The mm to attach to the context @@ -157,7 +148,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm); /** - * Detach an MM from an OpenCAPI context + * ocxl_context_detach() - Detach an MM from an OpenCAPI context * @ctx: The OpenCAPI context to attach * * Returns 0 on success, negative on failure @@ -167,25 +158,25 @@ int ocxl_context_detach(struct ocxl_context *ctx); // AFU IRQs /** - * Allocate an IRQ associated with an AFU context + * ocxl_afu_irq_alloc() - Allocate an IRQ associated with an AFU context * @ctx: the AFU context * @irq_id: out, the IRQ ID * * Returns 0 on success, negative on failure */ -extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id); +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id); /** - * Frees an IRQ associated with an AFU context + * ocxl_afu_irq_free() - Frees an IRQ associated with an AFU context * @ctx: the AFU context * @irq_id: the IRQ ID * * Returns 0 on success, negative on failure */ -extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id); +int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id); /** - * Gets the address of the trigger page for an IRQ + * ocxl_afu_irq_get_addr() - Gets the address of the trigger page for an IRQ * This can then be provided to an AFU which will write to that * page to trigger the IRQ. * @ctx: The AFU context that the IRQ is associated with @@ -193,10 +184,10 @@ extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id); * * returns the trigger page address, or 0 if the IRQ is not valid */ -extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id); +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id); /** - * Provide a callback to be called when an IRQ is triggered + * ocxl_irq_set_handler() - Provide a callback to be called when an IRQ is triggered * @ctx: The AFU context that the IRQ is associated with * @irq_id: The IRQ ID * @handler: the callback to be called when the IRQ is triggered @@ -213,8 +204,7 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id, // AFU Metadata /** - * Get a pointer to the config for an AFU - * + * ocxl_afu_config() - Get a pointer to the config for an AFU * @afu: a pointer to the AFU to get the config for * * Returns a pointer to the AFU config @@ -222,27 +212,24 @@ int ocxl_irq_set_handler(struct ocxl_context *ctx, int irq_id, struct ocxl_afu_config *ocxl_afu_config(struct ocxl_afu *afu); /** - * Assign opaque hardware specific information to an OpenCAPI AFU. - * - * @dev: The PCI device associated with the OpenCAPI device + * ocxl_afu_set_private() - Assign opaque hardware specific information to an OpenCAPI AFU. + * @afu: The OpenCAPI AFU * @private: the opaque hardware specific information to assign to the driver */ void ocxl_afu_set_private(struct ocxl_afu *afu, void *private); /** - * Fetch the hardware specific information associated with an external OpenCAPI - * AFU. This may be consumed by an external OpenCAPI driver. - * - * @afu: The AFU + * ocxl_afu_get_private() - Fetch the hardware specific information associated with + * an external OpenCAPI AFU. This may be consumed by an external OpenCAPI driver. + * @afu: The OpenCAPI AFU * * Returns the opaque pointer associated with the device, or NULL if not set */ -void *ocxl_afu_get_private(struct ocxl_afu *dev); +void *ocxl_afu_get_private(struct ocxl_afu *afu); // Global MMIO /** - * Read a 32 bit value from global MMIO - * + * ocxl_global_mmio_read32() - Read a 32 bit value from global MMIO * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -251,11 +238,10 @@ void *ocxl_afu_get_private(struct ocxl_afu *dev); * Returns 0 for success, negative on error */ int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u32 *val); + enum ocxl_endian endian, u32 *val); /** - * Read a 64 bit value from global MMIO - * + * ocxl_global_mmio_read64() - Read a 64 bit value from global MMIO * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -264,11 +250,10 @@ int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u64 *val); + enum ocxl_endian endian, u64 *val); /** - * Write a 32 bit value to global MMIO - * + * ocxl_global_mmio_write32() - Write a 32 bit value to global MMIO * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -277,11 +262,10 @@ int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u32 val); + enum ocxl_endian endian, u32 val); /** - * Write a 64 bit value to global MMIO - * + * ocxl_global_mmio_write64() - Write a 64 bit value to global MMIO * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -290,11 +274,10 @@ int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u64 val); + enum ocxl_endian endian, u64 val); /** - * Set bits in a 32 bit global MMIO register - * + * ocxl_global_mmio_set32() - Set bits in a 32 bit global MMIO register * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -303,11 +286,10 @@ int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u32 mask); + enum ocxl_endian endian, u32 mask); /** - * Set bits in a 64 bit global MMIO register - * + * ocxl_global_mmio_set64() - Set bits in a 64 bit global MMIO register * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -316,11 +298,10 @@ int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u64 mask); + enum ocxl_endian endian, u64 mask); /** - * Set bits in a 32 bit global MMIO register - * + * ocxl_global_mmio_clear32() - Set bits in a 32 bit global MMIO register * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -329,11 +310,10 @@ int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u32 mask); + enum ocxl_endian endian, u32 mask); /** - * Set bits in a 64 bit global MMIO register - * + * ocxl_global_mmio_clear64() - Set bits in a 64 bit global MMIO register * @afu: The AFU * @offset: The Offset from the start of MMIO * @endian: the endianness that the MMIO data is in @@ -342,7 +322,7 @@ int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset, * Returns 0 for success, negative on error */ int ocxl_global_mmio_clear64(struct ocxl_afu *afu, size_t offset, - enum ocxl_endian endian, u64 mask); + enum ocxl_endian endian, u64 mask); // Functions left here are for compatibility with the cxlflash driver diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5fb752034386..939092dbcb8b 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -114,8 +114,6 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } -#elif defined(CONFIG_PPC) -#define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) || defined(CONFIG_IA64) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 94661d2d13ad..78c0837bfd7b 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -635,6 +635,19 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) return ret == 1 ? 0 : -EADDRNOTAVAIL; } +/** + * arch_kexec_locate_mem_hole - Find free memory to place the segments. + * @kbuf: Parameters for the memory search. + * + * On success, kbuf->mem will have the start address of the memory region found. + * + * Return: 0 on success, negative errno on error. + */ +int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + return kexec_locate_mem_hole(kbuf); +} + /** * kexec_add_buffer - place a buffer in a kexec segment * @kbuf: Buffer contents and memory parameters. @@ -647,7 +660,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) */ int kexec_add_buffer(struct kexec_buf *kbuf) { - struct kexec_segment *ksegment; int ret; @@ -675,7 +687,7 @@ int kexec_add_buffer(struct kexec_buf *kbuf) kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE); /* Walk the RAM ranges and allocate a suitable range for the buffer */ - ret = kexec_locate_mem_hole(kbuf); + ret = arch_kexec_locate_mem_hole(kbuf); if (ret) return ret; diff --git a/mm/ksm.c b/mm/ksm.c index 5fb176d497ea..217842a66912 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2452,10 +2452,6 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, if (vma_is_dax(vma)) return 0; -#ifdef VM_SAO - if (*vm_flags & VM_SAO) - return 0; -#endif #ifdef VM_SPARC_ADI if (*vm_flags & VM_SPARC_ADI) return 0; diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 264e266a85bf..c3af3f324c5a 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) #define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) +/* POWER10 registers */ +#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) +#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) +#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h index 54cfa0530e86..488f4339b83c 100644 --- a/tools/perf/arch/powerpc/util/book3s_hcalls.h +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -84,7 +84,7 @@ {0x1a4, "H_CREATE_RPT"}, \ {0x1a8, "H_REMOVE_RPT"}, \ {0x1ac, "H_REGISTER_RPAGES"}, \ - {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b0, "H_DISABLE_AND_GET"}, \ {0x1b4, "H_ERROR_DATA"}, \ {0x1b8, "H_GET_HCA_INFO"}, \ {0x1bc, "H_GET_PERF_COUNT"}, \ diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 0453c50c949c..55ef15184057 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -9,7 +9,17 @@ * This selftest exercises the powerpc alignment fault handler. * * We create two sets of source and destination buffers, one in regular memory, - * the other cache-inhibited (we use /dev/fb0 for this). + * the other cache-inhibited (by default we use /dev/fb0 for this, but an + * alterative path for cache-inhibited memory may be provided). + * + * One way to get cache-inhibited memory is to use the "mem" kernel parameter + * to limit the kernel to less memory than actually exists. Addresses above + * the limit may still be accessed but will be treated as cache-inhibited. For + * example, if there is actually 4GB of memory and the parameter "mem=3GB" is + * used, memory from address 0xC0000000 onwards is treated as cache-inhibited. + * To access this region /dev/mem is used. The kernel should be configured + * without CONFIG_STRICT_DEVMEM. In this case use: + * ./alignment_handler /dev/mem 0xc0000000 * * We initialise the source buffers, then use whichever set of load/store * instructions is under test to copy bytes from the source buffers to the @@ -48,11 +58,14 @@ #include #include "utils.h" +#include "instructions.h" int bufsize; int debug; int testing; volatile int gotsig; +char *cipath = "/dev/fb0"; +long cioffset; void sighandler(int sig, siginfo_t *info, void *ctx) { @@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } \ rc |= do_test(#name, test_##name) +#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + ld_op(ld_reg, %0, 0, 0) \ + st_op(st_reg, %1, 0, 0) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) @@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx) #define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) +#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31) +#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31) + +#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0) +#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0) + +#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32) +#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0) /* FIXME: Unimplemented tests: */ // STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ @@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) printf("\tDoing %s:\t", test_name); - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) { printf("\n"); - perror("Can't open /dev/fb0 now?"); + perror("Can't open ci file now?"); return 1; } - ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, 0x0); - ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, - fd, bufsize); + ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset); + ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED, + fd, cioffset + bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { printf("\n"); perror("mmap failed"); @@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) return rc; } -static bool can_open_fb0(void) +static bool can_open_cifile(void) { int fd; - fd = open("/dev/fb0", O_RDWR); + fd = open(cipath, O_RDWR); if (fd < 0) return false; @@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("VSX: 2.06B\n"); @@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); printf("VSX: 2.07B\n"); @@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); printf("VSX: 3.00B\n"); @@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void) return rc; } +int test_alignment_handler_vsx_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("VSX: PREFIX\n"); + LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0); + LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1); + STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0); + STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1); + return rc; +} + int test_alignment_handler_integer(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Integer\n"); LOAD_DFORM_TEST(lbz); @@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Integer: 2.06\n"); @@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void) return rc; } +int test_alignment_handler_integer_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Integer: PREFIX\n"); + LOAD_MLS_PREFIX_TEST(PLBZ); + LOAD_MLS_PREFIX_TEST(PLHZ); + LOAD_MLS_PREFIX_TEST(PLHA); + LOAD_MLS_PREFIX_TEST(PLWZ); + LOAD_8LS_PREFIX_TEST(PLWA); + LOAD_8LS_PREFIX_TEST(PLD); + STORE_MLS_PREFIX_TEST(PSTB); + STORE_MLS_PREFIX_TEST(PSTH); + STORE_MLS_PREFIX_TEST(PSTW); + STORE_8LS_PREFIX_TEST(PSTD); + return rc; +} + int test_alignment_handler_vmx(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)); printf("VMX\n"); @@ -451,7 +527,7 @@ int test_alignment_handler_fp(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); printf("Floating point\n"); LOAD_FLOAT_DFORM_TEST(lfd); @@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05)); printf("Floating point: 2.05\n"); @@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void) { int rc = 0; - SKIP_IF(!can_open_fb0()); + SKIP_IF(!can_open_cifile()); SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); printf("Floating point: 2.06\n"); @@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void) return rc; } + +int test_alignment_handler_fp_prefix(void) +{ + int rc = 0; + + SKIP_IF(!can_open_cifile()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + printf("Floating point: PREFIX\n"); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFS); + LOAD_FLOAT_MLS_PREFIX_TEST(PLFD); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFS); + STORE_FLOAT_MLS_PREFIX_TEST(PSTFD); + return rc; +} + void usage(char *prog) { - printf("Usage: %s [options]\n", prog); + printf("Usage: %s [options] [path [offset]]\n", prog); printf(" -d Enable debug error output\n"); printf("\n"); - printf("This test requires a POWER8 or POWER9 CPU and a usable "); - printf("framebuffer at /dev/fb0.\n"); + printf("This test requires a POWER8, POWER9 or POWER10 CPU "); + printf("and either a usable framebuffer at /dev/fb0 or "); + printf("the path to usable cache inhibited memory and optional "); + printf("offset to be provided\n"); } int main(int argc, char *argv[]) @@ -533,6 +628,13 @@ int main(int argc, char *argv[]) exit(1); } } + argc -= optind; + argv += optind; + + if (argc > 0) + cipath = argv[0]; + if (argc > 1) + cioffset = strtol(argv[1], 0, 0x10); bufsize = getpagesize(); @@ -552,10 +654,14 @@ int main(int argc, char *argv[]) "test_alignment_handler_vsx_207"); rc |= test_harness(test_alignment_handler_vsx_300, "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_vsx_prefix, + "test_alignment_handler_vsx_prefix"); rc |= test_harness(test_alignment_handler_integer, "test_alignment_handler_integer"); rc |= test_harness(test_alignment_handler_integer_206, "test_alignment_handler_integer_206"); + rc |= test_harness(test_alignment_handler_integer_prefix, + "test_alignment_handler_integer_prefix"); rc |= test_harness(test_alignment_handler_vmx, "test_alignment_handler_vmx"); rc |= test_harness(test_alignment_handler_fp, @@ -564,5 +670,7 @@ int main(int argc, char *argv[]) "test_alignment_handler_fp_205"); rc |= test_harness(test_alignment_handler_fp_206, "test_alignment_handler_fp_206"); + rc |= test_harness(test_alignment_handler_fp_prefix, + "test_alignment_handler_fp_prefix"); return rc; } diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index a2e8c9da7fa5..d50cc05df495 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) { - int pid; - cpu_set_t cpuset; + int pid, ncpus; + cpu_set_t *cpuset; + size_t size; pid = fork(); if (pid == -1) { @@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) if (pid) return; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); + ncpus = get_nprocs(); + size = CPU_ALLOC_SIZE(ncpus); + cpuset = CPU_ALLOC(ncpus); + if (!cpuset) { + perror("malloc"); + exit(1); + } + CPU_ZERO_S(size, cpuset); + CPU_SET_S(cpu, size, cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + if (sched_setaffinity(0, size, cpuset)) { perror("sched_setaffinity"); + CPU_FREE(cpuset); exit(1); } + CPU_FREE(cpuset); fn(arg); exit(0); diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index f52ed92b53e7..00dc32c0ed75 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -5,12 +5,17 @@ pe_ok() { local dev="$1" local path="/sys/bus/pci/devices/$dev/eeh_pe_state" - if ! [ -e "$path" ] ; then + # if a driver doesn't support the error handling callbacks then the + # device is recovered by removing and re-probing it. This causes the + # sysfs directory to disappear so read the PE state once and squash + # any potential error messages + local eeh_state="$(cat $path 2>/dev/null)" + if [ -z "$eeh_state" ]; then return 1; fi - local fw_state="$(cut -d' ' -f1 < $path)" - local sw_state="$(cut -d' ' -f2 < $path)" + local fw_state="$(echo $eeh_state | cut -d' ' -f1)" + local sw_state="$(echo $eeh_state | cut -d' ' -f2)" # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an # error state or being recovered. Either way, not ok. diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h index f36061eb6f0f..4efa6314bd96 100644 --- a/tools/testing/selftests/powerpc/include/instructions.h +++ b/tools/testing/selftests/powerpc/include/instructions.h @@ -66,4 +66,81 @@ static inline int paste_last(void *i) #define PPC_INST_PASTE __PASTE(0, 0, 0, 0) #define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) +/* This defines the prefixed load/store instructions */ +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +#else +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +#endif + +#define __PPC_RA(a) (((a) & 0x1f) << 16) +#define __PPC_RS(s) (((s) & 0x1f) << 21) +#define __PPC_RT(t) __PPC_RS(t) +#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20) + +#define PPC_PREFIX_MLS 0x06000000 +#define PPC_PREFIX_8LS 0x04000000 + +#define PPC_INST_LBZ 0x88000000 +#define PPC_INST_LHZ 0xa0000000 +#define PPC_INST_LHA 0xa8000000 +#define PPC_INST_LWZ 0x80000000 +#define PPC_INST_STB 0x98000000 +#define PPC_INST_STH 0xb0000000 +#define PPC_INST_STW 0x90000000 +#define PPC_INST_STD 0xf8000000 +#define PPC_INST_LFS 0xc0000000 +#define PPC_INST_LFD 0xc8000000 +#define PPC_INST_STFS 0xd0000000 +#define PPC_INST_STFD 0xd8000000 + +#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_MLS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \ + stringify_in_c(.long PPC_PREFIX_8LS | \ + __PPC_PREFIX_R(r) | \ + (((d) >> 16) & 0x3ffff);) \ + stringify_in_c(.long (instr) | \ + __PPC_RT(t) | \ + __PPC_RA(a) | \ + ((d) & 0xffff);\n) + +/* Prefixed Integer Load/Store instructions */ +#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d) +#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d) +#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d) +#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d) +#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d) +#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d) +#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d) +#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d) +#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d) +#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d) +#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d) +#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d) + +/* Prefixed Floating-Point Load/Store Instructions */ +#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d) +#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d) +#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d) +#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d) + +/* Prefixed VSX Load/Store Instructions */ +#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d) +#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d) +#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d) +#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d) +#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d) +#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d) +#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d) +#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d) + #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */ diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h new file mode 100644 index 000000000000..3312cb1b058d --- /dev/null +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020, Sandipan Das, IBM Corp. + */ + +#ifndef _SELFTESTS_POWERPC_PKEYS_H +#define _SELFTESTS_POWERPC_PKEYS_H + +#include + +#include "reg.h" +#include "utils.h" + +/* + * Older versions of libc use the Intel-specific access rights. + * Hence, override the definitions as they might be incorrect. + */ +#undef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x3 + +#undef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 + +#undef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 + +/* Older versions of libc do not not define this */ +#ifndef SEGV_PKUERR +#define SEGV_PKUERR 4 +#endif + +#define SI_PKEY_OFFSET 0x20 + +#define __NR_pkey_mprotect 386 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 + +#define PKEY_BITS_PER_PKEY 2 +#define NR_PKEYS 32 +#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) + +inline unsigned long pkeyreg_get(void) +{ + return mfspr(SPRN_AMR); +} + +inline void pkeyreg_set(unsigned long amr) +{ + set_amr(amr); +} + +void pkey_set_rights(int pkey, unsigned long rights) +{ + unsigned long amr, shift; + + shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; + amr = pkeyreg_get(); + amr &= ~(PKEY_BITS_MASK << shift); + amr |= (rights & PKEY_BITS_MASK) << shift; + pkeyreg_set(amr); +} + +int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) +{ + return syscall(__NR_pkey_mprotect, addr, len, prot, pkey); +} + +int sys_pkey_alloc(unsigned long flags, unsigned long rights) +{ + return syscall(__NR_pkey_alloc, flags, rights); +} + +int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +int pkeys_unsupported(void) +{ + bool hash_mmu = false; + int pkey; + + /* Protection keys are currently supported on Hash MMU only */ + FAIL_IF(using_hash_mmu(&hash_mmu)); + SKIP_IF(!hash_mmu); + + /* Check if the system call is supported */ + pkey = sys_pkey_alloc(0, 0); + SKIP_IF(pkey < 0); + sys_pkey_free(pkey); + + return 0; +} + +int siginfo_pkey(siginfo_t *si) +{ + /* + * In older versions of libc, siginfo_t does not have si_pkey as + * a member. + */ +#ifdef si_pkey + return si->si_pkey; +#else + return *((int *)(((char *) si) + SI_PKEY_OFFSET)); +#endif +} + +#define pkey_rights(r) ({ \ + static char buf[4] = "rwx"; \ + unsigned int amr_bits; \ + if ((r) & PKEY_DISABLE_EXECUTE) \ + buf[2] = '-'; \ + amr_bits = (r) & PKEY_BITS_MASK; \ + if (amr_bits & PKEY_DISABLE_WRITE) \ + buf[1] = '-'; \ + if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \ + buf[0] = '-'; \ + buf; \ +}) + +unsigned long next_pkey_rights(unsigned long rights) +{ + if (rights == PKEY_DISABLE_ACCESS) + return PKEY_DISABLE_EXECUTE; + else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE)) + return 0; + + if ((rights & PKEY_BITS_MASK) == 0) + rights |= PKEY_DISABLE_WRITE; + else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE) + rights |= PKEY_DISABLE_ACCESS; + + return rights; +} + +#endif /* _SELFTESTS_POWERPC_PKEYS_H */ diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 022c5076b2c5..c0f2742a3a59 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -57,6 +57,12 @@ #define SPRN_PPR 896 /* Program Priority Register */ #define SPRN_AMR 13 /* Authority Mask Register - problem state */ +#define set_amr(v) asm volatile("isync;" \ + "mtspr " __stringify(SPRN_AMR) ",%0;" \ + "isync" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + /* TEXASR register bits */ #define TEXASR_FC 0xFE00000000000000 #define TEXASR_FP 0x0100000000000000 diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index e089a0c30d9a..71d2924f5b8b 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -42,6 +42,16 @@ int perf_event_enable(int fd); int perf_event_disable(int fd); int perf_event_reset(int fd); +#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) +#include +#include + +static inline pid_t gettid(void) +{ + return syscall(SYS_gettid); +} +#endif + static inline bool have_hwcap(unsigned long ftr) { return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; @@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2) #endif bool is_ppc64le(void); +int using_hash_mmu(bool *using_hash); /* Yes, this is evil */ #define FAIL_IF(x) \ @@ -71,6 +82,15 @@ do { \ } \ } while (0) +#define FAIL_IF_EXIT(x) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + _exit(1); \ + } \ +} while (0) + /* The test harness uses this, yes it's gross */ #define MAGIC_SKIP_RETURN_VALUE 99 @@ -96,11 +116,20 @@ do { \ #define _str(s) #s #define str(s) _str(s) +#define sigsafe_err(msg) ({ \ + ssize_t nbytes __attribute__((unused)); \ + nbytes = write(STDERR_FILENO, msg, strlen(msg)); }) + /* POWER9 feature */ #ifndef PPC_FEATURE2_ARCH_3_00 #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif +/* POWER10 feature */ +#ifndef PPC_FEATURE2_ARCH_3_1 +#define PPC_FEATURE2_ARCH_3_1 0x00040000 +#endif + #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] #define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index e31ca6f453ed..d0c23b2e4b60 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -6,3 +6,4 @@ vmx_preempt fpu_signal vmx_signal vsx_preempt +fpu_denormal diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 11a10d7a2bbd..fcc91c205984 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt +TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt top_srcdir = ../../../../.. include ../../lib.mk @@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S $(OUTPUT)/fpu_preempt: fpu_asm.S $(OUTPUT)/fpu_signal: fpu_asm.S -$(OUTPUT)/vmx_syscall: vmx_asm.S -$(OUTPUT)/vmx_preempt: vmx_asm.S -$(OUTPUT)/vmx_signal: vmx_asm.S +$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c +$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx -$(OUTPUT)/vsx_preempt: vsx_asm.S +$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c new file mode 100644 index 000000000000..5f96682abaa8 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright IBM Corp. 2020 + * + * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately + * if the denormal handler is not configured or working properly, this can cause a bad + * crash in kernel mode when the kernel tries to save FP registers when the process + * exits. + */ + +#include +#include + +#include "utils.h" + +static int test_denormal_fpu(void) +{ + unsigned int m32; + unsigned long m64; + volatile float f; + volatile double d; + + /* try to induce lfs ; stfd */ + + m32 = 0x00715fcf; /* random denormal */ + memcpy((float *)&f, &m32, sizeof(f)); + d = f; + memcpy(&m64, (double *)&d, sizeof(d)); + + FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_denormal_fpu, "fpu_denormal"); +} diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c index 2e059f154e77..6761d6ce30ec 100644 --- a/tools/testing/selftests/powerpc/math/vmx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -57,6 +57,9 @@ int test_preempt_vmx(void) int i, rc, threads; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c index 785a48e0976f..b340a5c4e79d 100644 --- a/tools/testing/selftests/powerpc/math/vmx_signal.c +++ b/tools/testing/selftests/powerpc/math/vmx_signal.c @@ -96,6 +96,9 @@ int test_signal_vmx(void) void *rc_p; pthread_t *tids; + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c index 9ee293cc868e..03c78dfe3444 100644 --- a/tools/testing/selftests/powerpc/math/vmx_syscall.c +++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c @@ -49,9 +49,14 @@ int test_vmx_syscall(void) * Setup an environment with much context switching */ pid_t pid2; - pid_t pid = fork(); + pid_t pid; int ret; int child_ret; + + // vcmpequd used in vmx_asm.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + + pid = fork(); FAIL_IF(pid == -1); pid2 = fork(); diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c index 63de9c6e2cd3..d1601bb889d4 100644 --- a/tools/testing/selftests/powerpc/math/vsx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c @@ -92,6 +92,8 @@ int test_preempt_vsx(void) int i, rc, threads; pthread_t *tids; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; tids = malloc(threads * sizeof(pthread_t)); FAIL_IF(!tids); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 2ca523255b1b..91c775c23c66 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -2,9 +2,12 @@ hugetlb_vs_thp_test subpage_prot tempfile -prot_sao segv_errors wild_bctr large_vm_fork_separation bad_accesses tlbie_test +pkey_exec_prot +pkey_siginfo +stack_expansion_ldst +stack_expansion_signal diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index b9103c4bb414..250ce172e0da 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,23 +2,31 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ - large_vm_fork_separation bad_accesses +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \ + large_vm_fork_separation bad_accesses pkey_exec_prot \ + pkey_siginfo stack_expansion_signal stack_expansion_ldst + TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk -$(TEST_GEN_PROGS): ../harness.c - -$(OUTPUT)/prot_sao: ../utils.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 +$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64 +$(OUTPUT)/pkey_siginfo: CFLAGS += -m64 + +$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c + +$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector +$(OUTPUT)/stack_expansion_ldst: ../utils.c $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread +$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index adc465f499ef..a864ed7e2008 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -64,34 +64,6 @@ int bad_access(char *p, bool write) return 0; } -static int using_hash_mmu(bool *using_hash) -{ - char line[128]; - FILE *f; - int rc; - - f = fopen("/proc/cpuinfo", "r"); - FAIL_IF(!f); - - rc = 0; - while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { - *using_hash = true; - goto out; - } - - if (strcmp(line, "MMU : Radix\n") == 0) { - *using_hash = false; - goto out; - } - } - - rc = -1; -out: - fclose(f); - return rc; -} - static int test(void) { unsigned long i, j, addr, region_shift, page_shift, page_size; diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c new file mode 100644 index 000000000000..9e5c7f3f498a --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if applying execute protection on pages using memory + * protection keys works as expected. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_TRAP 0x7fe00008 +#define PPC_INST_BLR 0x4e800020 + +static volatile sig_atomic_t fault_pkey, fault_code, fault_type; +static volatile sig_atomic_t remaining_faults; +static volatile unsigned int *fault_addr; +static unsigned long pgsize, numinsns; +static unsigned int *insns; + +static void trap_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) + sigsafe_err("got a fault for an unexpected address\n"); + + _exit(1); +} + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + int signal_pkey; + + signal_pkey = siginfo_pkey(sinfo); + fault_code = sinfo->si_code; + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if too many faults have occurred for a single test case */ + if (!remaining_faults) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + + /* Restore permissions in order to continue */ + switch (fault_code) { + case SEGV_ACCERR: + if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) { + sigsafe_err("failed to set access permissions\n"); + _exit(1); + } + break; + case SEGV_PKUERR: + if (signal_pkey != fault_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + switch (fault_type) { + case PKEY_DISABLE_ACCESS: + pkey_set_rights(fault_pkey, 0); + break; + case PKEY_DISABLE_EXECUTE: + /* + * Reassociate the exec-only pkey with the region + * to be able to continue. Unlike AMR, we cannot + * set IAMR directly from userspace to restore the + * permissions. + */ + if (mprotect(insns, pgsize, PROT_EXEC)) { + sigsafe_err("failed to set execute permissions\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected type\n"); + _exit(1); + } + break; + default: + sigsafe_err("got a fault with an unexpected code\n"); + _exit(1); + } + + remaining_faults--; +} + +static int test(void) +{ + struct sigaction segv_act, trap_act; + unsigned long rights; + int pkey, ret, i; + + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Setup SIGSEGV handler */ + segv_act.sa_handler = 0; + segv_act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0); + segv_act.sa_flags = SA_SIGINFO; + segv_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0); + + /* Setup SIGTRAP handler */ + trap_act.sa_handler = 0; + trap_act.sa_sigaction = trap_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0); + trap_act.sa_flags = SA_SIGINFO; + trap_act.sa_restorer = 0; + FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0); + + /* Setup executable region */ + pgsize = getpagesize(); + numinsns = pgsize / sizeof(unsigned int); + insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(insns == MAP_FAILED); + + /* Write the instruction words */ + for (i = 1; i < numinsns - 1; i++) + insns[i] = PPC_INST_NOP; + + /* + * Set the first instruction as an unconditional trap. If + * the last write to this address succeeds, this should + * get overwritten by a no-op. + */ + insns[0] = PPC_INST_TRAP; + + /* + * Later, to jump to the executable region, we use a branch + * and link instruction (bctrl) which sets the return address + * automatically in LR. Use that to return back. + */ + insns[numinsns - 1] = PPC_INST_BLR; + + /* Allocate a pkey that restricts execution */ + rights = PKEY_DISABLE_EXECUTE; + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Pick the first instruction's address from the executable + * region. + */ + fault_addr = insns; + + /* The following two cases will avoid SEGV_PKUERR */ + fault_type = -1; + fault_pkey = -1; + + /* + * Read an instruction word from the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should not generate a fault as having PROT_EXEC + * implies PROT_READ on GNU systems. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 0; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0); + + /* + * Write an instruction word to the address when AMR bits + * are not set i.e. the pkey permits both read and write + * access. + * + * This should generate an access fault as having just + * PROT_EXEC also restricts writes. The pkey currently + * restricts execution only based on the IAMR bits. The + * AMR bits are cleared. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_TRAP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* The following three cases will generate SEGV_PKUERR */ + rights |= PKEY_DISABLE_ACCESS; + fault_type = PKEY_DISABLE_ACCESS; + fault_pkey = pkey; + + /* + * Read an instruction word from the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate a pkey fault based on AMR bits only + * as having PROT_EXEC implicitly allows reads. + */ + remaining_faults = 1; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("read from %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + i = *fault_addr; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR); + + /* + * Write an instruction word to the address when AMR bits + * are set i.e. the pkey permits neither read nor write + * access. + * + * This should generate two faults. First, a pkey fault + * based on AMR bits and then an access fault since + * PROT_EXEC does not allow writes. + */ + remaining_faults = 2; + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + pkey_set_rights(pkey, rights); + printf("write to %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + *fault_addr = PPC_INST_NOP; + FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + rights = 0; + do { + /* + * Allocate pkeys with all valid combinations of read, + * write and execute restrictions. + */ + pkey = sys_pkey_alloc(0, rights); + FAIL_IF(pkey < 0); + + /* + * Jump to the executable region. AMR bits may or may not + * be set but they should not affect execution. + * + * This should generate pkey faults based on IAMR bits which + * may be set to restrict execution. + * + * The first iteration also checks if the overwrite of the + * first instruction word from a trap to a no-op succeeded. + */ + fault_pkey = pkey; + fault_type = -1; + remaining_faults = 0; + if (rights & PKEY_DISABLE_EXECUTE) { + fault_type = PKEY_DISABLE_EXECUTE; + remaining_faults = 1; + } + + FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0); + printf("execute at %p, pkey permissions are %s\n", fault_addr, + pkey_rights(rights)); + asm volatile("mtctr %0; bctrl" : : "r"(insns)); + FAIL_IF(remaining_faults != 0); + if (rights & PKEY_DISABLE_EXECUTE) + FAIL_IF(fault_code != SEGV_PKUERR); + + /* Free the current pkey */ + sys_pkey_free(pkey); + + /* Find next valid combination of pkey rights */ + rights = next_pkey_rights(rights); + } while (rights); + + /* Cleanup */ + munmap((void *) insns, pgsize); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_exec_prot"); +} diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c new file mode 100644 index 000000000000..4f815d7c1214 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020, Sandipan Das, IBM Corp. + * + * Test if the signal information reports the correct memory protection + * key upon getting a key access violation fault for a page that was + * attempted to be protected by two different keys from two competing + * threads at the same time. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +#include +#include +#include + +#include "pkeys.h" + +#define PPC_INST_NOP 0x60000000 +#define PPC_INST_BLR 0x4e800020 +#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC) + +#define NUM_ITERATIONS 1000000 + +static volatile sig_atomic_t perm_pkey, rest_pkey; +static volatile sig_atomic_t rights, fault_count; +static volatile unsigned int *volatile fault_addr; +static pthread_barrier_t iteration_barrier; + +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx) +{ + void *pgstart; + size_t pgsize; + int pkey; + + pkey = siginfo_pkey(sinfo); + + /* Check if this fault originated from a pkey access violation */ + if (sinfo->si_code != SEGV_PKUERR) { + sigsafe_err("got a fault for an unexpected reason\n"); + _exit(1); + } + + /* Check if this fault originated from the expected address */ + if (sinfo->si_addr != (void *) fault_addr) { + sigsafe_err("got a fault for an unexpected address\n"); + _exit(1); + } + + /* Check if this fault originated from the restrictive pkey */ + if (pkey != rest_pkey) { + sigsafe_err("got a fault for an unexpected pkey\n"); + _exit(1); + } + + /* Check if too many faults have occurred for the same iteration */ + if (fault_count > 0) { + sigsafe_err("got too many faults for the same address\n"); + _exit(1); + } + + pgsize = getpagesize(); + pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1)); + + /* + * If the current fault occurred due to lack of execute rights, + * reassociate the page with the exec-only pkey since execute + * rights cannot be changed directly for the faulting pkey as + * IAMR is inaccessible from userspace. + * + * Otherwise, if the current fault occurred due to lack of + * read-write rights, change the AMR permission bits for the + * pkey. + * + * This will let the test continue. + */ + if (rights == PKEY_DISABLE_EXECUTE && + mprotect(pgstart, pgsize, PROT_EXEC)) + _exit(1); + else + pkey_set_rights(pkey, 0); + + fault_count++; +} + +struct region { + unsigned long rights; + unsigned int *base; + size_t size; +}; + +static void *protect(void *p) +{ + unsigned long rights; + unsigned int *base; + size_t size; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + FAIL_IF_EXIT(!base); + + /* No read, write and execute restrictions */ + rights = 0; + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + + /* Allocate the permissive pkey */ + perm_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(perm_pkey < 0); + + /* + * Repeatedly try to protect the common region with a permissive + * pkey + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * restrictive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the permissive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + perm_pkey)); + } + + /* Free the permissive pkey */ + sys_pkey_free(perm_pkey); + + return NULL; +} + +static void *protect_access(void *p) +{ + size_t size, numinsns; + unsigned int *base; + int tid, i; + + tid = gettid(); + base = ((struct region *) p)->base; + size = ((struct region *) p)->size; + rights = ((struct region *) p)->rights; + numinsns = size / sizeof(base[0]); + FAIL_IF_EXIT(!base); + + /* Allocate the restrictive pkey */ + rest_pkey = sys_pkey_alloc(0, rights); + FAIL_IF_EXIT(rest_pkey < 0); + + printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights)); + printf("tid %d, %s randomly in range [%p, %p]\n", tid, + (rights == PKEY_DISABLE_EXECUTE) ? "execute" : + (rights == PKEY_DISABLE_WRITE) ? "write" : "read", + base, base + numinsns); + + /* + * Repeatedly try to protect the common region with a restrictive + * pkey and read, write or execute from it + */ + for (i = 0; i < NUM_ITERATIONS; i++) { + /* + * Wait until the other thread has finished allocating the + * permissive pkey or until the next iteration has begun + */ + pthread_barrier_wait(&iteration_barrier); + + /* Try to associate the restrictive pkey with the region */ + FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX, + rest_pkey)); + + /* Choose a random instruction word address from the region */ + fault_addr = base + (rand() % numinsns); + fault_count = 0; + + switch (rights) { + /* Read protection test */ + case PKEY_DISABLE_ACCESS: + /* + * Read an instruction word from the region and + * verify if it has not been overwritten to + * something unexpected + */ + FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP && + *fault_addr != PPC_INST_BLR); + break; + + /* Write protection test */ + case PKEY_DISABLE_WRITE: + /* + * Write an instruction word to the region and + * verify if the overwrite has succeeded + */ + *fault_addr = PPC_INST_BLR; + FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR); + break; + + /* Execute protection test */ + case PKEY_DISABLE_EXECUTE: + /* Jump to the region and execute instructions */ + asm volatile( + "mtctr %0; bctrl" + : : "r"(fault_addr) : "ctr", "lr"); + break; + } + + /* + * Restore the restrictions originally imposed by the + * restrictive pkey as the signal handler would have + * cleared out the corresponding AMR bits + */ + pkey_set_rights(rest_pkey, rights); + } + + /* Free restrictive pkey */ + sys_pkey_free(rest_pkey); + + return NULL; +} + +static void reset_pkeys(unsigned long rights) +{ + int pkeys[NR_PKEYS], i; + + /* Exhaustively allocate all available pkeys */ + for (i = 0; i < NR_PKEYS; i++) + pkeys[i] = sys_pkey_alloc(0, rights); + + /* Free all allocated pkeys */ + for (i = 0; i < NR_PKEYS; i++) + sys_pkey_free(pkeys[i]); +} + +static int test(void) +{ + pthread_t prot_thread, pacc_thread; + struct sigaction act; + pthread_attr_t attr; + size_t numinsns; + struct region r; + int ret, i; + + srand(time(NULL)); + ret = pkeys_unsupported(); + if (ret) + return ret; + + /* Allocate the region */ + r.size = getpagesize(); + r.base = mmap(NULL, r.size, PROT_RWX, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + FAIL_IF(r.base == MAP_FAILED); + + /* + * Fill the region with no-ops with a branch at the end + * for returning to the caller + */ + numinsns = r.size / sizeof(r.base[0]); + for (i = 0; i < numinsns - 1; i++) + r.base[i] = PPC_INST_NOP; + r.base[i] = PPC_INST_BLR; + + /* Setup SIGSEGV handler */ + act.sa_handler = 0; + act.sa_sigaction = segv_handler; + FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0); + act.sa_flags = SA_SIGINFO; + act.sa_restorer = 0; + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + /* + * For these tests, the parent process should clear all bits of + * AMR and IAMR, i.e. impose no restrictions, for all available + * pkeys. This will be the base for the initial AMR and IAMR + * values for all the test thread pairs. + * + * If the AMR and IAMR bits of all available pkeys are cleared + * before running the tests and a fault is generated when + * attempting to read, write or execute instructions from a + * pkey protected region, the pkey responsible for this must be + * the one from the protect-and-access thread since the other + * one is fully permissive. Despite that, if the pkey reported + * by siginfo is not the restrictive pkey, then there must be a + * kernel bug. + */ + reset_pkeys(0); + + /* Setup barrier for protect and protect-and-access threads */ + FAIL_IF(pthread_attr_init(&attr) != 0); + FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0); + + /* Setup and start protect and protect-and-read threads */ + puts("starting thread pair (protect, protect-and-read)"); + r.rights = PKEY_DISABLE_ACCESS; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-write threads */ + puts("starting thread pair (protect, protect-and-write)"); + r.rights = PKEY_DISABLE_WRITE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Setup and start protect and protect-and-execute threads */ + puts("starting thread pair (protect, protect-and-execute)"); + r.rights = PKEY_DISABLE_EXECUTE; + FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0); + FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0); + FAIL_IF(pthread_join(prot_thread, NULL) != 0); + FAIL_IF(pthread_join(pacc_thread, NULL) != 0); + + /* Cleanup */ + FAIL_IF(pthread_attr_destroy(&attr) != 0); + FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0); + munmap(r.base, r.size); + + return 0; +} + +int main(void) +{ + test_harness(test, "pkey_siginfo"); +} diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c deleted file mode 100644 index e2eed65b7735..000000000000 --- a/tools/testing/selftests/powerpc/mm/prot_sao.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2016, Michael Ellerman, IBM Corp. - */ - -#include -#include -#include -#include - -#include - -#include "utils.h" - -#define SIZE (64 * 1024) - -int test_prot_sao(void) -{ - char *p; - - /* 2.06 or later should support SAO */ - SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); - - /* - * Ensure we can ask for PROT_SAO. - * We can't really verify that it does the right thing, but at least we - * confirm the kernel will accept it. - */ - p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - FAIL_IF(p == MAP_FAILED); - - /* Write to the mapping, to at least cause a fault */ - memset(p, 0xaa, SIZE); - - return 0; -} - -int main(void) -{ - return test_harness(test_prot_sao, "prot-sao"); -} diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c new file mode 100644 index 000000000000..ed9143990888 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that loads/stores expand the stack segment, or trigger a SEGV, in + * various conditions. + * + * Based on test code by Tom Lane. + */ + +#undef NDEBUG +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _KB (1024) +#define _MB (1024 * 1024) + +volatile char *stack_top_ptr; +volatile unsigned long stack_top_sp; +volatile char c; + +enum access_type { + LOAD, + STORE, +}; + +/* + * Consume stack until the stack pointer is below @target_sp, then do an access + * (load or store) at offset @delta from either the base of the stack or the + * current stack pointer. + */ +__attribute__ ((noinline)) +int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type) +{ + unsigned long target; + char stack_cur; + + if ((unsigned long)&stack_cur > target_sp) + return consume_stack(target_sp, stack_high, delta, type); + else { + // We don't really need this, but without it GCC might not + // generate a recursive call above. + stack_top_ptr = &stack_cur; + +#ifdef __powerpc__ + asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp)); +#else + asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp)); +#endif + target = stack_high - delta + 1; + volatile char *p = (char *)target; + + if (type == STORE) + *p = c; + else + c = *p; + + // Do something to prevent the stack frame being popped prior to + // our access above. + getpid(); + } + + return 0; +} + +static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high) +{ + unsigned long start, end; + static char buf[4096]; + char name[128]; + FILE *f; + int rc; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + while (fgets(buf, sizeof(buf), f)) { + rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, name); + if (rc == 2) + continue; + + if (rc != 3) { + printf("sscanf errored\n"); + rc = -1; + break; + } + + if (strstr(name, needle)) { + *low = start; + *high = end - 1; + rc = 0; + break; + } + } + + fclose(f); + + return rc; +} + +int child(unsigned int stack_used, int delta, enum access_type type) +{ + unsigned long low, stack_high; + + assert(search_proc_maps("[stack]", &low, &stack_high) == 0); + + assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0); + + printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n", + type == LOAD ? "load" : "store", delta, stack_used, stack_high, + stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1); + + return 0; +} + +static int test_one(unsigned int stack_used, int delta, enum access_type type) +{ + pid_t pid; + int rc; + + pid = fork(); + if (pid == 0) + exit(child(stack_used, delta, type)); + + assert(waitpid(pid, &rc, 0) != -1); + + if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0) + return 0; + + // We don't expect a non-zero exit that's not a signal + assert(!WIFEXITED(rc)); + + printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n", + type == LOAD ? "load" : "store", delta, stack_used, + WTERMSIG(rc)); + + return 1; +} + +// This is fairly arbitrary but is well below any of the targets below, +// so that the delta between the stack pointer and the target is large. +#define DEFAULT_SIZE (32 * _KB) + +static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur) +{ + unsigned long delta; + + // We should be able to access anywhere within the rlimit + for (delta = page_size; delta <= rlim_cur; delta += page_size) + assert(test_one(DEFAULT_SIZE, delta, type) == 0); + + assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0); + + // But if we go past the rlimit it should fail + assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0); +} + +static int test(void) +{ + unsigned long page_size; + struct rlimit rlimit; + + page_size = getpagesize(); + getrlimit(RLIMIT_STACK, &rlimit); + printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + + printf("Testing loads ...\n"); + test_one_type(LOAD, page_size, rlimit.rlim_cur); + printf("Testing stores ...\n"); + test_one_type(STORE, page_size, rlimit.rlim_cur); + + printf("All OK\n"); + + return 0; +} + +#ifdef __powerpc__ +#include "utils.h" + +int main(void) +{ + return test_harness(test, "stack_expansion_ldst"); +} +#else +int main(void) +{ + return test(); +} +#endif diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c new file mode 100644 index 000000000000..c8b32a29e274 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that signal delivery is able to expand the stack segment without + * triggering a SEGV. + * + * Based on test code by Tom Lane. + */ + +#include +#include +#include +#include +#include +#include + +#include "../pmu/lib.h" +#include "utils.h" + +#define _KB (1024) +#define _MB (1024 * 1024) + +static char *stack_base_ptr; +static char *stack_top_ptr; + +static volatile sig_atomic_t sig_occurred = 0; + +static void sigusr1_handler(int signal_arg) +{ + sig_occurred = 1; +} + +static int consume_stack(unsigned int stack_size, union pipe write_pipe) +{ + char stack_cur; + + if ((stack_base_ptr - &stack_cur) < stack_size) + return consume_stack(stack_size, write_pipe); + else { + stack_top_ptr = &stack_cur; + + FAIL_IF(notify_parent(write_pipe)); + + while (!sig_occurred) + barrier(); + } + + return 0; +} + +static int child(unsigned int stack_size, union pipe write_pipe) +{ + struct sigaction act; + char stack_base; + + act.sa_handler = sigusr1_handler; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGUSR1, &act, NULL) < 0) + err(1, "sigaction"); + + stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL); + + FAIL_IF(consume_stack(stack_size, write_pipe)); + + printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n", + stack_size, stack_base_ptr, stack_top_ptr, + stack_base_ptr - stack_top_ptr); + + return 0; +} + +static int test_one_size(unsigned int stack_size) +{ + union pipe read_pipe, write_pipe; + pid_t pid; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + close(read_pipe.read_fd); + close(write_pipe.write_fd); + exit(child(stack_size, read_pipe)); + } + + close(read_pipe.write_fd); + close(write_pipe.read_fd); + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + kill(pid, SIGUSR1); + + FAIL_IF(wait_for_child(pid)); + + close(read_pipe.read_fd); + close(write_pipe.write_fd); + + return 0; +} + +int test(void) +{ + unsigned int i, size; + + // Test with used stack from 1MB - 64K to 1MB + 64K + // Increment by 64 to get more coverage of odd sizes + for (i = 0; i < (128 * _KB); i += 64) { + size = i + (1 * _MB) - (64 * _KB); + FAIL_IF(test_one_size(size)); + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "stack_expansion_signal"); +} diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 7b4ac4537702..2980abca31e0 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "event.h" #include "utils.h" @@ -104,6 +105,9 @@ static int test_body(void) struct event events[3]; u64 overhead; + // The STCX_FAIL event we use works on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions"); setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles"); setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail"); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c index a2d7b0e3dca9..a26ac122c759 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c @@ -91,8 +91,6 @@ int back_to_back_ebbs(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c index bc893813483e..bb9f587fa76e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c @@ -42,8 +42,6 @@ int cycles(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c index dcd351d20328..9ae795ce314e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c @@ -99,8 +99,6 @@ int cycles_with_freeze(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); printf("EBBs while frozen %d\n", ebbs_while_frozen); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 94c99c12c0f2..4b45a2e70f62 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -71,8 +71,6 @@ int cycles_with_mmcr2(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index dfbc5c3ad52d..21537d6eb6b7 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c index ca2f7d729155..b208bf6ad58d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c @@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); FAIL_IF(ebb_state.stats.ebb_count == 0); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c index ac3e6e182614..ba2681a12cc7 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -75,7 +75,6 @@ static int test_body(void) ebb_freeze_pmcs(); ebb_global_disable(); - count_pmc(4, sample_period); mtspr(SPRN_PMC4, 0xdead); dump_summary_ebb_state(); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c index b8242e9d97d2..791d37ba327b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c @@ -70,13 +70,6 @@ int multi_counter(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - count_pmc(2, sample_period); - count_pmc(3, sample_period); - count_pmc(4, sample_period); - count_pmc(5, sample_period); - count_pmc(6, sample_period); - dump_ebb_state(); for (i = 0; i < 6; i++) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c index a05c0e18ded6..9b0f70d59702 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c @@ -61,8 +61,6 @@ static int cycles_child(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_summary_ebb_state(); event_close(&event); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c index 153ebc92234f..2904c741e04e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c @@ -82,8 +82,6 @@ static int test_body(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(1, sample_period); - dump_ebb_state(); if (mmcr0_mismatch) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index eadad75ed7e6..b29f8ba22d1e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -76,8 +76,6 @@ int pmc56_overflow(void) ebb_global_disable(); ebb_freeze_pmcs(); - count_pmc(2, sample_period); - dump_ebb_state(); printf("PMC5/6 overflow %d\n", pmc56_overflowed); diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index fa12e7d0b4d3..bf1bec013bbb 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -6,6 +6,7 @@ #ifndef __SELFTESTS_POWERPC_PMU_LIB_H #define __SELFTESTS_POWERPC_PMU_LIB_H +#include #include #include #include diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2756fe2efdc5..2d37942bf72b 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,6 +12,8 @@ #include #include +#include + #include "event.h" #include "lib.h" #include "utils.h" @@ -23,12 +25,9 @@ static int per_event_excludes(void) { struct event *e, events[4]; - char *platform; int i; - platform = (char *)get_auxv_entry(AT_BASE_PLATFORM); - FAIL_IF(!platform); - SKIP_IF(strcmp(platform, "power8") != 0); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); /* * We need to create the events disabled, otherwise the running/enabled diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index d5c64fee032d..bbc05ffc5860 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -150,7 +150,7 @@ static int child(struct shared_info *info) printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr); + set_amr(info->amr); /* * We won't use pkey3. This tests whether the kernel restores the UAMOR diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bdbbbe8431e0..bc454f899124 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -44,7 +44,7 @@ struct shared_info { unsigned long amr2; /* AMR value that ptrace should refuse to write to the child. */ - unsigned long amr3; + unsigned long invalid_amr; /* IAMR value the parent expects to read from the child. */ unsigned long expected_iamr; @@ -57,8 +57,8 @@ struct shared_info { * (even though they're valid ones) because userspace doesn't have * access to those registers. */ - unsigned long new_iamr; - unsigned long new_uamor; + unsigned long invalid_iamr; + unsigned long invalid_uamor; }; static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) @@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) return syscall(__NR_pkey_alloc, flags, init_access_rights); } -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int child(struct shared_info *info) { unsigned long reg; @@ -100,33 +95,37 @@ static int child(struct shared_info *info) info->amr1 |= 3ul << pkeyshift(pkey1); info->amr2 |= 3ul << pkeyshift(pkey2); - info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3); + /* + * invalid amr value where we try to force write + * things which are deined by a uamor setting. + */ + info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor); + /* + * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr + */ if (disable_execute) info->expected_iamr |= 1ul << pkeyshift(pkey1); else info->expected_iamr &= ~(1ul << pkeyshift(pkey1)); - info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3)); - - info->expected_uamor |= 3ul << pkeyshift(pkey1) | - 3ul << pkeyshift(pkey2); - info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); - info->new_uamor |= 3ul << pkeyshift(pkey1); + /* + * We allocated pkey2 and pkey 3 above. Clear the IAMR bits. + */ + info->expected_iamr &= ~(1ul << pkeyshift(pkey2)); + info->expected_iamr &= ~(1ul << pkeyshift(pkey3)); /* - * We won't use pkey3. We just want a plausible but invalid key to test - * whether ptrace will let us write to AMR bits we are not supposed to. - * - * This also tests whether the kernel restores the UAMOR permissions - * after a key is freed. + * Create an IAMR value different from expected value. + * Kernel will reject an IAMR and UAMOR change. */ - sys_pkey_free(pkey3); + info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2)); + info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1)); printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", user_write, info->amr1, pkey1, pkey2, pkey3); - mtspr(SPRN_AMR, info->amr1); + set_amr(info->amr1); /* Wait for parent to read our AMR value and write a new one. */ ret = prod_parent(&info->child_sync); @@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid) PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); PARENT_FAIL_IF(ret, &info->child_sync); - info->amr1 = info->amr2 = info->amr3 = regs[0]; - info->expected_iamr = info->new_iamr = regs[1]; - info->expected_uamor = info->new_uamor = regs[2]; + info->amr1 = info->amr2 = regs[0]; + info->expected_iamr = regs[1]; + info->expected_uamor = regs[2]; /* Wake up child so that it can set itself up. */ ret = prod_child(&info->child_sync); @@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid) return ret; /* Write invalid AMR value in child. */ - ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1); + ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1); PARENT_FAIL_IF(ret, &info->child_sync); - printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr); /* Wake up child so that it can verify it didn't change. */ ret = prod_child(&info->child_sync); @@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid) /* Try to write to IAMR. */ regs[0] = info->amr1; - regs[1] = info->new_iamr; + regs[1] = info->invalid_iamr; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2); PARENT_FAIL_IF(!ret, &info->child_sync); @@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid) ptrace_write_running, regs[0], regs[1]); /* Try to write to IAMR and UAMOR. */ - regs[2] = info->new_uamor; + regs[2] = info->invalid_uamor; ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3); PARENT_FAIL_IF(!ret, &info->child_sync); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 58cb1a860cc9..4436ca9d3caf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -78,6 +78,9 @@ int ptrace_tar(void) pid_t pid; int ret, status; + // TAR was added in v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index c4fe0e893306..cb9875f764ca 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,6 +61,8 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; + SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < VEC_MAX; i++) diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index 8c6b982af2a8..c8d82b784102 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -183,6 +183,16 @@ int spectre_v2_test(void) if (miss_percent > 15) { printf("Branch misses > 15%% unexpected in this configuration!\n"); printf("Possible mis-match between reported & actual mitigation\n"); + /* + * Such a mismatch may be caused by a guest system + * reporting as vulnerable when the host is mitigated. + * Return skip code to avoid detecting this as an error. + * We are not vulnerable and reporting otherwise, so + * missing such a mismatch is safe. + */ + if (state == VULNERABLE) + return 4; + return 1; } break; diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 7fc0623d85c3..9c39f55a58ff 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null TEST_GEN_PROGS := memcmp_64 strlen -$(OUTPUT)/memcmp_64: memcmp.c +$(OUTPUT)/memcmp_64: memcmp.c ../utils.c $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec ifeq ($(build_32bit),1) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index b1fa7546957f..979df3d98368 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include "utils.h" #define SIZE 256 @@ -13,6 +15,9 @@ #define LARGE_MAX_OFFSET 32 #define LARGE_SIZE_START 4096 +/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */ +#define MAP_SIZE (64 * 1024) + #define MAX_OFFSET_DIFF_S1_S2 48 int vmx_count; @@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset, static int testcase(bool islarge) { - char *s1; - char *s2; - unsigned long i; + unsigned long i, comp_size, alloc_size; + char *p, *s1, *s2; + int iterations; - unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE); - unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; - int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; + comp_size = (islarge ? LARGE_SIZE : SIZE); + alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2; + iterations = islarge ? LARGE_ITERATIONS : ITERATIONS; - s1 = memalign(128, alloc_size); - if (!s1) { - perror("memalign"); - exit(1); - } + p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); - s2 = memalign(128, alloc_size); - if (!s2) { - perror("memalign"); - exit(1); - } + /* Put s1/s2 at the end of a page */ + s1 = p + MAP_SIZE - alloc_size; + s2 = p + 3 * MAP_SIZE - alloc_size; + + /* And unmap the subsequent page to force a fault if we overread */ + munmap(p + MAP_SIZE, MAP_SIZE); + munmap(p + 3 * MAP_SIZE, MAP_SIZE); srandom(time(0)); @@ -147,6 +152,11 @@ static int testcase(bool islarge) static int testcases(void) { +#ifdef __powerpc64__ + // vcmpequd used in memcmp_64.S is v2.07 + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); +#endif + testcase(0); testcase(1); return 0; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 5ee0e98c4896..18b6a773d5c7 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -88,28 +89,40 @@ void *get_auxv_entry(int type) int pick_online_cpu(void) { - cpu_set_t mask; - int cpu; + int ncpus, cpu = -1; + cpu_set_t *mask; + size_t size; - CPU_ZERO(&mask); - - if (sched_getaffinity(0, sizeof(mask), &mask)) { - perror("sched_getaffinity"); + ncpus = get_nprocs_conf(); + size = CPU_ALLOC_SIZE(ncpus); + mask = CPU_ALLOC(ncpus); + if (!mask) { + perror("malloc"); return -1; } + CPU_ZERO_S(size, mask); + + if (sched_getaffinity(0, size, mask)) { + perror("sched_getaffinity"); + goto done; + } + /* We prefer a primary thread, but skip 0 */ - for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = 8; cpu < ncpus; cpu += 8) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; /* Search for anything, but in reverse */ - for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) - if (CPU_ISSET(cpu, &mask)) - return cpu; + for (cpu = ncpus - 1; cpu >= 0; cpu--) + if (CPU_ISSET_S(cpu, size, mask)) + goto done; printf("No cpus in affinity mask?!\n"); - return -1; + +done: + CPU_FREE(mask); + return cpu; } bool is_ppc64le(void) @@ -293,3 +306,31 @@ void set_dscr(unsigned long val) asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); } + +int using_hash_mmu(bool *using_hash) +{ + char line[128]; + FILE *f; + int rc; + + f = fopen("/proc/cpuinfo", "r"); + FAIL_IF(!f); + + rc = 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (strcmp(line, "MMU : Hash\n") == 0) { + *using_hash = true; + goto out; + } + + if (strcmp(line, "MMU : Radix\n") == 0) { + *using_hash = false; + goto out; + } + } + + rc = -1; +out: + fclose(f); + return rc; +}