From a318c2432218d3cd189ec8228b8a795666899c2a Mon Sep 17 00:00:00 2001
From: Philipp Zabel
Date: Fri, 31 Aug 2018 14:34:02 +0200
Subject: [PATCH 01/53] mfd: da9063: Fix DT probing with constraints
Commit 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
reordered the da9063_regulator_info[] array, but not the DA9063_ID_*
regulator ids and not the da9063_matches[] array, because ids are used
as indices in the array initializer. This mismatch between regulator id
and da9063_regulator_info[] array index causes the driver probe to fail
because constraints from DT are not applied to the correct regulator:
da9063 0-0058: Device detected (chip-ID: 0x61, var-ID: 0x50)
DA9063_BMEM: Bringing 900000uV into 3300000-3300000uV
DA9063_LDO9: Bringing 3300000uV into 2500000-2500000uV
DA9063_LDO1: Bringing 900000uV into 3300000-3300000uV
DA9063_LDO1: failed to apply 3300000-3300000uV constraint(-22)
This patch reorders the DA9063_ID_* as apparently intended, and with
them the entries in the da90630_matches[] array.
Fixes: 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
Signed-off-by: Philipp Zabel
Reviewed-by: Marek Vasut
Reviewed-by: Geert Uytterhoeven
Signed-off-by: Lee Jones
---
include/linux/mfd/da9063/pdata.h | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 8a125701ef7b..50bed4f89c1a 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -21,7 +21,7 @@
/*
* Regulator configuration
*/
-/* DA9063 regulator IDs */
+/* DA9063 and DA9063L regulator IDs */
enum {
/* BUCKs */
DA9063_ID_BCORE1,
@@ -37,18 +37,20 @@ enum {
DA9063_ID_BMEM_BIO_MERGED,
/* When two BUCKs are merged, they cannot be reused separately */
- /* LDOs */
- DA9063_ID_LDO1,
- DA9063_ID_LDO2,
+ /* LDOs on both DA9063 and DA9063L */
DA9063_ID_LDO3,
- DA9063_ID_LDO4,
- DA9063_ID_LDO5,
- DA9063_ID_LDO6,
DA9063_ID_LDO7,
DA9063_ID_LDO8,
DA9063_ID_LDO9,
- DA9063_ID_LDO10,
DA9063_ID_LDO11,
+
+ /* DA9063-only LDOs */
+ DA9063_ID_LDO1,
+ DA9063_ID_LDO2,
+ DA9063_ID_LDO4,
+ DA9063_ID_LDO5,
+ DA9063_ID_LDO6,
+ DA9063_ID_LDO10,
};
/* Regulators platform data */
From 10492ee8ed9188d6d420e1f79b2b9bdbc0624e65 Mon Sep 17 00:00:00 2001
From: Tony Lindgren
Date: Wed, 25 Apr 2018 07:29:22 -0700
Subject: [PATCH 02/53] mfd: omap-usb-host: Fix dts probe of children
It currently only works if the parent bus uses "simple-bus". We
currently try to probe children with non-existing compatible values.
And we're missing .probe.
I noticed this while testing devices configured to probe using ti-sysc
interconnect target module driver. For that we also may want to rebind
the driver, so let's remove __init and __exit.
Signed-off-by: Tony Lindgren
Acked-by: Roger Quadros
Signed-off-by: Lee Jones
---
drivers/mfd/omap-usb-host.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index e11ab12fbdf2..800986a79704 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -528,8 +528,8 @@ static int usbhs_omap_get_dt_pdata(struct device *dev,
}
static const struct of_device_id usbhs_child_match_table[] = {
- { .compatible = "ti,omap-ehci", },
- { .compatible = "ti,omap-ohci", },
+ { .compatible = "ti,ehci-omap", },
+ { .compatible = "ti,ohci-omap3", },
{ }
};
@@ -855,6 +855,7 @@ static struct platform_driver usbhs_omap_driver = {
.pm = &usbhsomap_dev_pm_ops,
.of_match_table = usbhs_omap_dt_ids,
},
+ .probe = usbhs_omap_probe,
.remove = usbhs_omap_remove,
};
@@ -864,9 +865,9 @@ MODULE_ALIAS("platform:" USBHS_DRIVER_NAME);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("usb host common core driver for omap EHCI and OHCI");
-static int __init omap_usbhs_drvinit(void)
+static int omap_usbhs_drvinit(void)
{
- return platform_driver_probe(&usbhs_omap_driver, usbhs_omap_probe);
+ return platform_driver_register(&usbhs_omap_driver);
}
/*
@@ -878,7 +879,7 @@ static int __init omap_usbhs_drvinit(void)
*/
fs_initcall_sync(omap_usbhs_drvinit);
-static void __exit omap_usbhs_drvexit(void)
+static void omap_usbhs_drvexit(void)
{
platform_driver_unregister(&usbhs_omap_driver);
}
From d310959365942b100f79b56bcce859968fe7ca9c Mon Sep 17 00:00:00 2001
From: Scott Branden
Date: Tue, 11 Sep 2018 13:26:38 -0700
Subject: [PATCH 03/53] efi/libstub/arm: default EFI_ARMSTUB_DTB_LOADER to y
Default EFI_ARMSTUB_DTB_LOADER to y to allow the dtb= command
line parameter to function with efi loader.
Required for development purposes and to boot on existing bootloaders
that do not support devicetree provided by the firmware or by the
bootloader.
Fixes: 3d7ee348aa41 ("efi/libstub/arm: Add opt-in Kconfig option ...")
Signed-off-by: Scott Branden
Signed-off-by: Ard Biesheuvel
---
drivers/firmware/efi/Kconfig | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index d8e159feb573..89110dfc7127 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -90,14 +90,17 @@ config EFI_ARMSTUB
config EFI_ARMSTUB_DTB_LOADER
bool "Enable the DTB loader"
depends on EFI_ARMSTUB
+ default y
help
Select this config option to add support for the dtb= command
line parameter, allowing a device tree blob to be loaded into
memory from the EFI System Partition by the stub.
- The device tree is typically provided by the platform or by
- the bootloader, so this option is mostly for development
- purposes only.
+ If the device tree is provided by the platform or by
+ the bootloader this option may not be needed.
+ But, for various development reasons and to maintain existing
+ functionality for bootloaders that do not have such support
+ this option is necessary.
config EFI_BOOTLOADER_CONTROL
tristate "EFI Bootloader Control"
From b3f0907c71e006e12fde74ea9a745b6096b6f90f Mon Sep 17 00:00:00 2001
From: Brijesh Singh
Date: Fri, 14 Sep 2018 08:45:58 -0500
Subject: [PATCH 04/53] x86/mm: Add .bss..decrypted section to hold shared
variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
kvmclock defines few static variables which are shared with the
hypervisor during the kvmclock initialization.
When SEV is active, memory is encrypted with a guest-specific key, and
if the guest OS wants to share the memory region with the hypervisor
then it must clear the C-bit before sharing it.
Currently, we use kernel_physical_mapping_init() to split large pages
before clearing the C-bit on shared pages. But it fails when called from
the kvmclock initialization (mainly because the memblock allocator is
not ready that early during boot).
Add a __bss_decrypted section attribute which can be used when defining
such shared variable. The so-defined variables will be placed in the
.bss..decrypted section. This section will be mapped with C=0 early
during boot.
The .bss..decrypted section has a big chunk of memory that may be unused
when memory encryption is not active, free it when memory encryption is
not active.
Suggested-by: Thomas Gleixner
Signed-off-by: Brijesh Singh
Signed-off-by: Thomas Gleixner
Cc: Tom Lendacky
Cc: Borislav Petkov
Cc: "H. Peter Anvin"
Cc: Paolo Bonzini
Cc: Sean Christopherson
Cc: Radim Krčmář
Cc: kvm@vger.kernel.org
Link: https://lkml.kernel.org/r/1536932759-12905-2-git-send-email-brijesh.singh@amd.com
---
arch/x86/include/asm/mem_encrypt.h | 7 +++++++
arch/x86/kernel/head64.c | 16 ++++++++++++++++
arch/x86/kernel/vmlinux.lds.S | 19 +++++++++++++++++++
arch/x86/mm/init.c | 4 ++++
arch/x86/mm/mem_encrypt.c | 24 ++++++++++++++++++++++++
5 files changed, 70 insertions(+)
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);
bool sme_active(void);
bool sev_active(void);
+#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+#define __bss_decrypted
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
+extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..c16af27eb23f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -234,6 +235,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
+ /*
+ * Clear the memory encryption mask from the .bss..decrypted section.
+ * The bss section will be memset to zero later in the initialization so
+ * there is no need to zero it after changing the memory encryption
+ * attribute.
+ */
+ if (mem_encrypt_active()) {
+ vaddr = (unsigned long)__start_bss_decrypted;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid splitting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define BSS_DECRYPTED \
+ . = ALIGN(PMD_SIZE); \
+ __start_bss_decrypted = .; \
+ *(.bss..decrypted); \
+ . = ALIGN(PAGE_SIZE); \
+ __start_bss_decrypted_unused = .; \
+ . = ALIGN(PMD_SIZE); \
+ __end_bss_decrypted = .; \
+
#else
#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
+#define BSS_DECRYPTED
#endif
@@ -355,6 +373,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
+ BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}
+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
void __ref free_initmem(void)
{
e820__reallocate_tables();
+ mem_encrypt_free_decrypted_mem();
+
free_kernel_image_pages(&__init_begin, &__init_end);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);
/* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+ unsigned long vaddr, vaddr_end, npages;
+ int r;
+
+ vaddr = (unsigned long)__start_bss_decrypted_unused;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+
+ /*
+ * The unused memory range was mapped decrypted, change the encryption
+ * attribute from decrypted to encrypted before freeing it.
+ */
+ if (mem_encrypt_active()) {
+ r = set_memory_encrypted(vaddr, npages);
+ if (r) {
+ pr_warn("failed to free unused decrypted pages\n");
+ return;
+ }
+ }
+
+ free_init_pages("unused decrypted", vaddr, vaddr_end);
+}
+
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)
From 6a1cac56f41f9ea94e440dfcc1cac44b41a1b194 Mon Sep 17 00:00:00 2001
From: Brijesh Singh
Date: Fri, 14 Sep 2018 08:45:59 -0500
Subject: [PATCH 05/53] x86/kvm: Use __bss_decrypted attribute in shared
variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The recent removal of the memblock dependency from kvmclock caused a SEV
guest regression because the wall_clock and hv_clock_boot variables are
no longer mapped decrypted when SEV is active.
Use the __bss_decrypted attribute to put the static wall_clock and
hv_clock_boot in the .bss..decrypted section so that they are mapped
decrypted during boot.
In the preparatory stage of CPU hotplug, the per-cpu pvclock data pointer
assigns either an element of the static array or dynamically allocated
memory for the pvclock data pointer. The static array are now mapped
decrypted but the dynamically allocated memory is not mapped decrypted.
However, when SEV is active this memory range must be mapped decrypted.
Add a function which is called after the page allocator is up, and
allocate memory for the pvclock data pointers for the all possible cpus.
Map this memory range as decrypted when SEV is active.
Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Suggested-by: Thomas Gleixner
Signed-off-by: Brijesh Singh
Signed-off-by: Thomas Gleixner
Cc: Tom Lendacky
Cc: Borislav Petkov
Cc: "H. Peter Anvin"
Cc: Paolo Bonzini
Cc: Sean Christopherson
Cc: "Radim Krčmář"
Cc: kvm@vger.kernel.org
Link: https://lkml.kernel.org/r/1536932759-12905-3-git-send-email-brijesh.singh@amd.com
---
arch/x86/kernel/kvmclock.c | 52 +++++++++++++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
- hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+ hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __bss_decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+static struct pvclock_vsyscall_time_info *hvclock_mem;
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
+static void __init kvmclock_init_mem(void)
+{
+ unsigned long ncpus;
+ unsigned int order;
+ struct page *p;
+ int r;
+
+ if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
+ return;
+
+ ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
+ order = get_order(ncpus * sizeof(*hvclock_mem));
+
+ p = alloc_pages(GFP_KERNEL, order);
+ if (!p) {
+ pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
+ return;
+ }
+
+ hvclock_mem = page_address(p);
+
+ /*
+ * hvclock is shared between the guest and the hypervisor, must
+ * be mapped decrypted.
+ */
+ if (sev_active()) {
+ r = set_memory_decrypted((unsigned long) hvclock_mem,
+ 1UL << order);
+ if (r) {
+ __free_pages(p, order);
+ hvclock_mem = NULL;
+ pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
+ return;
+ }
+ }
+
+ memset(hvclock_mem, 0, PAGE_SIZE << order);
+}
+
static int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
+
+ kvmclock_init_mem();
+
return 0;
}
early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
+ else if (hvclock_mem)
+ p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
else
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ return -ENOMEM;
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
From 6d41907c630d3196be89c9ed5a7f8258486b3eaf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo
Date: Fri, 14 Sep 2018 16:47:14 -0300
Subject: [PATCH 06/53] tools lib bpf: Provide wrapper for strerror_r to build
in !_GNU_SOURCE systems
Same problem that got fixed in a similar fashion in tools/perf/ in
c8b5f2c96d1b ("tools: Introduce str_error_r()"), fix it in the same
way, licensing needs to be sorted out to libbpf to use libapi, so,
for this simple case, just get the same wrapper in tools/lib/bpf.
This makes libbpf and its users (bpftool, selftests, perf) to build
again in Alpine Linux 3.[45678] and edge.
Acked-by: Alexei Starovoitov
Cc: Adrian Hunter
Cc: Daniel Borkmann
Cc: David Ahern
Cc: Hendrik Brueckner
Cc: Jakub Kicinski
Cc: Jiri Olsa
Cc: Martin KaFai Lau
Cc: Namhyung Kim
Cc: Quentin Monnet
Cc: Thomas Richter
Cc: Wang Nan
Cc: Yonghong Song
Fixes: 1ce6a9fc1549 ("bpf: fix build error in libbpf with EXTRA_CFLAGS="-Wp, -D_FORTIFY_SOURCE=2 -O2"")
Link: https://lkml.kernel.org/r/20180917151636.GA21790@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo
---
tools/lib/bpf/Build | 2 +-
tools/lib/bpf/libbpf.c | 20 ++++++++++----------
tools/lib/bpf/str_error.c | 18 ++++++++++++++++++
tools/lib/bpf/str_error.h | 6 ++++++
4 files changed, 35 insertions(+), 11 deletions(-)
create mode 100644 tools/lib/bpf/str_error.c
create mode 100644 tools/lib/bpf/str_error.h
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 13a861135127..6eb9bacd1948 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2abd0f112627..bdb94939fd60 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,6 +50,7 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
+#include "str_error.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
char errmsg[STRERR_BUFSIZE];
- char *cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ char *cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
@@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size, name, idx);
if (err) {
char errmsg[STRERR_BUFSIZE];
- char *cp = strerror_r(-err, errmsg,
- sizeof(errmsg));
+ char *cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to alloc program %s (%s): %s",
name, obj->path, cp);
@@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, errno);
create_attr.btf_fd = 0;
@@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name, cp);
for (j = 0; j < i; j++)
@@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
}
ret = -LIBBPF_ERRNO__LOAD;
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
if (log_buf && log_buf[0] != '\0') {
@@ -1654,7 +1654,7 @@ static int check_path(const char *path)
dir = dirname(dname);
if (statfs(dir, &st_fs)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to statfs %s: %s\n", dir, cp);
err = -errno;
}
@@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin program: %s\n", cp);
return -errno;
}
@@ -1708,7 +1708,7 @@ static int make_dir(const char *path)
err = -errno;
if (err) {
- cp = strerror_r(-err, errmsg, sizeof(errmsg));
+ cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to mkdir %s: %s\n", path, cp);
}
return err;
@@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
}
if (bpf_obj_pin(map->fd, path)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin map: %s\n", cp);
return -errno;
}
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000000..b8798114a357
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include
+#include
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *str_error(int err, char *dst, int len)
+{
+ int ret = strerror_r(err, dst, len);
+ if (ret)
+ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000000..355b1db571d1
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
+#ifndef BPF_STR_ERROR
+#define BPF_STR_ERROR
+
+char *str_error(int err, char *dst, int len);
+#endif // BPF_STR_ERROR
From 169e366c08084aeb49a3793c892c9abfaa47eeda Mon Sep 17 00:00:00 2001
From: Ben Hutchings
Date: Sun, 16 Sep 2018 16:17:05 +0100
Subject: [PATCH 07/53] perf Documentation: Fix out-of-tree asciidoctor man
page generation
The dependency for the man page rule using asciidoctor incorrectly
specifies a source file in $(OUTPUT). When building out-of-tree, the
source file is not found, resulting in a fall-back to the following rule
which uses xmlto.
Signed-off-by: Ben Hutchings
Cc: Alexander Shishkin
Cc: Jiri Olsa
Cc: Namhyung Kim
Cc: Peter Zijlstra
Link: http://lkml.kernel.org/r/20180916151704.GF4765@decadent.org.uk
Fixes: ffef80ecf89f ("perf Documentation: Support for asciidoctor")
Signed-off-by: Arnaldo Carvalho de Melo
---
tools/perf/Documentation/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 42261a9b280e..ac841bc5c35b 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
mv $@+ $@
ifdef USE_ASCIIDOCTOR
-$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b manpage -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
From 753694a8df318f204a0ac1303de136def16f2e9c Mon Sep 17 00:00:00 2001
From: Xiaochen Shen
Date: Sat, 15 Sep 2018 14:58:19 -0700
Subject: [PATCH 08/53] x86/intel_rdt: Fix data type in parsing callbacks
Each resource is associated with a parsing callback to parse the data
provided from user space when writing schemata file.
The 'data' parameter in the callbacks is defined as a void pointer which
is error prone due to lack of type check.
parse_bw() processes the 'data' parameter as a string while its caller
actually passes the parameter as a pointer to struct rdt_cbm_parse_data.
Thus, parse_bw() takes wrong data and causes failure of parsing MBA
throttle value.
To fix the issue, the 'data' parameter in all parsing callbacks is defined
and handled as a pointer to struct rdt_parse_data (renamed from struct
rdt_cbm_parse_data).
Fixes: 7604df6e16ae ("x86/intel_rdt: Support flexible data to parsing callbacks")
Fixes: 9ab9aa15c309 ("x86/intel_rdt: Ensure requested schemata respects mode")
Signed-off-by: Xiaochen Shen
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-2-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt.h | 16 ++++++++++++----
arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 21 ++++++++-------------
2 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..78266c798280 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
e <= QOS_L3_MBM_LOCAL_EVENT_ID);
}
+struct rdt_parse_data {
+ struct rdtgroup *rdtgrp;
+ char *buf;
+};
+
/**
* struct rdt_resource - attributes of an RDT resource
* @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
const char *format_str;
- int (*parse_ctrlval) (void *data, struct rdt_resource *r,
- struct rdt_domain *d);
+ int (*parse_ctrlval)(struct rdt_parse_data *data,
+ struct rdt_resource *r,
+ struct rdt_domain *d);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
unsigned long fflags;
};
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..edd5761f7336 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- unsigned long data;
- char *buf = _buf;
+ unsigned long bw_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
}
- if (!bw_validate(buf, &data, r))
+ if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
- d->new_ctrl = data;
+ d->new_ctrl = bw_val;
d->have_new_ctrl = true;
return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
-struct rdt_cbm_parse_data {
- struct rdtgroup *rdtgrp;
- char *buf;
-};
-
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- struct rdt_cbm_parse_data *data = _data;
struct rdtgroup *rdtgrp = data->rdtgrp;
u32 cbm_val;
@@ -195,7 +190,7 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
static int parse_line(char *line, struct rdt_resource *r,
struct rdtgroup *rdtgrp)
{
- struct rdt_cbm_parse_data data;
+ struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
unsigned long dom_id;
From f968dc119a159a95628a20de2a2dcc913d0a82d7 Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:20 -0700
Subject: [PATCH 09/53] x86/intel_rdt: Fix size reporting of MBA resource
Chen Yu reported a divide-by-zero error when accessing the 'size'
resctrl file when a MBA resource is enabled.
divide error: 0000 [#1] SMP PTI
CPU: 93 PID: 1929 Comm: cat Not tainted 4.19.0-rc2-debug-rdt+ #25
RIP: 0010:rdtgroup_cbm_to_size+0x7e/0xa0
Call Trace:
rdtgroup_size_show+0x11a/0x1d0
seq_read+0xd8/0x3b0
Quoting Chen Yu's report: This is because for MB resource, the
r->cache.cbm_len is zero, thus calculating size in rdtgroup_cbm_to_size()
will trigger the exception.
Fix this issue in the 'size' file by getting correct memory bandwidth value
which is in MBps when MBA software controller is enabled or in percentage
when MBA software controller is disabled.
Fixes: d9b48c86eb38 ("x86/intel_rdt: Display resource groups' allocations in bytes")
Reported-by: Chen Yu
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Tested-by: Chen Yu
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Link: https://lkml.kernel.org/r/20180904174614.26682-1-yu.c.chen@intel.com
Link: https://lkml.kernel.org/r/1537048707-76280-3-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..32e8bbdf2400 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1155,8 +1155,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
- bool sep = false;
- u32 cbm;
+ bool sep;
+ u32 ctrl;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -1174,6 +1174,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
for_each_alloc_enabled_rdt_resource(r) {
+ sep = false;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
@@ -1181,8 +1182,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
- cbm = d->ctrl_val[rdtgrp->closid];
- size = rdtgroup_cbm_to_size(r, d, cbm);
+ ctrl = (!is_mba_sc(r) ?
+ d->ctrl_val[rdtgrp->closid] :
+ d->mbps_val[rdtgrp->closid]);
+ if (r->rid == RDT_RESOURCE_MBA)
+ size = ctrl;
+ else
+ size = rdtgroup_cbm_to_size(r, d, ctrl);
}
seq_printf(s, "%d=%u", d->id, size);
sep = true;
From c793da8e4c62d2c002a79c47f44efead450cbcae Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:21 -0700
Subject: [PATCH 10/53] x86/intel_rdt: Global closid helper to support future
fixes
The number of CLOSIDs supported by a system is the minimum number of
CLOSIDs supported by any of its resources. Care should be taken when
iterating over the CLOSIDs of a resource since it may be that the number
of CLOSIDs supported on the system is less than the number of CLOSIDs
supported by the resource.
Introduce a helper function that can be used to query the number of
CLOSIDs that is supported by all resources, irrespective of how many
CLOSIDs are supported by a particular resource.
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-4-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 7 +++++++
2 files changed, 8 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 78266c798280..285eb3ec4200 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -544,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
+int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 32e8bbdf2400..b372923eb209 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
* limited as the number of resources grows.
*/
static int closid_free_map;
+static int closid_free_map_len;
+
+int closids_supported(void)
+{
+ return closid_free_map_len;
+}
static void closid_init(void)
{
@@ -111,6 +117,7 @@ static void closid_init(void)
/* CLOSID 0 is always reserved for the default group */
closid_free_map &= ~1;
+ closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
From 47d53b184aee983ab9492503da11b0a81b19145b Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:22 -0700
Subject: [PATCH 11/53] x86/intel_rdt: Fix invalid mode warning when multiple
resources are managed
When multiple resources are managed by RDT, the number of CLOSIDs used
is the minimum of the CLOSIDs supported by each resource. In the function
rdt_bit_usage_show(), the annotated bitmask is created to depict how the
CAT supporting caches are being used. During this annotated bitmask
creation, each resource group is queried for its mode that is used as a
label in the annotated bitmask.
The maximum number of resource groups is currently assumed to be the
number of CLOSIDs supported by the resource for which the information is
being displayed. This is incorrect since the number of active CLOSIDs is
the minimum across all resources.
If information for a cache instance with more CLOSIDs than another is
being generated we thus encounter a warning like:
invalid mode for closid 8
WARNING: CPU: 88 PID: 1791 at [SNIP]/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
:827 rdt_bit_usage_show+0x221/0x2b0
Fix this by ensuring that only the number of supported CLOSIDs are
considered.
Fixes: e651901187ab8 ("x86/intel_rdt: Introduce "bit_usage" to display cache allocations details")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-5-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b372923eb209..ea91750ba27f 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -809,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (!closid_allocated(i))
continue;
mode = rdtgroup_mode_by_closid(i);
From 70479c012b67b89e219c40eddc5dc338b7c447a3 Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:23 -0700
Subject: [PATCH 12/53] x86/intel_rdt: Fix unchecked MSR access
When a new resource group is created, it is initialized with sane
defaults that currently assume the resource being initialized is a CAT
resource. This code path is also followed by a MBA resource that is not
allocated the same as a CAT resource and as a result we encounter the
following unchecked MSR access error:
unchecked MSR access error: WRMSR to 0xd51 (tried to write 0x0000
000000000064) at rIP: 0xffffffffae059994 (native_write_msr+0x4/0x20)
Call Trace:
mba_wrmsr+0x41/0x80
update_domains+0x125/0x130
rdtgroup_mkdir+0x270/0x500
Fix the above by ensuring the initial allocation is only attempted on a
CAT resource.
Fixes: 95f0b77ef ("x86/intel_rdt: Initialize new resource group with sane defaults")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-6-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index ea91750ba27f..74821bc457c0 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2349,6 +2349,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
list_for_each_entry(d, &r->domains, list) {
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
@@ -2386,6 +2392,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
}
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("failed to initialize allocations\n");
From 32d736abed4febff4b6bf85d5d240ee24d254322 Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:24 -0700
Subject: [PATCH 13/53] x86/intel_rdt: Do not allow pseudo-locking of MBA
resource
A system supporting pseudo-locking may have MBA as well as CAT
resources of which only the CAT resources could support cache
pseudo-locking. When the schemata to be pseudo-locked is provided it
should be checked that that schemata does not attempt to pseudo-lock a
MBA resource.
Fixes: e0bdfe8e3 ("x86/intel_rdt: Support creation/removal of pseudo-locked region")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-7-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index edd5761f7336..0f53049719cd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -195,6 +195,12 @@ static int parse_line(char *line, struct rdt_resource *r,
struct rdt_domain *d;
unsigned long dom_id;
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ r->rid == RDT_RESOURCE_MBA) {
+ rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
+ return -EINVAL;
+ }
+
next:
if (!line || line[0] == '\0')
return 0;
From f0df4e1acf3d721958dcafb2c9c0bdf25189068d Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:25 -0700
Subject: [PATCH 14/53] x86/intel_rdt: Fix incorrect loop end condition
A loop is used to check if a CAT resource's CBM of one CLOSID
overlaps with the CBM of another CLOSID of the same resource. The loop
is run over all CLOSIDs supported by the resource.
The problem with running the loop over all CLOSIDs supported by the
resource is that its number of supported CLOSIDs may be more than the
number of supported CLOSIDs on the system, which is the minimum number of
CLOSIDs supported across all resources.
Fix the loop to only consider the number of system supported CLOSIDs,
not all that are supported by the resource.
Fixes: 49f7b4efa ("x86/intel_rdt: Enable setting of exclusive mode")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-8-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 74821bc457c0..afd93d45e21b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -996,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
ctrl_b = (unsigned long *)ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
From 939b90b20bc87e199b6b53942764b987289b87ce Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:26 -0700
Subject: [PATCH 15/53] x86/intel_rdt: Fix exclusive mode handling of MBA
resource
It is possible for a resource group to consist out of MBA as well as
CAT/CDP resources. The "exclusive" resource mode only applies to the
CAT/CDP resources since MBA allocations cannot be specified to overlap
or not. When a user requests a resource group to become "exclusive" then it
can only be successful if there are CAT/CDP resources in the group
and none of their CBMs associated with the group's CLOSID overlaps with
any other resource group.
Fix the "exclusive" mode setting by failing if there isn't any CAT/CDP
resource in the group and ensuring that the CBM checking is only done on
CAT/CDP resources.
Fixes: 49f7b4efa ("x86/intel_rdt: Enable setting of exclusive mode")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-9-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index afd93d45e21b..f3231f78d69b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1031,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
struct rdt_resource *r;
+ bool has_cache = false;
struct rdt_domain *d;
for_each_alloc_enabled_rdt_resource(r) {
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
+ has_cache = true;
list_for_each_entry(d, &r->domains, list) {
if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
- rdtgrp->closid, false))
+ rdtgrp->closid, false)) {
+ rdt_last_cmd_puts("schemata overlaps\n");
return false;
+ }
}
}
+ if (!has_cache) {
+ rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
+ return false;
+ }
+
return true;
}
@@ -1092,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
rdtgrp->mode = RDT_MODE_SHAREABLE;
} else if (!strcmp(buf, "exclusive")) {
if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
- rdt_last_cmd_printf("schemata overlaps\n");
ret = -EINVAL;
goto out;
}
From ffb2315fd22c2568747402eecdc581a245a2f5ba Mon Sep 17 00:00:00 2001
From: Reinette Chatre
Date: Sat, 15 Sep 2018 14:58:27 -0700
Subject: [PATCH 16/53] x86/intel_rdt: Fix incorrect loop end condition
In order to determine a sane default cache allocation for a new CAT/CDP
resource group, all resource groups are checked to determine which cache
portions are available to share. At this time all possible CLOSIDs
that can be supported by the resource is checked. This is problematic
if the resource supports more CLOSIDs than another CAT/CDP resource. In
this case, the number of CLOSIDs that could be allocated are fewer than
the number of CLOSIDs that can be supported by the resource.
Limit the check of closids to that what is supported by the system based
on the minimum across all resources.
Fixes: 95f0b77ef ("x86/intel_rdt: Initialize new resource group with sane defaults")
Signed-off-by: Reinette Chatre
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Cc: "Xiaochen Shen"
Cc: "Chen Yu"
Link: https://lkml.kernel.org/r/1537048707-76280-10-git-send-email-fenghua.yu@intel.com
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index f3231f78d69b..1b8e86a5d5e1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2370,7 +2370,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
From 8e2aac333785f91ff74e219a1e78e6bdc1ef2c41 Mon Sep 17 00:00:00 2001
From: Simon Detheridge
Date: Sat, 15 Sep 2018 22:15:18 +0100
Subject: [PATCH 17/53] pinctrl: cannonlake: Fix gpio base for GPP-E
The gpio base for GPP-E was set incorrectly to 258 instead of 256,
preventing the touchpad working on my Tong Fang GK5CN5Z laptop.
Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=200787
Signed-off-by: Simon Detheridge
Acked-by: Mika Westerberg
Signed-off-by: Linus Walleij
---
drivers/pinctrl/intel/pinctrl-cannonlake.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pinctrl/intel/pinctrl-cannonlake.c b/drivers/pinctrl/intel/pinctrl-cannonlake.c
index fb1afe55bf53..8d48371caaa2 100644
--- a/drivers/pinctrl/intel/pinctrl-cannonlake.c
+++ b/drivers/pinctrl/intel/pinctrl-cannonlake.c
@@ -379,7 +379,7 @@ static const struct intel_padgroup cnlh_community1_gpps[] = {
static const struct intel_padgroup cnlh_community3_gpps[] = {
CNL_GPP(0, 155, 178, 192), /* GPP_K */
CNL_GPP(1, 179, 202, 224), /* GPP_H */
- CNL_GPP(2, 203, 215, 258), /* GPP_E */
+ CNL_GPP(2, 203, 215, 256), /* GPP_E */
CNL_GPP(3, 216, 239, 288), /* GPP_F */
CNL_GPP(4, 240, 248, CNL_NO_GPIO), /* SPI */
};
From 571d0563c8881595f4ab027aef9ed1c55e3e7b7c Mon Sep 17 00:00:00 2001
From: Dan Carpenter
Date: Wed, 19 Sep 2018 13:35:53 +0300
Subject: [PATCH 18/53] x86/paravirt: Fix some warning messages
The first argument to WARN_ONCE() is a condition.
Fixes: 5800dc5c19f3 ("x86/paravirt: Fix spectre-v2 mitigations for paravirt guests")
Signed-off-by: Dan Carpenter
Signed-off-by: Thomas Gleixner
Reviewed-by: Juergen Gross
Cc: Peter Zijlstra
Cc: Alok Kataria
Cc: "H. Peter Anvin"
Cc: virtualization@lists.linux-foundation.org
Cc: kernel-janitors@vger.kernel.org
Link: https://lkml.kernel.org/r/20180919103553.GD9238@mwanda
---
arch/x86/kernel/paravirt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
From 336b08088d4da009108d4418e241ca95c9152237 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 19 Sep 2018 13:48:41 +0200
Subject: [PATCH 19/53] MAINTAINERS: Add Borislav to the x86 maintainers
Borislav is effectivly maintaining parts of X86 already, make it official.
Signed-off-by: Thomas Gleixner
Acked-by: Ingo Molnar
Acked-by: Borislav Petkov
---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ece30f15777..091e66b60cd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15913,6 +15913,7 @@ F: net/x25/
X86 ARCHITECTURE (32-BIT AND 64-BIT)
M: Thomas Gleixner
M: Ingo Molnar
+M: Borislav Petkov
R: "H. Peter Anvin"
M: x86@kernel.org
L: linux-kernel@vger.kernel.org
From 70513d58751d7c6c1a0133557b13089b9f2e3e66 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky
Date: Thu, 12 Jul 2018 13:27:00 -0400
Subject: [PATCH 20/53] xen/x86/vpmu: Zero struct pt_regs before calling into
sample handling code
Otherwise we may leak kernel stack for events that sample user
registers.
Reported-by: Mark Rutland
Reviewed-by: Juergen Gross
Signed-off-by: Boris Ostrovsky
Cc: stable@vger.kernel.org
---
arch/x86/xen/pmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 7d00d4ad44d4..95997e6c0696 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
- struct pt_regs regs;
+ struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();
From d59f532480f5231bf62615a9287e05b78225fb05 Mon Sep 17 00:00:00 2001
From: Juergen Gross
Date: Wed, 19 Sep 2018 15:42:33 +0200
Subject: [PATCH 21/53] xen: issue warning message when out of grant maptrack
entries
When a driver domain (e.g. dom0) is running out of maptrack entries it
can't map any more foreign domain pages. Instead of silently stalling
the affected domUs issue a rate limited warning in this case in order
to make it easier to detect that situation.
Signed-off-by: Juergen Gross
Reviewed-by: Boris Ostrovsky
Signed-off-by: Boris Ostrovsky
---
drivers/xen/grant-table.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7bafa703a992..84575baceebc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1040,18 +1040,33 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
return ret;
for (i = 0; i < count; i++) {
- /* Retry eagain maps */
- if (map_ops[i].status == GNTST_eagain)
- gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i,
- &map_ops[i].status, __func__);
-
- if (map_ops[i].status == GNTST_okay) {
+ switch (map_ops[i].status) {
+ case GNTST_okay:
+ {
struct xen_page_foreign *foreign;
SetPageForeign(pages[i]);
foreign = xen_page_foreign(pages[i]);
foreign->domid = map_ops[i].dom;
foreign->gref = map_ops[i].ref;
+ break;
+ }
+
+ case GNTST_no_device_space:
+ pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
+ break;
+
+ case GNTST_eagain:
+ /* Retry eagain maps */
+ gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
+ map_ops + i,
+ &map_ops[i].status, __func__);
+ /* Test status in next loop iteration. */
+ i--;
+ break;
+
+ default:
+ break;
}
}
From 96147db1e1dff83679e71ac92193cbcab761a14c Mon Sep 17 00:00:00 2001
From: Mika Westerberg
Date: Tue, 18 Sep 2018 18:36:21 +0300
Subject: [PATCH 22/53] pinctrl: intel: Do pin translation in other GPIO
operations as well
For some reason I thought GPIOLIB handles translation from GPIO ranges
to pinctrl pins but it turns out not to be the case. This means that
when GPIOs operations are performed for a pin controller having a custom
GPIO base such as Cannon Lake and Ice Lake incorrect pin number gets
used internally.
Fix this in the same way we did for lock/unlock IRQ operations and
translate the GPIO number to pin before using it.
Fixes: a60eac3239f0 ("pinctrl: intel: Allow custom GPIO base for pad groups")
Reported-by: Rajat Jain
Signed-off-by: Mika Westerberg
Tested-by: Rajat Jain
Signed-off-by: Linus Walleij
---
drivers/pinctrl/intel/pinctrl-intel.c | 175 ++++++++++++++------------
1 file changed, 95 insertions(+), 80 deletions(-)
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 62b009b27eda..ec8dafc94694 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -747,86 +747,6 @@ static const struct pinctrl_desc intel_pinctrl_desc = {
.owner = THIS_MODULE,
};
-static int intel_gpio_get(struct gpio_chip *chip, unsigned offset)
-{
- struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
- void __iomem *reg;
- u32 padcfg0;
-
- reg = intel_get_padcfg(pctrl, offset, PADCFG0);
- if (!reg)
- return -EINVAL;
-
- padcfg0 = readl(reg);
- if (!(padcfg0 & PADCFG0_GPIOTXDIS))
- return !!(padcfg0 & PADCFG0_GPIOTXSTATE);
-
- return !!(padcfg0 & PADCFG0_GPIORXSTATE);
-}
-
-static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
-{
- struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
- unsigned long flags;
- void __iomem *reg;
- u32 padcfg0;
-
- reg = intel_get_padcfg(pctrl, offset, PADCFG0);
- if (!reg)
- return;
-
- raw_spin_lock_irqsave(&pctrl->lock, flags);
- padcfg0 = readl(reg);
- if (value)
- padcfg0 |= PADCFG0_GPIOTXSTATE;
- else
- padcfg0 &= ~PADCFG0_GPIOTXSTATE;
- writel(padcfg0, reg);
- raw_spin_unlock_irqrestore(&pctrl->lock, flags);
-}
-
-static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
-{
- struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
- void __iomem *reg;
- u32 padcfg0;
-
- reg = intel_get_padcfg(pctrl, offset, PADCFG0);
- if (!reg)
- return -EINVAL;
-
- padcfg0 = readl(reg);
-
- if (padcfg0 & PADCFG0_PMODE_MASK)
- return -EINVAL;
-
- return !!(padcfg0 & PADCFG0_GPIOTXDIS);
-}
-
-static int intel_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
-{
- return pinctrl_gpio_direction_input(chip->base + offset);
-}
-
-static int intel_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
- int value)
-{
- intel_gpio_set(chip, offset, value);
- return pinctrl_gpio_direction_output(chip->base + offset);
-}
-
-static const struct gpio_chip intel_gpio_chip = {
- .owner = THIS_MODULE,
- .request = gpiochip_generic_request,
- .free = gpiochip_generic_free,
- .get_direction = intel_gpio_get_direction,
- .direction_input = intel_gpio_direction_input,
- .direction_output = intel_gpio_direction_output,
- .get = intel_gpio_get,
- .set = intel_gpio_set,
- .set_config = gpiochip_generic_config,
-};
-
/**
* intel_gpio_to_pin() - Translate from GPIO offset to pin number
* @pctrl: Pinctrl structure
@@ -872,6 +792,101 @@ static int intel_gpio_to_pin(struct intel_pinctrl *pctrl, unsigned offset,
return -EINVAL;
}
+static int intel_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+ struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
+ void __iomem *reg;
+ u32 padcfg0;
+ int pin;
+
+ pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
+ if (pin < 0)
+ return -EINVAL;
+
+ reg = intel_get_padcfg(pctrl, pin, PADCFG0);
+ if (!reg)
+ return -EINVAL;
+
+ padcfg0 = readl(reg);
+ if (!(padcfg0 & PADCFG0_GPIOTXDIS))
+ return !!(padcfg0 & PADCFG0_GPIOTXSTATE);
+
+ return !!(padcfg0 & PADCFG0_GPIORXSTATE);
+}
+
+static void intel_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+ struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
+ unsigned long flags;
+ void __iomem *reg;
+ u32 padcfg0;
+ int pin;
+
+ pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
+ if (pin < 0)
+ return;
+
+ reg = intel_get_padcfg(pctrl, pin, PADCFG0);
+ if (!reg)
+ return;
+
+ raw_spin_lock_irqsave(&pctrl->lock, flags);
+ padcfg0 = readl(reg);
+ if (value)
+ padcfg0 |= PADCFG0_GPIOTXSTATE;
+ else
+ padcfg0 &= ~PADCFG0_GPIOTXSTATE;
+ writel(padcfg0, reg);
+ raw_spin_unlock_irqrestore(&pctrl->lock, flags);
+}
+
+static int intel_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
+{
+ struct intel_pinctrl *pctrl = gpiochip_get_data(chip);
+ void __iomem *reg;
+ u32 padcfg0;
+ int pin;
+
+ pin = intel_gpio_to_pin(pctrl, offset, NULL, NULL);
+ if (pin < 0)
+ return -EINVAL;
+
+ reg = intel_get_padcfg(pctrl, pin, PADCFG0);
+ if (!reg)
+ return -EINVAL;
+
+ padcfg0 = readl(reg);
+
+ if (padcfg0 & PADCFG0_PMODE_MASK)
+ return -EINVAL;
+
+ return !!(padcfg0 & PADCFG0_GPIOTXDIS);
+}
+
+static int intel_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+ return pinctrl_gpio_direction_input(chip->base + offset);
+}
+
+static int intel_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
+ int value)
+{
+ intel_gpio_set(chip, offset, value);
+ return pinctrl_gpio_direction_output(chip->base + offset);
+}
+
+static const struct gpio_chip intel_gpio_chip = {
+ .owner = THIS_MODULE,
+ .request = gpiochip_generic_request,
+ .free = gpiochip_generic_free,
+ .get_direction = intel_gpio_get_direction,
+ .direction_input = intel_gpio_direction_input,
+ .direction_output = intel_gpio_direction_output,
+ .get = intel_gpio_get,
+ .set = intel_gpio_set,
+ .set_config = gpiochip_generic_config,
+};
+
static int intel_gpio_irq_reqres(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
From a8b3bb338e4ee4cc84a2b9a6fdf27049b84baa59 Mon Sep 17 00:00:00 2001
From: Fenghua Yu
Date: Thu, 20 Sep 2018 12:37:08 -0700
Subject: [PATCH 23/53] x86/intel_rdt: Add Reinette as co-maintainer for RDT
Reinette Chatre is doing great job on enabling pseudo-locking and other
features in RDT. Add her as co-maintainer for RDT.
Suggested-by: Thomas Gleixner
Signed-off-by: Fenghua Yu
Signed-off-by: Thomas Gleixner
Acked-by: Ingo Molnar
Acked-by: Reinette Chatre
Cc: "H Peter Anvin"
Cc: "Tony Luck"
Link: https://lkml.kernel.org/r/1537472228-221799-1-git-send-email-fenghua.yu@intel.com
---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 091e66b60cd2..140ea6ee3ac8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12260,6 +12260,7 @@ F: Documentation/networking/rds.txt
RDT - RESOURCE ALLOCATION
M: Fenghua Yu
+M: Reinette Chatre
L: linux-kernel@vger.kernel.org
S: Supported
F: arch/x86/kernel/cpu/intel_rdt*
From 9068a427ee0beb1365a0925e8c33f338f09f5e97 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner
Date: Wed, 19 Sep 2018 14:33:14 +0200
Subject: [PATCH 24/53] MAINTAINERS: Add X86 MM entry
Dave, Andy and Peter are de facto overseing the mm parts of X86. Add an
explicit maintainers entry.
Signed-off-by: Thomas Gleixner
Acked-by: Dave Hansen
Acked-by: Andy Lutomirski
Acked-by: Peter Zijlstra
Acked-by: Ingo Molnar
---
MAINTAINERS | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 140ea6ee3ac8..80a311252b04 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15943,6 +15943,15 @@ M: Borislav Petkov
S: Maintained
F: arch/x86/kernel/cpu/microcode/*
+X86 MM
+M: Dave Hansen
+M: Andy Lutomirski
+M: Peter Zijlstra
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
+S: Maintained
+F: arch/x86/mm/
+
X86 PLATFORM DRIVERS
M: Darren Hart
M: Andy Shevchenko
From 05ab1d8a4b36ee912b7087c6da127439ed0a903e Mon Sep 17 00:00:00 2001
From: Feng Tang
Date: Thu, 20 Sep 2018 10:58:28 +0800
Subject: [PATCH 25/53] x86/mm: Expand static page table for fixmap space
We met a kernel panic when enabling earlycon, which is due to the fixmap
address of earlycon is not statically setup.
Currently the static fixmap setup in head_64.S only covers 2M virtual
address space, while it actually could be in 4M space with different
kernel configurations, e.g. when VSYSCALL emulation is disabled.
So increase the static space to 4M for now by defining FIXMAP_PMD_NUM to 2,
and add a build time check to ensure that the fixmap is covered by the
initial static page tables.
Fixes: 1ad83c858c7d ("x86_64,vsyscall: Make vsyscall emulation configurable")
Suggested-by: Thomas Gleixner
Signed-off-by: Feng Tang
Signed-off-by: Thomas Gleixner
Tested-by: kernel test robot
Reviewed-by: Juergen Gross (Xen parts)
Cc: H Peter Anvin
Cc: Peter Zijlstra
Cc: Michal Hocko
Cc: Yinghai Lu
Cc: Dave Hansen
Cc: Andi Kleen
Cc: Andy Lutomirsky
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180920025828.23699-1-feng.tang@intel.com
---
arch/x86/include/asm/fixmap.h | 10 ++++++++++
arch/x86/include/asm/pgtable_64.h | 3 ++-
arch/x86/kernel/head64.c | 4 +++-
arch/x86/kernel/head_64.S | 16 ++++++++++++----
arch/x86/mm/pgtable.c | 9 +++++++++
arch/x86/xen/mmu_pv.c | 8 ++++++--
6 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
+/*
+ * Exposed to assembly code for setting up initial page tables. Cannot be
+ * calculated in assembly code (fixmap entries are an enum), but is sanity
+ * checked in the actual fixmap C code to make sure that the fixmap is
+ * covered fully.
+ */
+#define FIXMAP_PMD_NUM 2
+/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
+#define FIXMAP_PMD_TOP 507
+
#ifndef __ASSEMBLY__
#include
#include
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ce2b59047cb8..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
#include
#include
#include
+#include
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
-extern pte_t level1_fixmap_pgt[512];
+extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c16af27eb23f..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
#include
#include
#include
+#include
/*
* Manage page tables very early on.
@@ -166,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pud[511] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
- pmd[506] += load_delta;
+ for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
+ pmd[i] += load_delta;
/*
* Set up the identity mapping for the switchover. These
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
#include "../entry/calling.h"
#include
#include
+#include
#ifdef CONFIG_PARAVIRT
#include
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_fixmap_pgt)
- .fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
- .fill 5,8,0
+ .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
+ pgtno = 0
+ .rept (FIXMAP_PMD_NUM)
+ .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
+ + _PAGE_TABLE_NOENC;
+ pgtno = pgtno + 1
+ .endr
+ /* 6 MB reserved space + a 2MB hole */
+ .fill 4,8,0
NEXT_PAGE(level1_fixmap_pgt)
+ .rept (FIXMAP_PMD_NUM)
.fill 512,8,0
+ .endr
#undef PMDS
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ae394552fb94..089e78c4effd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
unsigned long address = __fix_to_virt(idx);
+#ifdef CONFIG_X86_64
+ /*
+ * Ensure that the static initial page tables are covering the
+ * fixmap completely.
+ */
+ BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
+ (FIXMAP_PMD_NUM * PTRS_PER_PTE));
+#endif
+
if (idx >= __end_of_fixed_addresses) {
BUG();
return;
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
- /* L3_k[511][506] -> level1_fixmap_pgt */
+ /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
convert_pfn_mfn(level2_fixmap_pgt);
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
- set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+
+ for (i = 0; i < FIXMAP_PMD_NUM; i++) {
+ set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
+ PAGE_KERNEL_RO);
+ }
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
From b57e99b4b8b0ebdf9707424e7ddc0c392bdc5fe6 Mon Sep 17 00:00:00 2001
From: Omar Sandoval
Date: Fri, 21 Sep 2018 16:44:34 -0700
Subject: [PATCH 26/53] block: use nanosecond resolution for iostat
Klaus Kusche reported that the I/O busy time in /proc/diskstats was not
updating properly on 4.18. This is because we started using ktime to
track elapsed time, and we convert nanoseconds to jiffies when we update
the partition counter. However, this gets rounded down, so any I/Os that
take less than a jiffy are not accounted for. Previously in this case,
the value of jiffies would sometimes increment while we were doing I/O,
so at least some I/Os were accounted for.
Let's convert the stats to use nanoseconds internally. We still report
milliseconds as before, now more accurately than ever. The value is
still truncated to 32 bits for backwards compatibility.
Fixes: 522a777566f5 ("block: consolidate struct request timestamp fields")
Cc: stable@vger.kernel.org
Reported-by: Klaus Kusche
Signed-off-by: Omar Sandoval
Signed-off-by: Jens Axboe
---
block/bio.c | 2 +-
block/blk-core.c | 4 +---
block/genhd.c | 6 +++---
block/partition-generic.c | 6 +++---
include/linux/genhd.h | 5 ++++-
5 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 8c680a776171..0093bed81c0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1684,7 +1684,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
const int sgrp = op_stat_group(req_op);
int cpu = part_stat_lock();
- part_stat_add(cpu, part, ticks[sgrp], duration);
+ part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
part_round_stats(q, cpu, part);
part_dec_in_flight(q, part, op_is_write(req_op));
diff --git a/block/blk-core.c b/block/blk-core.c
index 4dbc93f43b38..cff0a60ee200 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2733,17 +2733,15 @@ void blk_account_io_done(struct request *req, u64 now)
* containing request is enough.
*/
if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
- unsigned long duration;
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
int cpu;
- duration = nsecs_to_jiffies(now - req->start_time_ns);
cpu = part_stat_lock();
part = req->part;
part_stat_inc(cpu, part, ios[sgrp]);
- part_stat_add(cpu, part, ticks[sgrp], duration);
+ part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
part_round_stats(req->q, cpu, part);
part_dec_in_flight(req->q, part, rq_data_dir(req));
diff --git a/block/genhd.c b/block/genhd.c
index 8cc719a37b32..be5bab20b2ab 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1343,18 +1343,18 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_read(hd, ios[STAT_READ]),
part_stat_read(hd, merges[STAT_READ]),
part_stat_read(hd, sectors[STAT_READ]),
- jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
+ (unsigned int)part_stat_read_msecs(hd, STAT_READ),
part_stat_read(hd, ios[STAT_WRITE]),
part_stat_read(hd, merges[STAT_WRITE]),
part_stat_read(hd, sectors[STAT_WRITE]),
- jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
+ (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
inflight[0],
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
part_stat_read(hd, ios[STAT_DISCARD]),
part_stat_read(hd, merges[STAT_DISCARD]),
part_stat_read(hd, sectors[STAT_DISCARD]),
- jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
+ (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
);
}
disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 5a8975a1201c..d3d14e81fb12 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -136,18 +136,18 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, ios[STAT_READ]),
part_stat_read(p, merges[STAT_READ]),
(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
- jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
+ (unsigned int)part_stat_read_msecs(p, STAT_READ),
part_stat_read(p, ios[STAT_WRITE]),
part_stat_read(p, merges[STAT_WRITE]),
(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
- jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
+ (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
inflight[0],
jiffies_to_msecs(part_stat_read(p, io_ticks)),
jiffies_to_msecs(part_stat_read(p, time_in_queue)),
part_stat_read(p, ios[STAT_DISCARD]),
part_stat_read(p, merges[STAT_DISCARD]),
(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
+ (unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
}
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 57864422a2c8..25c08c6c7f99 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -83,10 +83,10 @@ struct partition {
} __attribute__((packed));
struct disk_stats {
+ u64 nsecs[NR_STAT_GROUPS];
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
unsigned long merges[NR_STAT_GROUPS];
- unsigned long ticks[NR_STAT_GROUPS];
unsigned long io_ticks;
unsigned long time_in_queue;
};
@@ -354,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part)
#endif /* CONFIG_SMP */
+#define part_stat_read_msecs(part, which) \
+ div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
+
#define part_stat_read_accum(part, field) \
(part_stat_read(part, field[STAT_READ]) + \
part_stat_read(part, field[STAT_WRITE]) + \
From 6bf4ca7fbc85d80446ac01c0d1d77db4d91a6d84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman
Date: Sun, 23 Sep 2018 19:15:18 +0200
Subject: [PATCH 27/53] Linux 4.19-rc5
---
Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index f03a1e062503..0c90c4354979 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
NAME = Merciless Moray
# *DOCUMENTATION*
From d2db7773ba864df6b4e19643dfc54838550d8049 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose
Date: Wed, 26 Sep 2018 17:32:37 +0100
Subject: [PATCH 28/53] kvm: arm/arm64: Fix stage2_flush_memslot for 4 level
page table
So far we have only supported 3 level page table with fixed IPA of
40bits, where PUD is folded. With 4 level page tables, we need
to check if the PUD entry is valid or not. Fix stage2_flush_memslot()
to do this check, before walking down the table.
Acked-by: Christoffer Dall
Acked-by: Marc Zyngier
Reviewed-by: Eric Auger
Signed-off-by: Suzuki K Poulose
Signed-off-by: Marc Zyngier
---
virt/kvm/arm/mmu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ed162a6c57c5..ee7ce8fa4a12 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -412,7 +412,8 @@ static void stage2_flush_memslot(struct kvm *kvm,
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
next = stage2_pgd_addr_end(addr, end);
- stage2_flush_puds(kvm, pgd, addr, next);
+ if (!stage2_pgd_none(*pgd))
+ stage2_flush_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
From 7788a28062aca211979ecd2c334e06e2ef5281cc Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose
Date: Wed, 26 Sep 2018 17:32:38 +0100
Subject: [PATCH 29/53] kvm: arm/arm64: Remove spurious WARN_ON
On a 4-level page table pgd entry can be empty, unlike a 3-level
page table. Remove the spurious WARN_ON() in stage_get_pud().
Acked-by: Christoffer Dall
Acked-by: Marc Zyngier
Reviewed-by: Eric Auger
Signed-off-by: Suzuki K Poulose
Signed-off-by: Marc Zyngier
---
virt/kvm/arm/mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ee7ce8fa4a12..4a285d760ce0 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1005,7 +1005,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pud_t *pud;
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
- if (WARN_ON(stage2_pgd_none(*pgd))) {
+ if (stage2_pgd_none(*pgd)) {
if (!cache)
return NULL;
pud = mmu_memory_cache_alloc(cache);
From 9f98ddd6686cc9469fb73b11ddd403271d65cbdf Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose
Date: Wed, 26 Sep 2018 17:32:39 +0100
Subject: [PATCH 30/53] kvm: arm64: Add helper for loading the stage2 setting
for a VM
We load the stage2 context of a guest for different operations,
including running the guest and tlb maintenance on behalf of the
guest. As of now only the vttbr is private to the guest, but this
is about to change with IPA per VM. Add a helper to load the stage2
configuration for a VM, which could do the right thing with the
future changes.
Cc: Christoffer Dall
Cc: Marc Zyngier
Reviewed-by: Eric Auger
Signed-off-by: Suzuki K Poulose
Signed-off-by: Marc Zyngier
---
arch/arm64/include/asm/kvm_hyp.h | 9 +++++++++
arch/arm64/kvm/hyp/switch.c | 2 +-
arch/arm64/kvm/hyp/tlb.c | 4 ++--
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 384c34397619..d1bd1e0f14d7 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -155,5 +155,14 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
+{
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ca46153d7915..9d5ce1a3039a 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
static void __hyp_text __activate_vm(struct kvm *kvm)
{
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 131c7772703c..4dbd9c69a96d 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
* bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
* let's flip TGE before executing the TLB operation.
*/
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
{
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
isb();
}
From ce00e3cb4fb496683708db6bfce470e5c7710ddc Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose
Date: Wed, 26 Sep 2018 17:32:40 +0100
Subject: [PATCH 31/53] arm64: Add a helper for PARange to physical shift
conversion
On arm64, ID_AA64MMFR0_EL1.PARange encodes the maximum Physical
Address range supported by the CPU. Add a helper to decode this
to actual physical shift. If we hit an unallocated value, return
the maximum range supported by the kernel.
This will be used by KVM to set the VTCR_EL2.T0SZ, as it
is about to move its place. Having this helper keeps the code
movement cleaner.
Cc: Marc Zyngier
Cc: James Morse
Cc: Christoffer Dall
Acked-by: Catalin Marinas
Reviewed-by: Eric Auger
Signed-off-by: Suzuki K Poulose
Signed-off-by: Marc Zyngier
---
arch/arm64/include/asm/cpufeature.h | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1717ba1db35d..072cc1c970c2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -530,6 +530,26 @@ void arm64_set_ssbd_mitigation(bool state);
static inline void arm64_set_ssbd_mitigation(bool state) {}
#endif
+static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
+{
+ switch (parange) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5: return 48;
+ case 6: return 52;
+ /*
+ * A future PE could use a value unknown to the kernel.
+ * However, by the "D10.1.4 Principles of the ID scheme
+ * for fields in ID registers", ARM DDI 0487C.a, any new
+ * value is guaranteed to be higher than what we know already.
+ * As a safe limit, we return the limit supported by the kernel.
+ */
+ default: return CONFIG_ARM64_PA_BITS;
+ }
+}
#endif /* __ASSEMBLY__ */
#endif
From b2df44ffba363bd90274340e0adfd8137f5cd878 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose
Date: Wed, 26 Sep 2018 17:32:41 +0100
Subject: [PATCH 32/53] kvm: arm64: Clean up VTCR_EL2 initialisation
Use the new helper for converting the parange to the physical shift.
Also, add the missing definitions for the VTCR_EL2 register fields
and use them instead of hard coding numbers.
Cc: Marc Zyngier
Cc: Christoffer Dall
Reviewed-by: Eric Auger
Signed-off-by: Suzuki K Poulose
Signed-off-by: Marc Zyngier
---
arch/arm64/include/asm/kvm_arm.h | 3 +++
arch/arm64/kvm/hyp/s2-setup.c | 34 ++++++++------------------------
2 files changed, 11 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index aa45df752a16..5f807b680a5f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
#define VTCR_EL2_RES1 (1 << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
+#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
#define VTCR_EL2_TG0_4K TCR_TG0_4K
@@ -127,6 +128,8 @@
#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
+#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
+
/*
* We configure the Stage-2 page tables to always restrict the IPA space to be
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index 603e1ee83e89..e1ca672e937a 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -19,45 +19,27 @@
#include