From 046f773be106ec8eb92b13414c90f8e279deffe0 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:11 -0600 Subject: [PATCH 01/60] KVM: SVM: Define sev_features and VMPL field in the VMSA The hypervisor uses the sev_features field (offset 3B0h) in the Save State Area to control the SEV-SNP guest features such as SNPActive, vTOM, ReflectVC etc. An SEV-SNP guest can read the sev_features field through the SEV_STATUS MSR. While at it, update dump_vmcb() to log the VMPL level. See APM2 Table 15-34 and B-4 for more details. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-2-brijesh.singh@amd.com --- arch/x86/include/asm/svm.h | 6 ++++-- arch/x86/kvm/svm/svm.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f70a5108d464..f2d01f351e95 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -282,7 +282,8 @@ struct vmcb_save_area { struct vmcb_seg ldtr; struct vmcb_seg idtr; struct vmcb_seg tr; - u8 reserved_1[43]; + u8 reserved_1[42]; + u8 vmpl; u8 cpl; u8 reserved_2[4]; u64 efer; @@ -347,7 +348,8 @@ struct vmcb_save_area { u64 sw_exit_info_1; u64 sw_exit_info_2; u64 sw_scratch; - u8 reserved_11[56]; + u64 sev_features; + u8 reserved_11[48]; u64 xcr0; u8 valid_bitmap[16]; u64 x87_state_gpa; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bd4c64b362d2..81cb518170a8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3117,8 +3117,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "tr:", save01->tr.selector, save01->tr.attrib, save01->tr.limit, save01->tr.base); - pr_err("cpl: %d efer: %016llx\n", - save->cpl, save->efer); + pr_err("vmpl: %d cpl: %d efer: %016llx\n", + save->vmpl, save->cpl, save->efer); pr_err("%-15s %016llx %-13s %016llx\n", "cr0:", save->cr0, "cr2:", save->cr2); pr_err("%-15s %016llx %-13s %016llx\n", From 3dd2775b74c9b1b01d19805877ab45bc47c4a5a5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 5 Apr 2022 13:27:43 -0500 Subject: [PATCH 02/60] KVM: SVM: Create a separate mapping for the SEV-ES save area The save area for SEV-ES/SEV-SNP guests, as used by the hardware, is different from the save area of a non SEV-ES/SEV-SNP guest. This is the first step in defining the multiple save areas to keep them separate and ensuring proper operation amongst the different types of guests. Create an SEV-ES/SEV-SNP save area and adjust usage to the new save area definition where needed. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220405182743.308853-1-brijesh.singh@amd.com --- arch/x86/include/asm/svm.h | 89 +++++++++++++++++++++++++++++--------- arch/x86/kvm/svm/sev.c | 22 +++++----- arch/x86/kvm/svm/svm.c | 4 +- arch/x86/kvm/svm/svm.h | 4 +- 4 files changed, 83 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2d01f351e95..788a43f99080 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -271,6 +271,7 @@ struct vmcb_seg { u64 base; } __packed; +/* Save area definition for legacy and SEV-MEM guests */ struct vmcb_save_area { struct vmcb_seg es; struct vmcb_seg cs; @@ -287,8 +288,58 @@ struct vmcb_save_area { u8 cpl; u8 reserved_2[4]; u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u64 s_cet; + u64 ssp; + u64 isst_addr; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_5[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; + u8 reserved_6[72]; + u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ +} __packed; + +/* Save area definition for SEV-ES and SEV-SNP guests */ +struct sev_es_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; u8 reserved_3[104]; - u64 xss; /* Valid for SEV-ES only */ + u64 xss; u64 cr4; u64 cr3; u64 cr0; @@ -316,22 +367,14 @@ struct vmcb_save_area { u64 br_to; u64 last_excp_from; u64 last_excp_to; - - /* - * The following part of the save area is valid only for - * SEV-ES guests when referenced through the GHCB or for - * saving to the host save area. - */ - u8 reserved_7[72]; - u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ - u8 reserved_7b[4]; + u8 reserved_7[80]; u32 pkru; - u8 reserved_7a[20]; - u64 reserved_8; /* rax already available at 0x01f8 */ + u8 reserved_9[20]; + u64 reserved_10; /* rax already available at 0x01f8 */ u64 rcx; u64 rdx; u64 rbx; - u64 reserved_9; /* rsp already available at 0x01d8 */ + u64 reserved_11; /* rsp already available at 0x01d8 */ u64 rbp; u64 rsi; u64 rdi; @@ -343,23 +386,25 @@ struct vmcb_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_10[16]; + u8 reserved_12[16]; u64 sw_exit_code; u64 sw_exit_info_1; u64 sw_exit_info_2; u64 sw_scratch; u64 sev_features; - u8 reserved_11[48]; + u8 reserved_13[48]; u64 xcr0; u8 valid_bitmap[16]; u64 x87_state_gpa; } __packed; -struct ghcb { - struct vmcb_save_area save; - u8 reserved_save[2048 - sizeof(struct vmcb_save_area)]; +#define GHCB_SHARED_BUF_SIZE 2032 - u8 shared_buffer[2032]; +struct ghcb { + struct sev_es_save_area save; + u8 reserved_save[2048 - sizeof(struct sev_es_save_area)]; + + u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; u8 reserved_1[10]; u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ @@ -367,13 +412,15 @@ struct ghcb { } __packed; -#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032 +#define EXPECTED_VMCB_SAVE_AREA_SIZE 740 +#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1032 #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE static inline void __unused_size_checks(void) { BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); } @@ -443,7 +490,7 @@ struct vmcb { /* GHCB Accessor functions */ #define GHCB_BITMAP_IDX(field) \ - (offsetof(struct vmcb_save_area, field) / sizeof(u64)) + (offsetof(struct sev_es_save_area, field) / sizeof(u64)) #define DEFINE_GHCB_ACCESSORS(field) \ static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 75fa6dd268f0..6e18ec1839f0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -559,12 +559,20 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_es_sync_vmsa(struct vcpu_svm *svm) { - struct vmcb_save_area *save = &svm->vmcb->save; + struct sev_es_save_area *save = svm->sev_es.vmsa; /* Check some debug related fields before encrypting the VMSA */ - if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1)) + if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) return -EINVAL; + /* + * SEV-ES will use a VMSA that is pointed to by the VMCB, not + * the traditional VMSA that is part of the VMCB. Copy the + * traditional VMSA as it has been built so far (in prep + * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. + */ + memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save)); + /* Sync registgers */ save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; @@ -592,14 +600,6 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - /* - * SEV-ES will use a VMSA that is pointed to by the VMCB, not - * the traditional VMSA that is part of the VMCB. Copy the - * traditional VMSA as it has been built so far (in prep - * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. - */ - memcpy(svm->sev_es.vmsa, save, sizeof(*save)); - return 0; } @@ -2932,7 +2932,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) sev_enc_bit)); } -void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa) +void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) { /* * As an SEV-ES guest, hardware will restore the host state on VMEXIT, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 81cb518170a8..6ff595f74e3a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1270,8 +1270,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ vmsave(__sme_page_pa(sd->save_area)); if (sev_es_guest(vcpu->kvm)) { - struct vmcb_save_area *hostsa; - hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); + struct sev_es_save_area *hostsa; + hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); sev_es_prepare_switch_to_guest(hostsa); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f77a7d2d39dd..cc857deaee5e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -181,7 +181,7 @@ struct svm_nested_state { struct vcpu_sev_es_state { /* SEV-ES support */ - struct vmcb_save_area *vmsa; + struct sev_es_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; bool received_first_sipi; @@ -620,7 +620,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); -void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa); +void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ From a4690359eaec985a1351786da887df1ba92440a0 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Mar 2022 15:33:13 -0600 Subject: [PATCH 03/60] KVM: SVM: Create a separate mapping for the GHCB save area The initial implementation of the GHCB spec was based on trying to keep the register state offsets the same relative to the VM save area. However, the save area for SEV-ES has changed within the hardware causing the relation between the SEV-ES save area to change relative to the GHCB save area. This is the second step in defining the multiple save areas to keep them separate and ensuring proper operation amongst the different types of guests. Create a GHCB save area that matches the GHCB specification. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-4-brijesh.singh@amd.com --- arch/x86/include/asm/svm.h | 48 +++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 788a43f99080..0789ad83b2bd 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -398,11 +398,51 @@ struct sev_es_save_area { u64 x87_state_gpa; } __packed; +struct ghcb_save_area { + u8 reserved_1[203]; + u8 cpl; + u8 reserved_2[116]; + u64 xss; + u8 reserved_3[24]; + u64 dr7; + u8 reserved_4[16]; + u64 rip; + u8 reserved_5[88]; + u64 rsp; + u8 reserved_6[24]; + u64 rax; + u8 reserved_7[264]; + u64 rcx; + u64 rdx; + u64 rbx; + u8 reserved_8[8]; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u8 reserved_9[16]; + u64 sw_exit_code; + u64 sw_exit_info_1; + u64 sw_exit_info_2; + u64 sw_scratch; + u8 reserved_10[56]; + u64 xcr0; + u8 valid_bitmap[16]; + u64 x87_state_gpa; +} __packed; + #define GHCB_SHARED_BUF_SIZE 2032 struct ghcb { - struct sev_es_save_area save; - u8 reserved_save[2048 - sizeof(struct sev_es_save_area)]; + struct ghcb_save_area save; + u8 reserved_save[2048 - sizeof(struct ghcb_save_area)]; u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; @@ -413,6 +453,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 740 +#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 #define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1032 #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE @@ -420,6 +461,7 @@ struct ghcb { static inline void __unused_size_checks(void) { BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); + BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); @@ -490,7 +532,7 @@ struct vmcb { /* GHCB Accessor functions */ #define GHCB_BITMAP_IDX(field) \ - (offsetof(struct sev_es_save_area, field) / sizeof(u64)) + (offsetof(struct ghcb_save_area, field) / sizeof(u64)) #define DEFINE_GHCB_ACCESSORS(field) \ static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ From 6d3b3d34e39eb4ee9a7dbe7e28e2588e160f9c0f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Mar 2022 15:33:14 -0600 Subject: [PATCH 04/60] KVM: SVM: Update the SEV-ES save area mapping This is the final step in defining the multiple save areas to keep them separate and ensuring proper operation amongst the different types of guests. Update the SEV-ES/SEV-SNP save area to match the APM. This save area will be used for the upcoming SEV-SNP AP Creation NAE event support. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-5-brijesh.singh@amd.com --- arch/x86/include/asm/svm.h | 66 +++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0789ad83b2bd..ec623e7da33d 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -334,7 +334,13 @@ struct sev_es_save_area { struct vmcb_seg ldtr; struct vmcb_seg idtr; struct vmcb_seg tr; - u8 reserved_1[43]; + u64 vmpl0_ssp; + u64 vmpl1_ssp; + u64 vmpl2_ssp; + u64 vmpl3_ssp; + u64 u_cet; + u8 reserved_1[2]; + u8 vmpl; u8 cpl; u8 reserved_2[4]; u64 efer; @@ -347,9 +353,19 @@ struct sev_es_save_area { u64 dr6; u64 rflags; u64 rip; - u8 reserved_4[88]; + u64 dr0; + u64 dr1; + u64 dr2; + u64 dr3; + u64 dr0_addr_mask; + u64 dr1_addr_mask; + u64 dr2_addr_mask; + u64 dr3_addr_mask; + u8 reserved_4[24]; u64 rsp; - u8 reserved_5[24]; + u64 s_cet; + u64 ssp; + u64 isst_addr; u64 rax; u64 star; u64 lstar; @@ -360,7 +376,7 @@ struct sev_es_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_6[32]; + u8 reserved_5[32]; u64 g_pat; u64 dbgctl; u64 br_from; @@ -369,12 +385,12 @@ struct sev_es_save_area { u64 last_excp_to; u8 reserved_7[80]; u32 pkru; - u8 reserved_9[20]; - u64 reserved_10; /* rax already available at 0x01f8 */ + u8 reserved_8[20]; + u64 reserved_9; /* rax already available at 0x01f8 */ u64 rcx; u64 rdx; u64 rbx; - u64 reserved_11; /* rsp already available at 0x01d8 */ + u64 reserved_10; /* rsp already available at 0x01d8 */ u64 rbp; u64 rsi; u64 rdi; @@ -386,16 +402,34 @@ struct sev_es_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_12[16]; - u64 sw_exit_code; - u64 sw_exit_info_1; - u64 sw_exit_info_2; - u64 sw_scratch; + u8 reserved_11[16]; + u64 guest_exit_info_1; + u64 guest_exit_info_2; + u64 guest_exit_int_info; + u64 guest_nrip; u64 sev_features; - u8 reserved_13[48]; + u64 vintr_ctrl; + u64 guest_exit_code; + u64 virtual_tom; + u64 tlb_id; + u64 pcpu_id; + u64 event_inj; u64 xcr0; - u8 valid_bitmap[16]; - u64 x87_state_gpa; + u8 reserved_12[16]; + + /* Floating point area */ + u64 x87_dp; + u32 mxcsr; + u16 x87_ftw; + u16 x87_fsw; + u16 x87_fcw; + u16 x87_fop; + u16 x87_ds; + u16 x87_cs; + u64 x87_rip; + u8 fpreg_x87[80]; + u8 fpreg_xmm[256]; + u8 fpreg_ymm[256]; } __packed; struct ghcb_save_area { @@ -454,7 +488,7 @@ struct ghcb { #define EXPECTED_VMCB_SAVE_AREA_SIZE 740 #define EXPECTED_GHCB_SAVE_AREA_SIZE 1032 -#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1032 +#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648 #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE From 176db622573f028f85221873ea4577e096785315 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:09:59 -0600 Subject: [PATCH 05/60] x86/boot: Introduce helpers for MSR reads/writes The current set of helpers used throughout the run-time kernel have dependencies on code/facilities outside of the boot kernel, so there are a number of call-sites throughout the boot kernel where inline assembly is used instead. More will be added with subsequent patches that add support for SEV-SNP, so take the opportunity to provide a basic set of helpers that can be used by the boot kernel to reduce reliance on inline assembly. Use boot_* prefix so that it's clear these are helpers specific to the boot kernel to avoid any confusion with the various other MSR read/write helpers. [ bp: Disambiguate parameter names and trim comment. ] Suggested-by: Borislav Petkov Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-6-brijesh.singh@amd.com --- arch/x86/boot/msr.h | 26 ++++++++++++++++++++++++++ arch/x86/include/asm/msr.h | 11 +---------- arch/x86/include/asm/shared/msr.h | 15 +++++++++++++++ 3 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 arch/x86/boot/msr.h create mode 100644 arch/x86/include/asm/shared/msr.h diff --git a/arch/x86/boot/msr.h b/arch/x86/boot/msr.h new file mode 100644 index 000000000000..aed66f7ae199 --- /dev/null +++ b/arch/x86/boot/msr.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Helpers/definitions related to MSR access. + */ + +#ifndef BOOT_MSR_H +#define BOOT_MSR_H + +#include + +/* + * The kernel proper already defines rdmsr()/wrmsr(), but they are not for the + * boot kernel since they rely on tracepoint/exception handling infrastructure + * that's not available here. + */ +static inline void boot_rdmsr(unsigned int reg, struct msr *m) +{ + asm volatile("rdmsr" : "=a" (m->l), "=d" (m->h) : "c" (reg)); +} + +static inline void boot_wrmsr(unsigned int reg, const struct msr *m) +{ + asm volatile("wrmsr" : : "c" (reg), "a"(m->l), "d" (m->h) : "memory"); +} + +#endif /* BOOT_MSR_H */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index d42e6c6b47b1..65ec1965cd28 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -10,16 +10,7 @@ #include #include #include - -struct msr { - union { - struct { - u32 l; - u32 h; - }; - u64 q; - }; -}; +#include struct msr_info { u32 msr_no; diff --git a/arch/x86/include/asm/shared/msr.h b/arch/x86/include/asm/shared/msr.h new file mode 100644 index 000000000000..1e6ec10b3a15 --- /dev/null +++ b/arch/x86/include/asm/shared/msr.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHARED_MSR_H +#define _ASM_X86_SHARED_MSR_H + +struct msr { + union { + struct { + u32 l; + u32 h; + }; + u64 q; + }; +}; + +#endif /* _ASM_X86_SHARED_MSR_H */ From 950d00558a920227b5703d1fcc4751cfe03853cd Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:10:00 -0600 Subject: [PATCH 06/60] x86/boot: Use MSR read/write helpers instead of inline assembly Update all C code to use the new boot_rdmsr()/boot_wrmsr() helpers instead of relying on inline assembly. Suggested-by: Borislav Petkov Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-7-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 17 +++++++---------- arch/x86/boot/cpucheck.c | 30 +++++++++++++++--------------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 28bcf04c022e..4e82101b7d13 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -22,6 +22,7 @@ #include #include "error.h" +#include "../msr.h" struct ghcb boot_ghcb_page __aligned(PAGE_SIZE); struct ghcb *boot_ghcb; @@ -56,23 +57,19 @@ static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) static inline u64 sev_es_rd_ghcb_msr(void) { - unsigned long low, high; + struct msr m; - asm volatile("rdmsr" : "=a" (low), "=d" (high) : - "c" (MSR_AMD64_SEV_ES_GHCB)); + boot_rdmsr(MSR_AMD64_SEV_ES_GHCB, &m); - return ((high << 32) | low); + return m.q; } static inline void sev_es_wr_ghcb_msr(u64 val) { - u32 low, high; + struct msr m; - low = val & 0xffffffffUL; - high = val >> 32; - - asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB), - "a"(low), "d" (high) : "memory"); + m.q = val; + boot_wrmsr(MSR_AMD64_SEV_ES_GHCB, &m); } static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index e1478d32de1a..fed8d13ce252 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -27,6 +27,7 @@ #include #include #include "string.h" +#include "msr.h" static u32 err_flags[NCAPINTS]; @@ -130,12 +131,11 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) /* If this is an AMD and we're only missing SSE+SSE2, try to turn them on */ - u32 ecx = MSR_K7_HWCR; - u32 eax, edx; + struct msr m; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - eax &= ~(1 << 15); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + boot_rdmsr(MSR_K7_HWCR, &m); + m.l &= ~(1 << 15); + boot_wrmsr(MSR_K7_HWCR, &m); get_cpuflags(); /* Make sure it really did something */ err = check_cpuflags(); @@ -145,28 +145,28 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) /* If this is a VIA C3, we might have to enable CX8 explicitly */ - u32 ecx = MSR_VIA_FCR; - u32 eax, edx; + struct msr m; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - eax |= (1<<1)|(1<<7); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + boot_rdmsr(MSR_VIA_FCR, &m); + m.l |= (1 << 1) | (1 << 7); + boot_wrmsr(MSR_VIA_FCR, &m); set_bit(X86_FEATURE_CX8, cpu.flags); err = check_cpuflags(); } else if (err == 0x01 && is_transmeta()) { /* Transmeta might have masked feature bits in word 0 */ - u32 ecx = 0x80860004; - u32 eax, edx; + struct msr m, m_tmp; u32 level = 1; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); + boot_rdmsr(0x80860004, &m); + m_tmp = m; + m_tmp.l = ~0; + boot_wrmsr(0x80860004, &m_tmp); asm("cpuid" : "+a" (level), "=d" (cpu.flags[0]) : : "ecx", "ebx"); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + boot_wrmsr(0x80860004, &m); err = check_cpuflags(); } else if (err == 0x01 && From ec1c66af3a30d45c2420da0974c01d3515dba26e Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:10:01 -0600 Subject: [PATCH 07/60] x86/compressed/64: Detect/setup SEV/SME features earlier during boot With upcoming SEV-SNP support, SEV-related features need to be initialized earlier during boot, at the same point the initial #VC handler is set up, so that the SEV-SNP CPUID table can be utilized during the initial feature checks. Also, SEV-SNP feature detection will rely on EFI helper functions to scan the EFI config table for the Confidential Computing blob, and so would need to be implemented at least partially in C. Currently set_sev_encryption_mask() is used to initialize the sev_status and sme_me_mask globals that advertise what SEV/SME features are available in a guest. Rename it to sev_enable() to better reflect that (SME is only enabled in the case of SEV guests in the boot/compressed kernel), and move it to just after the stage1 #VC handler is set up so that it can be used to initialize SEV-SNP as well in future patches. While at it, re-implement it as C code so that all SEV feature detection can be better consolidated with upcoming SEV-SNP feature detection, which will also be in C. The 32-bit entry path remains unchanged, as it never relied on the set_sev_encryption_mask() initialization to begin with. [ bp: Massage commit message. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-8-brijesh.singh@amd.com --- arch/x86/boot/compressed/head_64.S | 37 +++++++++++++++----------- arch/x86/boot/compressed/mem_encrypt.S | 36 ------------------------- arch/x86/boot/compressed/misc.h | 4 +-- arch/x86/boot/compressed/sev.c | 36 +++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 53 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index dea95301196b..4cd661165d4a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -189,11 +189,11 @@ SYM_FUNC_START(startup_32) subl $32, %eax /* Encryption bit is always above bit 31 */ bts %eax, %edx /* Set encryption mask for page tables */ /* - * Mark SEV as active in sev_status so that startup32_check_sev_cbit() - * will do a check. The sev_status memory will be fully initialized - * with the contents of MSR_AMD_SEV_STATUS later in - * set_sev_encryption_mask(). For now it is sufficient to know that SEV - * is active. + * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that + * startup32_check_sev_cbit() will do a check. sev_enable() will + * initialize sev_status with all the bits reported by + * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT + * needs to be set for now. */ movl $1, rva(sev_status)(%ebp) 1: @@ -447,6 +447,23 @@ SYM_CODE_START(startup_64) call load_stage1_idt popq %rsi +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Now that the stage1 interrupt handlers are set up, #VC exceptions from + * CPUID instructions can be properly handled for SEV-ES guests. + * + * For SEV-SNP, the CPUID table also needs to be set up in advance of any + * CPUID instructions being issued, so go ahead and do that now via + * sev_enable(), which will also handle the rest of the SEV-related + * detection/setup to ensure that has been done in advance of any dependent + * code. + */ + pushq %rsi + movq %rsi, %rdi /* real mode address */ + call sev_enable + popq %rsi +#endif + /* * paging_prepare() sets up the trampoline and checks if we need to * enable 5-level paging. @@ -558,17 +575,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq -/* - * If running as an SEV guest, the encryption mask is required in the - * page-table setup code below. When the guest also has SEV-ES enabled - * set_sev_encryption_mask() will cause #VC exceptions, but the stage2 - * handler can't map its GHCB because the page-table is not set up yet. - * So set up the encryption mask here while still on the stage1 #VC - * handler. Then load stage2 IDT and switch to the kernel's own - * page-table. - */ pushq %rsi - call set_sev_encryption_mask call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index a63424d13627..a73e4d783cae 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -187,42 +187,6 @@ SYM_CODE_END(startup32_vc_handler) .code64 #include "../../kernel/sev_verify_cbit.S" -SYM_FUNC_START(set_sev_encryption_mask) -#ifdef CONFIG_AMD_MEM_ENCRYPT - push %rbp - push %rdx - - movq %rsp, %rbp /* Save current stack pointer */ - - call get_sev_encryption_bit /* Get the encryption bit position */ - testl %eax, %eax - jz .Lno_sev_mask - - bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ - - /* - * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in - * get_sev_encryption_bit() because this function is 32-bit code and - * shared between 64-bit and 32-bit boot path. - */ - movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ - rdmsr - - /* Store MSR value in sev_status */ - shlq $32, %rdx - orq %rdx, %rax - movq %rax, sev_status(%rip) - -.Lno_sev_mask: - movq %rbp, %rsp /* Restore original stack pointer */ - - pop %rdx - pop %rbp -#endif - - xor %rax, %rax - RET -SYM_FUNC_END(set_sev_encryption_mask) .data diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 16ed360b6692..23e0e395084a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -120,12 +120,12 @@ static inline void console_init(void) { } #endif -void set_sev_encryption_mask(void); - #ifdef CONFIG_AMD_MEM_ENCRYPT +void sev_enable(struct boot_params *bp); void sev_es_shutdown_ghcb(void); extern bool sev_es_check_ghcb_fault(unsigned long address); #else +static inline void sev_enable(struct boot_params *bp) { } static inline void sev_es_shutdown_ghcb(void) { } static inline bool sev_es_check_ghcb_fault(unsigned long address) { diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 4e82101b7d13..27ccd5a5ff60 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -201,3 +201,39 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) else if (result != ES_RETRY) sev_es_terminate(GHCB_SEV_ES_GEN_REQ); } + +void sev_enable(struct boot_params *bp) +{ + unsigned int eax, ebx, ecx, edx; + struct msr m; + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + /* Check whether SEV is supported */ + if (!(eax & BIT(1))) + return; + + /* Set the SME mask if this is an SEV guest. */ + boot_rdmsr(MSR_AMD64_SEV, &m); + sev_status = m.q; + if (!(sev_status & MSR_AMD64_SEV_ENABLED)) + return; + + sme_me_mask = BIT_ULL(ebx & 0x3f); +} From bcce829083339bf862d66df602cbb111943da8fb Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:10:02 -0600 Subject: [PATCH 08/60] x86/sev: Detect/setup SEV/SME features earlier in boot sme_enable() handles feature detection for both SEV and SME. Future patches will also use it for SEV-SNP feature detection/setup, which will need to be done immediately after the first #VC handler is set up. Move it now in preparation. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-9-brijesh.singh@amd.com --- arch/x86/kernel/head64.c | 3 --- arch/x86/kernel/head_64.S | 13 +++++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 4f5ecbbaae77..cbc285ddc4ac 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -192,9 +192,6 @@ unsigned long __head __startup_64(unsigned long physaddr, if (load_delta & ~PMD_PAGE_MASK) for (;;); - /* Activate Secure Memory Encryption (SME) if supported and enabled */ - sme_enable(bp); - /* Include the SME encryption mask in the fixup value */ load_delta += sme_get_me_mask(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b8e3019547a5..6bf340c91165 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -69,6 +69,19 @@ SYM_CODE_START_NOALIGN(startup_64) call startup_64_setup_env popq %rsi +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Activate SEV/SME memory encryption if supported/enabled. This needs to + * be done now, since this also includes setup of the SEV-SNP CPUID table, + * which needs to be done before any CPUID instructions are executed in + * subsequent code. + */ + movq %rsi, %rdi + pushq %rsi + call sme_enable + popq %rsi +#endif + /* Now switch to __KERNEL_CS so IRET works reliably */ pushq $__KERNEL_CS leaq .Lon_kernel_cs(%rip), %rax From f742b90e61bb53b27771f64bdae05db03a6ab1f2 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 24 Feb 2022 10:55:49 -0600 Subject: [PATCH 09/60] x86/mm: Extend cc_attr to include AMD SEV-SNP The CC_ATTR_GUEST_SEV_SNP can be used by the guest to query whether the SNP (Secure Nested Paging) feature is active. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-10-brijesh.singh@amd.com --- arch/x86/coco/core.c | 3 +++ arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/mm/mem_encrypt.c | 4 ++++ include/linux/cc_platform.h | 8 ++++++++ 4 files changed, 17 insertions(+) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index fc1365dd927e..dafd4881ce29 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -57,6 +57,9 @@ static bool amd_cc_platform_has(enum cc_attr attr) return (sev_status & MSR_AMD64_SEV_ENABLED) && !(sev_status & MSR_AMD64_SEV_ES_ENABLED); + case CC_ATTR_GUEST_SEV_SNP: + return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + default: return false; } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 0eb90d21049e..ef96f166b1b6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -502,8 +502,10 @@ #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) +#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 50d209939c66..f85868c031c6 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -62,6 +62,10 @@ static void print_mem_encrypt_feature_info(void) if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) pr_cont(" SEV-ES"); + /* Secure Nested Paging */ + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + pr_cont(" SEV-SNP"); + pr_cont("\n"); } diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index efd8205282da..d08dd65b5c43 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -72,6 +72,14 @@ enum cc_attr { * Examples include TDX guest & SEV. */ CC_ATTR_GUEST_UNROLL_STRING_IO, + + /** + * @CC_ATTR_SEV_SNP: Guest SNP is active. + * + * The platform/OS is running as a guest/virtual machine and actively + * using AMD SEV-SNP features. + */ + CC_ATTR_GUEST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM From 6c0f74d678c94060932683738b3e227995b363d3 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:04 -0600 Subject: [PATCH 10/60] x86/sev: Define the Linux-specific guest termination reasons The GHCB specification defines the reason code for reason set 0. The reason codes defined in the set 0 do not cover all possible causes for a guest to request termination. The reason sets 1 to 255 are reserved for the vendor-specific codes. Reserve the reason set 1 for the Linux guest. Define the error codes for reason set 1 so that one can have meaningful termination reasons and thus better guest failure diagnosis. While at it, change sev_es_terminate() to accept a reason set parameter. [ bp: Massage commit message. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-11-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 6 +++--- arch/x86/include/asm/sev-common.h | 8 ++++++++ arch/x86/kernel/sev-shared.c | 11 ++++------- arch/x86/kernel/sev.c | 4 ++-- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 27ccd5a5ff60..56e941d5e092 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -119,7 +119,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, static bool early_setup_sev_es(void) { if (!sev_es_negotiate_protocol()) - sev_es_terminate(GHCB_SEV_ES_PROT_UNSUPPORTED); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED); if (set_page_decrypted((unsigned long)&boot_ghcb_page)) return false; @@ -172,7 +172,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) enum es_result result; if (!boot_ghcb && !early_setup_sev_es()) - sev_es_terminate(GHCB_SEV_ES_GEN_REQ); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); vc_ghcb_invalidate(boot_ghcb); result = vc_init_em_ctxt(&ctxt, regs, exit_code); @@ -199,7 +199,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) if (result == ES_OK) vc_finish_insn(&ctxt); else if (result != ES_RETRY) - sev_es_terminate(GHCB_SEV_ES_GEN_REQ); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } void sev_enable(struct boot_params *bp) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 1b2fd32b42fe..94f0ea574049 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -73,9 +73,17 @@ /* GHCBData[23:16] */ \ ((((u64)reason_val) & 0xff) << 16)) +/* Error codes from reason set 0 */ +#define SEV_TERM_SET_GEN 0 #define GHCB_SEV_ES_GEN_REQ 0 #define GHCB_SEV_ES_PROT_UNSUPPORTED 1 +/* Linux-specific reason codes (used with reason set 1) */ +#define SEV_TERM_SET_LINUX 1 +#define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */ +#define GHCB_TERM_PSC 1 /* Page State Change failure */ +#define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */ + #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) /* diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index ce987688bbc0..2abf8a7d75e5 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -24,15 +24,12 @@ static bool __init sev_es_check_cpu_features(void) return true; } -static void __noreturn sev_es_terminate(unsigned int reason) +static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) { u64 val = GHCB_MSR_TERM_REQ; - /* - * Tell the hypervisor what went wrong - only reason-set 0 is - * currently supported. - */ - val |= GHCB_SEV_TERM_REASON(0, reason); + /* Tell the hypervisor what went wrong. */ + val |= GHCB_SEV_TERM_REASON(set, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -221,7 +218,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) fail: /* Terminate the guest */ - sev_es_terminate(GHCB_SEV_ES_GEN_REQ); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index e6d316a01fdd..19ad09712902 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1337,7 +1337,7 @@ DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) show_regs(regs); /* Ask hypervisor to sev_es_terminate */ - sev_es_terminate(GHCB_SEV_ES_GEN_REQ); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); /* If that fails and we get here - just panic */ panic("Returned from Terminate-Request to Hypervisor\n"); @@ -1385,7 +1385,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) /* Do initial setup or terminate the guest */ if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) - sev_es_terminate(GHCB_SEV_ES_GEN_REQ); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); vc_ghcb_invalidate(boot_ghcb); From 2ea29c5abbc27147c2d9e2ab5e05436aca706b65 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:05 -0600 Subject: [PATCH 11/60] x86/sev: Save the negotiated GHCB version The SEV-ES guest calls sev_es_negotiate_protocol() to negotiate the GHCB protocol version before establishing the GHCB. Cache the negotiated GHCB version so that it can be used later. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-12-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 2 +- arch/x86/kernel/sev-shared.c | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ec060c433589..9b9c190e8c3b 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -12,7 +12,7 @@ #include #include -#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MIN 1ULL #define GHCB_PROTOCOL_MAX 1ULL #define GHCB_DEFAULT_USAGE 0ULL diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2abf8a7d75e5..91105f5a02a8 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -14,6 +14,15 @@ #define has_cpuflag(f) boot_cpu_has(f) #endif +/* + * Since feature negotiation related variables are set early in the boot + * process they must reside in the .data section so as not to be zeroed + * out when the .bss section is later cleared. + * + * GHCB protocol version negotiated with the hypervisor. + */ +static u16 ghcb_version __ro_after_init; + static bool __init sev_es_check_cpu_features(void) { if (!has_cpuflag(X86_FEATURE_RDRAND)) { @@ -51,10 +60,12 @@ static bool sev_es_negotiate_protocol(void) if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) return false; + ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); + return true; } @@ -127,7 +138,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr, u64 exit_info_1, u64 exit_info_2) { /* Fill in protocol and format specifiers */ - ghcb->protocol_version = GHCB_PROTOCOL_MAX; + ghcb->protocol_version = ghcb_version; ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; ghcb_set_sw_exit_code(ghcb, exit_code); From cbd3d4f7c4e5a93edae68e5142a269368fde77d6 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:06 -0600 Subject: [PATCH 12/60] x86/sev: Check SEV-SNP features support Version 2 of the GHCB specification added the advertisement of features that are supported by the hypervisor. If the hypervisor supports SEV-SNP then it must set the SEV-SNP features bit to indicate that the base functionality is supported. Check that feature bit while establishing the GHCB; if failed, terminate the guest. Version 2 of the GHCB specification adds several new Non-Automatic Exits (NAEs), most of them are optional except the hypervisor feature. Now that the hypervisor feature NAE is implemented, bump the GHCB maximum supported protocol version. While at it, move the GHCB protocol negotiation check from the #VC exception handler to sev_enable() so that all feature detection happens before the first #VC exception. While at it, document why the GHCB page cannot be setup from load_stage2_idt(). [ bp: Massage commit message. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-13-brijesh.singh@amd.com --- arch/x86/boot/compressed/idt_64.c | 18 +++++++++++++++++- arch/x86/boot/compressed/sev.c | 20 +++++++++++++++----- arch/x86/include/asm/sev-common.h | 6 ++++++ arch/x86/include/asm/sev.h | 2 +- arch/x86/include/uapi/asm/svm.h | 2 ++ arch/x86/kernel/sev-shared.c | 20 ++++++++++++++++++++ arch/x86/kernel/sev.c | 14 ++++++++++++++ 7 files changed, 75 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 9b93567d663a..6debb816e83d 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -39,7 +39,23 @@ void load_stage1_idt(void) load_boot_idt(&boot_idt_desc); } -/* Setup IDT after kernel jumping to .Lrelocated */ +/* + * Setup IDT after kernel jumping to .Lrelocated. + * + * initialize_identity_maps() needs a #PF handler to be setup + * in order to be able to fault-in identity mapping ranges; see + * do_boot_page_fault(). + * + * This #PF handler setup needs to happen in load_stage2_idt() where the + * IDT is loaded and there the #VC IDT entry gets setup too. + * + * In order to be able to handle #VCs, one needs a GHCB which + * gets setup with an already set up pagetable, which is done in + * initialize_identity_maps(). And there's the catch 22: the boot #VC + * handler do_boot_stage2_vc() needs to call early_setup_ghcb() itself + * (and, especially set_page_decrypted()) because the SEV-ES setup code + * cannot initialize a GHCB as there's no #PF handler yet... + */ void load_stage2_idt(void) { boot_idt_desc.address = (unsigned long)boot_idt; diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 56e941d5e092..5b389310be87 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -116,11 +116,8 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, /* Include code for early handlers */ #include "../../kernel/sev-shared.c" -static bool early_setup_sev_es(void) +static bool early_setup_ghcb(void) { - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED); - if (set_page_decrypted((unsigned long)&boot_ghcb_page)) return false; @@ -171,7 +168,7 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) struct es_em_ctxt ctxt; enum es_result result; - if (!boot_ghcb && !early_setup_sev_es()) + if (!boot_ghcb && !early_setup_ghcb()) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); vc_ghcb_invalidate(boot_ghcb); @@ -235,5 +232,18 @@ void sev_enable(struct boot_params *bp) if (!(sev_status & MSR_AMD64_SEV_ENABLED)) return; + /* Negotiate the GHCB protocol version. */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) { + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_PROT_UNSUPPORTED); + } + + /* + * SNP is supported in v2 of the GHCB spec which mandates support for HV + * features. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED && !(get_hv_features() & GHCB_HV_FT_SNP)) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + sme_me_mask = BIT_ULL(ebx & 0x3f); } diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 94f0ea574049..6f037c29a46e 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -60,6 +60,11 @@ /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 +#define GHCB_MSR_HV_FT_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + +#define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 @@ -77,6 +82,7 @@ #define SEV_TERM_SET_GEN 0 #define GHCB_SEV_ES_GEN_REQ 0 #define GHCB_SEV_ES_PROT_UNSUPPORTED 1 +#define GHCB_SNP_UNSUPPORTED 2 /* Linux-specific reason codes (used with reason set 1) */ #define SEV_TERM_SET_LINUX 1 diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9b9c190e8c3b..17b75f6ee11a 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -13,7 +13,7 @@ #include #define GHCB_PROTOCOL_MIN 1ULL -#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_PROTOCOL_MAX 2ULL #define GHCB_DEFAULT_USAGE 0ULL #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index efa969325ede..b0ad00f4c1e1 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -108,6 +108,7 @@ #define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ @@ -218,6 +219,7 @@ { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ + { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 91105f5a02a8..4a876e684f67 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -48,6 +48,26 @@ static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason) asm volatile("hlt\n" : : : "memory"); } +/* + * The hypervisor features are available from GHCB version 2 onward. + */ +static u64 get_hv_features(void) +{ + u64 val; + + if (ghcb_version < 2) + return 0; + + sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ); + VMGEXIT(); + + val = sev_es_rd_ghcb_msr(); + if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP) + return 0; + + return GHCB_MSR_HV_FT_RESP_VAL(val); +} + static bool sev_es_negotiate_protocol(void) { u64 val; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 19ad09712902..cb20fb0c608e 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -43,6 +43,9 @@ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); */ static struct ghcb __initdata *boot_ghcb; +/* Bitmap of SEV features supported by the hypervisor */ +static u64 sev_hv_features __ro_after_init; + /* #VC handler runtime per-CPU data */ struct sev_es_runtime_data { struct ghcb ghcb_page; @@ -766,6 +769,17 @@ void __init sev_es_init_vc_handling(void) if (!sev_es_check_cpu_features()) panic("SEV-ES CPU Features missing"); + /* + * SNP is supported in v2 of the GHCB spec which mandates support for HV + * features. + */ + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + sev_hv_features = get_hv_features(); + + if (!(sev_hv_features & GHCB_HV_FT_SNP)) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + } + /* Enable SEV-ES special handling */ static_branch_enable(&sev_es_enable_key); From 0bd6f1e526070271dbe0f626b123b4f6b01dc79c Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:07 -0600 Subject: [PATCH 13/60] x86/sev: Add a helper for the PVALIDATE instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An SNP-active guest uses the PVALIDATE instruction to validate or rescind the validation of a guest page’s RMP entry. Upon completion, a return code is stored in EAX and rFLAGS bits are set based on the return code. If the instruction completed successfully, the carry flag (CF) indicates if the content of the RMP were changed or not. See AMD APM Volume 3 for additional details. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-14-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 17b75f6ee11a..4ee98976aed8 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -60,6 +60,9 @@ extern void vc_no_ghcb(void); extern void vc_boot_ghcb(void); extern bool handle_vc_boot_ghcb(struct pt_regs *regs); +/* Software defined (when rFlags.CF = 1) */ +#define PVALIDATE_FAIL_NOUPDATE 255 + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); @@ -87,12 +90,30 @@ extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, struct es_em_ctxt *ctxt, u64 exit_code, u64 exit_info_1, u64 exit_info_2); +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) +{ + bool no_rmpupdate; + int rc; + + /* "pvalidate" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF2, 0x0F, 0x01, 0xFF\n\t" + CC_SET(c) + : CC_OUT(c) (no_rmpupdate), "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(validate) + : "memory", "cc"); + + if (no_rmpupdate) + return PVALIDATE_FAIL_NOUPDATE; + + return rc; +} #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } #endif #endif From 81cc3df9a90e7817494421ecc48ede6bd5e8132b Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:08 -0600 Subject: [PATCH 14/60] x86/sev: Check the VMPL level The Virtual Machine Privilege Level (VMPL) feature in the SEV-SNP architecture allows a guest VM to divide its address space into four levels. The level can be used to provide hardware isolated abstraction layers within a VM. VMPL0 is the highest privilege level, and VMPL3 is the least privilege level. Certain operations must be done by the VMPL0 software, such as: * Validate or invalidate memory range (PVALIDATE instruction) * Allocate VMSA page (RMPADJUST instruction when VMSA=1) The initial SNP support requires that the guest kernel is running at VMPL0. Add such a check to verify the guest is running at level 0 before continuing the boot. There is no easy method to query the current VMPL level, so use the RMPADJUST instruction to determine whether the guest is running at the VMPL0. [ bp: Massage commit message. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-15-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 28 ++++++++++++++++++++++++++-- arch/x86/include/asm/sev-common.h | 1 + arch/x86/include/asm/sev.h | 16 ++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 5b389310be87..eb421787d50a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -199,6 +199,26 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } +static void enforce_vmpl0(void) +{ + u64 attrs; + int err; + + /* + * RMPADJUST modifies RMP permissions of a lesser-privileged (numerically + * higher) privilege level. Here, clear the VMPL1 permission mask of the + * GHCB page. If the guest is not running at VMPL0, this will fail. + * + * If the guest is running at VMPL0, it will succeed. Even if that operation + * modifies permission bits, it is still ok to do so currently because Linux + * SNP guests are supported only on VMPL0 so VMPL1 or higher permission masks + * changing is a don't-care. + */ + attrs = 1; + if (rmpadjust((unsigned long)&boot_ghcb_page, RMP_PG_SIZE_4K, attrs)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NOT_VMPL0); +} + void sev_enable(struct boot_params *bp) { unsigned int eax, ebx, ecx, edx; @@ -242,8 +262,12 @@ void sev_enable(struct boot_params *bp) * SNP is supported in v2 of the GHCB spec which mandates support for HV * features. */ - if (sev_status & MSR_AMD64_SEV_SNP_ENABLED && !(get_hv_features() & GHCB_HV_FT_SNP)) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + if (!(get_hv_features() & GHCB_HV_FT_SNP)) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + enforce_vmpl0(); + } sme_me_mask = BIT_ULL(ebx & 0x3f); } diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 6f037c29a46e..7ac5842e32b6 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -89,6 +89,7 @@ #define GHCB_TERM_REGISTER 0 /* GHCB GPA registration failure */ #define GHCB_TERM_PSC 1 /* Page State Change failure */ #define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */ +#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 4ee98976aed8..e37451849165 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -63,6 +63,9 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs); /* Software defined (when rFlags.CF = 1) */ #define PVALIDATE_FAIL_NOUPDATE 255 +/* RMP page size */ +#define RMP_PG_SIZE_4K 0 + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); @@ -90,6 +93,18 @@ extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, struct es_em_ctxt *ctxt, u64 exit_code, u64 exit_info_1, u64 exit_info_2); +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) +{ + int rc; + + /* "rmpadjust" mnemonic support in binutils 2.36 and newer */ + asm volatile(".byte 0xF3,0x0F,0x01,0xFE\n\t" + : "=a"(rc) + : "a"(vaddr), "c"(rmp_psize), "d"(attrs) + : "memory", "cc"); + + return rc; +} static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { bool no_rmpupdate; @@ -114,6 +129,7 @@ static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { ret static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } +static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } #endif #endif From 4f9c403e44e5e88feb27d5e617d1adc9cc7ef684 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:09 -0600 Subject: [PATCH 15/60] x86/compressed: Add helper for validating pages in the decompression stage Many of the integrity guarantees of SEV-SNP are enforced through the Reverse Map Table (RMP). Each RMP entry contains the GPA at which a particular page of DRAM should be mapped. The VMs can request the hypervisor to add pages in the RMP table via the Page State Change VMGEXIT defined in the GHCB specification. Inside each RMP entry is a Validated flag; this flag is automatically cleared to 0 by the CPU hardware when a new RMP entry is created for a guest. Each VM page can be either validated or invalidated, as indicated by the Validated flag in the RMP entry. Memory access to a private page that is not validated generates a #VC. A VM must use the PVALIDATE instruction to validate a private page before using it. To maintain the security guarantee of SEV-SNP guests, when transitioning pages from private to shared, the guest must invalidate the pages before asking the hypervisor to change the page state to shared in the RMP table. After the pages are mapped private in the page table, the guest must issue a page state change VMGEXIT to mark the pages private in the RMP table and validate them. Upon boot, BIOS should have validated the entire system memory. During the kernel decompression stage, early_setup_ghcb() uses set_page_decrypted() to make the GHCB page shared (i.e. clear encryption attribute). And while exiting from the decompression, it calls set_page_encrypted() to make the page private. Add snp_set_page_{private,shared}() helpers that are used by set_page_{decrypted,encrypted}() to change the page state in the RMP table. [ bp: Massage commit message and comments. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-16-brijesh.singh@amd.com --- arch/x86/boot/compressed/ident_map_64.c | 18 +++++++++- arch/x86/boot/compressed/misc.h | 4 +++ arch/x86/boot/compressed/sev.c | 46 +++++++++++++++++++++++++ arch/x86/include/asm/sev-common.h | 26 ++++++++++++++ 4 files changed, 93 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index f7213d0943b8..613367e7e09e 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -275,15 +275,31 @@ static int set_clr_page_flags(struct x86_mapping_info *info, * Changing encryption attributes of a page requires to flush it from * the caches. */ - if ((set | clr) & _PAGE_ENC) + if ((set | clr) & _PAGE_ENC) { clflush_page(address); + /* + * If the encryption attribute is being cleared, change the page state + * to shared in the RMP table. + */ + if (clr) + snp_set_page_shared(__pa(address & PAGE_MASK)); + } + /* Update PTE */ pte = *ptep; pte = pte_set_flags(pte, set); pte = pte_clear_flags(pte, clr); set_pte(ptep, pte); + /* + * If the encryption attribute is being set, then change the page state to + * private in the RMP entry. The page state change must be done after the PTE + * is updated. + */ + if (set & _PAGE_ENC) + snp_set_page_private(__pa(address & PAGE_MASK)); + /* Flush TLB after changing encryption attribute */ write_cr3(top_level_pgt); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 23e0e395084a..01cc13c12059 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -124,6 +124,8 @@ static inline void console_init(void) void sev_enable(struct boot_params *bp); void sev_es_shutdown_ghcb(void); extern bool sev_es_check_ghcb_fault(unsigned long address); +void snp_set_page_private(unsigned long paddr); +void snp_set_page_shared(unsigned long paddr); #else static inline void sev_enable(struct boot_params *bp) { } static inline void sev_es_shutdown_ghcb(void) { } @@ -131,6 +133,8 @@ static inline bool sev_es_check_ghcb_fault(unsigned long address) { return false; } +static inline void snp_set_page_private(unsigned long paddr) { } +static inline void snp_set_page_shared(unsigned long paddr) { } #endif /* acpi.c */ diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index eb421787d50a..5f2c26860df7 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -116,6 +116,52 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, /* Include code for early handlers */ #include "../../kernel/sev-shared.c" +static inline bool sev_snp_enabled(void) +{ + return sev_status & MSR_AMD64_SEV_SNP_ENABLED; +} + +static void __page_state_change(unsigned long paddr, enum psc_op op) +{ + u64 val; + + if (!sev_snp_enabled()) + return; + + /* + * If private -> shared then invalidate the page before requesting the + * state change in the RMP table. + */ + if (op == SNP_PAGE_STATE_SHARED && pvalidate(paddr, RMP_PG_SIZE_4K, 0)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + + /* Issue VMGEXIT to change the page state in RMP table. */ + sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); + VMGEXIT(); + + /* Read the response of the VMGEXIT. */ + val = sev_es_rd_ghcb_msr(); + if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + /* + * Now that page state is changed in the RMP table, validate it so that it is + * consistent with the RMP entry. + */ + if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); +} + +void snp_set_page_private(unsigned long paddr) +{ + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); +} + +void snp_set_page_shared(unsigned long paddr) +{ + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); +} + static bool early_setup_ghcb(void) { if (set_page_decrypted((unsigned long)&boot_ghcb_page)) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 7ac5842e32b6..fe7fe16e5fd5 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -57,6 +57,32 @@ #define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 #define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +/* + * SNP Page State Change Operation + * + * GHCBData[55:52] - Page operation: + * 0x0001 Page assignment, Private + * 0x0002 Page assignment, Shared + */ +enum psc_op { + SNP_PAGE_STATE_PRIVATE = 1, + SNP_PAGE_STATE_SHARED, +}; + +#define GHCB_MSR_PSC_REQ 0x014 +#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \ + /* GHCBData[55:52] */ \ + (((u64)((op) & 0xf) << 52) | \ + /* GHCBData[51:12] */ \ + ((u64)((gfn) & GENMASK_ULL(39, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_PSC_REQ) + +#define GHCB_MSR_PSC_RESP 0x015 +#define GHCB_MSR_PSC_RESP_VAL(val) \ + /* GHCBData[63:32] */ \ + (((u64)(val) & GENMASK_ULL(63, 32)) >> 32) + /* GHCB Hypervisor Feature Request/Response */ #define GHCB_MSR_HV_FT_REQ 0x080 #define GHCB_MSR_HV_FT_RESP 0x081 From 87294bdb7b4b73555b0fba45da1cdecdc6a0d5a8 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:10 -0600 Subject: [PATCH 16/60] x86/compressed: Register GHCB memory when SEV-SNP is active The SEV-SNP guest is required by the GHCB spec to register the GHCB's Guest Physical Address (GPA). This is because the hypervisor may prefer that a guest use a consistent and/or specific GPA for the GHCB associated with a vCPU. For more information, see the GHCB specification section "GHCB GPA Registration". If hypervisor can not work with the guest provided GPA then terminate the guest boot. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-17-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 4 ++++ arch/x86/include/asm/sev-common.h | 13 +++++++++++++ arch/x86/kernel/sev-shared.c | 16 ++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 5f2c26860df7..f31b434e2ce4 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -175,6 +175,10 @@ static bool early_setup_ghcb(void) /* Initialize lookup tables for the instruction decoder */ inat_init_tables(); + /* SNP guest requires the GHCB GPA must be registered */ + if (sev_snp_enabled()) + snp_register_ghcb_early(__pa(&boot_ghcb_page)); + return true; } diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index fe7fe16e5fd5..f077a6c95e67 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -57,6 +57,19 @@ #define GHCB_MSR_AP_RESET_HOLD_REQ 0x006 #define GHCB_MSR_AP_RESET_HOLD_RESP 0x007 +/* GHCB GPA Register */ +#define GHCB_MSR_REG_GPA_REQ 0x012 +#define GHCB_MSR_REG_GPA_REQ_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)((v) & GENMASK_ULL(51, 0)) << 12) | \ + /* GHCBData[11:0] */ \ + GHCB_MSR_REG_GPA_REQ) + +#define GHCB_MSR_REG_GPA_RESP 0x013 +#define GHCB_MSR_REG_GPA_RESP_VAL(v) \ + /* GHCBData[63:12] */ \ + (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) + /* * SNP Page State Change Operation * diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 4a876e684f67..e9ff13cd90b0 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -68,6 +68,22 @@ static u64 get_hv_features(void) return GHCB_MSR_HV_FT_RESP_VAL(val); } +static void __maybe_unused snp_register_ghcb_early(unsigned long paddr) +{ + unsigned long pfn = paddr >> PAGE_SHIFT; + u64 val; + + sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); + VMGEXIT(); + + val = sev_es_rd_ghcb_msr(); + + /* If the response GPA is not ours then abort the guest */ + if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || + (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); +} + static bool sev_es_negotiate_protocol(void) { u64 val; From 95d33bfaa3e169cfec1926e0d0f0c6b0ea75d763 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:11 -0600 Subject: [PATCH 17/60] x86/sev: Register GHCB memory when SEV-SNP is active The SEV-SNP guest is required by the GHCB spec to register the GHCB's Guest Physical Address (GPA). This is because the hypervisor may prefer that a guest uses a consistent and/or specific GPA for the GHCB associated with a vCPU. For more information, see the GHCB specification section "GHCB GPA Registration". [ bp: Cleanup comments. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-18-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 2 ++ arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/head64.c | 4 +++- arch/x86/kernel/sev-shared.c | 2 +- arch/x86/kernel/sev.c | 46 ++++++++++++++++++++++++++---------- 5 files changed, 44 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index e37451849165..48df02713ee0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -122,6 +122,7 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } +void setup_ghcb(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -130,6 +131,7 @@ static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } +static inline void setup_ghcb(void) { } #endif #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed4417500700..9e4552133872 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "cpu.h" @@ -2124,6 +2125,9 @@ void cpu_init_exception_handling(void) load_TR_desc(); + /* GHCB needs to be setup to handle #VC. */ + setup_ghcb(); + /* Finally load the IDT */ load_current_idt(); } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index cbc285ddc4ac..83514b9827e6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -597,8 +597,10 @@ static void startup_64_load_idt(unsigned long physbase) void early_setup_idt(void) { /* VMM Communication Exception */ - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { + setup_ghcb(); set_bringup_idt_handler(bringup_idt_table, X86_TRAP_VC, vc_boot_ghcb); + } bringup_idt_descr.address = (unsigned long)bringup_idt_table; native_load_idt(&bringup_idt_descr); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index e9ff13cd90b0..3aaef1a18ffe 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -68,7 +68,7 @@ static u64 get_hv_features(void) return GHCB_MSR_HV_FT_RESP_VAL(val); } -static void __maybe_unused snp_register_ghcb_early(unsigned long paddr) +static void snp_register_ghcb_early(unsigned long paddr) { unsigned long pfn = paddr >> PAGE_SHIFT; u64 val; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index cb20fb0c608e..39c87469ee0b 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -41,7 +41,7 @@ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); * Needs to be in the .data section because we need it NULL before bss is * cleared */ -static struct ghcb __initdata *boot_ghcb; +static struct ghcb *boot_ghcb __section(".data"); /* Bitmap of SEV features supported by the hypervisor */ static u64 sev_hv_features __ro_after_init; @@ -647,15 +647,39 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } -/* - * This function runs on the first #VC exception after the kernel - * switched to virtual addresses. - */ -static bool __init sev_es_setup_ghcb(void) +static void snp_register_per_cpu_ghcb(void) { + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + snp_register_ghcb_early(__pa(ghcb)); +} + +void setup_ghcb(void) +{ + if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) + return; + /* First make sure the hypervisor talks a supported protocol. */ if (!sev_es_negotiate_protocol()) - return false; + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + + /* + * Check whether the runtime #VC exception handler is active. It uses + * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). + * + * If SNP is active, register the per-CPU GHCB page so that the runtime + * exception handler can use it. + */ + if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + snp_register_per_cpu_ghcb(); + + return; + } /* * Clear the boot_ghcb. The first exception comes in before the bss @@ -666,7 +690,9 @@ static bool __init sev_es_setup_ghcb(void) /* Alright - Make the boot-ghcb public */ boot_ghcb = &boot_ghcb_page; - return true; + /* SNP guest requires that GHCB GPA must be registered. */ + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + snp_register_ghcb_early(__pa(&boot_ghcb_page)); } #ifdef CONFIG_HOTPLUG_CPU @@ -1397,10 +1423,6 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) struct es_em_ctxt ctxt; enum es_result result; - /* Do initial setup or terminate the guest */ - if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb())) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - vc_ghcb_invalidate(boot_ghcb); result = vc_init_em_ctxt(&ctxt, regs, exit_code); From 5e5ccff60a2977142d39b987a8b90e422d9fc634 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:12 -0600 Subject: [PATCH 18/60] x86/sev: Add helper for validating pages in early enc attribute changes early_set_memory_{encrypted,decrypted}() are used for changing the page state from decrypted (shared) to encrypted (private) and vice versa. When SEV-SNP is active, the page state transition needs to go through additional steps. If the page is transitioned from shared to private, then perform the following after the encryption attribute is set in the page table: 1. Issue the page state change VMGEXIT to add the page as a private in the RMP table. 2. Validate the page after its successfully added in the RMP table. To maintain the security guarantees, if the page is transitioned from private to shared, then perform the following before clearing the encryption attribute from the page table. 1. Invalidate the page. 2. Issue the page state change VMGEXIT to make the page shared in the RMP table. early_set_memory_{encrypted,decrypted}() can be called before the GHCB is setup so use the SNP page state MSR protocol VMGEXIT defined in the GHCB specification to request the page state change in the RMP table. While at it, add a helper snp_prep_memory() which will be used in probe_roms(), in a later patch. [ bp: Massage commit message. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-19-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 10 ++++ arch/x86/kernel/sev.c | 99 +++++++++++++++++++++++++++++++++++ arch/x86/mm/mem_encrypt_amd.c | 58 ++++++++++++++++++-- 3 files changed, 163 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 48df02713ee0..f65d257e3d4a 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -123,6 +123,11 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } void setup_ghcb(void); +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned int npages); +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned int npages); +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -132,6 +137,11 @@ static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } +static inline void __init +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +static inline void __init +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } #endif #endif diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 39c87469ee0b..e3fca8615fe8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -556,6 +556,105 @@ static u64 get_jump_table_addr(void) return ret; } +static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate) +{ + unsigned long vaddr_end; + int rc; + + vaddr = vaddr & PAGE_MASK; + vaddr_end = vaddr + (npages << PAGE_SHIFT); + + while (vaddr < vaddr_end) { + rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); + if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + + vaddr = vaddr + PAGE_SIZE; + } +} + +static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op) +{ + unsigned long paddr_end; + u64 val; + + paddr = paddr & PAGE_MASK; + paddr_end = paddr + (npages << PAGE_SHIFT); + + while (paddr < paddr_end) { + /* + * Use the MSR protocol because this function can be called before + * the GHCB is established. + */ + sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); + VMGEXIT(); + + val = sev_es_rd_ghcb_msr(); + + if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP, + "Wrong PSC response code: 0x%x\n", + (unsigned int)GHCB_RESP_CODE(val))) + goto e_term; + + if (WARN(GHCB_MSR_PSC_RESP_VAL(val), + "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n", + op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared", + paddr, GHCB_MSR_PSC_RESP_VAL(val))) + goto e_term; + + paddr = paddr + PAGE_SIZE; + } + + return; + +e_term: + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); +} + +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, + unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + /* + * Ask the hypervisor to mark the memory pages as private in the RMP + * table. + */ + early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE); + + /* Validate the memory pages after they've been added in the RMP table. */ + pvalidate_pages(vaddr, npages, true); +} + +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, + unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + /* Invalidate the memory pages before they are marked shared in the RMP table. */ + pvalidate_pages(vaddr, npages, false); + + /* Ask hypervisor to mark the memory pages shared in the RMP table. */ + early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); +} + +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) +{ + unsigned long vaddr, npages; + + vaddr = (unsigned long)__va(paddr); + npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + + if (op == SNP_PAGE_STATE_PRIVATE) + early_snp_set_memory_private(vaddr, paddr, npages); + else if (op == SNP_PAGE_STATE_SHARED) + early_snp_set_memory_shared(vaddr, paddr, npages); + else + WARN(1, "invalid memory op %d\n", op); +} + int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { u16 startup_cs, startup_ip; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 6169053c2854..8539dd6f24ff 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -47,6 +48,36 @@ EXPORT_SYMBOL(sme_me_mask); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); +/* + * SNP-specific routine which needs to additionally change the page state from + * private to shared before copying the data from the source to destination and + * restore after the copy. + */ +static inline void __init snp_memcpy(void *dst, void *src, size_t sz, + unsigned long paddr, bool decrypt) +{ + unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + + if (decrypt) { + /* + * @paddr needs to be accessed decrypted, mark the page shared in + * the RMP table before copying it. + */ + early_snp_set_memory_shared((unsigned long)__va(paddr), paddr, npages); + + memcpy(dst, src, sz); + + /* Restore the page state after the memcpy. */ + early_snp_set_memory_private((unsigned long)__va(paddr), paddr, npages); + } else { + /* + * @paddr need to be accessed encrypted, no need for the page state + * change. + */ + memcpy(dst, src, sz); + } +} + /* * This routine does not change the underlying encryption setting of the * page(s) that map this memory. It assumes that eventually the memory is @@ -95,8 +126,13 @@ static void __init __sme_early_enc_dec(resource_size_t paddr, * Use a temporary buffer, of cache-line multiple size, to * avoid data corruption as documented in the APM. */ - memcpy(sme_early_buffer, src, len); - memcpy(dst, sme_early_buffer, len); + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + snp_memcpy(sme_early_buffer, src, len, paddr, enc); + snp_memcpy(dst, sme_early_buffer, len, paddr, !enc); + } else { + memcpy(sme_early_buffer, src, len); + memcpy(dst, sme_early_buffer, len); + } early_memunmap(dst, len); early_memunmap(src, len); @@ -322,14 +358,28 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) clflush_cache_range(__va(pa), size); /* Encrypt/decrypt the contents in-place */ - if (enc) + if (enc) { sme_early_encrypt(pa, size); - else + } else { sme_early_decrypt(pa, size); + /* + * ON SNP, the page state in the RMP table must happen + * before the page table updates. + */ + early_snp_set_memory_shared((unsigned long)__va(pa), pa, 1); + } + /* Change the page encryption mask. */ new_pte = pfn_pte(pfn, new_prot); set_pte_atomic(kpte, new_pte); + + /* + * If page is set encrypted in the page table, then update the RMP table to + * add this page as private. + */ + if (enc) + early_snp_set_memory_private((unsigned long)__va(pa), pa, 1); } static int __init early_set_memory_enc_dec(unsigned long vaddr, From efac0eedfab515e523cde5cb7a62289eb2ee58f8 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:13 -0600 Subject: [PATCH 19/60] x86/kernel: Mark the .bss..decrypted section as shared in the RMP table The encryption attribute for the .bss..decrypted section is cleared in the initial page table build. This is because the section contains the data that need to be shared between the guest and the hypervisor. When SEV-SNP is active, just clearing the encryption attribute in the page table is not enough. The page state needs to be updated in the RMP table. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-20-brijesh.singh@amd.com --- arch/x86/kernel/head64.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 83514b9827e6..656d2f3e2cf0 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -143,7 +143,20 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv if (sme_get_me_mask()) { vaddr = (unsigned long)__start_bss_decrypted; vaddr_end = (unsigned long)__end_bss_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + /* + * On SNP, transition the page to shared in the RMP table so that + * it is consistent with the page table attribute change. + * + * __start_bss_decrypted has a virtual address in the high range + * mapping (kernel .text). PVALIDATE, by way of + * early_snp_set_memory_shared(), requires a valid virtual + * address but the kernel is currently running off of the identity + * mapping so use __pa() to get a *currently* valid virtual address. + */ + early_snp_set_memory_shared(__pa(vaddr), __pa(vaddr), PTRS_PER_PMD); + i = pmd_index(vaddr); pmd[i] -= sme_get_me_mask(); } From 9704c07bf9f7682a83aec4e66f2d9154dbd8577f Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 9 Feb 2022 12:10:14 -0600 Subject: [PATCH 20/60] x86/kernel: Validate ROM memory before accessing when SEV-SNP is active probe_roms() accesses the memory range (0xc0000 - 0x10000) to probe various ROMs. The memory range is not part of the E820 system RAM range. The memory range is mapped as private (i.e encrypted) in the page table. When SEV-SNP is active, all the private memory must be validated before accessing. The ROM range was not part of E820 map, so the guest BIOS did not validate it. An access to invalidated memory will cause a exception yet, so validate the ROM memory regions before it is accessed. [ bp: Massage commit message. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-21-brijesh.singh@amd.com --- arch/x86/kernel/probe_roms.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 36e84d904260..319fef37d9dc 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -21,6 +21,7 @@ #include #include #include +#include static struct resource system_rom_resource = { .name = "System ROM", @@ -197,11 +198,21 @@ static int __init romchecksum(const unsigned char *rom, unsigned long length) void __init probe_roms(void) { - const unsigned char *rom; unsigned long start, length, upper; + const unsigned char *rom; unsigned char c; int i; + /* + * The ROM memory range is not part of the e820 table and is therefore not + * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted + * memory, and SNP requires encrypted memory to be validated before access. + * Do that here. + */ + snp_prep_memory(video_rom_resource.start, + ((system_rom_resource.end + 1) - video_rom_resource.start), + SNP_PAGE_STATE_PRIVATE); + /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { From dc3f3d2474b80eaee8be89f4c5eb344f10648f42 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 24 Feb 2022 10:56:01 -0600 Subject: [PATCH 21/60] x86/mm: Validate memory when changing the C-bit Add the needed functionality to change pages state from shared to private and vice-versa using the Page State Change VMGEXIT as documented in the GHCB spec. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-22-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 22 ++++ arch/x86/include/asm/sev.h | 4 + arch/x86/include/uapi/asm/svm.h | 2 + arch/x86/kernel/sev.c | 168 ++++++++++++++++++++++++++++++ arch/x86/mm/mem_encrypt_amd.c | 13 +++ 5 files changed, 209 insertions(+) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index f077a6c95e67..1aa72b5c2490 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -105,6 +105,28 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) +/* SNP Page State Change NAE event */ +#define VMGEXIT_PSC_MAX_ENTRY 253 + +struct psc_hdr { + u16 cur_entry; + u16 end_entry; + u32 reserved; +} __packed; + +struct psc_entry { + u64 cur_page : 12, + gfn : 40, + operation : 4, + pagesize : 1, + reserved : 7; +} __packed; + +struct snp_psc_desc { + struct psc_hdr hdr; + struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; +} __packed; + #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index f65d257e3d4a..feeb93e6ec97 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -128,6 +128,8 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages); void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); +void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); +void snp_set_memory_private(unsigned long vaddr, unsigned int npages); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -142,6 +144,8 @@ early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } +static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { } +static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } #endif #endif diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b0ad00f4c1e1..64404b47b773 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -108,6 +108,7 @@ #define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_PSC 0x80000010 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff @@ -219,6 +220,7 @@ { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ + { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index e3fca8615fe8..bf4b57835694 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -655,6 +655,174 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op WARN(1, "invalid memory op %d\n", op); } +static int vmgexit_psc(struct snp_psc_desc *desc) +{ + int cur_entry, end_entry, ret = 0; + struct snp_psc_desc *data; + struct ghcb_state state; + struct es_em_ctxt ctxt; + unsigned long flags; + struct ghcb *ghcb; + + /* + * __sev_get_ghcb() needs to run with IRQs disabled because it is using + * a per-CPU GHCB. + */ + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + if (!ghcb) { + ret = 1; + goto out_unlock; + } + + /* Copy the input desc into GHCB shared buffer */ + data = (struct snp_psc_desc *)ghcb->shared_buffer; + memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); + + /* + * As per the GHCB specification, the hypervisor can resume the guest + * before processing all the entries. Check whether all the entries + * are processed. If not, then keep retrying. Note, the hypervisor + * will update the data memory directly to indicate the status, so + * reference the data->hdr everywhere. + * + * The strategy here is to wait for the hypervisor to change the page + * state in the RMP table before guest accesses the memory pages. If the + * page state change was not successful, then later memory access will + * result in a crash. + */ + cur_entry = data->hdr.cur_entry; + end_entry = data->hdr.end_entry; + + while (data->hdr.cur_entry <= data->hdr.end_entry) { + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); + + /* This will advance the shared buffer data points to. */ + ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0); + + /* + * Page State Change VMGEXIT can pass error code through + * exit_info_2. + */ + if (WARN(ret || ghcb->save.sw_exit_info_2, + "SNP: PSC failed ret=%d exit_info_2=%llx\n", + ret, ghcb->save.sw_exit_info_2)) { + ret = 1; + goto out; + } + + /* Verify that reserved bit is not set */ + if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { + ret = 1; + goto out; + } + + /* + * Sanity check that entry processing is not going backwards. + * This will happen only if hypervisor is tricking us. + */ + if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, +"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", + end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { + ret = 1; + goto out; + } + } + +out: + __sev_put_ghcb(&state); + +out_unlock: + local_irq_restore(flags); + + return ret; +} + +static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, + unsigned long vaddr_end, int op) +{ + struct psc_hdr *hdr; + struct psc_entry *e; + unsigned long pfn; + int i; + + hdr = &data->hdr; + e = data->entries; + + memset(data, 0, sizeof(*data)); + i = 0; + + while (vaddr < vaddr_end) { + if (is_vmalloc_addr((void *)vaddr)) + pfn = vmalloc_to_pfn((void *)vaddr); + else + pfn = __pa(vaddr) >> PAGE_SHIFT; + + e->gfn = pfn; + e->operation = op; + hdr->end_entry = i; + + /* + * Current SNP implementation doesn't keep track of the RMP page + * size so use 4K for simplicity. + */ + e->pagesize = RMP_PG_SIZE_4K; + + vaddr = vaddr + PAGE_SIZE; + e++; + i++; + } + + if (vmgexit_psc(data)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); +} + +static void set_pages_state(unsigned long vaddr, unsigned int npages, int op) +{ + unsigned long vaddr_end, next_vaddr; + struct snp_psc_desc *desc; + + desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); + if (!desc) + panic("SNP: failed to allocate memory for PSC descriptor\n"); + + vaddr = vaddr & PAGE_MASK; + vaddr_end = vaddr + (npages << PAGE_SHIFT); + + while (vaddr < vaddr_end) { + /* Calculate the last vaddr that fits in one struct snp_psc_desc. */ + next_vaddr = min_t(unsigned long, vaddr_end, + (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); + + __set_pages_state(desc, vaddr, next_vaddr, op); + + vaddr = next_vaddr; + } + + kfree(desc); +} + +void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + pvalidate_pages(vaddr, npages, false); + + set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); +} + +void snp_set_memory_private(unsigned long vaddr, unsigned int npages) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); + + pvalidate_pages(vaddr, npages, true); +} + int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { u16 startup_cs, startup_ip; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 8539dd6f24ff..d3c88d9ef8d6 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -316,11 +316,24 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { + /* + * To maintain the security guarantees of SEV-SNP guests, make sure + * to invalidate the memory before encryption attribute is cleared. + */ + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) + snp_set_memory_shared(vaddr, npages); } /* Return true unconditionally: return value doesn't matter for the SEV side */ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) { + /* + * After memory is mapped encrypted in the page table, validate it + * so that it is consistent with the page table updates. + */ + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && enc) + snp_set_memory_private(vaddr, npages); + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) enc_dec_hypercall(vaddr, npages, enc); From 0afb6b660a6b58cb336d1175ed687bf9525849a4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 7 Mar 2022 15:33:32 -0600 Subject: [PATCH 22/60] x86/sev: Use SEV-SNP AP creation to start secondary CPUs To provide a more secure way to start APs under SEV-SNP, use the SEV-SNP AP Creation NAE event. This allows for guest control over the AP register state rather than trusting the hypervisor with the SEV-ES Jump Table address. During native_smp_prepare_cpus(), invoke an SEV-SNP function that, if SEV-SNP is active, will set/override apic->wakeup_secondary_cpu. This will allow the SEV-SNP AP Creation NAE event method to be used to boot the APs. As a result of installing the override when SEV-SNP is active, this method of starting the APs becomes the required method. The override function will fail to start the AP if the hypervisor does not have support for AP creation. [ bp: Work in forgotten review comments. ] Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-23-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 1 + arch/x86/include/asm/sev.h | 4 + arch/x86/include/uapi/asm/svm.h | 5 + arch/x86/kernel/sev.c | 242 ++++++++++++++++++++++++++++++ arch/x86/kernel/smpboot.c | 3 + 5 files changed, 255 insertions(+) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 1aa72b5c2490..e9b6815b3b3d 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -104,6 +104,7 @@ enum psc_op { (((u64)(v) & GENMASK_ULL(63, 12)) >> 12) #define GHCB_HV_FT_SNP BIT_ULL(0) +#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) /* SNP Page State Change NAE event */ #define VMGEXIT_PSC_MAX_ENTRY 253 diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index feeb93e6ec97..a3203b2caaca 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -66,6 +66,8 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs); /* RMP page size */ #define RMP_PG_SIZE_4K 0 +#define RMPADJUST_VMSA_PAGE_BIT BIT(16) + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); @@ -130,6 +132,7 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); +void snp_set_wakeup_secondary_cpu(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -146,6 +149,7 @@ early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned i static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } +static inline void snp_set_wakeup_secondary_cpu(void) { } #endif #endif diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 64404b47b773..cfea5e875088 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -109,6 +109,10 @@ #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 #define SVM_VMGEXIT_PSC 0x80000010 +#define SVM_VMGEXIT_AP_CREATION 0x80000013 +#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 +#define SVM_VMGEXIT_AP_CREATE 1 +#define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff @@ -221,6 +225,7 @@ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index bf4b57835694..d7915ae7729d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -31,9 +32,26 @@ #include #include #include +#include #define DR7_RESET_VALUE 0x400 +/* AP INIT values as documented in the APM2 section "Processor Initialization State" */ +#define AP_INIT_CS_LIMIT 0xffff +#define AP_INIT_DS_LIMIT 0xffff +#define AP_INIT_LDTR_LIMIT 0xffff +#define AP_INIT_GDTR_LIMIT 0xffff +#define AP_INIT_IDTR_LIMIT 0xffff +#define AP_INIT_TR_LIMIT 0xffff +#define AP_INIT_RFLAGS_DEFAULT 0x2 +#define AP_INIT_DR6_DEFAULT 0xffff0ff0 +#define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL +#define AP_INIT_XCR0_DEFAULT 0x1 +#define AP_INIT_X87_FTW_DEFAULT 0x5555 +#define AP_INIT_X87_FCW_DEFAULT 0x0040 +#define AP_INIT_CR0_DEFAULT 0x60000010 +#define AP_INIT_MXCSR_DEFAULT 0x1f80 + /* For early boot hypervisor communication in SEV-ES enabled guests */ static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); @@ -90,6 +108,8 @@ struct ghcb_state { static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); +static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); + static __always_inline bool on_vc_stack(struct pt_regs *regs) { unsigned long sp = regs->sp; @@ -823,6 +843,228 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages) pvalidate_pages(vaddr, npages, true); } +static int snp_set_vmsa(void *va, bool vmsa) +{ + u64 attrs; + + /* + * Running at VMPL0 allows the kernel to change the VMSA bit for a page + * using the RMPADJUST instruction. However, for the instruction to + * succeed it must target the permissions of a lesser privileged + * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST + * instruction in the AMD64 APM Volume 3). + */ + attrs = 1; + if (vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); +} + +#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) +#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) +#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) + +#define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) +#define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) + +static void *snp_alloc_vmsa_page(void) +{ + struct page *p; + + /* + * Allocate VMSA page to work around the SNP erratum where the CPU will + * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) + * collides with the RMP entry of VMSA page. The recommended workaround + * is to not use a large page. + * + * Allocate an 8k page which is also 8k-aligned. + */ + p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); + if (!p) + return NULL; + + split_page(p, 1); + + /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ + __free_page(p); + + return page_address(p + 1); +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) +{ + int err; + + err = snp_set_vmsa(vmsa, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + +static int wakeup_cpu_via_vmgexit(int apic_id, unsigned long start_ip) +{ + struct sev_es_save_area *cur_vmsa, *vmsa; + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + u8 sipi_vector; + int cpu, ret; + u64 cr4; + + /* + * The hypervisor SNP feature support check has happened earlier, just check + * the AP_CREATION one here. + */ + if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) + return -EOPNOTSUPP; + + /* + * Verify the desired start IP against the known trampoline start IP + * to catch any future new trampolines that may be introduced that + * would require a new protected guest entry point. + */ + if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, + "Unsupported SNP start_ip: %lx\n", start_ip)) + return -EINVAL; + + /* Override start_ip with known protected guest start IP */ + start_ip = real_mode_header->sev_es_trampoline_start; + + /* Find the logical CPU for the APIC ID */ + for_each_present_cpu(cpu) { + if (arch_match_cpu_phys_id(cpu, apic_id)) + break; + } + if (cpu >= nr_cpu_ids) + return -EINVAL; + + cur_vmsa = per_cpu(sev_vmsa, cpu); + + /* + * A new VMSA is created each time because there is no guarantee that + * the current VMSA is the kernels or that the vCPU is not running. If + * an attempt was done to use the current VMSA with a running vCPU, a + * #VMEXIT of that vCPU would wipe out all of the settings being done + * here. + */ + vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(); + if (!vmsa) + return -ENOMEM; + + /* CR4 should maintain the MCE value */ + cr4 = native_read_cr4() & X86_CR4_MCE; + + /* Set the CS value based on the start_ip converted to a SIPI vector */ + sipi_vector = (start_ip >> 12); + vmsa->cs.base = sipi_vector << 12; + vmsa->cs.limit = AP_INIT_CS_LIMIT; + vmsa->cs.attrib = INIT_CS_ATTRIBS; + vmsa->cs.selector = sipi_vector << 8; + + /* Set the RIP value based on start_ip */ + vmsa->rip = start_ip & 0xfff; + + /* Set AP INIT defaults as documented in the APM */ + vmsa->ds.limit = AP_INIT_DS_LIMIT; + vmsa->ds.attrib = INIT_DS_ATTRIBS; + vmsa->es = vmsa->ds; + vmsa->fs = vmsa->ds; + vmsa->gs = vmsa->ds; + vmsa->ss = vmsa->ds; + + vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; + vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; + vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; + vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; + vmsa->tr.limit = AP_INIT_TR_LIMIT; + vmsa->tr.attrib = INIT_TR_ATTRIBS; + + vmsa->cr4 = cr4; + vmsa->cr0 = AP_INIT_CR0_DEFAULT; + vmsa->dr7 = DR7_RESET_VALUE; + vmsa->dr6 = AP_INIT_DR6_DEFAULT; + vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; + vmsa->g_pat = AP_INIT_GPAT_DEFAULT; + vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; + vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; + vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; + vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; + + /* SVME must be set. */ + vmsa->efer = EFER_SVME; + + /* + * Set the SNP-specific fields for this VMSA: + * VMPL level + * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) + */ + vmsa->vmpl = 0; + vmsa->sev_features = sev_status >> 2; + + /* Switch the page over to a VMSA page now that it is initialized */ + ret = snp_set_vmsa(vmsa, true); + if (ret) { + pr_err("set VMSA page failed (%u)\n", ret); + free_page((unsigned long)vmsa); + + return -EINVAL; + } + + /* Issue VMGEXIT AP Creation NAE event */ + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + ghcb_set_rax(ghcb, vmsa->sev_features); + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); + ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE); + ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if (!ghcb_sw_exit_info_1_is_valid(ghcb) || + lower_32_bits(ghcb->save.sw_exit_info_1)) { + pr_err("SNP AP Creation error\n"); + ret = -EINVAL; + } + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + /* Perform cleanup if there was an error */ + if (ret) { + snp_cleanup_vmsa(vmsa); + vmsa = NULL; + } + + /* Free up any previous VMSA page */ + if (cur_vmsa) + snp_cleanup_vmsa(cur_vmsa); + + /* Record the current VMSA page */ + per_cpu(sev_vmsa, cpu) = vmsa; + + return ret; +} + +void snp_set_wakeup_secondary_cpu(void) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + /* + * Always set this override if SNP is enabled. This makes it the + * required method to start APs under SNP. If the hypervisor does + * not support AP creation, then no APs will be started. + */ + apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit; +} + int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { u16 startup_cs, startup_ip; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2ef14772dc04..85d7b1c5eb70 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -82,6 +82,7 @@ #include #include #include +#include /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -1430,6 +1431,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) smp_quirk_init_udelay(); speculative_store_bypass_ht_init(); + + snp_set_wakeup_secondary_cpu(); } void arch_thaw_secondary_cpus_begin(void) From 469693d8f62299709e8ba56d8fb3da9ea990213c Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:10:17 -0600 Subject: [PATCH 23/60] x86/head/64: Re-enable stack protection Due to 103a4908ad4d ("x86/head/64: Disable stack protection for head$(BITS).o") kernel/head{32,64}.c are compiled with -fno-stack-protector to allow a call to set_bringup_idt_handler(), which would otherwise have stack protection enabled with CONFIG_STACKPROTECTOR_STRONG. While sufficient for that case, there may still be issues with calls to any external functions that were compiled with stack protection enabled that in-turn make stack-protected calls, or if the exception handlers set up by set_bringup_idt_handler() make calls to stack-protected functions. Subsequent patches for SEV-SNP CPUID validation support will introduce both such cases. Attempting to disable stack protection for everything in scope to address that is prohibitive since much of the code, like the SEV-ES #VC handler, is shared code that remains in use after boot and could benefit from having stack protection enabled. Attempting to inline calls is brittle and can quickly balloon out to library/helper code where that's not really an option. Instead, re-enable stack protection for head32.c/head64.c, and make the appropriate changes to ensure the segment used for the stack canary is initialized in advance of any stack-protected C calls. For head64.c: - The BSP will enter from startup_64() and call into C code (startup_64_setup_env()) shortly after setting up the stack, which may result in calls to stack-protected code. Set up %gs early to allow for this safely. - APs will enter from secondary_startup_64*(), and %gs will be set up soon after. There is one call to C code prior to %gs being setup (__startup_secondary_64()), but it is only to fetch 'sme_me_mask' global, so just load 'sme_me_mask' directly instead, and remove the now-unused __startup_secondary_64() function. For head32.c: - BSPs/APs will set %fs to __BOOT_DS prior to any C calls. In recent kernels, the compiler is configured to access the stack canary at %fs:__stack_chk_guard [1], which overlaps with the initial per-cpu '__stack_chk_guard' variable in the initial/"master" .data..percpu area. This is sufficient to allow access to the canary for use during initial startup, so no changes are needed there. [1] 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable") [ bp: Massage commit message. ] Suggested-by: Joerg Roedel #for 64-bit %gs set up Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-24-brijesh.singh@amd.com --- arch/x86/include/asm/setup.h | 1 - arch/x86/kernel/Makefile | 2 -- arch/x86/kernel/head64.c | 9 --------- arch/x86/kernel/head_64.S | 24 +++++++++++++++++++++--- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 896e48d45828..a1b107f2a12a 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -50,7 +50,6 @@ extern unsigned long saved_video_mode; extern void reserve_standard_io_resources(void); extern void i386_reserve_resources(void); extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp); -extern unsigned long __startup_secondary_64(void); extern void startup_64_setup_env(unsigned long physbase); extern void early_setup_idt(void); extern void __init do_early_exception(struct pt_regs *regs, int trapnr); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c41ef42adbe8..1a2dc328cb5e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -46,8 +46,6 @@ endif # non-deterministic coverage. KCOV_INSTRUMENT := n -CFLAGS_head$(BITS).o += -fno-stack-protector - CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 656d2f3e2cf0..c185f4831498 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -318,15 +318,6 @@ unsigned long __head __startup_64(unsigned long physaddr, return sme_postprocess_startup(bp, pmd); } -unsigned long __startup_secondary_64(void) -{ - /* - * Return the SME encryption mask (if SME is active) to be used as a - * modifier for the initial pgdir entry programmed into CR3. - */ - return sme_get_me_mask(); -} - /* Wipe all early page tables except for the kernel symbol map */ static void __init reset_early_page_tables(void) { diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6bf340c91165..7bac9a4bdf7c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -65,6 +65,22 @@ SYM_CODE_START_NOALIGN(startup_64) leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp leaq _text(%rip), %rdi + + /* + * initial_gs points to initial fixed_percpu_data struct with storage for + * the stack protector canary. Global pointer fixups are needed at this + * stage, so apply them as is done in fixup_pointer(), and initialize %gs + * such that the canary can be accessed at %gs:40 for subsequent C calls. + */ + movl $MSR_GS_BASE, %ecx + movq initial_gs(%rip), %rax + movq $_text, %rdx + subq %rdx, %rax + addq %rdi, %rax + movq %rax, %rdx + shrq $32, %rdx + wrmsr + pushq %rsi call startup_64_setup_env popq %rsi @@ -147,9 +163,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * Retrieve the modifier (SME encryption mask if SME is active) to be * added to the initial pgdir entry that will be programmed into CR3. */ - pushq %rsi - call __startup_secondary_64 - popq %rsi +#ifdef CONFIG_AMD_MEM_ENCRYPT + movq sme_me_mask, %rax +#else + xorq %rax, %rax +#endif /* Form the CR3 value being sure to include the CR3 modifier */ addq $(init_top_pgt - __START_KERNEL_map), %rax From 7c4146e8885512719a50b641e9277a1712e052ff Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 9 Feb 2022 12:10:18 -0600 Subject: [PATCH 24/60] x86/compressed/acpi: Move EFI detection to helper Future patches for SEV-SNP-validated CPUID will also require early parsing of the EFI configuration. Incrementally move the related code into a set of helpers that can be re-used for that purpose. First, carve out the functionality which determines the EFI environment type the machine is booting on. [ bp: Massage commit message. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-25-brijesh.singh@amd.com --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/acpi.c | 28 +++++++---------- arch/x86/boot/compressed/efi.c | 50 +++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 16 ++++++++++ 4 files changed, 77 insertions(+), 18 deletions(-) create mode 100644 arch/x86/boot/compressed/efi.c diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6115274fe10f..e69c3d2e0628 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -103,6 +103,7 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a $(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 8bcbcee54aa1..db6c561920f0 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -87,7 +87,7 @@ static acpi_physical_address kexec_get_rsdp_addr(void) efi_system_table_64_t *systab; struct efi_setup_data *esd; struct efi_info *ei; - char *sig; + enum efi_type et; esd = (struct efi_setup_data *)get_kexec_setup_data_addr(); if (!esd) @@ -98,10 +98,9 @@ static acpi_physical_address kexec_get_rsdp_addr(void) return 0; } - ei = &boot_params->efi_info; - sig = (char *)&ei->efi_loader_signature; - if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { - debug_putstr("Wrong kexec EFI loader signature.\n"); + et = efi_get_type(boot_params); + if (et != EFI_TYPE_64) { + debug_putstr("Unexpected kexec EFI environment (expected 64-bit EFI).\n"); return 0; } @@ -122,29 +121,22 @@ static acpi_physical_address efi_get_rsdp_addr(void) unsigned long systab, config_tables; unsigned int nr_tables; struct efi_info *ei; + enum efi_type et; bool efi_64; - char *sig; - ei = &boot_params->efi_info; - sig = (char *)&ei->efi_loader_signature; - - if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + et = efi_get_type(boot_params); + if (et == EFI_TYPE_64) efi_64 = true; - } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) { + else if (et == EFI_TYPE_32) efi_64 = false; - } else { - debug_putstr("Wrong EFI loader signature.\n"); + else return 0; - } /* Get systab from boot params. */ + ei = &boot_params->efi_info; #ifdef CONFIG_X86_64 systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32); #else - if (ei->efi_systab_hi || ei->efi_memmap_hi) { - debug_putstr("Error getting RSDP address: EFI system table located above 4GB.\n"); - return 0; - } systab = ei->efi_systab; #endif if (!systab) diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c new file mode 100644 index 000000000000..bad0ce3e2ef6 --- /dev/null +++ b/arch/x86/boot/compressed/efi.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for early access to EFI configuration table. + * + * Originally derived from arch/x86/boot/compressed/acpi.c + */ + +#include "misc.h" +#include +#include + +/** + * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment. + * + * @bp: pointer to boot_params + * + * Return: EFI_TYPE_{32,64} for valid EFI environments, EFI_TYPE_NONE otherwise. + */ +enum efi_type efi_get_type(struct boot_params *bp) +{ + struct efi_info *ei; + enum efi_type et; + const char *sig; + + ei = &bp->efi_info; + sig = (char *)&ei->efi_loader_signature; + + if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + et = EFI_TYPE_64; + } else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4)) { + et = EFI_TYPE_32; + } else { + debug_putstr("No EFI environment detected.\n"); + et = EFI_TYPE_NONE; + } + +#ifndef CONFIG_X86_64 + /* + * Existing callers like acpi.c treat this case as an indicator to + * fall-through to non-EFI, rather than an error, so maintain that + * functionality here as well. + */ + if (ei->efi_systab_hi || ei->efi_memmap_hi) { + debug_putstr("EFI system table is located above 4GB and cannot be accessed.\n"); + et = EFI_TYPE_NONE; + } +#endif + + return et; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 01cc13c12059..fede1afa39e9 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -176,4 +176,20 @@ void boot_stage2_vc(void); unsigned long sev_verify_cbit(unsigned long cr3); +enum efi_type { + EFI_TYPE_64, + EFI_TYPE_32, + EFI_TYPE_NONE, +}; + +#ifdef CONFIG_EFI +/* helpers for early EFI config table access */ +enum efi_type efi_get_type(struct boot_params *bp); +#else +static inline enum efi_type efi_get_type(struct boot_params *bp) +{ + return EFI_TYPE_NONE; +} +#endif /* CONFIG_EFI */ + #endif /* BOOT_COMPRESSED_MISC_H */ From 58f3e6b71f42f99ab5d0ab26ddf6e7ee5631f5db Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:05 -0600 Subject: [PATCH 25/60] x86/compressed/acpi: Move EFI system table lookup to helper Future patches for SEV-SNP-validated CPUID will also require early parsing of the EFI configuration. Incrementally move the related code into a set of helpers that can be re-used for that purpose. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-26-brijesh.singh@amd.com --- arch/x86/boot/compressed/acpi.c | 21 +++++++-------------- arch/x86/boot/compressed/efi.c | 29 +++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 6 ++++++ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index db6c561920f0..58a3d3f3e305 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -105,7 +105,7 @@ static acpi_physical_address kexec_get_rsdp_addr(void) } /* Get systab from boot params. */ - systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32)); + systab = (efi_system_table_64_t *)efi_get_system_table(boot_params); if (!systab) error("EFI system table not found in kexec boot_params."); @@ -118,9 +118,8 @@ static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } static acpi_physical_address efi_get_rsdp_addr(void) { #ifdef CONFIG_EFI - unsigned long systab, config_tables; + unsigned long systab_pa, config_tables; unsigned int nr_tables; - struct efi_info *ei; enum efi_type et; bool efi_64; @@ -132,24 +131,18 @@ static acpi_physical_address efi_get_rsdp_addr(void) else return 0; - /* Get systab from boot params. */ - ei = &boot_params->efi_info; -#ifdef CONFIG_X86_64 - systab = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32); -#else - systab = ei->efi_systab; -#endif - if (!systab) - error("EFI system table not found."); + systab_pa = efi_get_system_table(boot_params); + if (!systab_pa) + error("EFI support advertised, but unable to locate system table."); /* Handle EFI bitness properly */ if (efi_64) { - efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab; + efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab_pa; config_tables = stbl->tables; nr_tables = stbl->nr_tables; } else { - efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab; + efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab_pa; config_tables = stbl->tables; nr_tables = stbl->nr_tables; diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index bad0ce3e2ef6..72a81ba1f87b 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -48,3 +48,32 @@ enum efi_type efi_get_type(struct boot_params *bp) return et; } + +/** + * efi_get_system_table - Given a pointer to boot_params, retrieve the physical address + * of the EFI system table. + * + * @bp: pointer to boot_params + * + * Return: EFI system table address on success. On error, return 0. + */ +unsigned long efi_get_system_table(struct boot_params *bp) +{ + unsigned long sys_tbl_pa; + struct efi_info *ei; + enum efi_type et; + + /* Get systab from boot params. */ + ei = &bp->efi_info; +#ifdef CONFIG_X86_64 + sys_tbl_pa = ei->efi_systab | ((__u64)ei->efi_systab_hi << 32); +#else + sys_tbl_pa = ei->efi_systab; +#endif + if (!sys_tbl_pa) { + debug_putstr("EFI system table not found."); + return 0; + } + + return sys_tbl_pa; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index fede1afa39e9..b2acd3ac6525 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -185,11 +185,17 @@ enum efi_type { #ifdef CONFIG_EFI /* helpers for early EFI config table access */ enum efi_type efi_get_type(struct boot_params *bp); +unsigned long efi_get_system_table(struct boot_params *bp); #else static inline enum efi_type efi_get_type(struct boot_params *bp) { return EFI_TYPE_NONE; } + +static inline unsigned long efi_get_system_table(struct boot_params *bp) +{ + return 0; +} #endif /* CONFIG_EFI */ #endif /* BOOT_COMPRESSED_MISC_H */ From 61c14ceda8405f37545a0983d6b9bc45c6236793 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:06 -0600 Subject: [PATCH 26/60] x86/compressed/acpi: Move EFI config table lookup to helper Future patches for SEV-SNP-validated CPUID will also require early parsing of the EFI configuration. Incrementally move the related code into a set of helpers that can be re-used for that purpose. [ bp: Remove superfluous zeroing of a stack variable. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-27-brijesh.singh@amd.com --- arch/x86/boot/compressed/acpi.c | 25 ++++++------------- arch/x86/boot/compressed/efi.c | 43 +++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 9 +++++++ 3 files changed, 60 insertions(+), 17 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 58a3d3f3e305..9a824af69961 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -118,10 +118,13 @@ static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } static acpi_physical_address efi_get_rsdp_addr(void) { #ifdef CONFIG_EFI - unsigned long systab_pa, config_tables; + unsigned long cfg_tbl_pa = 0; + unsigned int cfg_tbl_len; + unsigned long systab_pa; unsigned int nr_tables; enum efi_type et; bool efi_64; + int ret; et = efi_get_type(boot_params); if (et == EFI_TYPE_64) @@ -135,23 +138,11 @@ static acpi_physical_address efi_get_rsdp_addr(void) if (!systab_pa) error("EFI support advertised, but unable to locate system table."); - /* Handle EFI bitness properly */ - if (efi_64) { - efi_system_table_64_t *stbl = (efi_system_table_64_t *)systab_pa; + ret = efi_get_conf_table(boot_params, &cfg_tbl_pa, &cfg_tbl_len); + if (ret || !cfg_tbl_pa) + error("EFI config table not found."); - config_tables = stbl->tables; - nr_tables = stbl->nr_tables; - } else { - efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab_pa; - - config_tables = stbl->tables; - nr_tables = stbl->nr_tables; - } - - if (!config_tables) - error("EFI config tables not found."); - - return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64); + return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len, efi_64); #else return 0; #endif diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index 72a81ba1f87b..350838ed80db 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -77,3 +77,46 @@ unsigned long efi_get_system_table(struct boot_params *bp) return sys_tbl_pa; } + +/** + * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical + * address of EFI configuration table. + * + * @bp: pointer to boot_params + * @cfg_tbl_pa: location to store physical address of config table + * @cfg_tbl_len: location to store number of config table entries + * + * Return: 0 on success. On error, return params are left unchanged. + */ +int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len) +{ + unsigned long sys_tbl_pa; + enum efi_type et; + int ret; + + if (!cfg_tbl_pa || !cfg_tbl_len) + return -EINVAL; + + sys_tbl_pa = efi_get_system_table(bp); + if (!sys_tbl_pa) + return -EINVAL; + + /* Handle EFI bitness properly */ + et = efi_get_type(bp); + if (et == EFI_TYPE_64) { + efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa; + + *cfg_tbl_pa = stbl->tables; + *cfg_tbl_len = stbl->nr_tables; + } else if (et == EFI_TYPE_32) { + efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa; + + *cfg_tbl_pa = stbl->tables; + *cfg_tbl_len = stbl->nr_tables; + } else { + return -EINVAL; + } + + return 0; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b2acd3ac6525..8815af092a10 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -186,6 +186,8 @@ enum efi_type { /* helpers for early EFI config table access */ enum efi_type efi_get_type(struct boot_params *bp); unsigned long efi_get_system_table(struct boot_params *bp); +int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len); #else static inline enum efi_type efi_get_type(struct boot_params *bp) { @@ -196,6 +198,13 @@ static inline unsigned long efi_get_system_table(struct boot_params *bp) { return 0; } + +static inline int efi_get_conf_table(struct boot_params *bp, + unsigned long *cfg_tbl_pa, + unsigned int *cfg_tbl_len) +{ + return -ENOENT; +} #endif /* CONFIG_EFI */ #endif /* BOOT_COMPRESSED_MISC_H */ From dee602dd5d1489b5aa4651c561dfbe90eaee1589 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:07 -0600 Subject: [PATCH 27/60] x86/compressed/acpi: Move EFI vendor table lookup to helper Future patches for SEV-SNP-validated CPUID will also require early parsing of the EFI configuration. Incrementally move the related code into a set of helpers that can be re-used for that purpose. [ bp: Unbreak unnecessarily broken lines. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-28-brijesh.singh@amd.com --- arch/x86/boot/compressed/acpi.c | 64 ++++++++++-------------------- arch/x86/boot/compressed/efi.c | 70 +++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 13 ++++++ 3 files changed, 104 insertions(+), 43 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 9a824af69961..b0c1dffc5510 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -20,48 +20,31 @@ */ struct mem_vector immovable_mem[MAX_NUMNODES*2]; -/* - * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and - * ACPI_TABLE_GUID are found, take the former, which has more features. - */ static acpi_physical_address -__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables, - bool efi_64) +__efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) { - acpi_physical_address rsdp_addr = 0; - #ifdef CONFIG_EFI - int i; + unsigned long rsdp_addr; + int ret; - /* Get EFI tables from systab. */ - for (i = 0; i < nr_tables; i++) { - acpi_physical_address table; - efi_guid_t guid; + /* + * Search EFI system tables for RSDP. Preferred is ACPI_20_TABLE_GUID to + * ACPI_TABLE_GUID because it has more features. + */ + rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + ACPI_20_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; - if (efi_64) { - efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i; + /* No ACPI_20_TABLE_GUID found, fallback to ACPI_TABLE_GUID. */ + rsdp_addr = efi_find_vendor_table(boot_params, cfg_tbl_pa, cfg_tbl_len, + ACPI_TABLE_GUID); + if (rsdp_addr) + return (acpi_physical_address)rsdp_addr; - guid = tbl->guid; - table = tbl->table; - - if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { - debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); - return 0; - } - } else { - efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i; - - guid = tbl->guid; - table = tbl->table; - } - - if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) - rsdp_addr = table; - else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) - return table; - } + debug_putstr("Error getting RSDP address.\n"); #endif - return rsdp_addr; + return 0; } /* EFI/kexec support is 64-bit only. */ @@ -109,7 +92,7 @@ static acpi_physical_address kexec_get_rsdp_addr(void) if (!systab) error("EFI system table not found in kexec boot_params."); - return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true); + return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables); } #else static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } @@ -123,15 +106,10 @@ static acpi_physical_address efi_get_rsdp_addr(void) unsigned long systab_pa; unsigned int nr_tables; enum efi_type et; - bool efi_64; int ret; et = efi_get_type(boot_params); - if (et == EFI_TYPE_64) - efi_64 = true; - else if (et == EFI_TYPE_32) - efi_64 = false; - else + if (et == EFI_TYPE_NONE) return 0; systab_pa = efi_get_system_table(boot_params); @@ -142,7 +120,7 @@ static acpi_physical_address efi_get_rsdp_addr(void) if (ret || !cfg_tbl_pa) error("EFI config table not found."); - return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len, efi_64); + return __efi_get_rsdp_addr(cfg_tbl_pa, cfg_tbl_len); #else return 0; #endif diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index 350838ed80db..4d363dfc2b33 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -120,3 +120,73 @@ int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, return 0; } + +/* Get vendor table address/guid from EFI config table at the given index */ +static int get_vendor_table(void *cfg_tbl, unsigned int idx, + unsigned long *vendor_tbl_pa, + efi_guid_t *vendor_tbl_guid, + enum efi_type et) +{ + if (et == EFI_TYPE_64) { + efi_config_table_64_t *tbl_entry = (efi_config_table_64_t *)cfg_tbl + idx; + + if (!IS_ENABLED(CONFIG_X86_64) && tbl_entry->table >> 32) { + debug_putstr("Error: EFI config table entry located above 4GB.\n"); + return -EINVAL; + } + + *vendor_tbl_pa = tbl_entry->table; + *vendor_tbl_guid = tbl_entry->guid; + + } else if (et == EFI_TYPE_32) { + efi_config_table_32_t *tbl_entry = (efi_config_table_32_t *)cfg_tbl + idx; + + *vendor_tbl_pa = tbl_entry->table; + *vendor_tbl_guid = tbl_entry->guid; + } else { + return -EINVAL; + } + + return 0; +} + +/** + * efi_find_vendor_table - Given EFI config table, search it for the physical + * address of the vendor table associated with GUID. + * + * @bp: pointer to boot_params + * @cfg_tbl_pa: pointer to EFI configuration table + * @cfg_tbl_len: number of entries in EFI configuration table + * @guid: GUID of vendor table + * + * Return: vendor table address on success. On error, return 0. + */ +unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid) +{ + enum efi_type et; + unsigned int i; + + et = efi_get_type(bp); + if (et == EFI_TYPE_NONE) + return 0; + + for (i = 0; i < cfg_tbl_len; i++) { + unsigned long vendor_tbl_pa; + efi_guid_t vendor_tbl_guid; + int ret; + + ret = get_vendor_table((void *)cfg_tbl_pa, i, + &vendor_tbl_pa, + &vendor_tbl_guid, et); + if (ret) + return 0; + + if (!efi_guidcmp(guid, vendor_tbl_guid)) + return vendor_tbl_pa; + } + + return 0; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 8815af092a10..ba538af37e90 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,10 @@ enum efi_type efi_get_type(struct boot_params *bp); unsigned long efi_get_system_table(struct boot_params *bp); int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, unsigned int *cfg_tbl_len); +unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid); #else static inline enum efi_type efi_get_type(struct boot_params *bp) { @@ -205,6 +210,14 @@ static inline int efi_get_conf_table(struct boot_params *bp, { return -ENOENT; } + +static inline unsigned long efi_find_vendor_table(struct boot_params *bp, + unsigned long cfg_tbl_pa, + unsigned int cfg_tbl_len, + efi_guid_t guid) +{ + return 0; +} #endif /* CONFIG_EFI */ #endif /* BOOT_COMPRESSED_MISC_H */ From 824f37783189a48db914488fb41eba36ec57ebb7 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:08 -0600 Subject: [PATCH 28/60] x86/compressed/acpi: Move EFI kexec handling into common code Future patches for SEV-SNP-validated CPUID will also require early parsing of the EFI configuration. Incrementally move the related code into a set of helpers that can be re-used for that purpose. In this instance, the current acpi.c kexec handling is mainly used to get the alternative EFI config table address provided by kexec via a setup_data entry of type SETUP_EFI. If not present, the code then falls back to normal EFI config table address provided by EFI system table. This would need to be done by all call-sites attempting to access the EFI config table, so just have efi_get_conf_table() handle that automatically. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-29-brijesh.singh@amd.com --- arch/x86/boot/compressed/acpi.c | 59 --------------------------------- arch/x86/boot/compressed/efi.c | 46 ++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 60 deletions(-) diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index b0c1dffc5510..64b172dabd5c 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -47,57 +47,6 @@ __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len) return 0; } -/* EFI/kexec support is 64-bit only. */ -#ifdef CONFIG_X86_64 -static struct efi_setup_data *get_kexec_setup_data_addr(void) -{ - struct setup_data *data; - u64 pa_data; - - pa_data = boot_params->hdr.setup_data; - while (pa_data) { - data = (struct setup_data *)pa_data; - if (data->type == SETUP_EFI) - return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); - - pa_data = data->next; - } - return NULL; -} - -static acpi_physical_address kexec_get_rsdp_addr(void) -{ - efi_system_table_64_t *systab; - struct efi_setup_data *esd; - struct efi_info *ei; - enum efi_type et; - - esd = (struct efi_setup_data *)get_kexec_setup_data_addr(); - if (!esd) - return 0; - - if (!esd->tables) { - debug_putstr("Wrong kexec SETUP_EFI data.\n"); - return 0; - } - - et = efi_get_type(boot_params); - if (et != EFI_TYPE_64) { - debug_putstr("Unexpected kexec EFI environment (expected 64-bit EFI).\n"); - return 0; - } - - /* Get systab from boot params. */ - systab = (efi_system_table_64_t *)efi_get_system_table(boot_params); - if (!systab) - error("EFI system table not found in kexec boot_params."); - - return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables); -} -#else -static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } -#endif /* CONFIG_X86_64 */ - static acpi_physical_address efi_get_rsdp_addr(void) { #ifdef CONFIG_EFI @@ -210,14 +159,6 @@ acpi_physical_address get_rsdp_addr(void) pa = boot_params->acpi_rsdp_addr; - /* - * Try to get EFI data from setup_data. This can happen when we're a - * kexec'ed kernel and kexec(1) has passed all the required EFI info to - * us. - */ - if (!pa) - pa = kexec_get_rsdp_addr(); - if (!pa) pa = efi_get_rsdp_addr(); diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index 4d363dfc2b33..09fa3b5d70b8 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -78,6 +78,46 @@ unsigned long efi_get_system_table(struct boot_params *bp) return sys_tbl_pa; } +/* + * EFI config table address changes to virtual address after boot, which may + * not be accessible for the kexec'd kernel. To address this, kexec provides + * the initial physical address via a struct setup_data entry, which is + * checked for here, along with some sanity checks. + */ +static struct efi_setup_data *get_kexec_setup_data(struct boot_params *bp, + enum efi_type et) +{ +#ifdef CONFIG_X86_64 + struct efi_setup_data *esd = NULL; + struct setup_data *data; + u64 pa_data; + + pa_data = bp->hdr.setup_data; + while (pa_data) { + data = (struct setup_data *)pa_data; + if (data->type == SETUP_EFI) { + esd = (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); + break; + } + + pa_data = data->next; + } + + /* + * Original ACPI code falls back to attempting normal EFI boot in these + * cases, so maintain existing behavior by indicating non-kexec + * environment to the caller, but print them for debugging. + */ + if (esd && !esd->tables) { + debug_putstr("kexec EFI environment missing valid configuration table.\n"); + return NULL; + } + + return esd; +#endif + return NULL; +} + /** * efi_get_conf_table - Given a pointer to boot_params, locate and return the physical * address of EFI configuration table. @@ -106,8 +146,12 @@ int efi_get_conf_table(struct boot_params *bp, unsigned long *cfg_tbl_pa, et = efi_get_type(bp); if (et == EFI_TYPE_64) { efi_system_table_64_t *stbl = (efi_system_table_64_t *)sys_tbl_pa; + struct efi_setup_data *esd; - *cfg_tbl_pa = stbl->tables; + /* kexec provides an alternative EFI conf table, check for it. */ + esd = get_kexec_setup_data(bp, et); + + *cfg_tbl_pa = esd ? esd->tables : stbl->tables; *cfg_tbl_len = stbl->nr_tables; } else if (et == EFI_TYPE_32) { efi_system_table_32_t *stbl = (efi_system_table_32_t *)sys_tbl_pa; From 5ea98e01ab524cbc53dad8aebd27b434ebe5d074 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:39 -0600 Subject: [PATCH 29/60] x86/boot: Add Confidential Computing type to setup_data While launching encrypted guests, the hypervisor may need to provide some additional information during the guest boot. When booting under an EFI-based BIOS, the EFI configuration table contains an entry for the confidential computing blob that contains the required information. To support booting encrypted guests on non-EFI VMs, the hypervisor needs to pass this additional information to the guest kernel using a different method. For this purpose, introduce SETUP_CC_BLOB type in setup_data to hold the physical address of the confidential computing blob location. The boot loader or hypervisor may choose to use this method instead of an EFI configuration table. The CC blob location scanning should give preference to a setup_data blob over an EFI configuration table. In AMD SEV-SNP, the CC blob contains the address of the secrets and CPUID pages. The secrets page includes information such as a VM to PSP communication key and the CPUID page contains PSP-filtered CPUID values. Define the AMD SEV confidential computing blob structure. While at it, define the EFI GUID for the confidential computing blob. [ bp: Massage commit message, mark struct __packed. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220307213356.2797205-30-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 18 ++++++++++++++++++ arch/x86/include/uapi/asm/bootparam.h | 1 + include/linux/efi.h | 1 + 3 files changed, 20 insertions(+) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index a3203b2caaca..8c934bda2a90 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -42,6 +42,24 @@ struct es_em_ctxt { struct es_fault_info fi; }; +/* + * AMD SEV Confidential computing blob structure. The structure is + * defined in OVMF UEFI firmware header: + * https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/Guid/ConfidentialComputingSevSnpBlob.h + */ +#define CC_BLOB_SEV_HDR_MAGIC 0x45444d41 +struct cc_blob_sev_info { + u32 magic; + u16 version; + u16 reserved; + u64 secrets_phys; + u32 secrets_len; + u32 rsvd1; + u64 cpuid_phys; + u32 cpuid_len; + u32 rsvd2; +} __packed; + void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code); static inline u64 lower_bits(u64 val, unsigned int bits) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index b25d3f82c2f3..1ac5acca72ce 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -10,6 +10,7 @@ #define SETUP_EFI 4 #define SETUP_APPLE_PROPERTIES 5 #define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 #define SETUP_INDIRECT (1<<31) diff --git a/include/linux/efi.h b/include/linux/efi.h index ccd4d3f91c98..984aa688997a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -390,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_CERT_SHA256_GUID EFI_GUID(0xc1c41626, 0x504c, 0x4092, 0xac, 0xa9, 0x41, 0xf9, 0x36, 0x93, 0x43, 0x28) #define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72) #define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed) +#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) /* * This GUID is used to pass to the kernel proper the struct screen_info From b66370db9a90b3fa4c4a1a732af3e7e38d6d4c7c Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:10 -0600 Subject: [PATCH 30/60] KVM: x86: Move lookup of indexed CPUID leafs to helper Determining which CPUID leafs have significant ECX/index values is also needed by guest kernel code when doing SEV-SNP-validated CPUID lookups. Move this to common code to keep future updates in sync. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Venu Busireddy Link: https://lore.kernel.org/r/20220307213356.2797205-31-brijesh.singh@amd.com --- arch/x86/include/asm/cpuid.h | 34 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/cpuid.c | 19 ++----------------- 2 files changed, 36 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/cpuid.h diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h new file mode 100644 index 000000000000..70b2db18165e --- /dev/null +++ b/arch/x86/include/asm/cpuid.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CPUID-related helpers/definitions + * + * Derived from arch/x86/kvm/cpuid.c + */ + +#ifndef _ASM_X86_CPUID_H +#define _ASM_X86_CPUID_H + +static __always_inline bool cpuid_function_is_indexed(u32 function) +{ + switch (function) { + case 4: + case 7: + case 0xb: + case 0xd: + case 0xf: + case 0x10: + case 0x12: + case 0x14: + case 0x17: + case 0x18: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x8000001d: + return true; + } + + return false; +} + +#endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b24ca7f4ed7c..4b62d80bb22f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "cpuid.h" #include "lapic.h" #include "mmu.h" @@ -744,24 +745,8 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, cpuid_count(entry->function, entry->index, &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); - switch (function) { - case 4: - case 7: - case 0xb: - case 0xd: - case 0xf: - case 0x10: - case 0x12: - case 0x14: - case 0x17: - case 0x18: - case 0x1d: - case 0x1e: - case 0x1f: - case 0x8000001d: + if (cpuid_function_is_indexed(function)) entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - break; - } return entry; } From 801baa693c1f6d7327475c39100c456db340cd3e Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:11 -0600 Subject: [PATCH 31/60] x86/sev: Move MSR-based VMGEXITs for CPUID to helper This code will also be used later for SEV-SNP-validated CPUID code in some cases, so move it to a common helper. While here, also add a check to terminate in cases where the CPUID function/subfunction is indexed and the subfunction is non-zero, since the GHCB MSR protocol does not support non-zero subfunctions. Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-32-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 1 + arch/x86/kernel/sev-shared.c | 83 +++++++++++++++++++++++----------- arch/x86/kernel/sev.c | 1 + 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index f31b434e2ce4..7a9cfbc43f4b 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "error.h" #include "../msr.h" diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3aaef1a18ffe..b4d5558c9d0a 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -14,6 +14,16 @@ #define has_cpuflag(f) boot_cpu_has(f) #endif +/* I/O parameters for CPUID-related helpers */ +struct cpuid_leaf { + u32 fn; + u32 subfn; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; +}; + /* * Since feature negotiation related variables are set early in the boot * process they must reside in the .data section so as not to be zeroed @@ -194,6 +204,44 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr, return verify_exception_info(ghcb, ctxt); } +static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) +{ + u64 val; + + sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx)); + VMGEXIT(); + val = sev_es_rd_ghcb_msr(); + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) + return -EIO; + + *reg = (val >> 32); + + return 0; +} + +static int sev_cpuid_hv(struct cpuid_leaf *leaf) +{ + int ret; + + /* + * MSR protocol does not support fetching non-zero subfunctions, but is + * sufficient to handle current early-boot cases. Should that change, + * make sure to report an error rather than ignoring the index and + * grabbing random values. If this issue arises in the future, handling + * can be added here to use GHCB-page protocol for cases that occur late + * enough in boot that GHCB page is available. + */ + if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn) + return -EINVAL; + + ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax); + ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx); + ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx); + ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx); + + return ret; +} + /* * Boot VC Handler - This is the first VC handler during boot, there is no GHCB * page yet, so it only supports the MSR based communication with the @@ -201,40 +249,23 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr, */ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { + unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); - unsigned long val; + struct cpuid_leaf leaf; /* Only CPUID is supported via MSR protocol */ if (exit_code != SVM_EXIT_CPUID) goto fail; - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) + leaf.fn = fn; + leaf.subfn = subfn; + if (sev_cpuid_hv(&leaf)) goto fail; - regs->ax = val >> 32; - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) - goto fail; - regs->bx = val >> 32; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) - goto fail; - regs->cx = val >> 32; - - sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); - VMGEXIT(); - val = sev_es_rd_ghcb_msr(); - if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) - goto fail; - regs->dx = val >> 32; + regs->ax = leaf.eax; + regs->bx = leaf.ebx; + regs->cx = leaf.ecx; + regs->dx = leaf.edx; /* * This is a VC handler and the #VC is only raised when SEV-ES is diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index d7915ae7729d..0c2bf39c4a8f 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -33,6 +33,7 @@ #include #include #include +#include #define DR7_RESET_VALUE 0x400 From ee0bfa08a345370df28c07288e886abcbaac481f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:12 -0600 Subject: [PATCH 32/60] x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers CPUID instructions generate a #VC exception for SEV-ES/SEV-SNP guests, for which early handlers are currently set up to handle. In the case of SEV-SNP, guests can use a configurable location in guest memory that has been pre-populated with a firmware-validated CPUID table to look up the relevant CPUID values rather than requesting them from hypervisor via a VMGEXIT. Add the various hooks in the #VC handlers to allow CPUID instructions to be handled via the table. The code to actually configure/enable the table will be added in a subsequent commit. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-33-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 2 + arch/x86/kernel/sev-shared.c | 324 ++++++++++++++++++++++++++++++ 2 files changed, 326 insertions(+) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index e9b6815b3b3d..0759af9b1acf 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -152,6 +152,8 @@ struct snp_psc_desc { #define GHCB_TERM_PSC 1 /* Page State Change failure */ #define GHCB_TERM_PVALIDATE 2 /* Pvalidate failure */ #define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */ +#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */ +#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index b4d5558c9d0a..0f1375164ff0 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -24,6 +24,36 @@ struct cpuid_leaf { u32 edx; }; +/* + * Individual entries of the SNP CPUID table, as defined by the SNP + * Firmware ABI, Revision 0.9, Section 7.1, Table 14. + */ +struct snp_cpuid_fn { + u32 eax_in; + u32 ecx_in; + u64 xcr0_in; + u64 xss_in; + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u64 __reserved; +} __packed; + +/* + * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, + * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit + * of 64 entries per CPUID table. + */ +#define SNP_CPUID_COUNT_MAX 64 + +struct snp_cpuid_table { + u32 count; + u32 __reserved1; + u64 __reserved2; + struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; +} __packed; + /* * Since feature negotiation related variables are set early in the boot * process they must reside in the .data section so as not to be zeroed @@ -33,6 +63,19 @@ struct cpuid_leaf { */ static u16 ghcb_version __ro_after_init; +/* Copy of the SNP firmware's CPUID page. */ +static struct snp_cpuid_table cpuid_table_copy __ro_after_init; + +/* + * These will be initialized based on CPUID table so that non-present + * all-zero leaves (for sparse tables) can be differentiated from + * invalid/out-of-range leaves. This is needed since all-zero leaves + * still need to be post-processed. + */ +static u32 cpuid_std_range_max __ro_after_init; +static u32 cpuid_hyp_range_max __ro_after_init; +static u32 cpuid_ext_range_max __ro_after_init; + static bool __init sev_es_check_cpu_features(void) { if (!has_cpuflag(X86_FEATURE_RDRAND)) { @@ -242,6 +285,252 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf) return ret; } +/* + * This may be called early while still running on the initial identity + * mapping. Use RIP-relative addressing to obtain the correct address + * while running with the initial identity mapping as well as the + * switch-over to kernel virtual addresses later. + */ +static const struct snp_cpuid_table *snp_cpuid_get_table(void) +{ + void *ptr; + + asm ("lea cpuid_table_copy(%%rip), %0" + : "=r" (ptr) + : "p" (&cpuid_table_copy)); + + return ptr; +} + +/* + * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of + * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 + * and 1 based on the corresponding features enabled by a particular + * combination of XCR0 and XSS registers so that a guest can look up the + * version corresponding to the features currently enabled in its XCR0/XSS + * registers. The only values that differ between these versions/table + * entries is the enabled XSAVE area size advertised via EBX. + * + * While hypervisors may choose to make use of this support, it is more + * robust/secure for a guest to simply find the entry corresponding to the + * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the + * XSAVE area size using subfunctions 2 through 64, as documented in APM + * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. + * + * Since base/legacy XSAVE area size is documented as 0x240, use that value + * directly rather than relying on the base size in the CPUID table. + * + * Return: XSAVE area size on success, 0 otherwise. + */ +static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) +{ + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + u64 xfeatures_found = 0; + u32 xsave_size = 0x240; + int i; + + for (i = 0; i < cpuid_table->count; i++) { + const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; + + if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) + continue; + if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) + continue; + if (xfeatures_found & (BIT_ULL(e->ecx_in))) + continue; + + xfeatures_found |= (BIT_ULL(e->ecx_in)); + + if (compacted) + xsave_size += e->eax; + else + xsave_size = max(xsave_size, e->eax + e->ebx); + } + + /* + * Either the guest set unsupported XCR0/XSS bits, or the corresponding + * entries in the CPUID table were not present. This is not a valid + * state to be in. + */ + if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) + return 0; + + return xsave_size; +} + +static bool +snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) +{ + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + int i; + + for (i = 0; i < cpuid_table->count; i++) { + const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; + + if (e->eax_in != leaf->fn) + continue; + + if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn) + continue; + + /* + * For 0xD subfunctions 0 and 1, only use the entry corresponding + * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). + * See the comments above snp_cpuid_calc_xsave_size() for more + * details. + */ + if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) + if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) + continue; + + leaf->eax = e->eax; + leaf->ebx = e->ebx; + leaf->ecx = e->ecx; + leaf->edx = e->edx; + + return true; + } + + return false; +} + +static void snp_cpuid_hv(struct cpuid_leaf *leaf) +{ + if (sev_cpuid_hv(leaf)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); +} + +static int snp_cpuid_postprocess(struct cpuid_leaf *leaf) +{ + struct cpuid_leaf leaf_hv = *leaf; + + switch (leaf->fn) { + case 0x1: + snp_cpuid_hv(&leaf_hv); + + /* initial APIC ID */ + leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); + /* APIC enabled bit */ + leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9)); + + /* OSXSAVE enabled bit */ + if (native_read_cr4() & X86_CR4_OSXSAVE) + leaf->ecx |= BIT(27); + break; + case 0x7: + /* OSPKE enabled bit */ + leaf->ecx &= ~BIT(4); + if (native_read_cr4() & X86_CR4_PKE) + leaf->ecx |= BIT(4); + break; + case 0xB: + leaf_hv.subfn = 0; + snp_cpuid_hv(&leaf_hv); + + /* extended APIC ID */ + leaf->edx = leaf_hv.edx; + break; + case 0xD: { + bool compacted = false; + u64 xcr0 = 1, xss = 0; + u32 xsave_size; + + if (leaf->subfn != 0 && leaf->subfn != 1) + return 0; + + if (native_read_cr4() & X86_CR4_OSXSAVE) + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if (leaf->subfn == 1) { + /* Get XSS value if XSAVES is enabled. */ + if (leaf->eax & BIT(3)) { + unsigned long lo, hi; + + asm volatile("rdmsr" : "=a" (lo), "=d" (hi) + : "c" (MSR_IA32_XSS)); + xss = (hi << 32) | lo; + } + + /* + * The PPR and APM aren't clear on what size should be + * encoded in 0xD:0x1:EBX when compaction is not enabled + * by either XSAVEC (feature bit 1) or XSAVES (feature + * bit 3) since SNP-capable hardware has these feature + * bits fixed as 1. KVM sets it to 0 in this case, but + * to avoid this becoming an issue it's safer to simply + * treat this as unsupported for SNP guests. + */ + if (!(leaf->eax & (BIT(1) | BIT(3)))) + return -EINVAL; + + compacted = true; + } + + xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted); + if (!xsave_size) + return -EINVAL; + + leaf->ebx = xsave_size; + } + break; + case 0x8000001E: + snp_cpuid_hv(&leaf_hv); + + /* extended APIC ID */ + leaf->eax = leaf_hv.eax; + /* compute ID */ + leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); + /* node ID */ + leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); + break; + default: + /* No fix-ups needed, use values as-is. */ + break; + } + + return 0; +} + +/* + * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value + * should be treated as fatal by caller. + */ +static int snp_cpuid(struct cpuid_leaf *leaf) +{ + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + + if (!cpuid_table->count) + return -EOPNOTSUPP; + + if (!snp_cpuid_get_validated_func(leaf)) { + /* + * Some hypervisors will avoid keeping track of CPUID entries + * where all values are zero, since they can be handled the + * same as out-of-range values (all-zero). This is useful here + * as well as it allows virtually all guest configurations to + * work using a single SNP CPUID table. + * + * To allow for this, there is a need to distinguish between + * out-of-range entries and in-range zero entries, since the + * CPUID table entries are only a template that may need to be + * augmented with additional values for things like + * CPU-specific information during post-processing. So if it's + * not in the table, set the values to zero. Then, if they are + * within a valid CPUID range, proceed with post-processing + * using zeros as the initial values. Otherwise, skip + * post-processing and just return zeros immediately. + */ + leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; + + /* Skip post-processing for out-of-range zero leafs. */ + if (!(leaf->fn <= cpuid_std_range_max || + (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || + (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) + return 0; + } + + return snp_cpuid_postprocess(leaf); +} + /* * Boot VC Handler - This is the first VC handler during boot, there is no GHCB * page yet, so it only supports the MSR based communication with the @@ -252,6 +541,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); struct cpuid_leaf leaf; + int ret; /* Only CPUID is supported via MSR protocol */ if (exit_code != SVM_EXIT_CPUID) @@ -259,9 +549,18 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) leaf.fn = fn; leaf.subfn = subfn; + + ret = snp_cpuid(&leaf); + if (!ret) + goto cpuid_done; + + if (ret != -EOPNOTSUPP) + goto fail; + if (sev_cpuid_hv(&leaf)) goto fail; +cpuid_done: regs->ax = leaf.eax; regs->bx = leaf.ebx; regs->cx = leaf.ecx; @@ -556,12 +855,37 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ret; } +static int vc_handle_cpuid_snp(struct pt_regs *regs) +{ + struct cpuid_leaf leaf; + int ret; + + leaf.fn = regs->ax; + leaf.subfn = regs->cx; + ret = snp_cpuid(&leaf); + if (!ret) { + regs->ax = leaf.eax; + regs->bx = leaf.ebx; + regs->cx = leaf.ecx; + regs->dx = leaf.edx; + } + + return ret; +} + static enum es_result vc_handle_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct pt_regs *regs = ctxt->regs; u32 cr4 = native_read_cr4(); enum es_result ret; + int snp_cpuid_ret; + + snp_cpuid_ret = vc_handle_cpuid_snp(regs); + if (!snp_cpuid_ret) + return ES_OK; + if (snp_cpuid_ret != -EOPNOTSUPP) + return ES_VMM_ERROR; ghcb_set_rax(ghcb, regs->ax); ghcb_set_rcx(ghcb, regs->cx); From 8c9c509baf660f1062bc758c26008b7f9bbc39f3 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:13 -0600 Subject: [PATCH 33/60] x86/boot: Add a pointer to Confidential Computing blob in bootparams The previously defined Confidential Computing blob is provided to the kernel via a setup_data structure or EFI config table entry. Currently, these are both checked for by boot/compressed kernel to access the CPUID table address within it for use with SEV-SNP CPUID enforcement. To also enable that enforcement for the run-time kernel, similar access to the CPUID table is needed early on while it's still using the identity-mapped page table set up by boot/compressed, where global pointers need to be accessed via fixup_pointer(). This isn't much of an issue for accessing setup_data, and the EFI config table helper code currently used in boot/compressed *could* be used in this case as well since they both rely on identity-mapping. However, it has some reliance on EFI helpers/string constants that would need to be accessed via fixup_pointer(), and fixing it up while making it shareable between boot/compressed and run-time kernel is fragile and introduces a good bit of ugliness. Instead, add a boot_params->cc_blob_address pointer that the boot/compressed kernel can initialize so that the run-time kernel can access the CC blob from there instead of re-scanning the EFI config table. Also document these in Documentation/x86/zero-page.rst. While there, add missing documentation for the acpi_rsdp_addr field, which serves a similar purpose in providing the run-time kernel a pointer to the ACPI RSDP table so that it does not need to [re-]scan the EFI configuration table. [ bp: Fix typos, massage commit message. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-34-brijesh.singh@amd.com --- Documentation/x86/zero-page.rst | 2 ++ arch/x86/include/asm/bootparam_utils.h | 1 + arch/x86/include/uapi/asm/bootparam.h | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/x86/zero-page.rst b/Documentation/x86/zero-page.rst index f088f5881666..45aa9cceb4f1 100644 --- a/Documentation/x86/zero-page.rst +++ b/Documentation/x86/zero-page.rst @@ -19,6 +19,7 @@ Offset/Size Proto Name Meaning 058/008 ALL tboot_addr Physical address of tboot shared page 060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information (struct ist_info) +070/008 ALL acpi_rsdp_addr Physical address of ACPI RSDP table 080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!! 090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!! 0A0/010 ALL sys_desc_table System description table (struct sys_desc_table), @@ -27,6 +28,7 @@ Offset/Size Proto Name Meaning 0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits 0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits 0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits +13C/004 ALL cc_blob_address Physical address of Confidential Computing blob 140/080 ALL edid_info Video mode setup (struct edid_info) 1C0/020 ALL efi_info EFI 32 information (struct efi_info) 1E0/004 ALL alt_mem_k Alternative mem check, in KB diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 981fe923a59f..53e9b0620d96 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -74,6 +74,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(hdr), BOOT_PARAM_PRESERVE(e820_table), BOOT_PARAM_PRESERVE(eddbuf), + BOOT_PARAM_PRESERVE(cc_blob_address), }; memset(&scratch, 0, sizeof(scratch)); diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 1ac5acca72ce..bea5cdcdf532 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -188,7 +188,8 @@ struct boot_params { __u32 ext_ramdisk_image; /* 0x0c0 */ __u32 ext_ramdisk_size; /* 0x0c4 */ __u32 ext_cmd_line_ptr; /* 0x0c8 */ - __u8 _pad4[116]; /* 0x0cc */ + __u8 _pad4[112]; /* 0x0cc */ + __u32 cc_blob_address; /* 0x13c */ struct edid_info edid_info; /* 0x140 */ struct efi_info efi_info; /* 0x1c0 */ __u32 alt_mem_k; /* 0x1e0 */ From c01fce9cef8491974f7f007f90281f1608400768 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:14 -0600 Subject: [PATCH 34/60] x86/compressed: Add SEV-SNP feature detection/setup Initial/preliminary detection of SEV-SNP is done via the Confidential Computing blob. Check for it prior to the normal SEV/SME feature initialization, and add some sanity checks to confirm it agrees with SEV-SNP CPUID/MSR bits. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-35-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 112 ++++++++++++++++++++++++++++++++- arch/x86/include/asm/sev.h | 3 + 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 7a9cfbc43f4b..2a94bb7879ae 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -274,6 +274,13 @@ void sev_enable(struct boot_params *bp) { unsigned int eax, ebx, ecx, edx; struct msr m; + bool snp; + + /* + * Setup/preliminary detection of SNP. This will be sanity-checked + * against CPUID/MSR values later. + */ + snp = snp_init(bp); /* Check for the SME/SEV support leaf */ eax = 0x80000000; @@ -294,8 +301,11 @@ void sev_enable(struct boot_params *bp) ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ - if (!(eax & BIT(1))) + if (!(eax & BIT(1))) { + if (snp) + error("SEV-SNP support indicated by CC blob, but not CPUID."); return; + } /* Set the SME mask if this is an SEV guest. */ boot_rdmsr(MSR_AMD64_SEV, &m); @@ -320,5 +330,105 @@ void sev_enable(struct boot_params *bp) enforce_vmpl0(); } + if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); + sme_me_mask = BIT_ULL(ebx & 0x3f); } + +/* Search for Confidential Computing blob in the EFI config table. */ +static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) +{ + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + int ret; + + ret = efi_get_conf_table(bp, &cfg_table_pa, &cfg_table_len); + if (ret) + return NULL; + + return (struct cc_blob_sev_info *)efi_find_vendor_table(bp, cfg_table_pa, + cfg_table_len, + EFI_CC_BLOB_GUID); +} + +struct cc_setup_data { + struct setup_data header; + u32 cc_blob_address; +}; + +/* + * Search for a Confidential Computing blob passed in as a setup_data entry + * via the Linux Boot Protocol. + */ +static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +{ + struct cc_setup_data *sd = NULL; + struct setup_data *hdr; + + hdr = (struct setup_data *)bp->hdr.setup_data; + + while (hdr) { + if (hdr->type == SETUP_CC_BLOB) { + sd = (struct cc_setup_data *)hdr; + return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; + } + hdr = (struct setup_data *)hdr->next; + } + + return NULL; +} + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the boot kernel + * by firmware/bootloader in the following ways: + * + * - via an entry in the EFI config table + * - via a setup_data structure, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + cc_info = find_cc_blob_efi(bp); + if (cc_info) + goto found_cc_info; + + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); + + return cc_info; +} + +/* + * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks + * will verify the SNP CPUID/MSR bits. + */ +bool snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * Pass run-time kernel a pointer to CC info via boot_params so EFI + * config table doesn't need to be searched again during early startup + * phase. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 8c934bda2a90..31b3b10323b6 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -11,6 +11,7 @@ #include #include #include +#include #define GHCB_PROTOCOL_MIN 1ULL #define GHCB_PROTOCOL_MAX 2ULL @@ -151,6 +152,7 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); +bool snp_init(struct boot_params *bp); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -168,6 +170,7 @@ static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } +static inline bool snp_init(struct boot_params *bp) { return false; } #endif #endif From 5f211f4fc49622473667e6983bb57beab755f6f6 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:15 -0600 Subject: [PATCH 35/60] x86/compressed: Use firmware-validated CPUID leaves for SEV-SNP guests SEV-SNP guests will be provided the location of special 'secrets' 'CPUID' pages via the Confidential Computing blob. This blob is provided to the boot kernel either through an EFI config table entry, or via a setup_data structure as defined by the Linux Boot Protocol. Locate the Confidential Computing from these sources and, if found, use the provided CPUID page/table address to create a copy that the boot kernel will use when servicing CPUID instructions via a #VC CPUID handler. [ bp: s/cpuid/CPUID/ ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-36-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 2a94bb7879ae..dd3cf55696d4 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -408,6 +408,43 @@ static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) return cc_info; } +/* + * Initialize the kernel's copy of the SNP CPUID table, and set up the + * pointer that will be used to access it. + * + * Maintaining a direct mapping of the SNP CPUID table used by firmware would + * be possible as an alternative, but the approach is brittle since the + * mapping needs to be updated in sync with all the changes to virtual memory + * layout and related mapping facilities throughout the boot process. + */ +static void setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +{ + const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; + int i; + + if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); + + cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; + if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); + + cpuid_table = snp_cpuid_get_table(); + memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table)); + + /* Initialize CPUID ranges for range-checking. */ + for (i = 0; i < cpuid_table->count; i++) { + const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + if (fn->eax_in == 0x0) + cpuid_std_range_max = fn->eax; + else if (fn->eax_in == 0x40000000) + cpuid_hyp_range_max = fn->eax; + else if (fn->eax_in == 0x80000000) + cpuid_ext_range_max = fn->eax; + } +} + /* * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks * will verify the SNP CPUID/MSR bits. @@ -423,6 +460,15 @@ bool snp_init(struct boot_params *bp) if (!cc_info) return false; + /* + * If a SNP-specific Confidential Computing blob is present, then + * firmware/bootloader have indicated SNP support. Verifying this + * involves CPUID checks which will be more reliable if the SNP + * CPUID table is used. See comments over snp_setup_cpuid_table() for + * more details. + */ + setup_cpuid_table(cc_info); + /* * Pass run-time kernel a pointer to CC info via boot_params so EFI * config table doesn't need to be searched again during early startup From a9ee679b1f8c3803490ed2eeffb688aaee56583f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:16 -0600 Subject: [PATCH 36/60] x86/compressed: Export and rename add_identity_map() SEV-specific code will need to add some additional mappings, but doing this within ident_map_64.c requires some SEV-specific helpers to be exported and some SEV-specific struct definitions to be pulled into ident_map_64.c. Instead, export add_identity_map() so SEV-specific (and other subsystem-specific) code can be better contained outside of ident_map_64.c. While at it, rename the function to kernel_add_identity_map(), similar to the kernel_ident_mapping_init() function it relies upon. No functional changes. Suggested-by: Borislav Petkov Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-37-brijesh.singh@amd.com --- arch/x86/boot/compressed/ident_map_64.c | 18 +++++++++--------- arch/x86/boot/compressed/misc.h | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 613367e7e09e..99348ee7999b 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -90,7 +90,7 @@ static struct x86_mapping_info mapping_info; /* * Adds the specified range to the identity mappings. */ -static void add_identity_map(unsigned long start, unsigned long end) +void kernel_add_identity_map(unsigned long start, unsigned long end) { int ret; @@ -157,11 +157,11 @@ void initialize_identity_maps(void *rmode) * explicitly here in case the compressed kernel does not touch them, * or does not touch all the pages covering them. */ - add_identity_map((unsigned long)_head, (unsigned long)_end); + kernel_add_identity_map((unsigned long)_head, (unsigned long)_end); boot_params = rmode; - add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); + kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1)); cmdline = get_cmd_line_ptr(); - add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* Load the new page-table. */ sev_verify_cbit(top_level_pgt); @@ -246,10 +246,10 @@ static int set_clr_page_flags(struct x86_mapping_info *info, * It should already exist, but keep things generic. * * To map the page just read from it and fault it in if there is no - * mapping yet. add_identity_map() can't be called here because that - * would unconditionally map the address on PMD level, destroying any - * PTE-level mappings that might already exist. Use assembly here so - * the access won't be optimized away. + * mapping yet. kernel_add_identity_map() can't be called here because + * that would unconditionally map the address on PMD level, destroying + * any PTE-level mappings that might already exist. Use assembly here + * so the access won't be optimized away. */ asm volatile("mov %[address], %%r9" :: [address] "g" (*(unsigned long *)address) @@ -363,5 +363,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code) * Error code is sane - now identity map the 2M region around * the faulting address. */ - add_identity_map(address, end); + kernel_add_identity_map(address, end); } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index ba538af37e90..aae2722c6e9a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -156,6 +156,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; #endif +extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ extern pteval_t __default_kernel_pte_mask; From 76f61e1e89b32f3e5d639f1b57413a919066da06 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:17 -0600 Subject: [PATCH 37/60] x86/compressed/64: Add identity mapping for Confidential Computing blob The run-time kernel will need to access the Confidential Computing blob very early during boot to access the CPUID table it points to. At that stage, it will be relying on the identity-mapped page table set up by the boot/compressed kernel, so make sure the blob and the CPUID table it points to are mapped in advance. [ bp: Massage. ] Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-38-brijesh.singh@amd.com --- arch/x86/boot/compressed/ident_map_64.c | 3 ++- arch/x86/boot/compressed/misc.h | 2 ++ arch/x86/boot/compressed/sev.c | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 99348ee7999b..44c350d627c7 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -163,8 +163,9 @@ void initialize_identity_maps(void *rmode) cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + sev_prep_identity_maps(top_level_pgt); + /* Load the new page-table. */ - sev_verify_cbit(top_level_pgt); write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index aae2722c6e9a..75d284ec763f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -127,6 +127,7 @@ void sev_es_shutdown_ghcb(void); extern bool sev_es_check_ghcb_fault(unsigned long address); void snp_set_page_private(unsigned long paddr); void snp_set_page_shared(unsigned long paddr); +void sev_prep_identity_maps(unsigned long top_level_pgt); #else static inline void sev_enable(struct boot_params *bp) { } static inline void sev_es_shutdown_ghcb(void) { } @@ -136,6 +137,7 @@ static inline bool sev_es_check_ghcb_fault(unsigned long address) } static inline void snp_set_page_private(unsigned long paddr) { } static inline void snp_set_page_shared(unsigned long paddr) { } +static inline void sev_prep_identity_maps(unsigned long top_level_pgt) { } #endif /* acpi.c */ diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index dd3cf55696d4..1e4930ce2c95 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -478,3 +478,24 @@ bool snp_init(struct boot_params *bp) return true; } + +void sev_prep_identity_maps(unsigned long top_level_pgt) +{ + /* + * The Confidential Computing blob is used very early in uncompressed + * kernel to find the in-memory CPUID table to handle CPUID + * instructions. Make sure an identity-mapping exists so it can be + * accessed after switchover. + */ + if (sev_snp_enabled()) { + unsigned long cc_info_pa = boot_params->cc_blob_address; + struct cc_blob_sev_info *cc_info; + + kernel_add_identity_map(cc_info_pa, cc_info_pa + sizeof(*cc_info)); + + cc_info = (struct cc_blob_sev_info *)cc_info_pa; + kernel_add_identity_map(cc_info->cpuid_phys, cc_info->cpuid_phys + cc_info->cpuid_len); + } + + sev_verify_cbit(top_level_pgt); +} From b190a043c49af4587f5e157053f909192820522a Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:18 -0600 Subject: [PATCH 38/60] x86/sev: Add SEV-SNP feature detection/setup Initial/preliminary detection of SEV-SNP is done via the Confidential Computing blob. Check for it prior to the normal SEV/SME feature initialization, and add some sanity checks to confirm it agrees with SEV-SNP CPUID/MSR bits. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-39-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 27 ------------- arch/x86/include/asm/sev.h | 2 + arch/x86/kernel/sev-shared.c | 27 +++++++++++++ arch/x86/kernel/sev.c | 64 ++++++++++++++++++++++++++++++ arch/x86/mm/mem_encrypt_identity.c | 8 ++++ 5 files changed, 101 insertions(+), 27 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 1e4930ce2c95..82079ce7be06 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -352,33 +352,6 @@ static struct cc_blob_sev_info *find_cc_blob_efi(struct boot_params *bp) EFI_CC_BLOB_GUID); } -struct cc_setup_data { - struct setup_data header; - u32 cc_blob_address; -}; - -/* - * Search for a Confidential Computing blob passed in as a setup_data entry - * via the Linux Boot Protocol. - */ -static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) -{ - struct cc_setup_data *sd = NULL; - struct setup_data *hdr; - - hdr = (struct setup_data *)bp->hdr.setup_data; - - while (hdr) { - if (hdr->type == SETUP_CC_BLOB) { - sd = (struct cc_setup_data *)hdr; - return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; - } - hdr = (struct setup_data *)hdr->next; - } - - return NULL; -} - /* * Initial set up of SNP relies on information provided by the * Confidential Computing blob, which can be passed to the boot kernel diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 31b3b10323b6..84d3d5121e9c 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -153,6 +153,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); +void snp_abort(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -171,6 +172,7 @@ static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npage static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } +static inline void snp_abort(void) { } #endif #endif diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 0f1375164ff0..a7a1c0fb298e 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -937,3 +937,30 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, return ES_OK; } + +struct cc_setup_data { + struct setup_data header; + u32 cc_blob_address; +}; + +/* + * Search for a Confidential Computing blob passed in as a setup_data entry + * via the Linux Boot Protocol. + */ +static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) +{ + struct cc_setup_data *sd = NULL; + struct setup_data *hdr; + + hdr = (struct setup_data *)bp->hdr.setup_data; + + while (hdr) { + if (hdr->type == SETUP_CC_BLOB) { + sd = (struct cc_setup_data *)hdr; + return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; + } + hdr = (struct setup_data *)hdr->next; + } + + return NULL; +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 0c2bf39c4a8f..692da7b29127 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1974,3 +1974,67 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) while (true) halt(); } + +/* + * Initial set up of SNP relies on information provided by the + * Confidential Computing blob, which can be passed to the kernel + * in the following ways, depending on how it is booted: + * + * - when booted via the boot/decompress kernel: + * - via boot_params + * + * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): + * - via a setup_data entry, as defined by the Linux Boot Protocol + * + * Scan for the blob in that order. + */ +static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + /* Boot kernel would have passed the CC blob via boot_params. */ + if (bp->cc_blob_address) { + cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; + goto found_cc_info; + } + + /* + * If kernel was booted directly, without the use of the + * boot/decompression kernel, the CC blob may have been passed via + * setup_data instead. + */ + cc_info = find_cc_blob_setup_data(bp); + if (!cc_info) + return NULL; + +found_cc_info: + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) + snp_abort(); + + return cc_info; +} + +bool __init snp_init(struct boot_params *bp) +{ + struct cc_blob_sev_info *cc_info; + + if (!bp) + return false; + + cc_info = find_cc_blob(bp); + if (!cc_info) + return false; + + /* + * The CC blob will be used later to access the secrets page. Cache + * it here like the boot kernel does. + */ + bp->cc_blob_address = (u32)(unsigned long)cc_info; + + return true; +} + +void __init snp_abort(void) +{ + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); +} diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index b43bc24d2bb6..f415498d3175 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -509,8 +510,11 @@ void __init sme_enable(struct boot_params *bp) bool active_by_default; unsigned long me_mask; char buffer[16]; + bool snp; u64 msr; + snp = snp_init(bp); + /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; @@ -542,6 +546,10 @@ void __init sme_enable(struct boot_params *bp) sev_status = __rdmsr(MSR_AMD64_SEV); feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; + /* The SEV-SNP CC blob should never be present unless SEV-SNP is enabled. */ + if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) + snp_abort(); + /* Check if memory encryption is enabled */ if (feature_mask == AMD_SME_BIT) { /* From 30612045e69d088f1effd748048ebb0e282984ec Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 7 Mar 2022 15:33:49 -0600 Subject: [PATCH 39/60] x86/sev: Use firmware-validated CPUID for SEV-SNP guests SEV-SNP guests will be provided the location of special 'secrets' and 'CPUID' pages via the Confidential Computing blob. This blob is provided to the run-time kernel either through a boot_params field that was initialized by the boot/compressed kernel, or via a setup_data structure as defined by the Linux Boot Protocol. Locate the Confidential Computing blob from these sources and, if found, use the provided CPUID page/table address to create a copy that the run-time kernel will use when servicing CPUID instructions via a #VC handler. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-40-brijesh.singh@amd.com --- arch/x86/boot/compressed/sev.c | 37 ---------------------------------- arch/x86/kernel/sev-shared.c | 37 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/sev.c | 24 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 37 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 82079ce7be06..52f989f6acc2 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -381,43 +381,6 @@ static struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) return cc_info; } -/* - * Initialize the kernel's copy of the SNP CPUID table, and set up the - * pointer that will be used to access it. - * - * Maintaining a direct mapping of the SNP CPUID table used by firmware would - * be possible as an alternative, but the approach is brittle since the - * mapping needs to be updated in sync with all the changes to virtual memory - * layout and related mapping facilities throughout the boot process. - */ -static void setup_cpuid_table(const struct cc_blob_sev_info *cc_info) -{ - const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; - int i; - - if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); - - cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; - if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); - - cpuid_table = snp_cpuid_get_table(); - memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table)); - - /* Initialize CPUID ranges for range-checking. */ - for (i = 0; i < cpuid_table->count; i++) { - const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; - - if (fn->eax_in == 0x0) - cpuid_std_range_max = fn->eax; - else if (fn->eax_in == 0x40000000) - cpuid_hyp_range_max = fn->eax; - else if (fn->eax_in == 0x80000000) - cpuid_ext_range_max = fn->eax; - } -} - /* * Indicate SNP based on presence of SNP-specific CC blob. Subsequent checks * will verify the SNP CPUID/MSR bits. diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index a7a1c0fb298e..2b4270d5559e 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -964,3 +964,40 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) return NULL; } + +/* + * Initialize the kernel's copy of the SNP CPUID table, and set up the + * pointer that will be used to access it. + * + * Maintaining a direct mapping of the SNP CPUID table used by firmware would + * be possible as an alternative, but the approach is brittle since the + * mapping needs to be updated in sync with all the changes to virtual memory + * layout and related mapping facilities throughout the boot process. + */ +static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) +{ + const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; + int i; + + if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); + + cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; + if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); + + cpuid_table = snp_cpuid_get_table(); + memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table)); + + /* Initialize CPUID ranges for range-checking. */ + for (i = 0; i < cpuid_table->count; i++) { + const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + if (fn->eax_in == 0x0) + cpuid_std_range_max = fn->eax; + else if (fn->eax_in == 0x40000000) + cpuid_hyp_range_max = fn->eax; + else if (fn->eax_in == 0x80000000) + cpuid_ext_range_max = fn->eax; + } +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 692da7b29127..c8733725d8bf 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -34,6 +34,7 @@ #include #include #include +#include #define DR7_RESET_VALUE 0x400 @@ -2025,6 +2026,8 @@ bool __init snp_init(struct boot_params *bp) if (!cc_info) return false; + setup_cpuid_table(cc_info); + /* * The CC blob will be used later to access the secrets page. Cache * it here like the boot kernel does. @@ -2038,3 +2041,24 @@ void __init snp_abort(void) { sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } + +/* + * It is useful from an auditing/testing perspective to provide an easy way + * for the guest owner to know that the CPUID table has been initialized as + * expected, but that initialization happens too early in boot to print any + * sort of indicator, and there's not really any other good place to do it, + * so do it here. + */ +static int __init report_cpuid_table(void) +{ + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + + if (!cpuid_table->count) + return 0; + + pr_info("Using SNP CPUID table, %d entries present.\n", + cpuid_table->count); + + return 0; +} +arch_initcall(report_cpuid_table); From ba37a1438aeb540cc48722d629f4b2e7e4398466 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 7 Mar 2022 15:33:50 -0600 Subject: [PATCH 40/60] x86/sev: Add a sev= cmdline option For debugging purposes it is very useful to have a way to see the full contents of the SNP CPUID table provided to a guest. Add an sev=debug kernel command-line option to do so. Also introduce some infrastructure so that additional options can be specified via sev=option1[,option2] over time in a consistent manner. [ bp: Massage, simplify string parsing. ] Suggested-by: Borislav Petkov Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-41-brijesh.singh@amd.com --- .../admin-guide/kernel-parameters.txt | 2 + Documentation/x86/x86_64/boot-options.rst | 14 ++++++ arch/x86/kernel/sev.c | 44 +++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..48ad2ec0770b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5308,6 +5308,8 @@ serialnumber [BUGS=X86-32] + sev=option[,option...] [X86-64] See Documentation/x86/x86_64/boot-options.rst + shapers= [NET] Maximal number of shapers. diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst index 07aa0007f346..4efb1fa039ef 100644 --- a/Documentation/x86/x86_64/boot-options.rst +++ b/Documentation/x86/x86_64/boot-options.rst @@ -310,3 +310,17 @@ Miscellaneous Do not use GB pages for kernel direct mappings. gbpages Use GB pages for kernel direct mappings. + + +AMD SEV (Secure Encrypted Virtualization) +========================================= +Options relating to AMD SEV, specified via the following format: + +:: + + sev=option1[,option2] + +The available options are: + + debug + Enable debug messages. diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c8733725d8bf..70ecc6e2f251 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -112,6 +112,13 @@ DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); +struct sev_config { + __u64 debug : 1, + __reserved : 63; +}; + +static struct sev_config sev_cfg __read_mostly; + static __always_inline bool on_vc_stack(struct pt_regs *regs) { unsigned long sp = regs->sp; @@ -2042,6 +2049,23 @@ void __init snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +static void dump_cpuid_table(void) +{ + const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); + int i = 0; + + pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", + cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); + + for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { + const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; + + pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", + i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, + fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); + } +} + /* * It is useful from an auditing/testing perspective to provide an easy way * for the guest owner to know that the CPUID table has been initialized as @@ -2059,6 +2083,26 @@ static int __init report_cpuid_table(void) pr_info("Using SNP CPUID table, %d entries present.\n", cpuid_table->count); + if (sev_cfg.debug) + dump_cpuid_table(); + return 0; } arch_initcall(report_cpuid_table); + +static int __init init_sev_config(char *str) +{ + char *s; + + while ((s = strsep(&str, ","))) { + if (!strcmp(s, "debug")) { + sev_cfg.debug = true; + continue; + } + + pr_info("SEV command-line option '%s' was not recognized\n", s); + } + + return 1; +} +__setup("sev=", init_sev_config); From d5af44dde5461d125d1602ac913ab5c6bdf09b8b Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:51 -0600 Subject: [PATCH 41/60] x86/sev: Provide support for SNP guest request NAEs Version 2 of GHCB specification provides SNP_GUEST_REQUEST and SNP_EXT_GUEST_REQUEST NAE that can be used by the SNP guest to communicate with the PSP. While at it, add a snp_issue_guest_request() helper that will be used by driver or other subsystem to issue the request to PSP. See SEV-SNP firmware and GHCB spec for more details. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-42-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 3 ++ arch/x86/include/asm/sev.h | 14 ++++++++ arch/x86/include/uapi/asm/svm.h | 4 +++ arch/x86/kernel/sev.c | 57 +++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 0759af9b1acf..b8357d6ecd47 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -128,6 +128,9 @@ struct snp_psc_desc { struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; } __packed; +/* Guest message request error code */ +#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32) + #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 84d3d5121e9c..c63a1d485c20 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -87,6 +87,14 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs); #define RMPADJUST_VMSA_PAGE_BIT BIT(16) +/* SNP Guest message request */ +struct snp_req_data { + unsigned long req_gpa; + unsigned long resp_gpa; + unsigned long data_gpa; + unsigned int data_npages; +}; + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); @@ -154,6 +162,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void snp_abort(void); +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -173,6 +182,11 @@ static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npag static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, + unsigned long *fw_err) +{ + return -ENOTTY; +} #endif #endif diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index cfea5e875088..f69c168391aa 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -109,6 +109,8 @@ #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 #define SVM_VMGEXIT_PSC 0x80000010 +#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011 +#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012 #define SVM_VMGEXIT_AP_CREATION 0x80000013 #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 @@ -225,6 +227,8 @@ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ + { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 70ecc6e2f251..7237b4178935 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2106,3 +2106,60 @@ static int __init init_sev_config(char *str) return 1; } __setup("sev=", init_sev_config); + +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err) +{ + struct ghcb_state state; + struct es_em_ctxt ctxt; + unsigned long flags; + struct ghcb *ghcb; + int ret; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return -ENODEV; + + if (!fw_err) + return -EINVAL; + + /* + * __sev_get_ghcb() needs to run with IRQs disabled because it is using + * a per-CPU GHCB. + */ + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + if (!ghcb) { + ret = -EIO; + goto e_restore_irq; + } + + vc_ghcb_invalidate(ghcb); + + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { + ghcb_set_rax(ghcb, input->data_gpa); + ghcb_set_rbx(ghcb, input->data_npages); + } + + ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa); + if (ret) + goto e_put; + + if (ghcb->save.sw_exit_info_2) { + /* Number of expected pages are returned in RBX */ + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && + ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) + input->data_npages = ghcb_get_rbx(ghcb); + + *fw_err = ghcb->save.sw_exit_info_2; + + ret = -EIO; + } + +e_put: + __sev_put_ghcb(&state); +e_restore_irq: + local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(snp_issue_guest_request); From 3a45b3753849c4a12cca2dd176c0192cd2a63e62 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 24 Feb 2022 10:56:21 -0600 Subject: [PATCH 42/60] x86/sev: Register SEV-SNP guest request platform device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Version 2 of the GHCB specification provides a Non Automatic Exit (NAE) event type that can be used by the SEV-SNP guest to communicate with the PSP without risk from a malicious hypervisor who wishes to read, alter, drop or replay the messages sent. SNP_LAUNCH_UPDATE can insert two special pages into the guest’s memory: the secrets page and the CPUID page. The PSP firmware populates the contents of the secrets page. The secrets page contains encryption keys used by the guest to interact with the firmware. Because the secrets page is encrypted with the guest’s memory encryption key, the hypervisor cannot read the keys. See SEV-SNP firmware spec for further details on the secrets page format. Create a platform device that the SEV-SNP guest driver can bind to get the platform resources such as encryption key and message id to use to communicate with the PSP. The SEV-SNP guest driver provides a userspace interface to get the attestation report, key derivation, extended attestation report etc. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-43-brijesh.singh@amd.com --- arch/x86/include/asm/sev.h | 4 +++ arch/x86/kernel/sev.c | 56 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index c63a1d485c20..9c2d33f1cfee 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -95,6 +95,10 @@ struct snp_req_data { unsigned int data_npages; }; +struct snp_guest_platform_data { + u64 secrets_gpa; +}; + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 7237b4178935..ace43e116b40 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -19,6 +19,9 @@ #include #include #include +#include +#include +#include #include #include @@ -2163,3 +2166,56 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned return ret; } EXPORT_SYMBOL_GPL(snp_issue_guest_request); + +static struct platform_device guest_req_device = { + .name = "snp-guest", + .id = -1, +}; + +static u64 get_secrets_page(void) +{ + u64 pa_data = boot_params.cc_blob_address; + struct cc_blob_sev_info info; + void *map; + + /* + * The CC blob contains the address of the secrets page, check if the + * blob is present. + */ + if (!pa_data) + return 0; + + map = early_memremap(pa_data, sizeof(info)); + memcpy(&info, map, sizeof(info)); + early_memunmap(map, sizeof(info)); + + /* smoke-test the secrets page passed */ + if (!info.secrets_phys || info.secrets_len != PAGE_SIZE) + return 0; + + return info.secrets_phys; +} + +static int __init snp_init_platform_device(void) +{ + struct snp_guest_platform_data data; + u64 gpa; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return -ENODEV; + + gpa = get_secrets_page(); + if (!gpa) + return -ENODEV; + + data.secrets_gpa = gpa; + if (platform_device_add_data(&guest_req_device, &data, sizeof(data))) + return -ENODEV; + + if (platform_device_register(&guest_req_device)) + return -ENODEV; + + pr_info("SNP guest platform device initialized.\n"); + return 0; +} +device_initcall(snp_init_platform_device); From fce96cf0443083e37455eff8f78fd240c621dae3 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:53 -0600 Subject: [PATCH 43/60] virt: Add SEV-SNP guest driver The SEV-SNP specification provides the guest a mechanism to communicate with the PSP without risk from a malicious hypervisor who wishes to read, alter, drop or replay the messages sent. The driver uses snp_issue_guest_request() to issue GHCB SNP_GUEST_REQUEST or SNP_EXT_GUEST_REQUEST NAE events to submit the request to PSP. The PSP requires that all communication should be encrypted using key specified through a struct snp_guest_platform_data descriptor. Userspace can use SNP_GET_REPORT ioctl() to query the guest attestation report. See SEV-SNP spec section Guest Messages for more details. [ bp: Remove the "what" from the commit message, massage. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-44-brijesh.singh@amd.com --- Documentation/virt/coco/sevguest.rst | 86 ++++ Documentation/virt/index.rst | 1 + drivers/virt/Kconfig | 3 + drivers/virt/Makefile | 1 + drivers/virt/coco/sevguest/Kconfig | 14 + drivers/virt/coco/sevguest/Makefile | 2 + drivers/virt/coco/sevguest/sevguest.c | 607 ++++++++++++++++++++++++++ drivers/virt/coco/sevguest/sevguest.h | 98 +++++ include/uapi/linux/sev-guest.h | 50 +++ 9 files changed, 862 insertions(+) create mode 100644 Documentation/virt/coco/sevguest.rst create mode 100644 drivers/virt/coco/sevguest/Kconfig create mode 100644 drivers/virt/coco/sevguest/Makefile create mode 100644 drivers/virt/coco/sevguest/sevguest.c create mode 100644 drivers/virt/coco/sevguest/sevguest.h create mode 100644 include/uapi/linux/sev-guest.h diff --git a/Documentation/virt/coco/sevguest.rst b/Documentation/virt/coco/sevguest.rst new file mode 100644 index 000000000000..3da782e867a3 --- /dev/null +++ b/Documentation/virt/coco/sevguest.rst @@ -0,0 +1,86 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================================== +The Definitive SEV Guest API Documentation +=================================================================== + +1. General description +====================== + +The SEV API is a set of ioctls that are used by the guest or hypervisor +to get or set a certain aspect of the SEV virtual machine. The ioctls belong +to the following classes: + + - Hypervisor ioctls: These query and set global attributes which affect the + whole SEV firmware. These ioctl are used by platform provisioning tools. + + - Guest ioctls: These query and set attributes of the SEV virtual machine. + +2. API description +================== + +This section describes ioctls that is used for querying the SEV guest report +from the SEV firmware. For each ioctl, the following information is provided +along with a description: + + Technology: + which SEV technology provides this ioctl. SEV, SEV-ES, SEV-SNP or all. + + Type: + hypervisor or guest. The ioctl can be used inside the guest or the + hypervisor. + + Parameters: + what parameters are accepted by the ioctl. + + Returns: + the return value. General error numbers (-ENOMEM, -EINVAL) + are not detailed, but errors with specific meanings are. + +The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device. +The ioctl accepts struct snp_user_guest_request. The input and output structure is +specified through the req_data and resp_data field respectively. If the ioctl fails +to execute due to a firmware error, then fw_err code will be set otherwise the +fw_err will be set to 0x00000000000000ff. + +The firmware checks that the message sequence counter is one greater than +the guests message sequence counter. If guest driver fails to increment message +counter (e.g. counter overflow), then -EIO will be returned. + +:: + + struct snp_guest_request_ioctl { + /* Message version number */ + __u32 msg_version; + + /* Request and response structure address */ + __u64 req_data; + __u64 resp_data; + + /* firmware error code on failure (see psp-sev.h) */ + __u64 fw_err; + }; + +2.1 SNP_GET_REPORT +------------------ + +:Technology: sev-snp +:Type: guest ioctl +:Parameters (in): struct snp_report_req +:Returns (out): struct snp_report_resp on success, -negative on error + +The SNP_GET_REPORT ioctl can be used to query the attestation report from the +SEV-SNP firmware. The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command +provided by the SEV-SNP firmware to query the attestation report. + +On success, the snp_report_resp.data will contains the report. The report +contain the format described in the SEV-SNP specification. See the SEV-SNP +specification for further details. + + +Reference +--------- + +SEV-SNP and GHCB specification: developer.amd.com/sev + +The driver is based on SEV-SNP firmware spec 0.9 and GHCB spec version 2.0. diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst index edea7fea95a8..40ad0d20032e 100644 --- a/Documentation/virt/index.rst +++ b/Documentation/virt/index.rst @@ -13,6 +13,7 @@ Linux Virtualization Support guest-halt-polling ne_overview acrn/index + coco/sevguest .. only:: html and subproject diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 121b9293c737..7d3273cfab27 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -47,4 +47,7 @@ source "drivers/virt/vboxguest/Kconfig" source "drivers/virt/nitro_enclaves/Kconfig" source "drivers/virt/acrn/Kconfig" + +source "drivers/virt/coco/sevguest/Kconfig" + endif diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile index 108d0ffcc9aa..7b87a7ba1972 100644 --- a/drivers/virt/Makefile +++ b/drivers/virt/Makefile @@ -9,3 +9,4 @@ obj-y += vboxguest/ obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/ obj-$(CONFIG_ACRN_HSM) += acrn/ +obj-$(CONFIG_SEV_GUEST) += coco/sevguest/ diff --git a/drivers/virt/coco/sevguest/Kconfig b/drivers/virt/coco/sevguest/Kconfig new file mode 100644 index 000000000000..74ca1fe09437 --- /dev/null +++ b/drivers/virt/coco/sevguest/Kconfig @@ -0,0 +1,14 @@ +config SEV_GUEST + tristate "AMD SEV Guest driver" + default m + depends on AMD_MEM_ENCRYPT + select CRYPTO_AEAD2 + select CRYPTO_GCM + help + SEV-SNP firmware provides the guest a mechanism to communicate with + the PSP without risk from a malicious hypervisor who wishes to read, + alter, drop or replay the messages sent. The driver provides + userspace interface to communicate with the PSP to request the + attestation report and more. + + If you choose 'M' here, this module will be called sevguest. diff --git a/drivers/virt/coco/sevguest/Makefile b/drivers/virt/coco/sevguest/Makefile new file mode 100644 index 000000000000..b1ffb2b4177b --- /dev/null +++ b/drivers/virt/coco/sevguest/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SEV_GUEST) += sevguest.o diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c new file mode 100644 index 000000000000..beda93cdeb4f --- /dev/null +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -0,0 +1,607 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Encrypted Virtualization Nested Paging (SEV-SNP) guest request interface + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh + */ + +#define pr_fmt(fmt) "SNP: GUEST: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "sevguest.h" + +#define DEVICE_NAME "sev-guest" +#define AAD_LEN 48 +#define MSG_HDR_VER 1 + +struct snp_guest_crypto { + struct crypto_aead *tfm; + u8 *iv, *authtag; + int iv_len, a_len; +}; + +struct snp_guest_dev { + struct device *dev; + struct miscdevice misc; + + struct snp_guest_crypto *crypto; + struct snp_guest_msg *request, *response; + struct snp_secrets_page_layout *layout; + struct snp_req_data input; + u32 *os_area_msg_seqno; + u8 *vmpck; +}; + +static u32 vmpck_id; +module_param(vmpck_id, uint, 0444); +MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP."); + +/* Mutex to serialize the shared buffer access and command handling. */ +static DEFINE_MUTEX(snp_cmd_mutex); + +static bool is_vmpck_empty(struct snp_guest_dev *snp_dev) +{ + char zero_key[VMPCK_KEY_LEN] = {0}; + + if (snp_dev->vmpck) + return !memcmp(snp_dev->vmpck, zero_key, VMPCK_KEY_LEN); + + return true; +} + +static void snp_disable_vmpck(struct snp_guest_dev *snp_dev) +{ + memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN); + snp_dev->vmpck = NULL; +} + +static inline u64 __snp_get_msg_seqno(struct snp_guest_dev *snp_dev) +{ + u64 count; + + lockdep_assert_held(&snp_cmd_mutex); + + /* Read the current message sequence counter from secrets pages */ + count = *snp_dev->os_area_msg_seqno; + + return count + 1; +} + +/* Return a non-zero on success */ +static u64 snp_get_msg_seqno(struct snp_guest_dev *snp_dev) +{ + u64 count = __snp_get_msg_seqno(snp_dev); + + /* + * The message sequence counter for the SNP guest request is a 64-bit + * value but the version 2 of GHCB specification defines a 32-bit storage + * for it. If the counter exceeds the 32-bit value then return zero. + * The caller should check the return value, but if the caller happens to + * not check the value and use it, then the firmware treats zero as an + * invalid number and will fail the message request. + */ + if (count >= UINT_MAX) { + dev_err(snp_dev->dev, "request message sequence counter overflow\n"); + return 0; + } + + return count; +} + +static void snp_inc_msg_seqno(struct snp_guest_dev *snp_dev) +{ + /* + * The counter is also incremented by the PSP, so increment it by 2 + * and save in secrets page. + */ + *snp_dev->os_area_msg_seqno += 2; +} + +static inline struct snp_guest_dev *to_snp_dev(struct file *file) +{ + struct miscdevice *dev = file->private_data; + + return container_of(dev, struct snp_guest_dev, misc); +} + +static struct snp_guest_crypto *init_crypto(struct snp_guest_dev *snp_dev, u8 *key, size_t keylen) +{ + struct snp_guest_crypto *crypto; + + crypto = kzalloc(sizeof(*crypto), GFP_KERNEL_ACCOUNT); + if (!crypto) + return NULL; + + crypto->tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + if (IS_ERR(crypto->tfm)) + goto e_free; + + if (crypto_aead_setkey(crypto->tfm, key, keylen)) + goto e_free_crypto; + + crypto->iv_len = crypto_aead_ivsize(crypto->tfm); + crypto->iv = kmalloc(crypto->iv_len, GFP_KERNEL_ACCOUNT); + if (!crypto->iv) + goto e_free_crypto; + + if (crypto_aead_authsize(crypto->tfm) > MAX_AUTHTAG_LEN) { + if (crypto_aead_setauthsize(crypto->tfm, MAX_AUTHTAG_LEN)) { + dev_err(snp_dev->dev, "failed to set authsize to %d\n", MAX_AUTHTAG_LEN); + goto e_free_iv; + } + } + + crypto->a_len = crypto_aead_authsize(crypto->tfm); + crypto->authtag = kmalloc(crypto->a_len, GFP_KERNEL_ACCOUNT); + if (!crypto->authtag) + goto e_free_auth; + + return crypto; + +e_free_auth: + kfree(crypto->authtag); +e_free_iv: + kfree(crypto->iv); +e_free_crypto: + crypto_free_aead(crypto->tfm); +e_free: + kfree(crypto); + + return NULL; +} + +static void deinit_crypto(struct snp_guest_crypto *crypto) +{ + crypto_free_aead(crypto->tfm); + kfree(crypto->iv); + kfree(crypto->authtag); + kfree(crypto); +} + +static int enc_dec_message(struct snp_guest_crypto *crypto, struct snp_guest_msg *msg, + u8 *src_buf, u8 *dst_buf, size_t len, bool enc) +{ + struct snp_guest_msg_hdr *hdr = &msg->hdr; + struct scatterlist src[3], dst[3]; + DECLARE_CRYPTO_WAIT(wait); + struct aead_request *req; + int ret; + + req = aead_request_alloc(crypto->tfm, GFP_KERNEL); + if (!req) + return -ENOMEM; + + /* + * AEAD memory operations: + * +------ AAD -------+------- DATA -----+---- AUTHTAG----+ + * | msg header | plaintext | hdr->authtag | + * | bytes 30h - 5Fh | or | | + * | | cipher | | + * +------------------+------------------+----------------+ + */ + sg_init_table(src, 3); + sg_set_buf(&src[0], &hdr->algo, AAD_LEN); + sg_set_buf(&src[1], src_buf, hdr->msg_sz); + sg_set_buf(&src[2], hdr->authtag, crypto->a_len); + + sg_init_table(dst, 3); + sg_set_buf(&dst[0], &hdr->algo, AAD_LEN); + sg_set_buf(&dst[1], dst_buf, hdr->msg_sz); + sg_set_buf(&dst[2], hdr->authtag, crypto->a_len); + + aead_request_set_ad(req, AAD_LEN); + aead_request_set_tfm(req, crypto->tfm); + aead_request_set_callback(req, 0, crypto_req_done, &wait); + + aead_request_set_crypt(req, src, dst, len, crypto->iv); + ret = crypto_wait_req(enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req), &wait); + + aead_request_free(req); + return ret; +} + +static int __enc_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg, + void *plaintext, size_t len) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_guest_msg_hdr *hdr = &msg->hdr; + + memset(crypto->iv, 0, crypto->iv_len); + memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno)); + + return enc_dec_message(crypto, msg, plaintext, msg->payload, len, true); +} + +static int dec_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg, + void *plaintext, size_t len) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_guest_msg_hdr *hdr = &msg->hdr; + + /* Build IV with response buffer sequence number */ + memset(crypto->iv, 0, crypto->iv_len); + memcpy(crypto->iv, &hdr->msg_seqno, sizeof(hdr->msg_seqno)); + + return enc_dec_message(crypto, msg, msg->payload, plaintext, len, false); +} + +static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, u32 sz) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_guest_msg *resp = snp_dev->response; + struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg_hdr *req_hdr = &req->hdr; + struct snp_guest_msg_hdr *resp_hdr = &resp->hdr; + + dev_dbg(snp_dev->dev, "response [seqno %lld type %d version %d sz %d]\n", + resp_hdr->msg_seqno, resp_hdr->msg_type, resp_hdr->msg_version, resp_hdr->msg_sz); + + /* Verify that the sequence counter is incremented by 1 */ + if (unlikely(resp_hdr->msg_seqno != (req_hdr->msg_seqno + 1))) + return -EBADMSG; + + /* Verify response message type and version number. */ + if (resp_hdr->msg_type != (req_hdr->msg_type + 1) || + resp_hdr->msg_version != req_hdr->msg_version) + return -EBADMSG; + + /* + * If the message size is greater than our buffer length then return + * an error. + */ + if (unlikely((resp_hdr->msg_sz + crypto->a_len) > sz)) + return -EBADMSG; + + /* Decrypt the payload */ + return dec_payload(snp_dev, resp, payload, resp_hdr->msg_sz + crypto->a_len); +} + +static bool enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type, + void *payload, size_t sz) +{ + struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg_hdr *hdr = &req->hdr; + + memset(req, 0, sizeof(*req)); + + hdr->algo = SNP_AEAD_AES_256_GCM; + hdr->hdr_version = MSG_HDR_VER; + hdr->hdr_sz = sizeof(*hdr); + hdr->msg_type = type; + hdr->msg_version = version; + hdr->msg_seqno = seqno; + hdr->msg_vmpck = vmpck_id; + hdr->msg_sz = sz; + + /* Verify the sequence number is non-zero */ + if (!hdr->msg_seqno) + return -ENOSR; + + dev_dbg(snp_dev->dev, "request [seqno %lld type %d version %d sz %d]\n", + hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); + + return __enc_payload(snp_dev, req, payload, sz); +} + +static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver, + u8 type, void *req_buf, size_t req_sz, void *resp_buf, + u32 resp_sz, __u64 *fw_err) +{ + unsigned long err; + u64 seqno; + int rc; + + /* Get message sequence and verify that its a non-zero */ + seqno = snp_get_msg_seqno(snp_dev); + if (!seqno) + return -EIO; + + memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); + + /* Encrypt the userspace provided payload */ + rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); + if (rc) + return rc; + + /* Call firmware to process the request */ + rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + if (fw_err) + *fw_err = err; + + if (rc) + return rc; + + /* + * The verify_and_dec_payload() will fail only if the hypervisor is + * actively modifying the message header or corrupting the encrypted payload. + * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that + * the key cannot be used for any communication. The key is disabled to ensure + * that AES-GCM does not use the same IV while encrypting the request payload. + */ + rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); + if (rc) { + dev_alert(snp_dev->dev, + "Detected unexpected decode failure, disabling the vmpck_id %d\n", + vmpck_id); + snp_disable_vmpck(snp_dev); + return rc; + } + + /* Increment to new message sequence after payload decryption was successful. */ + snp_inc_msg_seqno(snp_dev); + + return 0; +} + +static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_report_resp *resp; + struct snp_report_req req; + int rc, resp_len; + + lockdep_assert_held(&snp_cmd_mutex); + + if (!arg->req_data || !arg->resp_data) + return -EINVAL; + + if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req))) + return -EFAULT; + + /* + * The intermediate response buffer is used while decrypting the + * response payload. Make sure that it has enough space to cover the + * authtag. + */ + resp_len = sizeof(resp->data) + crypto->a_len; + resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); + if (!resp) + return -ENOMEM; + + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, + SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data, + resp_len, &arg->fw_err); + if (rc) + goto e_free; + + if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp))) + rc = -EFAULT; + +e_free: + kfree(resp); + return rc; +} + +static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) +{ + struct snp_guest_dev *snp_dev = to_snp_dev(file); + void __user *argp = (void __user *)arg; + struct snp_guest_request_ioctl input; + int ret = -ENOTTY; + + if (copy_from_user(&input, argp, sizeof(input))) + return -EFAULT; + + input.fw_err = 0xff; + + /* Message version must be non-zero */ + if (!input.msg_version) + return -EINVAL; + + mutex_lock(&snp_cmd_mutex); + + /* Check if the VMPCK is not empty */ + if (is_vmpck_empty(snp_dev)) { + dev_err_ratelimited(snp_dev->dev, "VMPCK is disabled\n"); + mutex_unlock(&snp_cmd_mutex); + return -ENOTTY; + } + + switch (ioctl) { + case SNP_GET_REPORT: + ret = get_report(snp_dev, &input); + break; + default: + break; + } + + mutex_unlock(&snp_cmd_mutex); + + if (input.fw_err && copy_to_user(argp, &input, sizeof(input))) + return -EFAULT; + + return ret; +} + +static void free_shared_pages(void *buf, size_t sz) +{ + unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + int ret; + + if (!buf) + return; + + ret = set_memory_encrypted((unsigned long)buf, npages); + if (ret) { + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + return; + } + + __free_pages(virt_to_page(buf), get_order(sz)); +} + +static void *alloc_shared_pages(size_t sz) +{ + unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + struct page *page; + int ret; + + page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); + if (IS_ERR(page)) + return NULL; + + ret = set_memory_decrypted((unsigned long)page_address(page), npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(sz)); + return NULL; + } + + return page_address(page); +} + +static const struct file_operations snp_guest_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = snp_guest_ioctl, +}; + +static u8 *get_vmpck(int id, struct snp_secrets_page_layout *layout, u32 **seqno) +{ + u8 *key = NULL; + + switch (id) { + case 0: + *seqno = &layout->os_area.msg_seqno_0; + key = layout->vmpck0; + break; + case 1: + *seqno = &layout->os_area.msg_seqno_1; + key = layout->vmpck1; + break; + case 2: + *seqno = &layout->os_area.msg_seqno_2; + key = layout->vmpck2; + break; + case 3: + *seqno = &layout->os_area.msg_seqno_3; + key = layout->vmpck3; + break; + default: + break; + } + + return key; +} + +static int __init snp_guest_probe(struct platform_device *pdev) +{ + struct snp_secrets_page_layout *layout; + struct snp_guest_platform_data *data; + struct device *dev = &pdev->dev; + struct snp_guest_dev *snp_dev; + struct miscdevice *misc; + int ret; + + if (!dev->platform_data) + return -ENODEV; + + data = (struct snp_guest_platform_data *)dev->platform_data; + layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); + if (!layout) + return -ENODEV; + + ret = -ENOMEM; + snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL); + if (!snp_dev) + goto e_unmap; + + ret = -EINVAL; + snp_dev->vmpck = get_vmpck(vmpck_id, layout, &snp_dev->os_area_msg_seqno); + if (!snp_dev->vmpck) { + dev_err(dev, "invalid vmpck id %d\n", vmpck_id); + goto e_unmap; + } + + /* Verify that VMPCK is not zero. */ + if (is_vmpck_empty(snp_dev)) { + dev_err(dev, "vmpck id %d is null\n", vmpck_id); + goto e_unmap; + } + + platform_set_drvdata(pdev, snp_dev); + snp_dev->dev = dev; + snp_dev->layout = layout; + + /* Allocate the shared page used for the request and response message. */ + snp_dev->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); + if (!snp_dev->request) + goto e_unmap; + + snp_dev->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); + if (!snp_dev->response) + goto e_free_request; + + ret = -EIO; + snp_dev->crypto = init_crypto(snp_dev, snp_dev->vmpck, VMPCK_KEY_LEN); + if (!snp_dev->crypto) + goto e_free_response; + + misc = &snp_dev->misc; + misc->minor = MISC_DYNAMIC_MINOR; + misc->name = DEVICE_NAME; + misc->fops = &snp_guest_fops; + + /* initial the input address for guest request */ + snp_dev->input.req_gpa = __pa(snp_dev->request); + snp_dev->input.resp_gpa = __pa(snp_dev->response); + + ret = misc_register(misc); + if (ret) + goto e_free_response; + + dev_info(dev, "Initialized SNP guest driver (using vmpck_id %d)\n", vmpck_id); + return 0; + +e_free_response: + free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg)); +e_free_request: + free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg)); +e_unmap: + iounmap(layout); + return ret; +} + +static int __exit snp_guest_remove(struct platform_device *pdev) +{ + struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev); + + free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg)); + free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg)); + deinit_crypto(snp_dev->crypto); + misc_deregister(&snp_dev->misc); + + return 0; +} + +static struct platform_driver snp_guest_driver = { + .remove = __exit_p(snp_guest_remove), + .driver = { + .name = "snp-guest", + }, +}; + +module_platform_driver_probe(snp_guest_driver, snp_guest_probe); + +MODULE_AUTHOR("Brijesh Singh "); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD SNP Guest Driver"); diff --git a/drivers/virt/coco/sevguest/sevguest.h b/drivers/virt/coco/sevguest/sevguest.h new file mode 100644 index 000000000000..d39bdd013765 --- /dev/null +++ b/drivers/virt/coco/sevguest/sevguest.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh + * + * SEV-SNP API spec is available at https://developer.amd.com/sev + */ + +#ifndef __VIRT_SEVGUEST_H__ +#define __VIRT_SEVGUEST_H__ + +#include + +#define MAX_AUTHTAG_LEN 32 + +/* See SNP spec SNP_GUEST_REQUEST section for the structure */ +enum msg_type { + SNP_MSG_TYPE_INVALID = 0, + SNP_MSG_CPUID_REQ, + SNP_MSG_CPUID_RSP, + SNP_MSG_KEY_REQ, + SNP_MSG_KEY_RSP, + SNP_MSG_REPORT_REQ, + SNP_MSG_REPORT_RSP, + SNP_MSG_EXPORT_REQ, + SNP_MSG_EXPORT_RSP, + SNP_MSG_IMPORT_REQ, + SNP_MSG_IMPORT_RSP, + SNP_MSG_ABSORB_REQ, + SNP_MSG_ABSORB_RSP, + SNP_MSG_VMRK_REQ, + SNP_MSG_VMRK_RSP, + + SNP_MSG_TYPE_MAX +}; + +enum aead_algo { + SNP_AEAD_INVALID, + SNP_AEAD_AES_256_GCM, +}; + +struct snp_guest_msg_hdr { + u8 authtag[MAX_AUTHTAG_LEN]; + u64 msg_seqno; + u8 rsvd1[8]; + u8 algo; + u8 hdr_version; + u16 hdr_sz; + u8 msg_type; + u8 msg_version; + u16 msg_sz; + u32 rsvd2; + u8 msg_vmpck; + u8 rsvd3[35]; +} __packed; + +struct snp_guest_msg { + struct snp_guest_msg_hdr hdr; + u8 payload[4000]; +} __packed; + +/* + * The secrets page contains 96-bytes of reserved field that can be used by + * the guest OS. The guest OS uses the area to save the message sequence + * number for each VMPCK. + * + * See the GHCB spec section Secret page layout for the format for this area. + */ +struct secrets_os_area { + u32 msg_seqno_0; + u32 msg_seqno_1; + u32 msg_seqno_2; + u32 msg_seqno_3; + u64 ap_jump_table_pa; + u8 rsvd[40]; + u8 guest_usage[32]; +} __packed; + +#define VMPCK_KEY_LEN 32 + +/* See the SNP spec version 0.9 for secrets page format */ +struct snp_secrets_page_layout { + u32 version; + u32 imien : 1, + rsvd1 : 31; + u32 fms; + u32 rsvd2; + u8 gosvw[16]; + u8 vmpck0[VMPCK_KEY_LEN]; + u8 vmpck1[VMPCK_KEY_LEN]; + u8 vmpck2[VMPCK_KEY_LEN]; + u8 vmpck3[VMPCK_KEY_LEN]; + struct secrets_os_area os_area; + u8 rsvd3[3840]; +} __packed; + +#endif /* __VIRT_SEVGUEST_H__ */ diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h new file mode 100644 index 000000000000..38f11d723c68 --- /dev/null +++ b/include/uapi/linux/sev-guest.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Userspace interface for AMD SEV and SNP guest driver. + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh + * + * SEV API specification is available at: https://developer.amd.com/sev/ + */ + +#ifndef __UAPI_LINUX_SEV_GUEST_H_ +#define __UAPI_LINUX_SEV_GUEST_H_ + +#include + +struct snp_report_req { + /* user data that should be included in the report */ + __u8 user_data[64]; + + /* The vmpl level to be included in the report */ + __u32 vmpl; + + /* Must be zero filled */ + __u8 rsvd[28]; +}; + +struct snp_report_resp { + /* response data, see SEV-SNP spec for the format */ + __u8 data[4000]; +}; + +struct snp_guest_request_ioctl { + /* message version number (must be non-zero) */ + __u8 msg_version; + + /* Request and response structure address */ + __u64 req_data; + __u64 resp_data; + + /* firmware error code on failure (see psp-sev.h) */ + __u64 fw_err; +}; + +#define SNP_GUEST_REQ_IOC_TYPE 'S' + +/* Get SNP attestation report */ +#define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl) + +#endif /* __UAPI_LINUX_SEV_GUEST_H_ */ From 68de0b2f938642079c0c853b219bdb88c4dc4d13 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 24 Feb 2022 10:56:23 -0600 Subject: [PATCH 44/60] virt: sevguest: Add support to derive key The SNP_GET_DERIVED_KEY ioctl interface can be used by the SNP guest to ask the firmware to provide a key derived from a root key. The derived key may be used by the guest for any purposes it chooses, such as a sealing key or communicating with the external entities. See SEV-SNP firmware spec for more information. [ bp: No need to memset "req" - it will get overwritten. ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Reviewed-by: Liam Merwick Link: https://lore.kernel.org/r/20220307213356.2797205-45-brijesh.singh@amd.com --- Documentation/virt/coco/sevguest.rst | 17 ++++++++++ drivers/virt/coco/sevguest/sevguest.c | 45 +++++++++++++++++++++++++++ include/uapi/linux/sev-guest.h | 17 ++++++++++ 3 files changed, 79 insertions(+) diff --git a/Documentation/virt/coco/sevguest.rst b/Documentation/virt/coco/sevguest.rst index 3da782e867a3..4135c1431241 100644 --- a/Documentation/virt/coco/sevguest.rst +++ b/Documentation/virt/coco/sevguest.rst @@ -77,6 +77,23 @@ On success, the snp_report_resp.data will contains the report. The report contain the format described in the SEV-SNP specification. See the SEV-SNP specification for further details. +2.2 SNP_GET_DERIVED_KEY +----------------------- +:Technology: sev-snp +:Type: guest ioctl +:Parameters (in): struct snp_derived_key_req +:Returns (out): struct snp_derived_key_resp on success, -negative on error + +The SNP_GET_DERIVED_KEY ioctl can be used to get a key derive from a root key. +The derived key can be used by the guest for any purpose, such as sealing keys +or communicating with external entities. + +The ioctl uses the SNP_GUEST_REQUEST (MSG_KEY_REQ) command provided by the +SEV-SNP firmware to derive the key. See SEV-SNP specification for further details +on the various fields passed in the key derivation request. + +On success, the snp_derived_key_resp.data contains the derived key value. See +the SEV-SNP specification for further details. Reference --------- diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c index beda93cdeb4f..393777b72e5e 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -391,6 +391,48 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io return rc; } +static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_derived_key_resp resp = {0}; + struct snp_derived_key_req req; + int rc, resp_len; + /* Response data is 64 bytes and max authsize for GCM is 16 bytes. */ + u8 buf[64 + 16]; + + lockdep_assert_held(&snp_cmd_mutex); + + if (!arg->req_data || !arg->resp_data) + return -EINVAL; + + /* + * The intermediate response buffer is used while decrypting the + * response payload. Make sure that it has enough space to cover the + * authtag. + */ + resp_len = sizeof(resp.data) + crypto->a_len; + if (sizeof(buf) < resp_len) + return -ENOMEM; + + if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req))) + return -EFAULT; + + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, + SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len, + &arg->fw_err); + if (rc) + return rc; + + memcpy(resp.data, buf, sizeof(resp.data)); + if (copy_to_user((void __user *)arg->resp_data, &resp, sizeof(resp))) + rc = -EFAULT; + + /* The response buffer contains the sensitive data, explicitly clear it. */ + memzero_explicit(buf, sizeof(buf)); + memzero_explicit(&resp, sizeof(resp)); + return rc; +} + static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { struct snp_guest_dev *snp_dev = to_snp_dev(file); @@ -420,6 +462,9 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long case SNP_GET_REPORT: ret = get_report(snp_dev, &input); break; + case SNP_GET_DERIVED_KEY: + ret = get_derived_key(snp_dev, &input); + break; default: break; } diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 38f11d723c68..598367f12064 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -30,6 +30,20 @@ struct snp_report_resp { __u8 data[4000]; }; +struct snp_derived_key_req { + __u32 root_key_select; + __u32 rsvd; + __u64 guest_field_select; + __u32 vmpl; + __u32 guest_svn; + __u64 tcb_version; +}; + +struct snp_derived_key_resp { + /* response data, see SEV-SNP spec for the format */ + __u8 data[64]; +}; + struct snp_guest_request_ioctl { /* message version number (must be non-zero) */ __u8 msg_version; @@ -47,4 +61,7 @@ struct snp_guest_request_ioctl { /* Get SNP attestation report */ #define SNP_GET_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x0, struct snp_guest_request_ioctl) +/* Get a derived key from the root */ +#define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl) + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ From d80b494f712317493d464a55652698c4d1b7bb0f Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 7 Mar 2022 15:33:55 -0600 Subject: [PATCH 45/60] virt: sevguest: Add support to get extended report Version 2 of GHCB specification defines Non-Automatic-Exit (NAE) to get extended guest report which is similar to the SNP_GET_REPORT ioctl. The main difference is related to the additional data that will be returned. That additional data returned is a certificate blob that can be used by the SNP guest user. The certificate blob layout is defined in the GHCB specification. The driver simply treats the blob as a opaque data and copies it to userspace. [ bp: Massage commit message, cast 1st arg of access_ok() ] Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-46-brijesh.singh@amd.com --- Documentation/virt/coco/sevguest.rst | 23 +++++++ drivers/virt/coco/sevguest/sevguest.c | 92 ++++++++++++++++++++++++++- include/uapi/linux/sev-guest.h | 13 ++++ 3 files changed, 126 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/coco/sevguest.rst b/Documentation/virt/coco/sevguest.rst index 4135c1431241..625de22658ec 100644 --- a/Documentation/virt/coco/sevguest.rst +++ b/Documentation/virt/coco/sevguest.rst @@ -95,6 +95,29 @@ on the various fields passed in the key derivation request. On success, the snp_derived_key_resp.data contains the derived key value. See the SEV-SNP specification for further details. + +2.3 SNP_GET_EXT_REPORT +---------------------- +:Technology: sev-snp +:Type: guest ioctl +:Parameters (in/out): struct snp_ext_report_req +:Returns (out): struct snp_report_resp on success, -negative on error + +The SNP_GET_EXT_REPORT ioctl is similar to the SNP_GET_REPORT. The difference is +related to the additional certificate data that is returned with the report. +The certificate data returned is being provided by the hypervisor through the +SNP_SET_EXT_CONFIG. + +The ioctl uses the SNP_GUEST_REQUEST (MSG_REPORT_REQ) command provided by the SEV-SNP +firmware to get the attestation report. + +On success, the snp_ext_report_resp.data will contain the attestation report +and snp_ext_report_req.certs_address will contain the certificate blob. If the +length of the blob is smaller than expected then snp_ext_report_req.certs_len will +be updated with the expected value. + +See GHCB specification for further detail on how to parse the certificate blob. + Reference --------- diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c index 393777b72e5e..15afb6ce8d19 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -43,6 +43,7 @@ struct snp_guest_dev { struct device *dev; struct miscdevice misc; + void *certs_data; struct snp_guest_crypto *crypto; struct snp_guest_msg *request, *response; struct snp_secrets_page_layout *layout; @@ -433,6 +434,82 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque return rc; } +static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) +{ + struct snp_guest_crypto *crypto = snp_dev->crypto; + struct snp_ext_report_req req; + struct snp_report_resp *resp; + int ret, npages = 0, resp_len; + + lockdep_assert_held(&snp_cmd_mutex); + + if (!arg->req_data || !arg->resp_data) + return -EINVAL; + + if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req))) + return -EFAULT; + + /* userspace does not want certificate data */ + if (!req.certs_len || !req.certs_address) + goto cmd; + + if (req.certs_len > SEV_FW_BLOB_MAX_SIZE || + !IS_ALIGNED(req.certs_len, PAGE_SIZE)) + return -EINVAL; + + if (!access_ok((const void __user *)req.certs_address, req.certs_len)) + return -EFAULT; + + /* + * Initialize the intermediate buffer with all zeros. This buffer + * is used in the guest request message to get the certs blob from + * the host. If host does not supply any certs in it, then copy + * zeros to indicate that certificate data was not provided. + */ + memset(snp_dev->certs_data, 0, req.certs_len); + npages = req.certs_len >> PAGE_SHIFT; +cmd: + /* + * The intermediate response buffer is used while decrypting the + * response payload. Make sure that it has enough space to cover the + * authtag. + */ + resp_len = sizeof(resp->data) + crypto->a_len; + resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT); + if (!resp) + return -ENOMEM; + + snp_dev->input.data_npages = npages; + ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version, + SNP_MSG_REPORT_REQ, &req.data, + sizeof(req.data), resp->data, resp_len, &arg->fw_err); + + /* If certs length is invalid then copy the returned length */ + if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) { + req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT; + + if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req))) + ret = -EFAULT; + } + + if (ret) + goto e_free; + + if (npages && + copy_to_user((void __user *)req.certs_address, snp_dev->certs_data, + req.certs_len)) { + ret = -EFAULT; + goto e_free; + } + + if (copy_to_user((void __user *)arg->resp_data, resp, sizeof(*resp))) + ret = -EFAULT; + +e_free: + kfree(resp); + return ret; +} + static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { struct snp_guest_dev *snp_dev = to_snp_dev(file); @@ -465,6 +542,9 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long case SNP_GET_DERIVED_KEY: ret = get_derived_key(snp_dev, &input); break; + case SNP_GET_EXT_REPORT: + ret = get_ext_report(snp_dev, &input); + break; default: break; } @@ -595,10 +675,14 @@ static int __init snp_guest_probe(struct platform_device *pdev) if (!snp_dev->response) goto e_free_request; + snp_dev->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); + if (!snp_dev->certs_data) + goto e_free_response; + ret = -EIO; snp_dev->crypto = init_crypto(snp_dev, snp_dev->vmpck, VMPCK_KEY_LEN); if (!snp_dev->crypto) - goto e_free_response; + goto e_free_cert_data; misc = &snp_dev->misc; misc->minor = MISC_DYNAMIC_MINOR; @@ -608,14 +692,17 @@ static int __init snp_guest_probe(struct platform_device *pdev) /* initial the input address for guest request */ snp_dev->input.req_gpa = __pa(snp_dev->request); snp_dev->input.resp_gpa = __pa(snp_dev->response); + snp_dev->input.data_gpa = __pa(snp_dev->certs_data); ret = misc_register(misc); if (ret) - goto e_free_response; + goto e_free_cert_data; dev_info(dev, "Initialized SNP guest driver (using vmpck_id %d)\n", vmpck_id); return 0; +e_free_cert_data: + free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE); e_free_response: free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg)); e_free_request: @@ -629,6 +716,7 @@ static int __exit snp_guest_remove(struct platform_device *pdev) { struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev); + free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE); free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg)); free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg)); deinit_crypto(snp_dev->crypto); diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 598367f12064..256aaeff7e65 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -56,6 +56,16 @@ struct snp_guest_request_ioctl { __u64 fw_err; }; +struct snp_ext_report_req { + struct snp_report_req data; + + /* where to copy the certificate blob */ + __u64 certs_address; + + /* length of the certificate blob */ + __u32 certs_len; +}; + #define SNP_GUEST_REQ_IOC_TYPE 'S' /* Get SNP attestation report */ @@ -64,4 +74,7 @@ struct snp_guest_request_ioctl { /* Get a derived key from the root */ #define SNP_GET_DERIVED_KEY _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x1, struct snp_guest_request_ioctl) +/* Get SNP extended report as defined in the GHCB specification version 2. */ +#define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl) + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ From 92a99584d965b930988b28f36d925bd9675828b3 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 24 Feb 2022 10:56:25 -0600 Subject: [PATCH 46/60] virt: sevguest: Add documentation for SEV-SNP CPUID Enforcement Update the documentation with information regarding SEV-SNP CPUID Enforcement details and what sort of assurances it provides to guests. Signed-off-by: Michael Roth Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220307213356.2797205-47-brijesh.singh@amd.com --- Documentation/virt/coco/sevguest.rst | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/virt/coco/sevguest.rst b/Documentation/virt/coco/sevguest.rst index 625de22658ec..bf593e88cfd9 100644 --- a/Documentation/virt/coco/sevguest.rst +++ b/Documentation/virt/coco/sevguest.rst @@ -118,6 +118,35 @@ be updated with the expected value. See GHCB specification for further detail on how to parse the certificate blob. +3. SEV-SNP CPUID Enforcement +============================ + +SEV-SNP guests can access a special page that contains a table of CPUID values +that have been validated by the PSP as part of the SNP_LAUNCH_UPDATE firmware +command. It provides the following assurances regarding the validity of CPUID +values: + + - Its address is obtained via bootloader/firmware (via CC blob), and those + binaries will be measured as part of the SEV-SNP attestation report. + - Its initial state will be encrypted/pvalidated, so attempts to modify + it during run-time will result in garbage being written, or #VC exceptions + being generated due to changes in validation state if the hypervisor tries + to swap the backing page. + - Attempts to bypass PSP checks by the hypervisor by using a normal page, or + a non-CPUID encrypted page will change the measurement provided by the + SEV-SNP attestation report. + - The CPUID page contents are *not* measured, but attempts to modify the + expected contents of a CPUID page as part of guest initialization will be + gated by the PSP CPUID enforcement policy checks performed on the page + during SNP_LAUNCH_UPDATE, and noticeable later if the guest owner + implements their own checks of the CPUID values. + +It is important to note that this last assurance is only useful if the kernel +has taken care to make use of the SEV-SNP CPUID throughout all stages of boot. +Otherwise, guest owner attestation provides no assurance that the kernel wasn't +fed incorrect values at some point during boot. + + Reference --------- From e720ea52e85c9d00cf8c5769795d0a3e585524f6 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Thu, 17 Mar 2022 14:19:13 -0700 Subject: [PATCH 47/60] x86/sev-es: Replace open-coded hlt-loop with sev_es_terminate() Replace the halt loop in handle_vc_boot_ghcb() with an sev_es_terminate(). The HLT gives the system no indication the guest is unhappy. The termination request will signal there was an error during VC handling during boot. [ bp: Update it to pass the reason set too. ] Signed-off-by: Peter Gonda Signed-off-by: Borislav Petkov Reviewed-by: Joerg Roedel Link: https://lore.kernel.org/r/20220317211913.1397427-1-pgonda@google.com --- arch/x86/kernel/sev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index ace43e116b40..f01f4550e2c6 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1982,8 +1982,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs) fail: show_regs(regs); - while (true) - halt(); + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } /* From e50abbf788c239d529f9ab81e325f8e8f8432c9d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 11 Apr 2022 19:12:13 +0800 Subject: [PATCH 48/60] virt: sevguest: Fix return value check in alloc_shared_pages() If alloc_pages() fails, it returns a NULL pointer. Replace the wrong IS_ERR() check with the proper NULL pointer check. Fixes: fce96cf04430 ("virt: Add SEV-SNP guest driver") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Signed-off-by: Borislav Petkov Reviewed-by: Brijesh Singh Link: https://lore.kernel.org/r/20220411111213.1477853-1-yangyingliang@huawei.com --- drivers/virt/coco/sevguest/sevguest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c index 15afb6ce8d19..aaa6134d1d40 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -581,7 +581,7 @@ static void *alloc_shared_pages(size_t sz) int ret; page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); - if (IS_ERR(page)) + if (!page) return NULL; ret = set_memory_decrypted((unsigned long)page_address(page), npages); From 101826e02ac6c829bf4e768295e79ae9c37b4b2a Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 14 Apr 2022 18:04:17 +0800 Subject: [PATCH 49/60] virt: sevguest: Fix bool function returning negative value The function enc_payload() is wrongly declared bool but returns an integer value. Correct it. [ bp: Massage commit message. ] Fixes: fce96cf04430 ("virt: Add SEV-SNP guest driver") Signed-off-by: Haowen Bai Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/1649930657-10837-1-git-send-email-baihaowen@meizu.com --- drivers/virt/coco/sevguest/sevguest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c index aaa6134d1d40..15f069ec8f0b 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -276,7 +276,7 @@ static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, return dec_payload(snp_dev, resp, payload, resp_hdr->msg_sz + crypto->a_len); } -static bool enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type, +static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type, void *payload, size_t sz) { struct snp_guest_msg *req = snp_dev->request; From 5dc91f2d4f3c160199fea9421d6b08f67a906947 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Feb 2022 20:24:30 +0100 Subject: [PATCH 50/60] x86/boot: Add an efi.h header for the decompressor Copy the needed symbols only and remove the kernel proper includes. No functional changes. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/YlCKWhMJEMUgJmjF@zn.tnic --- arch/x86/boot/compressed/acpi.c | 3 +- arch/x86/boot/compressed/efi.c | 2 - arch/x86/boot/compressed/efi.h | 126 ++++++++++++++++++++++++++ arch/x86/boot/compressed/kaslr.c | 3 +- arch/x86/boot/compressed/misc.h | 3 +- arch/x86/boot/compressed/pgtable_64.c | 3 +- 6 files changed, 131 insertions(+), 9 deletions(-) create mode 100644 arch/x86/boot/compressed/efi.h diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 64b172dabd5c..9caf89063e77 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -3,10 +3,9 @@ #include "misc.h" #include "error.h" #include "../string.h" +#include "efi.h" #include -#include -#include /* * Longest parameter of 'acpi=' is 'copy_dsdt', plus an extra '\0' diff --git a/arch/x86/boot/compressed/efi.c b/arch/x86/boot/compressed/efi.c index 09fa3b5d70b8..6edd034b0b30 100644 --- a/arch/x86/boot/compressed/efi.c +++ b/arch/x86/boot/compressed/efi.c @@ -6,8 +6,6 @@ */ #include "misc.h" -#include -#include /** * efi_get_type - Given a pointer to boot_params, determine the type of EFI environment. diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h new file mode 100644 index 000000000000..7db2f41b54cd --- /dev/null +++ b/arch/x86/boot/compressed/efi.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef BOOT_COMPRESSED_EFI_H +#define BOOT_COMPRESSED_EFI_H + +#if defined(_LINUX_EFI_H) || defined(_ASM_X86_EFI_H) +#error Please do not include kernel proper namespace headers +#endif + +typedef guid_t efi_guid_t __aligned(__alignof__(u32)); + +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } + +#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) + +#define EFI32_LOADER_SIGNATURE "EL32" +#define EFI64_LOADER_SIGNATURE "EL64" + +/* + * Generic EFI table header + */ +typedef struct { + u64 signature; + u32 revision; + u32 headersize; + u32 crc32; + u32 reserved; +} efi_table_hdr_t; + +#define EFI_CONVENTIONAL_MEMORY 7 + +#define EFI_MEMORY_MORE_RELIABLE \ + ((u64)0x0000000000010000ULL) /* higher reliability */ +#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ + +#define EFI_PAGE_SHIFT 12 + +typedef struct { + u32 type; + u32 pad; + u64 phys_addr; + u64 virt_addr; + u64 num_pages; + u64 attribute; +} efi_memory_desc_t; + +#define efi_early_memdesc_ptr(map, desc_size, n) \ + (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size))) + +typedef struct { + efi_guid_t guid; + u64 table; +} efi_config_table_64_t; + +typedef struct { + efi_guid_t guid; + u32 table; +} efi_config_table_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + u32 __pad1; + u64 con_in_handle; + u64 con_in; + u64 con_out_handle; + u64 con_out; + u64 stderr_handle; + u64 stderr; + u64 runtime; + u64 boottime; + u32 nr_tables; + u32 __pad2; + u64 tables; +} efi_system_table_64_t; + +typedef struct { + efi_table_hdr_t hdr; + u32 fw_vendor; /* physical addr of CHAR16 vendor string */ + u32 fw_revision; + u32 con_in_handle; + u32 con_in; + u32 con_out_handle; + u32 con_out; + u32 stderr_handle; + u32 stderr; + u32 runtime; + u32 boottime; + u32 nr_tables; + u32 tables; +} efi_system_table_32_t; + +/* kexec external ABI */ +struct efi_setup_data { + u64 fw_vendor; + u64 __unused; + u64 tables; + u64 smbios; + u64 reserved[8]; +}; + +static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) +{ + return memcmp(&left, &right, sizeof (efi_guid_t)); +} + +#ifdef CONFIG_EFI +bool __pure __efi_soft_reserve_enabled(void); + +static inline bool __pure efi_soft_reserve_enabled(void) +{ + return IS_ENABLED(CONFIG_EFI_SOFT_RESERVE) + && __efi_soft_reserve_enabled(); +} +#else +static inline bool efi_soft_reserve_enabled(void) +{ + return false; +} +#endif /* CONFIG_EFI */ +#endif /* BOOT_COMPRESSED_EFI_H */ diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 411b268bc0a2..4a3f223973f4 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -22,15 +22,14 @@ #include "misc.h" #include "error.h" #include "../string.h" +#include "efi.h" #include #include #include #include #include -#include #include -#include #define _SETUP #include /* For COMMAND_LINE_SIZE */ diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 75d284ec763f..4ca2857ea041 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,8 @@ #define BOOT_BOOT_H #include "../ctype.h" +#include "efi.h" + #ifdef CONFIG_X86_64 #define memptr long #else diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index a1733319a22a..2ac12ff4111b 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,11 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" -#include #include #include -#include #include "pgtable.h" #include "../string.h" +#include "efi.h" #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ From 6044d159b5d826259a7397d42fa3ad0bfc4dbd13 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 20 Apr 2022 10:26:13 -0500 Subject: [PATCH 51/60] x86/boot: Put globals that are accessed early into the .data section The helpers in arch/x86/boot/compressed/efi.c might be used during early boot to access the EFI system/config tables, and in some cases these EFI helpers might attempt to print debug/error messages, before console_init() has been called. __putstr() checks some variables to avoid printing anything before the console has been initialized, but this isn't enough since those variables live in .bss, which may not have been cleared yet. This can lead to a triple-fault occurring, primarily when booting in legacy/CSM mode (where EFI helpers will attempt to print some debug messages). Fix this by declaring these globals in .data section instead so there is no dependency on .bss being cleared before accessing them. Fixes: c01fce9cef849 ("x86/compressed: Add SEV-SNP feature detection/setup") Reported-by: Borislav Petkov Suggested-by: Thomas Lendacky Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220420152613.145077-1-michael.roth@amd.com --- arch/x86/boot/compressed/early_serial_console.c | 3 ++- arch/x86/boot/compressed/misc.c | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c index 261e81fb9582..70a8d1706d0f 100644 --- a/arch/x86/boot/compressed/early_serial_console.c +++ b/arch/x86/boot/compressed/early_serial_console.c @@ -1,5 +1,6 @@ #include "misc.h" -int early_serial_base; +/* This might be accessed before .bss is cleared, so use .data instead. */ +int early_serial_base __section(".data"); #include "../early_serial_console.c" diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 1cdcaf34ee36..ca6820f99b40 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -53,7 +53,10 @@ memptr free_mem_end_ptr; static char *vidmem; static int vidport; -static int lines, cols; + +/* These might be accessed before .bss is cleared, so use .data instead. */ +static int lines __section(".data"); +static int cols __section(".data"); #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" From 2bf93ffbb97e0614cfc431d2ea33b7eae7481eb2 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 20 Apr 2022 09:14:13 -0500 Subject: [PATCH 52/60] virt: sevguest: Change driver name to reflect generic SEV support During patch review, it was decided the SNP guest driver name should not be SEV-SNP specific, but should be generic for use with anything SEV. However, this feedback was missed and the driver name, and many of the driver functions and structures, are SEV-SNP name specific. Rename the driver to "sev-guest" (to match the misc device that is created) and update some of the function and structure names, too. While in the file, adjust the one pr_err() message to be a dev_err() message so that the message, if issued, uses the driver name. Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/307710bb5515c9088a19fd0b930268c7300479b2.1650464054.git.thomas.lendacky@amd.com --- arch/x86/include/asm/sev.h | 2 +- arch/x86/kernel/sev.c | 10 +++---- drivers/virt/coco/sevguest/sevguest.c | 39 ++++++++++++++------------- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9c2d33f1cfee..6e3dda4f82b5 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -95,7 +95,7 @@ struct snp_req_data { unsigned int data_npages; }; -struct snp_guest_platform_data { +struct sev_guest_platform_data { u64 secrets_gpa; }; diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index f01f4550e2c6..2fa87a07ab30 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2166,8 +2166,8 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned } EXPORT_SYMBOL_GPL(snp_issue_guest_request); -static struct platform_device guest_req_device = { - .name = "snp-guest", +static struct platform_device sev_guest_device = { + .name = "sev-guest", .id = -1, }; @@ -2197,7 +2197,7 @@ static u64 get_secrets_page(void) static int __init snp_init_platform_device(void) { - struct snp_guest_platform_data data; + struct sev_guest_platform_data data; u64 gpa; if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) @@ -2208,10 +2208,10 @@ static int __init snp_init_platform_device(void) return -ENODEV; data.secrets_gpa = gpa; - if (platform_device_add_data(&guest_req_device, &data, sizeof(data))) + if (platform_device_add_data(&sev_guest_device, &data, sizeof(data))) return -ENODEV; - if (platform_device_register(&guest_req_device)) + if (platform_device_register(&sev_guest_device)) return -ENODEV; pr_info("SNP guest platform device initialized.\n"); diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sevguest/sevguest.c index 15f069ec8f0b..18c3231a816d 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sevguest/sevguest.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * AMD Secure Encrypted Virtualization Nested Paging (SEV-SNP) guest request interface + * AMD Secure Encrypted Virtualization (SEV) guest driver interface * * Copyright (C) 2021 Advanced Micro Devices, Inc. * * Author: Brijesh Singh */ -#define pr_fmt(fmt) "SNP: GUEST: " fmt - #include #include #include @@ -574,7 +572,7 @@ static void free_shared_pages(void *buf, size_t sz) __free_pages(virt_to_page(buf), get_order(sz)); } -static void *alloc_shared_pages(size_t sz) +static void *alloc_shared_pages(struct device *dev, size_t sz) { unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; struct page *page; @@ -586,7 +584,7 @@ static void *alloc_shared_pages(size_t sz) ret = set_memory_decrypted((unsigned long)page_address(page), npages); if (ret) { - pr_err("failed to mark page shared, ret=%d\n", ret); + dev_err(dev, "failed to mark page shared, ret=%d\n", ret); __free_pages(page, get_order(sz)); return NULL; } @@ -627,10 +625,10 @@ static u8 *get_vmpck(int id, struct snp_secrets_page_layout *layout, u32 **seqno return key; } -static int __init snp_guest_probe(struct platform_device *pdev) +static int __init sev_guest_probe(struct platform_device *pdev) { struct snp_secrets_page_layout *layout; - struct snp_guest_platform_data *data; + struct sev_guest_platform_data *data; struct device *dev = &pdev->dev; struct snp_guest_dev *snp_dev; struct miscdevice *misc; @@ -639,7 +637,7 @@ static int __init snp_guest_probe(struct platform_device *pdev) if (!dev->platform_data) return -ENODEV; - data = (struct snp_guest_platform_data *)dev->platform_data; + data = (struct sev_guest_platform_data *)dev->platform_data; layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); if (!layout) return -ENODEV; @@ -667,15 +665,15 @@ static int __init snp_guest_probe(struct platform_device *pdev) snp_dev->layout = layout; /* Allocate the shared page used for the request and response message. */ - snp_dev->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); + snp_dev->request = alloc_shared_pages(dev, sizeof(struct snp_guest_msg)); if (!snp_dev->request) goto e_unmap; - snp_dev->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); + snp_dev->response = alloc_shared_pages(dev, sizeof(struct snp_guest_msg)); if (!snp_dev->response) goto e_free_request; - snp_dev->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); + snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE); if (!snp_dev->certs_data) goto e_free_response; @@ -698,7 +696,7 @@ static int __init snp_guest_probe(struct platform_device *pdev) if (ret) goto e_free_cert_data; - dev_info(dev, "Initialized SNP guest driver (using vmpck_id %d)\n", vmpck_id); + dev_info(dev, "Initialized SEV guest driver (using vmpck_id %d)\n", vmpck_id); return 0; e_free_cert_data: @@ -712,7 +710,7 @@ static int __init snp_guest_probe(struct platform_device *pdev) return ret; } -static int __exit snp_guest_remove(struct platform_device *pdev) +static int __exit sev_guest_remove(struct platform_device *pdev) { struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev); @@ -725,16 +723,21 @@ static int __exit snp_guest_remove(struct platform_device *pdev) return 0; } -static struct platform_driver snp_guest_driver = { - .remove = __exit_p(snp_guest_remove), +/* + * This driver is a common SEV guest interface driver and meant to support + * any SEV guest API. As such, even though it has been introduced along with + * the SEV-SNP support, it is named "sev-guest". + */ +static struct platform_driver sev_guest_driver = { + .remove = __exit_p(sev_guest_remove), .driver = { - .name = "snp-guest", + .name = "sev-guest", }, }; -module_platform_driver_probe(snp_guest_driver, snp_guest_probe); +module_platform_driver_probe(sev_guest_driver, sev_guest_probe); MODULE_AUTHOR("Brijesh Singh "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); -MODULE_DESCRIPTION("AMD SNP Guest Driver"); +MODULE_DESCRIPTION("AMD SEV Guest Driver"); From d63670d23e60f00210635ca7c62bce27bec55f1b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 20 Apr 2022 09:14:14 -0500 Subject: [PATCH 53/60] virt: sevguest: Rename the sevguest dir and files to sev-guest Rename the drivers/virt/coco/sevguest directory and files to sev-guest so as to match the driver name. [ bp: Rename Documentation/virt/coco/sevguest.rst too, as reported by sfr: https://lore.kernel.org/r/20220427101059.3bf55262@canb.auug.org.au ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/2f5c9cb16e3a67599c8e3170f6c72c8712c47d53.1650464054.git.thomas.lendacky@amd.com --- Documentation/virt/coco/{sevguest.rst => sev-guest.rst} | 0 Documentation/virt/index.rst | 2 +- drivers/virt/Kconfig | 2 +- drivers/virt/Makefile | 2 +- drivers/virt/coco/{sevguest => sev-guest}/Kconfig | 2 +- drivers/virt/coco/{sevguest => sev-guest}/Makefile | 2 +- .../coco/{sevguest/sevguest.c => sev-guest/sev-guest.c} | 8 ++++---- .../coco/{sevguest/sevguest.h => sev-guest/sev-guest.h} | 0 8 files changed, 9 insertions(+), 9 deletions(-) rename Documentation/virt/coco/{sevguest.rst => sev-guest.rst} (100%) rename drivers/virt/coco/{sevguest => sev-guest}/Kconfig (87%) rename drivers/virt/coco/{sevguest => sev-guest}/Makefile (50%) rename drivers/virt/coco/{sevguest/sevguest.c => sev-guest/sev-guest.c} (98%) rename drivers/virt/coco/{sevguest/sevguest.h => sev-guest/sev-guest.h} (100%) diff --git a/Documentation/virt/coco/sevguest.rst b/Documentation/virt/coco/sev-guest.rst similarity index 100% rename from Documentation/virt/coco/sevguest.rst rename to Documentation/virt/coco/sev-guest.rst diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst index 40ad0d20032e..492f0920b988 100644 --- a/Documentation/virt/index.rst +++ b/Documentation/virt/index.rst @@ -13,7 +13,7 @@ Linux Virtualization Support guest-halt-polling ne_overview acrn/index - coco/sevguest + coco/sev-guest .. only:: html and subproject diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 7d3273cfab27..0c1bba7c5c66 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -48,6 +48,6 @@ source "drivers/virt/nitro_enclaves/Kconfig" source "drivers/virt/acrn/Kconfig" -source "drivers/virt/coco/sevguest/Kconfig" +source "drivers/virt/coco/sev-guest/Kconfig" endif diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile index 7b87a7ba1972..b2e6e864ebbe 100644 --- a/drivers/virt/Makefile +++ b/drivers/virt/Makefile @@ -9,4 +9,4 @@ obj-y += vboxguest/ obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/ obj-$(CONFIG_ACRN_HSM) += acrn/ -obj-$(CONFIG_SEV_GUEST) += coco/sevguest/ +obj-$(CONFIG_SEV_GUEST) += coco/sev-guest/ diff --git a/drivers/virt/coco/sevguest/Kconfig b/drivers/virt/coco/sev-guest/Kconfig similarity index 87% rename from drivers/virt/coco/sevguest/Kconfig rename to drivers/virt/coco/sev-guest/Kconfig index 74ca1fe09437..f9db0799ae67 100644 --- a/drivers/virt/coco/sevguest/Kconfig +++ b/drivers/virt/coco/sev-guest/Kconfig @@ -11,4 +11,4 @@ config SEV_GUEST userspace interface to communicate with the PSP to request the attestation report and more. - If you choose 'M' here, this module will be called sevguest. + If you choose 'M' here, this module will be called sev-guest. diff --git a/drivers/virt/coco/sevguest/Makefile b/drivers/virt/coco/sev-guest/Makefile similarity index 50% rename from drivers/virt/coco/sevguest/Makefile rename to drivers/virt/coco/sev-guest/Makefile index b1ffb2b4177b..63d67c27723a 100644 --- a/drivers/virt/coco/sevguest/Makefile +++ b/drivers/virt/coco/sev-guest/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_SEV_GUEST) += sevguest.o +obj-$(CONFIG_SEV_GUEST) += sev-guest.o diff --git a/drivers/virt/coco/sevguest/sevguest.c b/drivers/virt/coco/sev-guest/sev-guest.c similarity index 98% rename from drivers/virt/coco/sevguest/sevguest.c rename to drivers/virt/coco/sev-guest/sev-guest.c index 18c3231a816d..90ce16b6e05f 100644 --- a/drivers/virt/coco/sevguest/sevguest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -25,7 +25,7 @@ #include #include -#include "sevguest.h" +#include "sev-guest.h" #define DEVICE_NAME "sev-guest" #define AAD_LEN 48 @@ -724,9 +724,9 @@ static int __exit sev_guest_remove(struct platform_device *pdev) } /* - * This driver is a common SEV guest interface driver and meant to support - * any SEV guest API. As such, even though it has been introduced along with - * the SEV-SNP support, it is named "sev-guest". + * This driver is meant to be a common SEV guest interface driver and to + * support any SEV guest API. As such, even though it has been introduced + * with the SEV-SNP support, it is named "sev-guest". */ static struct platform_driver sev_guest_driver = { .remove = __exit_p(sev_guest_remove), diff --git a/drivers/virt/coco/sevguest/sevguest.h b/drivers/virt/coco/sev-guest/sev-guest.h similarity index 100% rename from drivers/virt/coco/sevguest/sevguest.h rename to drivers/virt/coco/sev-guest/sev-guest.h From 75d359ec4141b013727022a663762931f69e6510 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 22 Apr 2022 08:56:23 -0500 Subject: [PATCH 54/60] x86/sev: Add missing __init annotations to SEV init routines Currently, get_secrets_page() is only reachable from the following call chain: __init snp_init_platform_device(): get_secrets_page() so mark it as __init as well. This is also needed since it calls early_memremap(), which is also an __init routine. Similarly, get_jump_table_addr() is only reachable from the following call chain: __init setup_real_mode(): sme_sev_setup_real_mode(): sev_es_setup_ap_jump_table(): get_jump_table_addr() so mark get_jump_table_addr() and everything up that call chain as __init as well. This is also needed since future patches will add a call to get_secrets_page(), which needs to be __init due to the reasons stated above. Suggested-by: Borislav Petkov Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220422135624.114172-2-michael.roth@amd.com --- arch/x86/kernel/sev.c | 6 +++--- arch/x86/realmode/init.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2fa87a07ab30..b7fd1915560d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -558,7 +558,7 @@ void noinstr __sev_es_nmi_complete(void) __sev_put_ghcb(&state); } -static u64 get_jump_table_addr(void) +static u64 __init get_jump_table_addr(void) { struct ghcb_state state; unsigned long flags; @@ -1077,7 +1077,7 @@ void snp_set_wakeup_secondary_cpu(void) apic->wakeup_secondary_cpu = wakeup_cpu_via_vmgexit; } -int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) +int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { u16 startup_cs, startup_ip; phys_addr_t jump_table_pa; @@ -2171,7 +2171,7 @@ static struct platform_device sev_guest_device = { .id = -1, }; -static u64 get_secrets_page(void) +static u64 __init get_secrets_page(void) { u64 pa_data = boot_params.cc_blob_address; struct cc_blob_sev_info info; diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index c5e29db02a46..41d7669a97ad 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -67,7 +67,7 @@ void __init reserve_real_mode(void) memblock_reserve(0, SZ_1M); } -static void sme_sev_setup_real_mode(struct trampoline_header *th) +static void __init sme_sev_setup_real_mode(struct trampoline_header *th) { #ifdef CONFIG_AMD_MEM_ENCRYPT if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) From c2106a231c2ba36ff9af50cdf2867b9a5f8150a6 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 22 Apr 2022 08:56:24 -0500 Subject: [PATCH 55/60] x86/sev: Get the AP jump table address from secrets page The GHCB specification section 2.7 states that when SEV-SNP is enabled, a guest should not rely on the hypervisor to provide the address of the AP jump table. Instead, if a guest BIOS wants to provide an AP jump table, it should record the address in the SNP secrets page so the guest operating system can obtain it directly from there. Fix this on the guest kernel side by having SNP guests use the AP jump table address published in the secrets page rather than issuing a GHCB request to get it. [ mroth: - Improve error handling when ioremap()/memremap() return NULL - Don't mix function calls with declarations - Add missing __init - Tweak commit message ] Fixes: 0afb6b660a6b ("x86/sev: Use SEV-SNP AP creation to start secondary CPUs") Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220422135624.114172-3-michael.roth@amd.com --- arch/x86/include/asm/sev.h | 35 ++++++++++++ arch/x86/kernel/sev.c | 76 +++++++++++++++++-------- drivers/virt/coco/sev-guest/sev-guest.h | 35 ------------ 3 files changed, 87 insertions(+), 59 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 6e3dda4f82b5..19514524f0f8 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -99,6 +99,41 @@ struct sev_guest_platform_data { u64 secrets_gpa; }; +/* + * The secrets page contains 96-bytes of reserved field that can be used by + * the guest OS. The guest OS uses the area to save the message sequence + * number for each VMPCK. + * + * See the GHCB spec section Secret page layout for the format for this area. + */ +struct secrets_os_area { + u32 msg_seqno_0; + u32 msg_seqno_1; + u32 msg_seqno_2; + u32 msg_seqno_3; + u64 ap_jump_table_pa; + u8 rsvd[40]; + u8 guest_usage[32]; +} __packed; + +#define VMPCK_KEY_LEN 32 + +/* See the SNP spec version 0.9 for secrets page format */ +struct snp_secrets_page_layout { + u32 version; + u32 imien : 1, + rsvd1 : 31; + u32 fms; + u32 rsvd2; + u8 gosvw[16]; + u8 vmpck0[VMPCK_KEY_LEN]; + u8 vmpck1[VMPCK_KEY_LEN]; + u8 vmpck2[VMPCK_KEY_LEN]; + u8 vmpck3[VMPCK_KEY_LEN]; + struct secrets_os_area os_area; + u8 rsvd3[3840]; +} __packed; + #ifdef CONFIG_AMD_MEM_ENCRYPT extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b7fd1915560d..166375084b1f 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -558,6 +558,55 @@ void noinstr __sev_es_nmi_complete(void) __sev_put_ghcb(&state); } +static u64 __init get_secrets_page(void) +{ + u64 pa_data = boot_params.cc_blob_address; + struct cc_blob_sev_info info; + void *map; + + /* + * The CC blob contains the address of the secrets page, check if the + * blob is present. + */ + if (!pa_data) + return 0; + + map = early_memremap(pa_data, sizeof(info)); + if (!map) { + pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n"); + return 0; + } + memcpy(&info, map, sizeof(info)); + early_memunmap(map, sizeof(info)); + + /* smoke-test the secrets page passed */ + if (!info.secrets_phys || info.secrets_len != PAGE_SIZE) + return 0; + + return info.secrets_phys; +} + +static u64 __init get_snp_jump_table_addr(void) +{ + struct snp_secrets_page_layout *layout; + u64 pa, addr; + + pa = get_secrets_page(); + if (!pa) + return 0; + + layout = (__force void *)ioremap_encrypted(pa, PAGE_SIZE); + if (!layout) { + pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); + return 0; + } + + addr = layout->os_area.ap_jump_table_pa; + iounmap(layout); + + return addr; +} + static u64 __init get_jump_table_addr(void) { struct ghcb_state state; @@ -565,6 +614,9 @@ static u64 __init get_jump_table_addr(void) struct ghcb *ghcb; u64 ret = 0; + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return get_snp_jump_table_addr(); + local_irq_save(flags); ghcb = __sev_get_ghcb(&state); @@ -2171,30 +2223,6 @@ static struct platform_device sev_guest_device = { .id = -1, }; -static u64 __init get_secrets_page(void) -{ - u64 pa_data = boot_params.cc_blob_address; - struct cc_blob_sev_info info; - void *map; - - /* - * The CC blob contains the address of the secrets page, check if the - * blob is present. - */ - if (!pa_data) - return 0; - - map = early_memremap(pa_data, sizeof(info)); - memcpy(&info, map, sizeof(info)); - early_memunmap(map, sizeof(info)); - - /* smoke-test the secrets page passed */ - if (!info.secrets_phys || info.secrets_len != PAGE_SIZE) - return 0; - - return info.secrets_phys; -} - static int __init snp_init_platform_device(void) { struct sev_guest_platform_data data; diff --git a/drivers/virt/coco/sev-guest/sev-guest.h b/drivers/virt/coco/sev-guest/sev-guest.h index d39bdd013765..21bda26fdb95 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.h +++ b/drivers/virt/coco/sev-guest/sev-guest.h @@ -60,39 +60,4 @@ struct snp_guest_msg { u8 payload[4000]; } __packed; -/* - * The secrets page contains 96-bytes of reserved field that can be used by - * the guest OS. The guest OS uses the area to save the message sequence - * number for each VMPCK. - * - * See the GHCB spec section Secret page layout for the format for this area. - */ -struct secrets_os_area { - u32 msg_seqno_0; - u32 msg_seqno_1; - u32 msg_seqno_2; - u32 msg_seqno_3; - u64 ap_jump_table_pa; - u8 rsvd[40]; - u8 guest_usage[32]; -} __packed; - -#define VMPCK_KEY_LEN 32 - -/* See the SNP spec version 0.9 for secrets page format */ -struct snp_secrets_page_layout { - u32 version; - u32 imien : 1, - rsvd1 : 31; - u32 fms; - u32 rsvd2; - u8 gosvw[16]; - u8 vmpck0[VMPCK_KEY_LEN]; - u8 vmpck1[VMPCK_KEY_LEN]; - u8 vmpck2[VMPCK_KEY_LEN]; - u8 vmpck3[VMPCK_KEY_LEN]; - struct secrets_os_area os_area; - u8 rsvd3[3840]; -} __packed; - #endif /* __VIRT_SEVGUEST_H__ */ From ab65f49253ff706723ecbf87af74e9383b5e4582 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 2 May 2022 17:33:40 +0200 Subject: [PATCH 56/60] x86/sev: Fix address space sparse warning Fix: arch/x86/kernel/sev.c:605:16: warning: incorrect type in assignment (different address spaces) arch/x86/kernel/sev.c:605:16: expected struct snp_secrets_page_layout *layout arch/x86/kernel/sev.c:605:16: got void [noderef] __iomem *[assigned] mem Reported-by: kernel test robot Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/202205022233.XgNDR7WR-lkp@intel.com --- arch/x86/kernel/sev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 166375084b1f..c05f0124c410 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -589,20 +589,23 @@ static u64 __init get_secrets_page(void) static u64 __init get_snp_jump_table_addr(void) { struct snp_secrets_page_layout *layout; + void __iomem *mem; u64 pa, addr; pa = get_secrets_page(); if (!pa) return 0; - layout = (__force void *)ioremap_encrypted(pa, PAGE_SIZE); - if (!layout) { + mem = ioremap_encrypted(pa, PAGE_SIZE); + if (!mem) { pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); return 0; } + layout = (__force struct snp_secrets_page_layout *)mem; + addr = layout->os_area.ap_jump_table_pa; - iounmap(layout); + iounmap(mem); return addr; } From 0621210ab7693e6d50585ca689d95d57df617455 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 16 May 2022 19:42:15 +0100 Subject: [PATCH 57/60] x86/sev: Remove duplicated assignment to variable info Variable info is being assigned the same value twice, remove the redundant assignment. Also assign variable v in the declaration. Cleans up clang scan warning: warning: Value stored to 'info' during its initialization is never read [deadcode.DeadStores] No code changed: # arch/x86/kernel/sev.o: text data bss dec hex filename 19878 4487 4112 28477 6f3d sev.o.before 19878 4487 4112 28477 6f3d sev.o.after md5: bfbaa515af818615fd01fea91e7eba1b sev.o.before.asm bfbaa515af818615fd01fea91e7eba1b sev.o.after.asm [ bp: Running the before/after check on sev.c because sev-shared.c gets included into it. ] Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler") Signed-off-by: Colin Ian King Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220516184215.51841-1-colin.i.king@gmail.com --- arch/x86/kernel/sev-shared.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2b4270d5559e..b478edf43bec 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -201,10 +201,7 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt if (ret == 1) { u64 info = ghcb->save.sw_exit_info_2; - unsigned long v; - - info = ghcb->save.sw_exit_info_2; - v = info & SVM_EVTINJ_VEC_MASK; + unsigned long v = info & SVM_EVTINJ_VEC_MASK; /* Check if exception information from hypervisor is sane. */ if ((info & SVM_EVTINJ_VALID) && From c42b145181aafd59ed31ccd879493389e3ea5a08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 16 Mar 2022 12:16:12 +0800 Subject: [PATCH 58/60] x86/sev: Annotate stack change in the #VC handler In idtentry_vc(), vc_switch_off_ist() determines a safe stack to switch to, off of the IST stack. Annotate the new stack switch with ENCODE_FRAME_POINTER in case UNWINDER_FRAME_POINTER is used. A stack walk before looks like this: CPU: 0 PID: 0 Comm: swapper Not tainted 5.18.0-rc7+ #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl dump_stack kernel_exc_vmm_communication asm_exc_vmm_communication ? native_read_msr ? __x2apic_disable.part.0 ? x2apic_setup ? cpu_init ? trap_init ? start_kernel ? x86_64_start_reservations ? x86_64_start_kernel ? secondary_startup_64_no_verify and with the fix, the stack dump is exact: CPU: 0 PID: 0 Comm: swapper Not tainted 5.18.0-rc7+ #3 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl dump_stack kernel_exc_vmm_communication asm_exc_vmm_communication RIP: 0010:native_read_msr Code: ... < snipped regs > ? __x2apic_disable.part.0 x2apic_setup cpu_init trap_init start_kernel x86_64_start_reservations x86_64_start_kernel secondary_startup_64_no_verify [ bp: Test in a SEV-ES guest and rewrite the commit message to explain what exactly this does. ] Fixes: a13644f3a53d ("x86/entry/64: Add entry code for #VC handler") Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220316041612.71357-1-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4faac48ebec5..f7bd8001bf07 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -505,6 +505,7 @@ SYM_CODE_START(\asmsym) call vc_switch_off_ist movq %rax, %rsp /* Switch to new stack */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS /* Update pt_regs */ From 47f33de4aafb2f5e43d480d590a939d0f1d566a9 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 12 Apr 2022 20:49:08 +0800 Subject: [PATCH 59/60] x86/sev: Mark the code returning to user space as syscall gap When returning to user space, %rsp is user-controlled value. If it is a SNP-guest and the hypervisor decides to mess with the code-page for this path while a CPU is executing it, a potential #VC could hit in the syscall return path and mislead the #VC handler. So make ip_within_syscall_gap() return true in this case. Signed-off-by: Lai Jiangshan Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220412124909.10467-1-jiangshanlai@gmail.com --- arch/x86/entry/entry_64.S | 2 ++ arch/x86/entry/entry_64_compat.S | 2 ++ arch/x86/include/asm/proto.h | 4 ++++ arch/x86/include/asm/ptrace.h | 4 ++++ 4 files changed, 12 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f7bd8001bf07..2fd8a5cad853 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -215,8 +215,10 @@ syscall_return_via_sysret: popq %rdi popq %rsp +SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) swapgs sysretq +SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) SYM_CODE_END(entry_SYSCALL_64) /* diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 4fdb007cddbd..3c0e14960e2b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -297,6 +297,7 @@ sysret32_from_system_call: * code. We zero R8-R10 to avoid info leaks. */ movq RSP-ORIG_RAX(%rsp), %rsp +SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) /* * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored @@ -314,6 +315,7 @@ sysret32_from_system_call: xorl %r10d, %r10d swapgs sysretl +SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) SYM_CODE_END(entry_SYSCALL_compat) /* diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index feed36d44d04..f042cfc9938f 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -13,6 +13,8 @@ void syscall_init(void); #ifdef CONFIG_X86_64 void entry_SYSCALL_64(void); void entry_SYSCALL_64_safe_stack(void); +void entry_SYSRETQ_unsafe_stack(void); +void entry_SYSRETQ_end(void); long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2); #endif @@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); +void entry_SYSRETL_compat_unsafe_stack(void); +void entry_SYSRETL_compat_end(void); void entry_INT80_compat(void); #ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 4357e0f2cd5f..f4db78b09c8f 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs) bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETQ_end); #ifdef CONFIG_IA32_EMULATION ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); + ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack && + regs->ip < (unsigned long)entry_SYSRETL_compat_end); #endif return ret; From ce6565282b3b16fd850c6a676f78c6bc76d0c235 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 May 2022 10:26:04 +0200 Subject: [PATCH 60/60] x86/entry: Fixup objtool/ibt validation Commit 47f33de4aafb ("x86/sev: Mark the code returning to user space as syscall gap") added a bunch of text references without annotating them, resulting in a spree of objtool complaints: vmlinux.o: warning: objtool: vc_switch_off_ist+0x77: relocation to !ENDBR: entry_SYSCALL_64+0x15c vmlinux.o: warning: objtool: vc_switch_off_ist+0x8f: relocation to !ENDBR: entry_SYSCALL_compat+0xa5 vmlinux.o: warning: objtool: vc_switch_off_ist+0x97: relocation to !ENDBR: .entry.text+0x21ea vmlinux.o: warning: objtool: vc_switch_off_ist+0xef: relocation to !ENDBR: .entry.text+0x162 vmlinux.o: warning: objtool: __sev_es_ist_enter+0x60: relocation to !ENDBR: entry_SYSCALL_64+0x15c vmlinux.o: warning: objtool: __sev_es_ist_enter+0x6c: relocation to !ENDBR: .entry.text+0x162 vmlinux.o: warning: objtool: __sev_es_ist_enter+0x8a: relocation to !ENDBR: entry_SYSCALL_compat+0xa5 vmlinux.o: warning: objtool: __sev_es_ist_enter+0xc1: relocation to !ENDBR: .entry.text+0x21ea Since these text references are used to compare against IP, and are not an indirect call target, they don't need ENDBR so annotate them away. Fixes: 47f33de4aafb ("x86/sev: Mark the code returning to user space as syscall gap") Reported-by: Stephen Rothwell Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20220520082604.GQ2578@worktop.programming.kicks-ass.net --- arch/x86/entry/entry_64.S | 3 +++ arch/x86/entry/entry_64_compat.S | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2fd8a5cad853..58a2d764fa39 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -216,9 +216,12 @@ syscall_return_via_sysret: popq %rdi popq %rsp SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR swapgs sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 SYM_CODE_END(entry_SYSCALL_64) /* diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 3c0e14960e2b..8011021d7ce8 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -298,6 +298,7 @@ sysret32_from_system_call: */ movq RSP-ORIG_RAX(%rsp), %rsp SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored @@ -316,6 +317,8 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) swapgs sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + int3 SYM_CODE_END(entry_SYSCALL_compat) /*