mirror of https://gitee.com/openkylin/linux.git
xen: features and fixes for 3.18-rc0
- Add pvscsi frontend and backend drivers. - Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses. - Try and keep memory contiguous during PV memory setup (reduces SWIOTLB usage). - Allow front/back drivers to use threaded irqs. - Support large initrds in PV guests. - Fix PVH guests in preparation for Xen 4.5 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQEcBAABAgAGBQJUNonmAAoJEFxbo/MsZsTRHAQH/inCjpCT+pkvTB0YAVfVvgMI gUogT8G+iB2MuCNpMffGIt8TAVXwcVtnOLH9ABH3IBVehzgipIbIiVEM9YhjrYvU 1rgIKBpmZqSpjDHoIHpdHeCH67cVnRzA/PyoxZWLxPNmQ0t6bNf9yeAcCXK9PfUc 7EAblUDmPGSx9x/EUnOKNNaZSEiUJZHDBXbMBLllk1+5H1vfKnpFCRGMG0IrfI44 KVP2NX9Gfa05edMZYtH887FYyjFe2KNV6LJvE7+w7h2Dy0yIzf7y86t0l4n8gETb plvEUJ/lu9RYzTiZY/RxgBFYVTV59EqT45brSUtoe2Jcp8GSwiHslTHdfyFBwSo= =gw4d -----END PGP SIGNATURE----- Merge tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip Pull Xen updates from David Vrabel: "Features and fixes: - Add pvscsi frontend and backend drivers. - Remove _PAGE_IOMAP PTE flag, freeing it for alternate uses. - Try and keep memory contiguous during PV memory setup (reduces SWIOTLB usage). - Allow front/back drivers to use threaded irqs. - Support large initrds in PV guests. - Fix PVH guests in preparation for Xen 4.5" * tag 'stable/for-linus-3.18-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (22 commits) xen: remove DEFINE_XENBUS_DRIVER() macro xen/xenbus: Remove BUG_ON() when error string trucated xen/xenbus: Correct the comments for xenbus_grant_ring() x86/xen: Set EFER.NX and EFER.SCE in PVH guests xen: eliminate scalability issues from initrd handling xen: sync some headers with xen tree xen: make pvscsi frontend dependant on xenbus frontend arm{,64}/xen: Remove "EXPERIMENTAL" in the description of the Xen options xen-scsifront: don't deadlock if the ring becomes full x86: remove the Xen-specific _PAGE_IOMAP PTE flag x86/xen: do not use _PAGE_IOMAP PTE flag for I/O mappings x86: skip check for spurious faults for non-present faults xen/efi: Directly include needed headers xen-scsiback: clean up a type issue in scsiback_make_tpg() xen-scsifront: use GFP_ATOMIC under spin_lock MAINTAINERS: Add xen pvscsi maintainer xen-scsiback: Add Xen PV SCSI backend driver xen-scsifront: Add Xen PV SCSI frontend driver xen: Add Xen pvSCSI protocol description xen/events: support threaded irqs for interdomain event channels ...
This commit is contained in:
commit
81ae31d782
|
@ -10268,6 +10268,15 @@ S: Supported
|
|||
F: drivers/block/xen-blkback/*
|
||||
F: drivers/block/xen*
|
||||
|
||||
XEN PVSCSI DRIVERS
|
||||
M: Juergen Gross <jgross@suse.com>
|
||||
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
|
||||
L: linux-scsi@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/scsi/xen-scsifront.c
|
||||
F: drivers/xen/xen-scsiback.c
|
||||
F: include/xen/interface/io/vscsiif.h
|
||||
|
||||
XEN SWIOTLB SUBSYSTEM
|
||||
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
|
||||
|
|
|
@ -1779,7 +1779,7 @@ config XEN_DOM0
|
|||
depends on XEN
|
||||
|
||||
config XEN
|
||||
bool "Xen guest support on ARM (EXPERIMENTAL)"
|
||||
bool "Xen guest support on ARM"
|
||||
depends on ARM && AEABI && OF
|
||||
depends on CPU_V7 && !CPU_V6
|
||||
depends on !GENERIC_ATOMIC64
|
||||
|
|
|
@ -349,7 +349,7 @@ config XEN_DOM0
|
|||
depends on XEN
|
||||
|
||||
config XEN
|
||||
bool "Xen guest support on ARM64 (EXPERIMENTAL)"
|
||||
bool "Xen guest support on ARM64"
|
||||
depends on ARM64 && OF
|
||||
select SWIOTLB_XEN
|
||||
help
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1
|
||||
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
|
||||
#define _PAGE_BIT_SPLITTING _PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
|
||||
#define _PAGE_BIT_IOMAP _PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
|
||||
#define _PAGE_BIT_HIDDEN _PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
|
||||
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
|
||||
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
||||
|
@ -52,7 +51,7 @@
|
|||
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
|
||||
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
||||
#define _PAGE_SOFTW1 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
|
||||
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
|
||||
#define _PAGE_SOFTW2 (_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
|
||||
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
|
||||
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
||||
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
|
||||
|
@ -168,10 +167,10 @@
|
|||
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
|
||||
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
||||
|
||||
#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
|
||||
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
|
||||
#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
|
||||
#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
|
||||
#define __PAGE_KERNEL_IO (__PAGE_KERNEL)
|
||||
#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE)
|
||||
#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS)
|
||||
#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC)
|
||||
|
||||
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
||||
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
||||
|
|
|
@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
|||
* cross-processor TLB flush, even if no stale TLB entries exist
|
||||
* on other processors.
|
||||
*
|
||||
* Spurious faults may only occur if the TLB contains an entry with
|
||||
* fewer permission than the page table entry. Non-present (P = 0)
|
||||
* and reserved bit (R = 1) faults are never spurious.
|
||||
*
|
||||
* There are no security implications to leaving a stale TLB when
|
||||
* increasing the permissions on a page.
|
||||
*
|
||||
* Returns non-zero if a spurious fault was handled, zero otherwise.
|
||||
*
|
||||
* See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
|
||||
* (Optional Invalidation).
|
||||
*/
|
||||
static noinline int
|
||||
spurious_fault(unsigned long error_code, unsigned long address)
|
||||
|
@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
|||
pte_t *pte;
|
||||
int ret;
|
||||
|
||||
/* Reserved-bit violation or user access to kernel space? */
|
||||
if (error_code & (PF_USER | PF_RSVD))
|
||||
/*
|
||||
* Only writes to RO or instruction fetches from NX may cause
|
||||
* spurious faults.
|
||||
*
|
||||
* These could be from user or supervisor accesses but the TLB
|
||||
* is only lazily flushed after a kernel mapping protection
|
||||
* change, so user accesses are not expected to cause spurious
|
||||
* faults.
|
||||
*/
|
||||
if (error_code != (PF_WRITE | PF_PROT)
|
||||
&& error_code != (PF_INSTR | PF_PROT))
|
||||
return 0;
|
||||
|
||||
pgd = init_mm.pgd + pgd_index(address);
|
||||
|
|
|
@ -537,7 +537,7 @@ static void __init pagetable_init(void)
|
|||
permanent_kmaps_init(pgd_base);
|
||||
}
|
||||
|
||||
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
|
||||
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
|
||||
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
||||
|
||||
/* user-defined highmem size */
|
||||
|
|
|
@ -151,7 +151,7 @@ early_param("gbpages", parse_direct_gbpages_on);
|
|||
* around without checking the pgd every time.
|
||||
*/
|
||||
|
||||
pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
|
||||
pteval_t __supported_pte_mask __read_mostly = ~0;
|
||||
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
||||
|
||||
int force_personality32;
|
||||
|
|
|
@ -442,8 +442,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
|
|||
*/
|
||||
prot |= _PAGE_CACHE_UC_MINUS;
|
||||
|
||||
prot |= _PAGE_IOMAP; /* creating a mapping for IO */
|
||||
|
||||
vma->vm_page_prot = __pgprot(prot);
|
||||
|
||||
if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
|
||||
|
|
|
@ -15,12 +15,14 @@
|
|||
* with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/efi.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <xen/xen-ops.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
void __init xen_efi_init(void)
|
||||
|
|
|
@ -1463,6 +1463,7 @@ static void __ref xen_setup_gdt(int cpu)
|
|||
pv_cpu_ops.load_gdt = xen_load_gdt;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
/*
|
||||
* A PV guest starts with default flags that are not set for PVH, set them
|
||||
* here asap.
|
||||
|
@ -1508,17 +1509,21 @@ static void __init xen_pvh_early_guest_init(void)
|
|||
return;
|
||||
|
||||
xen_have_vector_callback = 1;
|
||||
|
||||
xen_pvh_early_cpu_init(0, false);
|
||||
xen_pvh_set_cr_flags(0);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
BUG(); /* PVH: Implement proper support. */
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_XEN_PVH */
|
||||
|
||||
/* First C function to be called on Xen boot */
|
||||
asmlinkage __visible void __init xen_start_kernel(void)
|
||||
{
|
||||
struct physdev_set_iopl set_iopl;
|
||||
unsigned long initrd_start = 0;
|
||||
int rc;
|
||||
|
||||
if (!xen_start_info)
|
||||
|
@ -1527,7 +1532,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|||
xen_domain_type = XEN_PV_DOMAIN;
|
||||
|
||||
xen_setup_features();
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
xen_pvh_early_guest_init();
|
||||
#endif
|
||||
xen_setup_machphys_mapping();
|
||||
|
||||
/* Install Xen paravirt ops */
|
||||
|
@ -1559,8 +1566,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|||
#endif
|
||||
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
|
||||
|
||||
__supported_pte_mask |= _PAGE_IOMAP;
|
||||
|
||||
/*
|
||||
* Prevent page tables from being allocated in highmem, even
|
||||
* if CONFIG_HIGHPTE is enabled.
|
||||
|
@ -1667,10 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|||
new_cpu_data.x86_capability[0] = cpuid_edx(1);
|
||||
#endif
|
||||
|
||||
if (xen_start_info->mod_start) {
|
||||
if (xen_start_info->flags & SIF_MOD_START_PFN)
|
||||
initrd_start = PFN_PHYS(xen_start_info->mod_start);
|
||||
else
|
||||
initrd_start = __pa(xen_start_info->mod_start);
|
||||
}
|
||||
|
||||
/* Poke various useful things into boot_params */
|
||||
boot_params.hdr.type_of_loader = (9 << 4) | 0;
|
||||
boot_params.hdr.ramdisk_image = xen_start_info->mod_start
|
||||
? __pa(xen_start_info->mod_start) : 0;
|
||||
boot_params.hdr.ramdisk_image = initrd_start;
|
||||
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
|
||||
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
|
||||
|
||||
|
|
|
@ -399,38 +399,14 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
|
|||
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
|
||||
mfn = 0;
|
||||
flags = 0;
|
||||
} else {
|
||||
/*
|
||||
* Paramount to do this test _after_ the
|
||||
* INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
|
||||
* IDENTITY_FRAME_BIT resolves to true.
|
||||
*/
|
||||
mfn &= ~FOREIGN_FRAME_BIT;
|
||||
if (mfn & IDENTITY_FRAME_BIT) {
|
||||
mfn &= ~IDENTITY_FRAME_BIT;
|
||||
flags |= _PAGE_IOMAP;
|
||||
}
|
||||
}
|
||||
} else
|
||||
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
|
||||
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static pteval_t iomap_pte(pteval_t val)
|
||||
{
|
||||
if (val & _PAGE_PRESENT) {
|
||||
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
|
||||
pteval_t flags = val & PTE_FLAGS_MASK;
|
||||
|
||||
/* We assume the pte frame number is a MFN, so
|
||||
just use it as-is. */
|
||||
val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
__visible pteval_t xen_pte_val(pte_t pte)
|
||||
{
|
||||
pteval_t pteval = pte.pte;
|
||||
|
@ -441,9 +417,6 @@ __visible pteval_t xen_pte_val(pte_t pte)
|
|||
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
|
||||
}
|
||||
#endif
|
||||
if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
|
||||
return pteval;
|
||||
|
||||
return pte_mfn_to_pfn(pteval);
|
||||
}
|
||||
PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
|
||||
|
@ -481,7 +454,6 @@ void xen_set_pat(u64 pat)
|
|||
|
||||
__visible pte_t xen_make_pte(pteval_t pte)
|
||||
{
|
||||
phys_addr_t addr = (pte & PTE_PFN_MASK);
|
||||
#if 0
|
||||
/* If Linux is trying to set a WC pte, then map to the Xen WC.
|
||||
* If _PAGE_PAT is set, then it probably means it is really
|
||||
|
@ -496,19 +468,7 @@ __visible pte_t xen_make_pte(pteval_t pte)
|
|||
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Unprivileged domains are allowed to do IOMAPpings for
|
||||
* PCI passthrough, but not map ISA space. The ISA
|
||||
* mappings are just dummy local mappings to keep other
|
||||
* parts of the kernel happy.
|
||||
*/
|
||||
if (unlikely(pte & _PAGE_IOMAP) &&
|
||||
(xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
|
||||
pte = iomap_pte(pte);
|
||||
} else {
|
||||
pte &= ~_PAGE_IOMAP;
|
||||
pte = pte_pfn_to_mfn(pte);
|
||||
}
|
||||
pte = pte_pfn_to_mfn(pte);
|
||||
|
||||
return native_make_pte(pte);
|
||||
}
|
||||
|
@ -2091,7 +2051,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
|||
|
||||
default:
|
||||
/* By default, set_fixmap is used for hardware mappings */
|
||||
pte = mfn_pte(phys, __pgprot(pgprot_val(prot) | _PAGE_IOMAP));
|
||||
pte = mfn_pte(phys, prot);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -173,6 +173,7 @@
|
|||
#include <xen/balloon.h>
|
||||
#include <xen/grant_table.h>
|
||||
|
||||
#include "p2m.h"
|
||||
#include "multicalls.h"
|
||||
#include "xen-ops.h"
|
||||
|
||||
|
@ -180,12 +181,6 @@ static void __init m2p_override_init(void);
|
|||
|
||||
unsigned long xen_max_p2m_pfn __read_mostly;
|
||||
|
||||
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
|
||||
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
|
||||
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
|
||||
|
||||
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
|
||||
|
||||
/* Placeholders for holes in the address space */
|
||||
static RESERVE_BRK_ARRAY(unsigned long, p2m_missing, P2M_PER_PAGE);
|
||||
static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_missing, P2M_MID_PER_PAGE);
|
||||
|
@ -202,16 +197,12 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE);
|
|||
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
|
||||
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
|
||||
|
||||
/* We might hit two boundary violations at the start and end, at max each
|
||||
* boundary violation will require three middle nodes. */
|
||||
RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3);
|
||||
|
||||
/* When we populate back during bootup, the amount of pages can vary. The
|
||||
* max we have is seen is 395979, but that does not mean it can't be more.
|
||||
* Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle
|
||||
* it can re-use Xen provided mfn_list array, so we only need to allocate at
|
||||
* most three P2M top nodes. */
|
||||
RESERVE_BRK(p2m_populated, PAGE_SIZE * 3);
|
||||
/* For each I/O range remapped we may lose up to two leaf pages for the boundary
|
||||
* violations and three mid pages to cover up to 3GB. With
|
||||
* early_can_reuse_p2m_middle() most of the leaf pages will be reused by the
|
||||
* remapped region.
|
||||
*/
|
||||
RESERVE_BRK(p2m_identity_remap, PAGE_SIZE * 2 * 3 * MAX_REMAP_RANGES);
|
||||
|
||||
static inline unsigned p2m_top_index(unsigned long pfn)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef _XEN_P2M_H
|
||||
#define _XEN_P2M_H
|
||||
|
||||
#define P2M_PER_PAGE (PAGE_SIZE / sizeof(unsigned long))
|
||||
#define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *))
|
||||
#define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **))
|
||||
|
||||
#define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
|
||||
|
||||
#define MAX_REMAP_RANGES 10
|
||||
|
||||
extern unsigned long __init set_phys_range_identity(unsigned long pfn_s,
|
||||
unsigned long pfn_e);
|
||||
|
||||
#endif /* _XEN_P2M_H */
|
|
@ -29,6 +29,7 @@
|
|||
#include <xen/features.h>
|
||||
#include "xen-ops.h"
|
||||
#include "vdso.h"
|
||||
#include "p2m.h"
|
||||
|
||||
/* These are code, but not functions. Defined in entry.S */
|
||||
extern const char xen_hypervisor_callback[];
|
||||
|
@ -46,6 +47,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
|
|||
/* Number of pages released from the initial allocation. */
|
||||
unsigned long xen_released_pages;
|
||||
|
||||
/* Buffer used to remap identity mapped pages */
|
||||
unsigned long xen_remap_buf[P2M_PER_PAGE] __initdata;
|
||||
|
||||
/*
|
||||
* The maximum amount of extra memory compared to the base size. The
|
||||
* main scaling factor is the size of struct page. At extreme ratios
|
||||
|
@ -151,107 +155,325 @@ static unsigned long __init xen_do_chunk(unsigned long start,
|
|||
return len;
|
||||
}
|
||||
|
||||
static unsigned long __init xen_release_chunk(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
return xen_do_chunk(start, end, true);
|
||||
}
|
||||
|
||||
static unsigned long __init xen_populate_chunk(
|
||||
/*
|
||||
* Finds the next RAM pfn available in the E820 map after min_pfn.
|
||||
* This function updates min_pfn with the pfn found and returns
|
||||
* the size of that range or zero if not found.
|
||||
*/
|
||||
static unsigned long __init xen_find_pfn_range(
|
||||
const struct e820entry *list, size_t map_size,
|
||||
unsigned long max_pfn, unsigned long *last_pfn,
|
||||
unsigned long credits_left)
|
||||
unsigned long *min_pfn)
|
||||
{
|
||||
const struct e820entry *entry;
|
||||
unsigned int i;
|
||||
unsigned long done = 0;
|
||||
unsigned long dest_pfn;
|
||||
|
||||
for (i = 0, entry = list; i < map_size; i++, entry++) {
|
||||
unsigned long s_pfn;
|
||||
unsigned long e_pfn;
|
||||
unsigned long pfns;
|
||||
long capacity;
|
||||
|
||||
if (credits_left <= 0)
|
||||
break;
|
||||
|
||||
if (entry->type != E820_RAM)
|
||||
continue;
|
||||
|
||||
e_pfn = PFN_DOWN(entry->addr + entry->size);
|
||||
|
||||
/* We only care about E820 after the xen_start_info->nr_pages */
|
||||
if (e_pfn <= max_pfn)
|
||||
/* We only care about E820 after this */
|
||||
if (e_pfn < *min_pfn)
|
||||
continue;
|
||||
|
||||
s_pfn = PFN_UP(entry->addr);
|
||||
/* If the E820 falls within the nr_pages, we want to start
|
||||
* at the nr_pages PFN.
|
||||
* If that would mean going past the E820 entry, skip it
|
||||
|
||||
/* If min_pfn falls within the E820 entry, we want to start
|
||||
* at the min_pfn PFN.
|
||||
*/
|
||||
if (s_pfn <= max_pfn) {
|
||||
capacity = e_pfn - max_pfn;
|
||||
dest_pfn = max_pfn;
|
||||
if (s_pfn <= *min_pfn) {
|
||||
done = e_pfn - *min_pfn;
|
||||
} else {
|
||||
capacity = e_pfn - s_pfn;
|
||||
dest_pfn = s_pfn;
|
||||
done = e_pfn - s_pfn;
|
||||
*min_pfn = s_pfn;
|
||||
}
|
||||
|
||||
if (credits_left < capacity)
|
||||
capacity = credits_left;
|
||||
|
||||
pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
|
||||
done += pfns;
|
||||
*last_pfn = (dest_pfn + pfns);
|
||||
if (pfns < capacity)
|
||||
break;
|
||||
credits_left -= pfns;
|
||||
break;
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
static void __init xen_set_identity_and_release_chunk(
|
||||
unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
|
||||
unsigned long *released, unsigned long *identity)
|
||||
/*
|
||||
* This releases a chunk of memory and then does the identity map. It's used as
|
||||
* as a fallback if the remapping fails.
|
||||
*/
|
||||
static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
|
||||
unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity,
|
||||
unsigned long *released)
|
||||
{
|
||||
unsigned long pfn;
|
||||
|
||||
/*
|
||||
* If the PFNs are currently mapped, clear the mappings
|
||||
* (except for the ISA region which must be 1:1 mapped) to
|
||||
* release the refcounts (in Xen) on the original frames.
|
||||
*/
|
||||
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
|
||||
pte_t pte = __pte_ma(0);
|
||||
|
||||
if (pfn < PFN_UP(ISA_END_ADDRESS))
|
||||
pte = mfn_pte(pfn, PAGE_KERNEL_IO);
|
||||
|
||||
(void)HYPERVISOR_update_va_mapping(
|
||||
(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
|
||||
}
|
||||
|
||||
if (start_pfn < nr_pages)
|
||||
*released += xen_release_chunk(
|
||||
start_pfn, min(end_pfn, nr_pages));
|
||||
WARN_ON(start_pfn > end_pfn);
|
||||
|
||||
/* Need to release pages first */
|
||||
*released += xen_do_chunk(start_pfn, min(end_pfn, nr_pages), true);
|
||||
*identity += set_phys_range_identity(start_pfn, end_pfn);
|
||||
}
|
||||
|
||||
static unsigned long __init xen_set_identity_and_release(
|
||||
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
|
||||
/*
|
||||
* Helper function to update both the p2m and m2p tables.
|
||||
*/
|
||||
static unsigned long __init xen_update_mem_tables(unsigned long pfn,
|
||||
unsigned long mfn)
|
||||
{
|
||||
struct mmu_update update = {
|
||||
.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
|
||||
.val = pfn
|
||||
};
|
||||
|
||||
/* Update p2m */
|
||||
if (!early_set_phys_to_machine(pfn, mfn)) {
|
||||
WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
|
||||
pfn, mfn);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Update m2p */
|
||||
if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
|
||||
WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
|
||||
mfn, pfn);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function updates the p2m and m2p tables with an identity map from
|
||||
* start_pfn to start_pfn+size and remaps the underlying RAM of the original
|
||||
* allocation at remap_pfn. It must do so carefully in P2M_PER_PAGE sized blocks
|
||||
* to not exhaust the reserved brk space. Doing it in properly aligned blocks
|
||||
* ensures we only allocate the minimum required leaf pages in the p2m table. It
|
||||
* copies the existing mfns from the p2m table under the 1:1 map, overwrites
|
||||
* them with the identity map and then updates the p2m and m2p tables with the
|
||||
* remapped memory.
|
||||
*/
|
||||
static unsigned long __init xen_do_set_identity_and_remap_chunk(
|
||||
unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
|
||||
{
|
||||
unsigned long ident_pfn_iter, remap_pfn_iter;
|
||||
unsigned long ident_start_pfn_align, remap_start_pfn_align;
|
||||
unsigned long ident_end_pfn_align, remap_end_pfn_align;
|
||||
unsigned long ident_boundary_pfn, remap_boundary_pfn;
|
||||
unsigned long ident_cnt = 0;
|
||||
unsigned long remap_cnt = 0;
|
||||
unsigned long left = size;
|
||||
unsigned long mod;
|
||||
int i;
|
||||
|
||||
WARN_ON(size == 0);
|
||||
|
||||
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
||||
|
||||
/*
|
||||
* Determine the proper alignment to remap memory in P2M_PER_PAGE sized
|
||||
* blocks. We need to keep track of both the existing pfn mapping and
|
||||
* the new pfn remapping.
|
||||
*/
|
||||
mod = start_pfn % P2M_PER_PAGE;
|
||||
ident_start_pfn_align =
|
||||
mod ? (start_pfn - mod + P2M_PER_PAGE) : start_pfn;
|
||||
mod = remap_pfn % P2M_PER_PAGE;
|
||||
remap_start_pfn_align =
|
||||
mod ? (remap_pfn - mod + P2M_PER_PAGE) : remap_pfn;
|
||||
mod = (start_pfn + size) % P2M_PER_PAGE;
|
||||
ident_end_pfn_align = start_pfn + size - mod;
|
||||
mod = (remap_pfn + size) % P2M_PER_PAGE;
|
||||
remap_end_pfn_align = remap_pfn + size - mod;
|
||||
|
||||
/* Iterate over each p2m leaf node in each range */
|
||||
for (ident_pfn_iter = ident_start_pfn_align, remap_pfn_iter = remap_start_pfn_align;
|
||||
ident_pfn_iter < ident_end_pfn_align && remap_pfn_iter < remap_end_pfn_align;
|
||||
ident_pfn_iter += P2M_PER_PAGE, remap_pfn_iter += P2M_PER_PAGE) {
|
||||
/* Check we aren't past the end */
|
||||
BUG_ON(ident_pfn_iter + P2M_PER_PAGE > start_pfn + size);
|
||||
BUG_ON(remap_pfn_iter + P2M_PER_PAGE > remap_pfn + size);
|
||||
|
||||
/* Save p2m mappings */
|
||||
for (i = 0; i < P2M_PER_PAGE; i++)
|
||||
xen_remap_buf[i] = pfn_to_mfn(ident_pfn_iter + i);
|
||||
|
||||
/* Set identity map which will free a p2m leaf */
|
||||
ident_cnt += set_phys_range_identity(ident_pfn_iter,
|
||||
ident_pfn_iter + P2M_PER_PAGE);
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Helps verify a p2m leaf has been freed */
|
||||
for (i = 0; i < P2M_PER_PAGE; i++) {
|
||||
unsigned int pfn = ident_pfn_iter + i;
|
||||
BUG_ON(pfn_to_mfn(pfn) != pfn);
|
||||
}
|
||||
#endif
|
||||
/* Now remap memory */
|
||||
for (i = 0; i < P2M_PER_PAGE; i++) {
|
||||
unsigned long mfn = xen_remap_buf[i];
|
||||
|
||||
/* This will use the p2m leaf freed above */
|
||||
if (!xen_update_mem_tables(remap_pfn_iter + i, mfn)) {
|
||||
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
|
||||
remap_pfn_iter + i, mfn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
remap_cnt++;
|
||||
}
|
||||
|
||||
left -= P2M_PER_PAGE;
|
||||
}
|
||||
|
||||
/* Max boundary space possible */
|
||||
BUG_ON(left > (P2M_PER_PAGE - 1) * 2);
|
||||
|
||||
/* Now handle the boundary conditions */
|
||||
ident_boundary_pfn = start_pfn;
|
||||
remap_boundary_pfn = remap_pfn;
|
||||
for (i = 0; i < left; i++) {
|
||||
unsigned long mfn;
|
||||
|
||||
/* These two checks move from the start to end boundaries */
|
||||
if (ident_boundary_pfn == ident_start_pfn_align)
|
||||
ident_boundary_pfn = ident_pfn_iter;
|
||||
if (remap_boundary_pfn == remap_start_pfn_align)
|
||||
remap_boundary_pfn = remap_pfn_iter;
|
||||
|
||||
/* Check we aren't past the end */
|
||||
BUG_ON(ident_boundary_pfn >= start_pfn + size);
|
||||
BUG_ON(remap_boundary_pfn >= remap_pfn + size);
|
||||
|
||||
mfn = pfn_to_mfn(ident_boundary_pfn);
|
||||
|
||||
if (!xen_update_mem_tables(remap_boundary_pfn, mfn)) {
|
||||
WARN(1, "Failed to update mem mapping for pfn=%ld mfn=%ld\n",
|
||||
remap_pfn_iter + i, mfn);
|
||||
return 0;
|
||||
}
|
||||
remap_cnt++;
|
||||
|
||||
ident_boundary_pfn++;
|
||||
remap_boundary_pfn++;
|
||||
}
|
||||
|
||||
/* Finish up the identity map */
|
||||
if (ident_start_pfn_align >= ident_end_pfn_align) {
|
||||
/*
|
||||
* In this case we have an identity range which does not span an
|
||||
* aligned block so everything needs to be identity mapped here.
|
||||
* If we didn't check this we might remap too many pages since
|
||||
* the align boundaries are not meaningful in this case.
|
||||
*/
|
||||
ident_cnt += set_phys_range_identity(start_pfn,
|
||||
start_pfn + size);
|
||||
} else {
|
||||
/* Remapped above so check each end of the chunk */
|
||||
if (start_pfn < ident_start_pfn_align)
|
||||
ident_cnt += set_phys_range_identity(start_pfn,
|
||||
ident_start_pfn_align);
|
||||
if (start_pfn + size > ident_pfn_iter)
|
||||
ident_cnt += set_phys_range_identity(ident_pfn_iter,
|
||||
start_pfn + size);
|
||||
}
|
||||
|
||||
BUG_ON(ident_cnt != size);
|
||||
BUG_ON(remap_cnt != size);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function takes a contiguous pfn range that needs to be identity mapped
|
||||
* and:
|
||||
*
|
||||
* 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
|
||||
* 2) Calls the do_ function to actually do the mapping/remapping work.
|
||||
*
|
||||
* The goal is to not allocate additional memory but to remap the existing
|
||||
* pages. In the case of an error the underlying memory is simply released back
|
||||
* to Xen and not remapped.
|
||||
*/
|
||||
static unsigned long __init xen_set_identity_and_remap_chunk(
|
||||
const struct e820entry *list, size_t map_size, unsigned long start_pfn,
|
||||
unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn,
|
||||
unsigned long *identity, unsigned long *remapped,
|
||||
unsigned long *released)
|
||||
{
|
||||
unsigned long pfn;
|
||||
unsigned long i = 0;
|
||||
unsigned long n = end_pfn - start_pfn;
|
||||
|
||||
while (i < n) {
|
||||
unsigned long cur_pfn = start_pfn + i;
|
||||
unsigned long left = n - i;
|
||||
unsigned long size = left;
|
||||
unsigned long remap_range_size;
|
||||
|
||||
/* Do not remap pages beyond the current allocation */
|
||||
if (cur_pfn >= nr_pages) {
|
||||
/* Identity map remaining pages */
|
||||
*identity += set_phys_range_identity(cur_pfn,
|
||||
cur_pfn + size);
|
||||
break;
|
||||
}
|
||||
if (cur_pfn + size > nr_pages)
|
||||
size = nr_pages - cur_pfn;
|
||||
|
||||
remap_range_size = xen_find_pfn_range(list, map_size,
|
||||
&remap_pfn);
|
||||
if (!remap_range_size) {
|
||||
pr_warning("Unable to find available pfn range, not remapping identity pages\n");
|
||||
xen_set_identity_and_release_chunk(cur_pfn,
|
||||
cur_pfn + left, nr_pages, identity, released);
|
||||
break;
|
||||
}
|
||||
/* Adjust size to fit in current e820 RAM region */
|
||||
if (size > remap_range_size)
|
||||
size = remap_range_size;
|
||||
|
||||
if (!xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn)) {
|
||||
WARN(1, "Failed to remap 1:1 memory cur_pfn=%ld size=%ld remap_pfn=%ld\n",
|
||||
cur_pfn, size, remap_pfn);
|
||||
xen_set_identity_and_release_chunk(cur_pfn,
|
||||
cur_pfn + left, nr_pages, identity, released);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update variables to reflect new mappings. */
|
||||
i += size;
|
||||
remap_pfn += size;
|
||||
*identity += size;
|
||||
*remapped += size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the PFNs are currently mapped, the VA mapping also needs
|
||||
* to be updated to be 1:1.
|
||||
*/
|
||||
for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
|
||||
(void)HYPERVISOR_update_va_mapping(
|
||||
(unsigned long)__va(pfn << PAGE_SHIFT),
|
||||
mfn_pte(pfn, PAGE_KERNEL_IO), 0);
|
||||
|
||||
return remap_pfn;
|
||||
}
|
||||
|
||||
static unsigned long __init xen_set_identity_and_remap(
|
||||
const struct e820entry *list, size_t map_size, unsigned long nr_pages,
|
||||
unsigned long *released)
|
||||
{
|
||||
phys_addr_t start = 0;
|
||||
unsigned long released = 0;
|
||||
unsigned long identity = 0;
|
||||
unsigned long remapped = 0;
|
||||
unsigned long last_pfn = nr_pages;
|
||||
const struct e820entry *entry;
|
||||
unsigned long num_released = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Combine non-RAM regions and gaps until a RAM region (or the
|
||||
* end of the map) is reached, then set the 1:1 map and
|
||||
* release the pages (if available) in those non-RAM regions.
|
||||
* remap the memory in those non-RAM regions.
|
||||
*
|
||||
* The combined non-RAM regions are rounded to a whole number
|
||||
* of pages so any partial pages are accessible via the 1:1
|
||||
|
@ -269,22 +491,24 @@ static unsigned long __init xen_set_identity_and_release(
|
|||
end_pfn = PFN_UP(entry->addr);
|
||||
|
||||
if (start_pfn < end_pfn)
|
||||
xen_set_identity_and_release_chunk(
|
||||
start_pfn, end_pfn, nr_pages,
|
||||
&released, &identity);
|
||||
|
||||
last_pfn = xen_set_identity_and_remap_chunk(
|
||||
list, map_size, start_pfn,
|
||||
end_pfn, nr_pages, last_pfn,
|
||||
&identity, &remapped,
|
||||
&num_released);
|
||||
start = end;
|
||||
}
|
||||
}
|
||||
|
||||
if (released)
|
||||
printk(KERN_INFO "Released %lu pages of unused memory\n", released);
|
||||
if (identity)
|
||||
printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
|
||||
*released = num_released;
|
||||
|
||||
return released;
|
||||
pr_info("Set %ld page(s) to 1-1 mapping\n", identity);
|
||||
pr_info("Remapped %ld page(s), last_pfn=%ld\n", remapped,
|
||||
last_pfn);
|
||||
pr_info("Released %ld page(s)\n", num_released);
|
||||
|
||||
return last_pfn;
|
||||
}
|
||||
|
||||
static unsigned long __init xen_get_max_pages(void)
|
||||
{
|
||||
unsigned long max_pages = MAX_DOMAIN_PAGES;
|
||||
|
@ -347,7 +571,6 @@ char * __init xen_memory_setup(void)
|
|||
unsigned long max_pages;
|
||||
unsigned long last_pfn = 0;
|
||||
unsigned long extra_pages = 0;
|
||||
unsigned long populated;
|
||||
int i;
|
||||
int op;
|
||||
|
||||
|
@ -392,20 +615,11 @@ char * __init xen_memory_setup(void)
|
|||
extra_pages += max_pages - max_pfn;
|
||||
|
||||
/*
|
||||
* Set P2M for all non-RAM pages and E820 gaps to be identity
|
||||
* type PFNs. Any RAM pages that would be made inaccesible by
|
||||
* this are first released.
|
||||
* Set identity map on non-RAM pages and remap the underlying RAM.
|
||||
*/
|
||||
xen_released_pages = xen_set_identity_and_release(
|
||||
map, memmap.nr_entries, max_pfn);
|
||||
last_pfn = xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn,
|
||||
&xen_released_pages);
|
||||
|
||||
/*
|
||||
* Populate back the non-RAM pages and E820 gaps that had been
|
||||
* released. */
|
||||
populated = xen_populate_chunk(map, memmap.nr_entries,
|
||||
max_pfn, &last_pfn, xen_released_pages);
|
||||
|
||||
xen_released_pages -= populated;
|
||||
extra_pages += xen_released_pages;
|
||||
|
||||
if (last_pfn > max_pfn) {
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <xen/hvc-console.h>
|
||||
#include "xen-ops.h"
|
||||
#include "mmu.h"
|
||||
#include "smp.h"
|
||||
|
||||
cpumask_var_t xen_cpu_initialized_map;
|
||||
|
||||
|
@ -99,10 +100,14 @@ static void cpu_bringup(void)
|
|||
wmb(); /* make sure everything is out */
|
||||
}
|
||||
|
||||
/* Note: cpu parameter is only relevant for PVH */
|
||||
static void cpu_bringup_and_idle(int cpu)
|
||||
/*
|
||||
* Note: cpu parameter is only relevant for PVH. The reason for passing it
|
||||
* is we can't do smp_processor_id until the percpu segments are loaded, for
|
||||
* which we need the cpu number! So we pass it in rdi as first parameter.
|
||||
*/
|
||||
asmlinkage __visible void cpu_bringup_and_idle(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap) &&
|
||||
xen_feature(XENFEAT_supervisor_mode_kernel))
|
||||
xen_pvh_secondary_vcpu_init(cpu);
|
||||
|
@ -374,11 +379,10 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
ctxt->user_regs.fs = __KERNEL_PERCPU;
|
||||
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
|
||||
#endif
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
|
||||
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
|
||||
|
||||
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
||||
ctxt->flags = VGCF_IN_KERNEL;
|
||||
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
|
||||
ctxt->user_regs.ds = __USER_DS;
|
||||
|
@ -413,15 +417,18 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
|
|||
(unsigned long)xen_failsafe_callback;
|
||||
ctxt->user_regs.cs = __KERNEL_CS;
|
||||
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
|
||||
#ifdef CONFIG_X86_32
|
||||
}
|
||||
#else
|
||||
} else
|
||||
/* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with
|
||||
* %rdi having the cpu number - which means are passing in
|
||||
* as the first parameter the cpu. Subtle!
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
else {
|
||||
/*
|
||||
* The vcpu comes on kernel page tables which have the NX pte
|
||||
* bit set. This means before DS/SS is touched, NX in
|
||||
* EFER must be set. Hence the following assembly glue code.
|
||||
*/
|
||||
ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
|
||||
ctxt->user_regs.rdi = cpu;
|
||||
ctxt->user_regs.rsi = true; /* entry == true */
|
||||
}
|
||||
#endif
|
||||
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
|
||||
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
||||
|
|
|
@ -8,4 +8,12 @@ extern void xen_send_IPI_allbutself(int vector);
|
|||
extern void xen_send_IPI_all(int vector);
|
||||
extern void xen_send_IPI_self(int vector);
|
||||
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
extern void xen_pvh_early_cpu_init(int cpu, bool entry);
|
||||
#else
|
||||
static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -47,6 +47,41 @@ ENTRY(startup_xen)
|
|||
|
||||
__FINIT
|
||||
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
/*
|
||||
* xen_pvh_early_cpu_init() - early PVH VCPU initialization
|
||||
* @cpu: this cpu number (%rdi)
|
||||
* @entry: true if this is a secondary vcpu coming up on this entry
|
||||
* point, false if this is the boot CPU being initialized for
|
||||
* the first time (%rsi)
|
||||
*
|
||||
* Note: This is called as a function on the boot CPU, and is the entry point
|
||||
* on the secondary CPU.
|
||||
*/
|
||||
ENTRY(xen_pvh_early_cpu_init)
|
||||
mov %rsi, %r11
|
||||
|
||||
/* Gather features to see if NX implemented. */
|
||||
mov $0x80000001, %eax
|
||||
cpuid
|
||||
mov %edx, %esi
|
||||
|
||||
mov $MSR_EFER, %ecx
|
||||
rdmsr
|
||||
bts $_EFER_SCE, %eax
|
||||
|
||||
bt $20, %esi
|
||||
jnc 1f /* No NX, skip setting it */
|
||||
bts $_EFER_NX, %eax
|
||||
1: wrmsr
|
||||
#ifdef CONFIG_SMP
|
||||
cmp $0, %r11b
|
||||
jne cpu_bringup_and_idle
|
||||
#endif
|
||||
ret
|
||||
|
||||
#endif /* CONFIG_XEN_PVH */
|
||||
|
||||
.pushsection .text
|
||||
.balign PAGE_SIZE
|
||||
ENTRY(hypercall_page)
|
||||
|
@ -124,6 +159,7 @@ NEXT_HYPERCALL(arch_6)
|
|||
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
|
||||
.quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN, .long 1)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START)
|
||||
ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0)
|
||||
|
||||
|
|
|
@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ** Driver Registration ** */
|
||||
|
||||
|
||||
static const struct xenbus_device_id xen_blkbk_ids[] = {
|
||||
{ "vbd" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xen_blkbk, ,
|
||||
static struct xenbus_driver xen_blkbk_driver = {
|
||||
.ids = xen_blkbk_ids,
|
||||
.probe = xen_blkbk_probe,
|
||||
.remove = xen_blkbk_remove,
|
||||
.otherend_changed = frontend_changed
|
||||
);
|
||||
|
||||
};
|
||||
|
||||
int xen_blkif_xenbus_init(void)
|
||||
{
|
||||
|
|
|
@ -2055,13 +2055,14 @@ static const struct xenbus_device_id blkfront_ids[] = {
|
|||
{ "" }
|
||||
};
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(blkfront, ,
|
||||
static struct xenbus_driver blkfront_driver = {
|
||||
.ids = blkfront_ids,
|
||||
.probe = blkfront_probe,
|
||||
.remove = blkfront_remove,
|
||||
.resume = blkfront_resume,
|
||||
.otherend_changed = blkback_changed,
|
||||
.is_ready = blkfront_is_ready,
|
||||
);
|
||||
};
|
||||
|
||||
static int __init xlblk_init(void)
|
||||
{
|
||||
|
|
|
@ -367,12 +367,13 @@ static const struct xenbus_device_id tpmfront_ids[] = {
|
|||
};
|
||||
MODULE_ALIAS("xen:vtpm");
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(tpmfront, ,
|
||||
.probe = tpmfront_probe,
|
||||
.remove = tpmfront_remove,
|
||||
.resume = tpmfront_resume,
|
||||
.otherend_changed = backend_changed,
|
||||
);
|
||||
static struct xenbus_driver tpmfront_driver = {
|
||||
.ids = tpmfront_ids,
|
||||
.probe = tpmfront_probe,
|
||||
.remove = tpmfront_remove,
|
||||
.resume = tpmfront_resume,
|
||||
.otherend_changed = backend_changed,
|
||||
};
|
||||
|
||||
static int __init xen_tpmfront_init(void)
|
||||
{
|
||||
|
|
|
@ -365,12 +365,13 @@ static const struct xenbus_device_id xenkbd_ids[] = {
|
|||
{ "" }
|
||||
};
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xenkbd, ,
|
||||
static struct xenbus_driver xenkbd_driver = {
|
||||
.ids = xenkbd_ids,
|
||||
.probe = xenkbd_probe,
|
||||
.remove = xenkbd_remove,
|
||||
.resume = xenkbd_resume,
|
||||
.otherend_changed = xenkbd_backend_changed,
|
||||
);
|
||||
};
|
||||
|
||||
static int __init xenkbd_init(void)
|
||||
{
|
||||
|
|
|
@ -937,22 +937,18 @@ static int read_xenbus_vif_flags(struct backend_info *be)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ** Driver Registration ** */
|
||||
|
||||
|
||||
static const struct xenbus_device_id netback_ids[] = {
|
||||
{ "vif" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(netback, ,
|
||||
static struct xenbus_driver netback_driver = {
|
||||
.ids = netback_ids,
|
||||
.probe = netback_probe,
|
||||
.remove = netback_remove,
|
||||
.uevent = netback_uevent,
|
||||
.otherend_changed = frontend_changed,
|
||||
);
|
||||
};
|
||||
|
||||
int xenvif_xenbus_init(void)
|
||||
{
|
||||
|
|
|
@ -2300,12 +2300,6 @@ static void xennet_sysfs_delif(struct net_device *netdev)
|
|||
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
static const struct xenbus_device_id netfront_ids[] = {
|
||||
{ "vif" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
|
||||
static int xennet_remove(struct xenbus_device *dev)
|
||||
{
|
||||
struct netfront_info *info = dev_get_drvdata(&dev->dev);
|
||||
|
@ -2338,12 +2332,18 @@ static int xennet_remove(struct xenbus_device *dev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(netfront, ,
|
||||
static const struct xenbus_device_id netfront_ids[] = {
|
||||
{ "vif" },
|
||||
{ "" }
|
||||
};
|
||||
|
||||
static struct xenbus_driver netfront_driver = {
|
||||
.ids = netfront_ids,
|
||||
.probe = netfront_probe,
|
||||
.remove = xennet_remove,
|
||||
.resume = netfront_resume,
|
||||
.otherend_changed = netback_changed,
|
||||
);
|
||||
};
|
||||
|
||||
static int __init netif_init(void)
|
||||
{
|
||||
|
|
|
@ -1136,11 +1136,13 @@ static const struct xenbus_device_id xenpci_ids[] = {
|
|||
{""},
|
||||
};
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xenpci, "pcifront",
|
||||
static struct xenbus_driver xenpci_driver = {
|
||||
.name = "pcifront",
|
||||
.ids = xenpci_ids,
|
||||
.probe = pcifront_xenbus_probe,
|
||||
.remove = pcifront_xenbus_remove,
|
||||
.otherend_changed = pcifront_backend_changed,
|
||||
);
|
||||
};
|
||||
|
||||
static int __init pcifront_init(void)
|
||||
{
|
||||
|
|
|
@ -587,6 +587,16 @@ config VMWARE_PVSCSI
|
|||
To compile this driver as a module, choose M here: the
|
||||
module will be called vmw_pvscsi.
|
||||
|
||||
config XEN_SCSI_FRONTEND
|
||||
tristate "XEN SCSI frontend driver"
|
||||
depends on SCSI && XEN
|
||||
select XEN_XENBUS_FRONTEND
|
||||
help
|
||||
The XEN SCSI frontend driver allows the kernel to access SCSI Devices
|
||||
within another guest OS (usually Dom0).
|
||||
Only needed if the kernel is running in a XEN guest and generic
|
||||
SCSI access to a device is needed.
|
||||
|
||||
config HYPERV_STORAGE
|
||||
tristate "Microsoft Hyper-V virtual storage driver"
|
||||
depends on SCSI && HYPERV
|
||||
|
|
|
@ -141,6 +141,7 @@ obj-$(CONFIG_SCSI_ESAS2R) += esas2r/
|
|||
obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o
|
||||
obj-$(CONFIG_SCSI_VIRTIO) += virtio_scsi.o
|
||||
obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o
|
||||
obj-$(CONFIG_XEN_SCSI_FRONTEND) += xen-scsifront.o
|
||||
obj-$(CONFIG_HYPERV_STORAGE) += hv_storvsc.o
|
||||
|
||||
obj-$(CONFIG_ARM) += arm/
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -347,8 +347,6 @@ static int xen_console_remove(struct xencons_info *info)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_HVC_XEN_FRONTEND
|
||||
static struct xenbus_driver xencons_driver;
|
||||
|
||||
static int xencons_remove(struct xenbus_device *dev)
|
||||
{
|
||||
return xen_console_remove(dev_get_drvdata(&dev->dev));
|
||||
|
@ -499,13 +497,14 @@ static const struct xenbus_device_id xencons_ids[] = {
|
|||
{ "" }
|
||||
};
|
||||
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xencons, "xenconsole",
|
||||
static struct xenbus_driver xencons_driver = {
|
||||
.name = "xenconsole",
|
||||
.ids = xencons_ids,
|
||||
.probe = xencons_probe,
|
||||
.remove = xencons_remove,
|
||||
.resume = xencons_resume,
|
||||
.otherend_changed = xencons_backend_changed,
|
||||
);
|
||||
};
|
||||
#endif /* CONFIG_HVC_XEN_FRONTEND */
|
||||
|
||||
static int __init xen_hvc_init(void)
|
||||
|
|
|
@ -684,12 +684,13 @@ static const struct xenbus_device_id xenfb_ids[] = {
|
|||
{ "" }
|
||||
};
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xenfb, ,
|
||||
static struct xenbus_driver xenfb_driver = {
|
||||
.ids = xenfb_ids,
|
||||
.probe = xenfb_probe,
|
||||
.remove = xenfb_remove,
|
||||
.resume = xenfb_resume,
|
||||
.otherend_changed = xenfb_backend_changed,
|
||||
);
|
||||
};
|
||||
|
||||
static int __init xenfb_init(void)
|
||||
{
|
||||
|
|
|
@ -172,6 +172,15 @@ config XEN_PCIDEV_BACKEND
|
|||
|
||||
If in doubt, say m.
|
||||
|
||||
config XEN_SCSI_BACKEND
|
||||
tristate "XEN SCSI backend driver"
|
||||
depends on XEN && XEN_BACKEND && TARGET_CORE
|
||||
help
|
||||
The SCSI backend driver allows the kernel to export its SCSI Devices
|
||||
to other guests via a high-performance shared-memory interface.
|
||||
Only needed for systems running as XEN driver domains (e.g. Dom0) and
|
||||
if guests need generic access to SCSI devices.
|
||||
|
||||
config XEN_PRIVCMD
|
||||
tristate
|
||||
depends on XEN
|
||||
|
|
|
@ -36,6 +36,7 @@ obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY) += xen-acpi-memhotplug.o
|
|||
obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU) += xen-acpi-cpuhotplug.o
|
||||
obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o
|
||||
obj-$(CONFIG_XEN_EFI) += efi.o
|
||||
obj-$(CONFIG_XEN_SCSI_BACKEND) += xen-scsiback.o
|
||||
xen-evtchn-y := evtchn.o
|
||||
xen-gntdev-y := gntdev.o
|
||||
xen-gntalloc-y := gntalloc.o
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
#include <xen/interface/platform.h>
|
||||
#include <xen/xen.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
#include <asm/xen/hypercall.h>
|
||||
|
||||
#define INIT_EFI_OP(name) \
|
||||
|
|
|
@ -900,8 +900,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
|
|||
return irq;
|
||||
}
|
||||
|
||||
static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
||||
unsigned int remote_port)
|
||||
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
||||
unsigned int remote_port)
|
||||
{
|
||||
struct evtchn_bind_interdomain bind_interdomain;
|
||||
int err;
|
||||
|
@ -914,6 +914,7 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
|||
|
||||
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
|
||||
|
||||
static int find_virq(unsigned int virq, unsigned int cpu)
|
||||
{
|
||||
|
|
|
@ -592,7 +592,7 @@ static int grow_gnttab_list(unsigned int more_frames)
|
|||
return 0;
|
||||
|
||||
grow_nomem:
|
||||
for ( ; i >= nr_glist_frames; i--)
|
||||
while (i-- > nr_glist_frames)
|
||||
free_page((unsigned long) gnttab_list[i]);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
|
|
@ -719,11 +719,13 @@ static const struct xenbus_device_id xen_pcibk_ids[] = {
|
|||
{""},
|
||||
};
|
||||
|
||||
static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
|
||||
static struct xenbus_driver xen_pcibk_driver = {
|
||||
.name = DRV_NAME,
|
||||
.ids = xen_pcibk_ids,
|
||||
.probe = xen_pcibk_xenbus_probe,
|
||||
.remove = xen_pcibk_xenbus_remove,
|
||||
.otherend_changed = xen_pcibk_frontend_changed,
|
||||
);
|
||||
};
|
||||
|
||||
const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -259,7 +259,6 @@ static char *error_path(struct xenbus_device *dev)
|
|||
static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
|
||||
const char *fmt, va_list ap)
|
||||
{
|
||||
int ret;
|
||||
unsigned int len;
|
||||
char *printf_buffer = NULL;
|
||||
char *path_buffer = NULL;
|
||||
|
@ -270,9 +269,7 @@ static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
|
|||
goto fail;
|
||||
|
||||
len = sprintf(printf_buffer, "%i ", -err);
|
||||
ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
|
||||
|
||||
BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
|
||||
vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
|
||||
|
||||
dev_err(&dev->dev, "%s\n", printf_buffer);
|
||||
|
||||
|
@ -361,8 +358,8 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
|
|||
* @ring_mfn: mfn of ring to grant
|
||||
|
||||
* Grant access to the given @ring_mfn to the peer of the given device. Return
|
||||
* 0 on success, or -errno on error. On error, the device will switch to
|
||||
* XenbusStateClosing, and the error will be saved in the store.
|
||||
* a grant reference on success, or -errno on error. On error, the device will
|
||||
* switch to XenbusStateClosing, and the error will be saved in the store.
|
||||
*/
|
||||
int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
|
||||
{
|
||||
|
|
|
@ -297,9 +297,13 @@ void xenbus_dev_shutdown(struct device *_dev)
|
|||
EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
|
||||
|
||||
int xenbus_register_driver_common(struct xenbus_driver *drv,
|
||||
struct xen_bus_type *bus)
|
||||
struct xen_bus_type *bus,
|
||||
struct module *owner, const char *mod_name)
|
||||
{
|
||||
drv->driver.name = drv->name ? drv->name : drv->ids[0].devicetype;
|
||||
drv->driver.bus = &bus->bus;
|
||||
drv->driver.owner = owner;
|
||||
drv->driver.mod_name = mod_name;
|
||||
|
||||
return driver_register(&drv->driver);
|
||||
}
|
||||
|
|
|
@ -60,7 +60,9 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
|
|||
extern int xenbus_dev_probe(struct device *_dev);
|
||||
extern int xenbus_dev_remove(struct device *_dev);
|
||||
extern int xenbus_register_driver_common(struct xenbus_driver *drv,
|
||||
struct xen_bus_type *bus);
|
||||
struct xen_bus_type *bus,
|
||||
struct module *owner,
|
||||
const char *mod_name);
|
||||
extern int xenbus_probe_node(struct xen_bus_type *bus,
|
||||
const char *type,
|
||||
const char *nodename);
|
||||
|
|
|
@ -234,13 +234,15 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
|
||||
|
||||
int xenbus_register_backend(struct xenbus_driver *drv)
|
||||
int __xenbus_register_backend(struct xenbus_driver *drv, struct module *owner,
|
||||
const char *mod_name)
|
||||
{
|
||||
drv->read_otherend_details = read_frontend_details;
|
||||
|
||||
return xenbus_register_driver_common(drv, &xenbus_backend);
|
||||
return xenbus_register_driver_common(drv, &xenbus_backend,
|
||||
owner, mod_name);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_register_backend);
|
||||
EXPORT_SYMBOL_GPL(__xenbus_register_backend);
|
||||
|
||||
static int backend_probe_and_watch(struct notifier_block *notifier,
|
||||
unsigned long event,
|
||||
|
|
|
@ -317,13 +317,15 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
|
|||
print_device_status);
|
||||
}
|
||||
|
||||
int xenbus_register_frontend(struct xenbus_driver *drv)
|
||||
int __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner,
|
||||
const char *mod_name)
|
||||
{
|
||||
int ret;
|
||||
|
||||
drv->read_otherend_details = read_backend_details;
|
||||
|
||||
ret = xenbus_register_driver_common(drv, &xenbus_frontend);
|
||||
ret = xenbus_register_driver_common(drv, &xenbus_frontend,
|
||||
owner, mod_name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -332,7 +334,7 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
|
|||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xenbus_register_frontend);
|
||||
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
|
||||
static int backend_state;
|
||||
|
|
|
@ -28,6 +28,8 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
|
|||
unsigned long irqflags,
|
||||
const char *devname,
|
||||
void *dev_id);
|
||||
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
||||
unsigned int remote_port);
|
||||
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
|
||||
unsigned int remote_port,
|
||||
irq_handler_t handler,
|
||||
|
|
|
@ -3,6 +3,24 @@
|
|||
*
|
||||
* Definitions used for the Xen ELF notes.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright (c) 2006, Ian Campbell, XenSource Ltd.
|
||||
*/
|
||||
|
||||
|
@ -18,12 +36,13 @@
|
|||
*
|
||||
* LEGACY indicated the fields in the legacy __xen_guest string which
|
||||
* this a note type replaces.
|
||||
*
|
||||
* String values (for non-legacy) are NULL terminated ASCII, also known
|
||||
* as ASCIZ type.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NAME=VALUE pair (string).
|
||||
*
|
||||
* LEGACY: FEATURES and PAE
|
||||
*/
|
||||
#define XEN_ELFNOTE_INFO 0
|
||||
|
||||
|
@ -137,9 +156,29 @@
|
|||
|
||||
/*
|
||||
* Whether or not the guest supports cooperative suspend cancellation.
|
||||
* This is a numeric value.
|
||||
*
|
||||
* Default is 0
|
||||
*/
|
||||
#define XEN_ELFNOTE_SUSPEND_CANCEL 14
|
||||
|
||||
/*
|
||||
* The (non-default) location the initial phys-to-machine map should be
|
||||
* placed at by the hypervisor (Dom0) or the tools (DomU).
|
||||
* The kernel must be prepared for this mapping to be established using
|
||||
* large pages, despite such otherwise not being available to guests.
|
||||
* The kernel must also be able to handle the page table pages used for
|
||||
* this mapping not being accessible through the initial mapping.
|
||||
* (Only x86-64 supports this at present.)
|
||||
*/
|
||||
#define XEN_ELFNOTE_INIT_P2M 15
|
||||
|
||||
/*
|
||||
* Whether or not the guest can deal with being passed an initrd not
|
||||
* mapped through its initial page tables.
|
||||
*/
|
||||
#define XEN_ELFNOTE_MOD_START_PFN 16
|
||||
|
||||
/*
|
||||
* The features supported by this kernel (numeric).
|
||||
*
|
||||
|
@ -153,6 +192,11 @@
|
|||
*/
|
||||
#define XEN_ELFNOTE_SUPPORTED_FEATURES 17
|
||||
|
||||
/*
|
||||
* The number of the highest elfnote defined.
|
||||
*/
|
||||
#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES
|
||||
|
||||
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
|
||||
|
||||
/*
|
||||
|
|
|
@ -0,0 +1,229 @@
|
|||
/******************************************************************************
|
||||
* vscsiif.h
|
||||
*
|
||||
* Based on the blkif.h code.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright(c) FUJITSU Limited 2008.
|
||||
*/
|
||||
|
||||
#ifndef __XEN__PUBLIC_IO_SCSI_H__
|
||||
#define __XEN__PUBLIC_IO_SCSI_H__
|
||||
|
||||
#include "ring.h"
|
||||
#include "../grant_table.h"
|
||||
|
||||
/*
|
||||
* Feature and Parameter Negotiation
|
||||
* =================================
|
||||
* The two halves of a Xen pvSCSI driver utilize nodes within the XenStore to
|
||||
* communicate capabilities and to negotiate operating parameters. This
|
||||
* section enumerates these nodes which reside in the respective front and
|
||||
* backend portions of the XenStore, following the XenBus convention.
|
||||
*
|
||||
* Any specified default value is in effect if the corresponding XenBus node
|
||||
* is not present in the XenStore.
|
||||
*
|
||||
* XenStore nodes in sections marked "PRIVATE" are solely for use by the
|
||||
* driver side whose XenBus tree contains them.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Backend XenBus Nodes
|
||||
*****************************************************************************
|
||||
*
|
||||
*------------------ Backend Device Identification (PRIVATE) ------------------
|
||||
*
|
||||
* p-devname
|
||||
* Values: string
|
||||
*
|
||||
* A free string used to identify the physical device (e.g. a disk name).
|
||||
*
|
||||
* p-dev
|
||||
* Values: string
|
||||
*
|
||||
* A string specifying the backend device: either a 4-tuple "h:c:t:l"
|
||||
* (host, controller, target, lun, all integers), or a WWN (e.g.
|
||||
* "naa.60014054ac780582").
|
||||
*
|
||||
* v-dev
|
||||
* Values: string
|
||||
*
|
||||
* A string specifying the frontend device in form of a 4-tuple "h:c:t:l"
|
||||
* (host, controller, target, lun, all integers).
|
||||
*
|
||||
*--------------------------------- Features ---------------------------------
|
||||
*
|
||||
* feature-sg-grant
|
||||
* Values: unsigned [VSCSIIF_SG_TABLESIZE...65535]
|
||||
* Default Value: 0
|
||||
*
|
||||
* Specifies the maximum number of scatter/gather elements in grant pages
|
||||
* supported. If not set, the backend supports up to VSCSIIF_SG_TABLESIZE
|
||||
* SG elements specified directly in the request.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Frontend XenBus Nodes
|
||||
*****************************************************************************
|
||||
*
|
||||
*----------------------- Request Transport Parameters -----------------------
|
||||
*
|
||||
* event-channel
|
||||
* Values: unsigned
|
||||
*
|
||||
* The identifier of the Xen event channel used to signal activity
|
||||
* in the ring buffer.
|
||||
*
|
||||
* ring-ref
|
||||
* Values: unsigned
|
||||
*
|
||||
* The Xen grant reference granting permission for the backend to map
|
||||
* the sole page in a single page sized ring buffer.
|
||||
*
|
||||
* protocol
|
||||
* Values: string (XEN_IO_PROTO_ABI_*)
|
||||
* Default Value: XEN_IO_PROTO_ABI_NATIVE
|
||||
*
|
||||
* The machine ABI rules governing the format of all ring request and
|
||||
* response structures.
|
||||
*/
|
||||
|
||||
/* Requests from the frontend to the backend */
|
||||
|
||||
/*
|
||||
* Request a SCSI operation specified via a CDB in vscsiif_request.cmnd.
|
||||
* The target is specified via channel, id and lun.
|
||||
*
|
||||
* The operation to be performed is specified via a CDB in cmnd[], the length
|
||||
* of the CDB is in cmd_len. sc_data_direction specifies the direction of data
|
||||
* (to the device, from the device, or none at all).
|
||||
*
|
||||
* If data is to be transferred to or from the device the buffer(s) in the
|
||||
* guest memory is/are specified via one or multiple scsiif_request_segment
|
||||
* descriptors each specifying a memory page via a grant_ref_t, a offset into
|
||||
* the page and the length of the area in that page. All scsiif_request_segment
|
||||
* areas concatenated form the resulting data buffer used by the operation.
|
||||
* If the number of scsiif_request_segment areas is not too large (less than
|
||||
* or equal VSCSIIF_SG_TABLESIZE) the areas can be specified directly in the
|
||||
* seg[] array and the number of valid scsiif_request_segment elements is to be
|
||||
* set in nr_segments.
|
||||
*
|
||||
* If "feature-sg-grant" in the Xenstore is set it is possible to specify more
|
||||
* than VSCSIIF_SG_TABLESIZE scsiif_request_segment elements via indirection.
|
||||
* The maximum number of allowed scsiif_request_segment elements is the value
|
||||
* of the "feature-sg-grant" entry from Xenstore. When using indirection the
|
||||
* seg[] array doesn't contain specifications of the data buffers, but
|
||||
* references to scsiif_request_segment arrays, which in turn reference the
|
||||
* data buffers. While nr_segments holds the number of populated seg[] entries
|
||||
* (plus the set VSCSIIF_SG_GRANT bit), the number of scsiif_request_segment
|
||||
* elements referencing the target data buffers is calculated from the lengths
|
||||
* of the seg[] elements (the sum of all valid seg[].length divided by the
|
||||
* size of one scsiif_request_segment structure).
|
||||
*/
|
||||
#define VSCSIIF_ACT_SCSI_CDB 1
|
||||
|
||||
/*
|
||||
* Request abort of a running operation for the specified target given by
|
||||
* channel, id, lun and the operation's rqid in ref_rqid.
|
||||
*/
|
||||
#define VSCSIIF_ACT_SCSI_ABORT 2
|
||||
|
||||
/*
|
||||
* Request a device reset of the specified target (channel and id).
|
||||
*/
|
||||
#define VSCSIIF_ACT_SCSI_RESET 3
|
||||
|
||||
/*
|
||||
* Preset scatter/gather elements for a following request. Deprecated.
|
||||
* Keeping the define only to avoid usage of the value "4" for other actions.
|
||||
*/
|
||||
#define VSCSIIF_ACT_SCSI_SG_PRESET 4
|
||||
|
||||
/*
|
||||
* Maximum scatter/gather segments per request.
|
||||
*
|
||||
* Considering balance between allocating at least 16 "vscsiif_request"
|
||||
* structures on one page (4096 bytes) and the number of scatter/gather
|
||||
* elements needed, we decided to use 26 as a magic number.
|
||||
*
|
||||
* If "feature-sg-grant" is set, more scatter/gather elements can be specified
|
||||
* by placing them in one or more (up to VSCSIIF_SG_TABLESIZE) granted pages.
|
||||
* In this case the vscsiif_request seg elements don't contain references to
|
||||
* the user data, but to the SG elements referencing the user data.
|
||||
*/
|
||||
#define VSCSIIF_SG_TABLESIZE 26
|
||||
|
||||
/*
|
||||
* based on Linux kernel 2.6.18, still valid
|
||||
* Changing these values requires support of multiple protocols via the rings
|
||||
* as "old clients" will blindly use these values and the resulting structure
|
||||
* sizes.
|
||||
*/
|
||||
#define VSCSIIF_MAX_COMMAND_SIZE 16
|
||||
#define VSCSIIF_SENSE_BUFFERSIZE 96
|
||||
|
||||
struct scsiif_request_segment {
|
||||
grant_ref_t gref;
|
||||
uint16_t offset;
|
||||
uint16_t length;
|
||||
};
|
||||
|
||||
#define VSCSIIF_SG_PER_PAGE (PAGE_SIZE / sizeof(struct scsiif_request_segment))
|
||||
|
||||
/* Size of one request is 252 bytes */
|
||||
struct vscsiif_request {
|
||||
uint16_t rqid; /* private guest value, echoed in resp */
|
||||
uint8_t act; /* command between backend and frontend */
|
||||
uint8_t cmd_len; /* valid CDB bytes */
|
||||
|
||||
uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE]; /* the CDB */
|
||||
uint16_t timeout_per_command; /* deprecated */
|
||||
uint16_t channel, id, lun; /* (virtual) device specification */
|
||||
uint16_t ref_rqid; /* command abort reference */
|
||||
uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
|
||||
DMA_FROM_DEVICE(2)
|
||||
DMA_NONE(3) requests */
|
||||
uint8_t nr_segments; /* Number of pieces of scatter-gather */
|
||||
/*
|
||||
* flag in nr_segments: SG elements via grant page
|
||||
*
|
||||
* If VSCSIIF_SG_GRANT is set, the low 7 bits of nr_segments specify the number
|
||||
* of grant pages containing SG elements. Usable if "feature-sg-grant" set.
|
||||
*/
|
||||
#define VSCSIIF_SG_GRANT 0x80
|
||||
|
||||
struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
|
||||
uint32_t reserved[3];
|
||||
};
|
||||
|
||||
/* Size of one response is 252 bytes */
|
||||
struct vscsiif_response {
|
||||
uint16_t rqid; /* identifies request */
|
||||
uint8_t padding;
|
||||
uint8_t sense_len;
|
||||
uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
|
||||
int32_t rslt;
|
||||
uint32_t residual_len; /* request bufflen -
|
||||
return the value from physical device */
|
||||
uint32_t reserved[36];
|
||||
};
|
||||
|
||||
DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
|
||||
|
||||
#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
|
|
@ -3,6 +3,24 @@
|
|||
*
|
||||
* Guest OS interface to Xen.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright (c) 2004, K A Fraser
|
||||
*/
|
||||
|
||||
|
@ -73,13 +91,23 @@
|
|||
* VIRTUAL INTERRUPTS
|
||||
*
|
||||
* Virtual interrupts that a guest OS may receive from Xen.
|
||||
* In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
|
||||
* global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
|
||||
* The latter can be allocated only once per guest: they must initially be
|
||||
* allocated to VCPU0 but can subsequently be re-bound.
|
||||
*/
|
||||
#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */
|
||||
#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
|
||||
#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
|
||||
#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
|
||||
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
|
||||
#define VIRQ_PCPU_STATE 9 /* (DOM0) PCPU state changed */
|
||||
#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */
|
||||
#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */
|
||||
#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */
|
||||
#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */
|
||||
#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */
|
||||
#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */
|
||||
#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */
|
||||
#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */
|
||||
#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
|
||||
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
|
||||
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
|
||||
#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
|
||||
|
||||
/* Architecture-specific VIRQ definitions. */
|
||||
#define VIRQ_ARCH_0 16
|
||||
|
@ -92,24 +120,68 @@
|
|||
#define VIRQ_ARCH_7 23
|
||||
|
||||
#define NR_VIRQS 24
|
||||
|
||||
/*
|
||||
* MMU-UPDATE REQUESTS
|
||||
*
|
||||
* HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
|
||||
* A foreigndom (FD) can be specified (or DOMID_SELF for none).
|
||||
* Where the FD has some effect, it is described below.
|
||||
* ptr[1:0] specifies the appropriate MMU_* command.
|
||||
* enum neg_errnoval HYPERVISOR_mmu_update(const struct mmu_update reqs[],
|
||||
* unsigned count, unsigned *done_out,
|
||||
* unsigned foreigndom)
|
||||
* @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
|
||||
* @count is the length of the above array.
|
||||
* @pdone is an output parameter indicating number of completed operations
|
||||
* @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
|
||||
* hypercall invocation. Can be DOMID_SELF.
|
||||
* @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
|
||||
* in this hypercall invocation. The value of this field
|
||||
* (x) encodes the PFD as follows:
|
||||
* x == 0 => PFD == DOMID_SELF
|
||||
* x != 0 => PFD == x - 1
|
||||
*
|
||||
* Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
|
||||
* -------------
|
||||
* ptr[1:0] == MMU_NORMAL_PT_UPDATE:
|
||||
* Updates an entry in a page table. If updating an L1 table, and the new
|
||||
* table entry is valid/present, the mapped frame must belong to the FD, if
|
||||
* an FD has been specified. If attempting to map an I/O page then the
|
||||
* caller assumes the privilege of the FD.
|
||||
* Updates an entry in a page table belonging to PFD. If updating an L1 table,
|
||||
* and the new table entry is valid/present, the mapped frame must belong to
|
||||
* FD. If attempting to map an I/O page then the caller assumes the privilege
|
||||
* of the FD.
|
||||
* FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
|
||||
* FD == DOMID_XEN: Map restricted areas of Xen's heap space.
|
||||
* ptr[:2] -- Machine address of the page-table entry to modify.
|
||||
* val -- Value to write.
|
||||
*
|
||||
* There also certain implicit requirements when using this hypercall. The
|
||||
* pages that make up a pagetable must be mapped read-only in the guest.
|
||||
* This prevents uncontrolled guest updates to the pagetable. Xen strictly
|
||||
* enforces this, and will disallow any pagetable update which will end up
|
||||
* mapping pagetable page RW, and will disallow using any writable page as a
|
||||
* pagetable. In practice it means that when constructing a page table for a
|
||||
* process, thread, etc, we MUST be very dilligient in following these rules:
|
||||
* 1). Start with top-level page (PGD or in Xen language: L4). Fill out
|
||||
* the entries.
|
||||
* 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD
|
||||
* or L2).
|
||||
* 3). Start filling out the PTE table (L1) with the PTE entries. Once
|
||||
* done, make sure to set each of those entries to RO (so writeable bit
|
||||
* is unset). Once that has been completed, set the PMD (L2) for this
|
||||
* PTE table as RO.
|
||||
* 4). When completed with all of the PMD (L2) entries, and all of them have
|
||||
* been set to RO, make sure to set RO the PUD (L3). Do the same
|
||||
* operation on PGD (L4) pagetable entries that have a PUD (L3) entry.
|
||||
* 5). Now before you can use those pages (so setting the cr3), you MUST also
|
||||
* pin them so that the hypervisor can verify the entries. This is done
|
||||
* via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame
|
||||
* number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op(
|
||||
* MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be
|
||||
* issued.
|
||||
* For 32-bit guests, the L4 is not used (as there is less pagetables), so
|
||||
* instead use L3.
|
||||
* At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE
|
||||
* hypercall. Also if so desired the OS can also try to write to the PTE
|
||||
* and be trapped by the hypervisor (as the PTE entry is RO).
|
||||
*
|
||||
* To deallocate the pages, the operations are the reverse of the steps
|
||||
* mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the
|
||||
* pagetable MUST not be in use (meaning that the cr3 is not set to it).
|
||||
*
|
||||
* ptr[1:0] == MMU_MACHPHYS_UPDATE:
|
||||
* Updates an entry in the machine->pseudo-physical mapping table.
|
||||
* ptr[:2] -- Machine address within the frame whose mapping to modify.
|
||||
|
@ -119,6 +191,72 @@
|
|||
* ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
|
||||
* As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
|
||||
* with those in @val.
|
||||
*
|
||||
* @val is usually the machine frame number along with some attributes.
|
||||
* The attributes by default follow the architecture defined bits. Meaning that
|
||||
* if this is a X86_64 machine and four page table layout is used, the layout
|
||||
* of val is:
|
||||
* - 63 if set means No execute (NX)
|
||||
* - 46-13 the machine frame number
|
||||
* - 12 available for guest
|
||||
* - 11 available for guest
|
||||
* - 10 available for guest
|
||||
* - 9 available for guest
|
||||
* - 8 global
|
||||
* - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages)
|
||||
* - 6 dirty
|
||||
* - 5 accessed
|
||||
* - 4 page cached disabled
|
||||
* - 3 page write through
|
||||
* - 2 userspace accessible
|
||||
* - 1 writeable
|
||||
* - 0 present
|
||||
*
|
||||
* The one bits that does not fit with the default layout is the PAGE_PSE
|
||||
* also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the
|
||||
* HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB
|
||||
* (or 2MB) instead of using the PAGE_PSE bit.
|
||||
*
|
||||
* The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen
|
||||
* using it as the Page Attribute Table (PAT) bit - for details on it please
|
||||
* refer to Intel SDM 10.12. The PAT allows to set the caching attributes of
|
||||
* pages instead of using MTRRs.
|
||||
*
|
||||
* The PAT MSR is as follows (it is a 64-bit value, each entry is 8 bits):
|
||||
* PAT4 PAT0
|
||||
* +-----+-----+----+----+----+-----+----+----+
|
||||
* | UC | UC- | WC | WB | UC | UC- | WC | WB | <= Linux
|
||||
* +-----+-----+----+----+----+-----+----+----+
|
||||
* | UC | UC- | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots)
|
||||
* +-----+-----+----+----+----+-----+----+----+
|
||||
* | rsv | rsv | WP | WC | UC | UC- | WT | WB | <= Xen
|
||||
* +-----+-----+----+----+----+-----+----+----+
|
||||
*
|
||||
* The lookup of this index table translates to looking up
|
||||
* Bit 7, Bit 4, and Bit 3 of val entry:
|
||||
*
|
||||
* PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
|
||||
*
|
||||
* If all bits are off, then we are using PAT0. If bit 3 turned on,
|
||||
* then we are using PAT1, if bit 3 and bit 4, then PAT2..
|
||||
*
|
||||
* As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means
|
||||
* that if a guest that follows Linux's PAT setup and would like to set Write
|
||||
* Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is
|
||||
* set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the
|
||||
* caching as:
|
||||
*
|
||||
* WB = none (so PAT0)
|
||||
* WC = PWT (bit 3 on)
|
||||
* UC = PWT | PCD (bit 3 and 4 are on).
|
||||
*
|
||||
* To make it work with Xen, it needs to translate the WC bit as so:
|
||||
*
|
||||
* PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
|
||||
*
|
||||
* And to translate back it would:
|
||||
*
|
||||
* PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
|
||||
*/
|
||||
#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
|
||||
#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
|
||||
|
@ -127,7 +265,12 @@
|
|||
/*
|
||||
* MMU EXTENDED OPERATIONS
|
||||
*
|
||||
* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
|
||||
* enum neg_errnoval HYPERVISOR_mmuext_op(mmuext_op_t uops[],
|
||||
* unsigned int count,
|
||||
* unsigned int *pdone,
|
||||
* unsigned int foreigndom)
|
||||
*/
|
||||
/* HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
|
||||
* A foreigndom (FD) can be specified (or DOMID_SELF for none).
|
||||
* Where the FD has some effect, it is described below.
|
||||
*
|
||||
|
@ -164,9 +307,23 @@
|
|||
* cmd: MMUEXT_FLUSH_CACHE
|
||||
* No additional arguments. Writes back and flushes cache contents.
|
||||
*
|
||||
* cmd: MMUEXT_FLUSH_CACHE_GLOBAL
|
||||
* No additional arguments. Writes back and flushes cache contents
|
||||
* on all CPUs in the system.
|
||||
*
|
||||
* cmd: MMUEXT_SET_LDT
|
||||
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
|
||||
* nr_ents: Number of entries in LDT.
|
||||
*
|
||||
* cmd: MMUEXT_CLEAR_PAGE
|
||||
* mfn: Machine frame number to be cleared.
|
||||
*
|
||||
* cmd: MMUEXT_COPY_PAGE
|
||||
* mfn: Machine frame number of the destination page.
|
||||
* src_mfn: Machine frame number of the source page.
|
||||
*
|
||||
* cmd: MMUEXT_[UN]MARK_SUPER
|
||||
* mfn: Machine frame number of head of superpage to be [un]marked.
|
||||
*/
|
||||
#define MMUEXT_PIN_L1_TABLE 0
|
||||
#define MMUEXT_PIN_L2_TABLE 1
|
||||
|
@ -183,12 +340,18 @@
|
|||
#define MMUEXT_FLUSH_CACHE 12
|
||||
#define MMUEXT_SET_LDT 13
|
||||
#define MMUEXT_NEW_USER_BASEPTR 15
|
||||
#define MMUEXT_CLEAR_PAGE 16
|
||||
#define MMUEXT_COPY_PAGE 17
|
||||
#define MMUEXT_FLUSH_CACHE_GLOBAL 18
|
||||
#define MMUEXT_MARK_SUPER 19
|
||||
#define MMUEXT_UNMARK_SUPER 20
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
struct mmuext_op {
|
||||
unsigned int cmd;
|
||||
union {
|
||||
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
|
||||
/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
|
||||
* CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */
|
||||
xen_pfn_t mfn;
|
||||
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
|
||||
unsigned long linear_addr;
|
||||
|
@ -198,6 +361,8 @@ struct mmuext_op {
|
|||
unsigned int nr_ents;
|
||||
/* TLB_FLUSH_MULTI, INVLPG_MULTI */
|
||||
void *vcpumask;
|
||||
/* COPY_PAGE */
|
||||
xen_pfn_t src_mfn;
|
||||
} arg2;
|
||||
};
|
||||
DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
|
||||
|
@ -225,10 +390,23 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
|
|||
*/
|
||||
#define VMASST_CMD_enable 0
|
||||
#define VMASST_CMD_disable 1
|
||||
|
||||
/* x86/32 guests: simulate full 4GB segment limits. */
|
||||
#define VMASST_TYPE_4gb_segments 0
|
||||
|
||||
/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
|
||||
#define VMASST_TYPE_4gb_segments_notify 1
|
||||
|
||||
/*
|
||||
* x86 guests: support writes to bottom-level PTEs.
|
||||
* NB1. Page-directory entries cannot be written.
|
||||
* NB2. Guest must continue to remove all writable mappings of PTEs.
|
||||
*/
|
||||
#define VMASST_TYPE_writable_pagetables 2
|
||||
|
||||
/* x86/PAE guests: support PDPTs above 4GB. */
|
||||
#define VMASST_TYPE_pae_extended_cr3 3
|
||||
|
||||
#define MAX_VMASST_TYPE 3
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -260,6 +438,15 @@ typedef uint16_t domid_t;
|
|||
*/
|
||||
#define DOMID_XEN (0x7FF2U)
|
||||
|
||||
/* DOMID_COW is used as the owner of sharable pages */
|
||||
#define DOMID_COW (0x7FF3U)
|
||||
|
||||
/* DOMID_INVALID is used to identify pages with unknown owner. */
|
||||
#define DOMID_INVALID (0x7FF4U)
|
||||
|
||||
/* Idle domain. */
|
||||
#define DOMID_IDLE (0x7FFFU)
|
||||
|
||||
/*
|
||||
* Send an array of these to HYPERVISOR_mmu_update().
|
||||
* NB. The fields are natural pointer/address size for this architecture.
|
||||
|
@ -272,7 +459,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
|
|||
|
||||
/*
|
||||
* Send an array of these to HYPERVISOR_multicall().
|
||||
* NB. The fields are natural register size for this architecture.
|
||||
* NB. The fields are logically the natural register size for this
|
||||
* architecture. In cases where xen_ulong_t is larger than this then
|
||||
* any unused bits in the upper portion must be zero.
|
||||
*/
|
||||
struct multicall_entry {
|
||||
xen_ulong_t op;
|
||||
|
@ -442,8 +631,48 @@ struct start_info {
|
|||
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
|
||||
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
|
||||
int8_t cmd_line[MAX_GUEST_CMDLINE];
|
||||
/* The pfn range here covers both page table and p->m table frames. */
|
||||
unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
|
||||
unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
|
||||
};
|
||||
|
||||
/* These flags are passed in the 'flags' field of start_info_t. */
|
||||
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
|
||||
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
|
||||
#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */
|
||||
#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */
|
||||
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
|
||||
|
||||
/*
|
||||
* A multiboot module is a package containing modules very similar to a
|
||||
* multiboot module array. The only differences are:
|
||||
* - the array of module descriptors is by convention simply at the beginning
|
||||
* of the multiboot module,
|
||||
* - addresses in the module descriptors are based on the beginning of the
|
||||
* multiboot module,
|
||||
* - the number of modules is determined by a termination descriptor that has
|
||||
* mod_start == 0.
|
||||
*
|
||||
* This permits to both build it statically and reference it in a configuration
|
||||
* file, and let the PV guest easily rebase the addresses to virtual addresses
|
||||
* and at the same time count the number of modules.
|
||||
*/
|
||||
struct xen_multiboot_mod_list {
|
||||
/* Address of first byte of the module */
|
||||
uint32_t mod_start;
|
||||
/* Address of last byte of the module (inclusive) */
|
||||
uint32_t mod_end;
|
||||
/* Address of zero-terminated command line */
|
||||
uint32_t cmdline;
|
||||
/* Unused, must be zero */
|
||||
uint32_t pad;
|
||||
};
|
||||
/*
|
||||
* The console structure in start_info.console.dom0
|
||||
*
|
||||
* This structure includes a variety of information required to
|
||||
* have a working VGA/VESA console.
|
||||
*/
|
||||
struct dom0_vga_console_info {
|
||||
uint8_t video_type;
|
||||
#define XEN_VGATYPE_TEXT_MODE_3 0x03
|
||||
|
@ -484,11 +713,6 @@ struct dom0_vga_console_info {
|
|||
} u;
|
||||
};
|
||||
|
||||
/* These flags are passed in the 'flags' field of start_info_t. */
|
||||
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
|
||||
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
|
||||
#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
|
||||
|
||||
typedef uint64_t cpumap_t;
|
||||
|
||||
typedef uint8_t xen_domain_handle_t[16];
|
||||
|
|
|
@ -86,6 +86,7 @@ struct xenbus_device_id
|
|||
|
||||
/* A xenbus driver. */
|
||||
struct xenbus_driver {
|
||||
const char *name; /* defaults to ids[0].devicetype */
|
||||
const struct xenbus_device_id *ids;
|
||||
int (*probe)(struct xenbus_device *dev,
|
||||
const struct xenbus_device_id *id);
|
||||
|
@ -100,20 +101,22 @@ struct xenbus_driver {
|
|||
int (*is_ready)(struct xenbus_device *dev);
|
||||
};
|
||||
|
||||
#define DEFINE_XENBUS_DRIVER(var, drvname, methods...) \
|
||||
struct xenbus_driver var ## _driver = { \
|
||||
.driver.name = drvname + 0 ?: var ## _ids->devicetype, \
|
||||
.driver.owner = THIS_MODULE, \
|
||||
.ids = var ## _ids, ## methods \
|
||||
}
|
||||
|
||||
static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
|
||||
{
|
||||
return container_of(drv, struct xenbus_driver, driver);
|
||||
}
|
||||
|
||||
int __must_check xenbus_register_frontend(struct xenbus_driver *);
|
||||
int __must_check xenbus_register_backend(struct xenbus_driver *);
|
||||
int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
|
||||
struct module *owner,
|
||||
const char *mod_name);
|
||||
int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
|
||||
struct module *owner,
|
||||
const char *mod_name);
|
||||
|
||||
#define xenbus_register_frontend(drv) \
|
||||
__xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
|
||||
#define xenbus_register_backend(drv) \
|
||||
__xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
|
||||
|
||||
void xenbus_unregister_driver(struct xenbus_driver *drv);
|
||||
|
||||
|
|
Loading…
Reference in New Issue