mirror of https://gitee.com/openkylin/linux.git
x86, cpa: make the kernel physical mapping initialization a two pass sequence, fix
Jeremy Fitzhardinge wrote: > I'd noticed that current tip/master hasn't been booting under Xen, and I > just got around to bisecting it down to this change. > > commit 065ae73c5462d42e9761afb76f2b52965ff45bd6 > Author: Suresh Siddha <suresh.b.siddha@intel.com> > > x86, cpa: make the kernel physical mapping initialization a two pass sequence > > This patch is causing Xen to fail various pagetable updates because it > ends up remapping pagetables to RW, which Xen explicitly prohibits (as > that would allow guests to make arbitrary changes to pagetables, rather > than have them mediated by the hypervisor). Instead of making init a two pass sequence, to satisfy the Intel's TLB Application note (developer.intel.com/design/processor/applnots/317080.pdf Section 6 page 26), we preserve the original page permissions when fragmenting the large mappings and don't touch the existing memory mapping (which satisfies Xen's requirements). Only open issue is: on a native linux kernel, we will go back to mapping the first 0-1GB kernel identity mapping as executable (because of the static mapping setup in head_64.S). We can fix this in a different patch if needed. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Acked-by: Jeremy Fitzhardinge <jeremy@goop.org> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
ad2cde16a2
commit
b27a43c1e9
|
@ -270,10 +270,9 @@ static __ref void unmap_low_page(void *adr)
|
||||||
early_iounmap(adr, PAGE_SIZE);
|
early_iounmap(adr, PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int physical_mapping_iter;
|
|
||||||
|
|
||||||
static unsigned long __meminit
|
static unsigned long __meminit
|
||||||
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
|
phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
|
||||||
|
pgprot_t prot)
|
||||||
{
|
{
|
||||||
unsigned pages = 0;
|
unsigned pages = 0;
|
||||||
unsigned long last_map_addr = end;
|
unsigned long last_map_addr = end;
|
||||||
|
@ -291,35 +290,40 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We will re-use the existing mapping.
|
||||||
|
* Xen for example has some special requirements, like mapping
|
||||||
|
* pagetable pages as RO. So assume someone who pre-setup
|
||||||
|
* these mappings are more intelligent.
|
||||||
|
*/
|
||||||
if (pte_val(*pte))
|
if (pte_val(*pte))
|
||||||
goto repeat_set_pte;
|
continue;
|
||||||
|
|
||||||
if (0)
|
if (0)
|
||||||
printk(" pte=%p addr=%lx pte=%016lx\n",
|
printk(" pte=%p addr=%lx pte=%016lx\n",
|
||||||
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
|
||||||
pages++;
|
pages++;
|
||||||
repeat_set_pte:
|
set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
|
||||||
set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
|
|
||||||
last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
|
last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (physical_mapping_iter == 1)
|
update_page_count(PG_LEVEL_4K, pages);
|
||||||
update_page_count(PG_LEVEL_4K, pages);
|
|
||||||
|
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long __meminit
|
static unsigned long __meminit
|
||||||
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
|
phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
|
||||||
|
pgprot_t prot)
|
||||||
{
|
{
|
||||||
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
|
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
|
||||||
|
|
||||||
return phys_pte_init(pte, address, end);
|
return phys_pte_init(pte, address, end, prot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long __meminit
|
static unsigned long __meminit
|
||||||
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||||
unsigned long page_size_mask)
|
unsigned long page_size_mask, pgprot_t prot)
|
||||||
{
|
{
|
||||||
unsigned long pages = 0;
|
unsigned long pages = 0;
|
||||||
unsigned long last_map_addr = end;
|
unsigned long last_map_addr = end;
|
||||||
|
@ -330,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||||
unsigned long pte_phys;
|
unsigned long pte_phys;
|
||||||
pmd_t *pmd = pmd_page + pmd_index(address);
|
pmd_t *pmd = pmd_page + pmd_index(address);
|
||||||
pte_t *pte;
|
pte_t *pte;
|
||||||
|
pgprot_t new_prot = prot;
|
||||||
|
|
||||||
if (address >= end) {
|
if (address >= end) {
|
||||||
if (!after_bootmem) {
|
if (!after_bootmem) {
|
||||||
|
@ -343,45 +348,58 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
||||||
if (!pmd_large(*pmd)) {
|
if (!pmd_large(*pmd)) {
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
last_map_addr = phys_pte_update(pmd, address,
|
last_map_addr = phys_pte_update(pmd, address,
|
||||||
end);
|
end, prot);
|
||||||
spin_unlock(&init_mm.page_table_lock);
|
spin_unlock(&init_mm.page_table_lock);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
goto repeat_set_pte;
|
/*
|
||||||
|
* If we are ok with PG_LEVEL_2M mapping, then we will
|
||||||
|
* use the existing mapping,
|
||||||
|
*
|
||||||
|
* Otherwise, we will split the large page mapping but
|
||||||
|
* use the same existing protection bits except for
|
||||||
|
* large page, so that we don't violate Intel's TLB
|
||||||
|
* Application note (317080) which says, while changing
|
||||||
|
* the page sizes, new and old translations should
|
||||||
|
* not differ with respect to page frame and
|
||||||
|
* attributes.
|
||||||
|
*/
|
||||||
|
if (page_size_mask & (1 << PG_LEVEL_2M))
|
||||||
|
continue;
|
||||||
|
new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page_size_mask & (1<<PG_LEVEL_2M)) {
|
if (page_size_mask & (1<<PG_LEVEL_2M)) {
|
||||||
pages++;
|
pages++;
|
||||||
repeat_set_pte:
|
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
set_pte((pte_t *)pmd,
|
set_pte((pte_t *)pmd,
|
||||||
pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
pfn_pte(address >> PAGE_SHIFT,
|
||||||
|
__pgprot(pgprot_val(prot) | _PAGE_PSE)));
|
||||||
spin_unlock(&init_mm.page_table_lock);
|
spin_unlock(&init_mm.page_table_lock);
|
||||||
last_map_addr = (address & PMD_MASK) + PMD_SIZE;
|
last_map_addr = (address & PMD_MASK) + PMD_SIZE;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
pte = alloc_low_page(&pte_phys);
|
pte = alloc_low_page(&pte_phys);
|
||||||
last_map_addr = phys_pte_init(pte, address, end);
|
last_map_addr = phys_pte_init(pte, address, end, new_prot);
|
||||||
unmap_low_page(pte);
|
unmap_low_page(pte);
|
||||||
|
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
|
pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
|
||||||
spin_unlock(&init_mm.page_table_lock);
|
spin_unlock(&init_mm.page_table_lock);
|
||||||
}
|
}
|
||||||
if (physical_mapping_iter == 1)
|
update_page_count(PG_LEVEL_2M, pages);
|
||||||
update_page_count(PG_LEVEL_2M, pages);
|
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long __meminit
|
static unsigned long __meminit
|
||||||
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
|
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
|
||||||
unsigned long page_size_mask)
|
unsigned long page_size_mask, pgprot_t prot)
|
||||||
{
|
{
|
||||||
pmd_t *pmd = pmd_offset(pud, 0);
|
pmd_t *pmd = pmd_offset(pud, 0);
|
||||||
unsigned long last_map_addr;
|
unsigned long last_map_addr;
|
||||||
|
|
||||||
last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
|
last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
@ -398,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||||
unsigned long pmd_phys;
|
unsigned long pmd_phys;
|
||||||
pud_t *pud = pud_page + pud_index(addr);
|
pud_t *pud = pud_page + pud_index(addr);
|
||||||
pmd_t *pmd;
|
pmd_t *pmd;
|
||||||
|
pgprot_t prot = PAGE_KERNEL;
|
||||||
|
|
||||||
if (addr >= end)
|
if (addr >= end)
|
||||||
break;
|
break;
|
||||||
|
@ -411,16 +430,28 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||||
if (pud_val(*pud)) {
|
if (pud_val(*pud)) {
|
||||||
if (!pud_large(*pud)) {
|
if (!pud_large(*pud)) {
|
||||||
last_map_addr = phys_pmd_update(pud, addr, end,
|
last_map_addr = phys_pmd_update(pud, addr, end,
|
||||||
page_size_mask);
|
page_size_mask, prot);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
goto repeat_set_pte;
|
* If we are ok with PG_LEVEL_1G mapping, then we will
|
||||||
|
* use the existing mapping.
|
||||||
|
*
|
||||||
|
* Otherwise, we will split the gbpage mapping but use
|
||||||
|
* the same existing protection bits except for large
|
||||||
|
* page, so that we don't violate Intel's TLB
|
||||||
|
* Application note (317080) which says, while changing
|
||||||
|
* the page sizes, new and old translations should
|
||||||
|
* not differ with respect to page frame and
|
||||||
|
* attributes.
|
||||||
|
*/
|
||||||
|
if (page_size_mask & (1 << PG_LEVEL_1G))
|
||||||
|
continue;
|
||||||
|
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page_size_mask & (1<<PG_LEVEL_1G)) {
|
if (page_size_mask & (1<<PG_LEVEL_1G)) {
|
||||||
pages++;
|
pages++;
|
||||||
repeat_set_pte:
|
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
set_pte((pte_t *)pud,
|
set_pte((pte_t *)pud,
|
||||||
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
||||||
|
@ -430,7 +461,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||||
}
|
}
|
||||||
|
|
||||||
pmd = alloc_low_page(&pmd_phys);
|
pmd = alloc_low_page(&pmd_phys);
|
||||||
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
|
last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
|
||||||
|
prot);
|
||||||
unmap_low_page(pmd);
|
unmap_low_page(pmd);
|
||||||
|
|
||||||
spin_lock(&init_mm.page_table_lock);
|
spin_lock(&init_mm.page_table_lock);
|
||||||
|
@ -439,8 +471,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
||||||
}
|
}
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
|
|
||||||
if (physical_mapping_iter == 1)
|
update_page_count(PG_LEVEL_1G, pages);
|
||||||
update_page_count(PG_LEVEL_1G, pages);
|
|
||||||
|
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
@ -505,54 +536,15 @@ static void __init init_gbpages(void)
|
||||||
direct_gbpages = 0;
|
direct_gbpages = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int is_kernel(unsigned long pfn)
|
|
||||||
{
|
|
||||||
unsigned long pg_addresss = pfn << PAGE_SHIFT;
|
|
||||||
|
|
||||||
if (pg_addresss >= (unsigned long) __pa(_text) &&
|
|
||||||
pg_addresss < (unsigned long) __pa(_end))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
||||||
unsigned long end,
|
unsigned long end,
|
||||||
unsigned long page_size_mask)
|
unsigned long page_size_mask)
|
||||||
{
|
{
|
||||||
|
|
||||||
unsigned long next, last_map_addr;
|
unsigned long next, last_map_addr = end;
|
||||||
u64 cached_supported_pte_mask = __supported_pte_mask;
|
|
||||||
unsigned long cache_start = start;
|
|
||||||
unsigned long cache_end = end;
|
|
||||||
|
|
||||||
/*
|
start = (unsigned long)__va(start);
|
||||||
* First iteration will setup identity mapping using large/small pages
|
end = (unsigned long)__va(end);
|
||||||
* based on page_size_mask, with other attributes same as set by
|
|
||||||
* the early code in head_64.S
|
|
||||||
*
|
|
||||||
* Second iteration will setup the appropriate attributes
|
|
||||||
* as desired for the kernel identity mapping.
|
|
||||||
*
|
|
||||||
* This two pass mechanism conforms to the TLB app note which says:
|
|
||||||
*
|
|
||||||
* "Software should not write to a paging-structure entry in a way
|
|
||||||
* that would change, for any linear address, both the page size
|
|
||||||
* and either the page frame or attributes."
|
|
||||||
*
|
|
||||||
* For now, only difference between very early PTE attributes used in
|
|
||||||
* head_64.S and here is _PAGE_NX.
|
|
||||||
*/
|
|
||||||
BUILD_BUG_ON((__PAGE_KERNEL_LARGE & ~__PAGE_KERNEL_IDENT_LARGE_EXEC)
|
|
||||||
!= _PAGE_NX);
|
|
||||||
__supported_pte_mask &= ~(_PAGE_NX);
|
|
||||||
physical_mapping_iter = 1;
|
|
||||||
|
|
||||||
repeat:
|
|
||||||
last_map_addr = cache_end;
|
|
||||||
|
|
||||||
start = (unsigned long)__va(cache_start);
|
|
||||||
end = (unsigned long)__va(cache_end);
|
|
||||||
|
|
||||||
for (; start < end; start = next) {
|
for (; start < end; start = next) {
|
||||||
pgd_t *pgd = pgd_offset_k(start);
|
pgd_t *pgd = pgd_offset_k(start);
|
||||||
|
@ -564,21 +556,11 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
||||||
next = end;
|
next = end;
|
||||||
|
|
||||||
if (pgd_val(*pgd)) {
|
if (pgd_val(*pgd)) {
|
||||||
/*
|
|
||||||
* Static identity mappings will be overwritten
|
|
||||||
* with run-time mappings. For example, this allows
|
|
||||||
* the static 0-1GB identity mapping to be mapped
|
|
||||||
* non-executable with this.
|
|
||||||
*/
|
|
||||||
if (is_kernel(pte_pfn(*((pte_t *) pgd))))
|
|
||||||
goto realloc;
|
|
||||||
|
|
||||||
last_map_addr = phys_pud_update(pgd, __pa(start),
|
last_map_addr = phys_pud_update(pgd, __pa(start),
|
||||||
__pa(end), page_size_mask);
|
__pa(end), page_size_mask);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
realloc:
|
|
||||||
pud = alloc_low_page(&pud_phys);
|
pud = alloc_low_page(&pud_phys);
|
||||||
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
|
last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
|
||||||
page_size_mask);
|
page_size_mask);
|
||||||
|
@ -590,15 +572,6 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
||||||
}
|
}
|
||||||
__flush_tlb_all();
|
__flush_tlb_all();
|
||||||
|
|
||||||
if (physical_mapping_iter == 1) {
|
|
||||||
physical_mapping_iter = 2;
|
|
||||||
/*
|
|
||||||
* Second iteration will set the actual desired PTE attributes.
|
|
||||||
*/
|
|
||||||
__supported_pte_mask = cached_supported_pte_mask;
|
|
||||||
goto repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
return last_map_addr;
|
return last_map_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue