2008-10-23 15:01:39 +08:00
|
|
|
#ifndef _ASM_X86_XEN_PAGE_H
|
|
|
|
#define _ASM_X86_XEN_PAGE_H
|
2008-04-03 01:53:58 +08:00
|
|
|
|
2008-12-17 04:37:07 +08:00
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/spinlock.h>
|
2008-04-03 01:53:58 +08:00
|
|
|
#include <linux/pfn.h>
|
2010-09-30 19:37:26 +08:00
|
|
|
#include <linux/mm.h>
|
2008-04-03 01:53:58 +08:00
|
|
|
|
|
|
|
#include <asm/uaccess.h>
|
2008-12-17 04:37:07 +08:00
|
|
|
#include <asm/page.h>
|
2008-04-03 01:53:58 +08:00
|
|
|
#include <asm/pgtable.h>
|
|
|
|
|
2008-12-17 04:37:07 +08:00
|
|
|
#include <xen/interface/xen.h>
|
2008-04-03 01:53:58 +08:00
|
|
|
#include <xen/features.h>
|
|
|
|
|
|
|
|
/* Xen machine address */
|
|
|
|
typedef struct xmaddr {
|
|
|
|
phys_addr_t maddr;
|
|
|
|
} xmaddr_t;
|
|
|
|
|
|
|
|
/* Xen pseudo-physical address */
|
|
|
|
typedef struct xpaddr {
|
|
|
|
phys_addr_t paddr;
|
|
|
|
} xpaddr_t;
|
|
|
|
|
|
|
|
#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
|
|
|
|
#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
|
|
|
|
|
|
|
|
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
|
|
|
|
#define INVALID_P2M_ENTRY (~0UL)
|
xen/mmu: Add the notion of identity (1-1) mapping.
Our P2M tree structure is a three-level. On the leaf nodes
we set the Machine Frame Number (MFN) of the PFN. What this means
is that when one does: pfn_to_mfn(pfn), which is used when creating
PTE entries, you get the real MFN of the hardware. When Xen sets
up a guest it initially populates a array which has descending
(or ascending) MFN values, as so:
idx: 0, 1, 2
[0x290F, 0x290E, 0x290D, ..]
so pfn_to_mfn(2)==0x290D. If you start, restart many guests that list
starts looking quite random.
We graft this structure on our P2M tree structure and stick in
those MFN in the leafs. But for all other leaf entries, or for the top
root, or middle one, for which there is a void entry, we assume it is
"missing". So
pfn_to_mfn(0xc0000)=INVALID_P2M_ENTRY.
We add the possibility of setting 1-1 mappings on certain regions, so
that:
pfn_to_mfn(0xc0000)=0xc0000
The benefit of this is, that we can assume for non-RAM regions (think
PCI BARs, or ACPI spaces), we can create mappings easily b/c we
get the PFN value to match the MFN.
For this to work efficiently we introduce one new page p2m_identity and
allocate (via reserved_brk) any other pages we need to cover the sides
(1GB or 4MB boundary violations). All entries in p2m_identity are set to
INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
no other fancy value).
On lookup we spot that the entry points to p2m_identity and return the identity
value instead of dereferencing and returning INVALID_P2M_ENTRY. If the entry
points to an allocated page, we just proceed as before and return the PFN.
If the PFN has IDENTITY_FRAME_BIT set we unmask that in appropriate functions
(pfn_to_mfn).
The reason for having the IDENTITY_FRAME_BIT instead of just returning the
PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
non-identity pfn. To protect ourselves against we elect to set (and get) the
IDENTITY_FRAME_BIT on all identity mapped PFNs.
This simplistic diagram is used to explain the more subtle piece of code.
There is also a digram of the P2M at the end that can help.
Imagine your E820 looking as so:
1GB 2GB
/-------------------+---------\/----\ /----------\ /---+-----\
| System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
\-------------------+---------/\----/ \----------/ \---+-----/
^- 1029MB ^- 2001MB
[1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), 2048MB = 524288 (0x80000)]
And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
is actually not present (would have to kick the balloon driver to put it in).
When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
of the PFN and the end PFN (263424 and 512256 respectively). The first step is
to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
covers 512^2 of page estate (1GB) and in case the start or end PFN is not
aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn to
end pfn. We reserve_brk top leaf pages if they are missing (means they point
to p2m_mid_missing).
With the E820 example above, 263424 is not 1GB aligned so we allocate a
reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
Each entry in the allocate page is "missing" (points to p2m_missing).
Next stage is to determine if we need to do a more granular boundary check
on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
We check if the start pfn and end pfn violate that boundary check, and if
so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
granularity of setting which PFNs are missing and which ones are identity.
In our example 263424 and 512256 both fail the check so we reserve_brk two
pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" values)
and assign them to p2m[1][2] and p2m[1][488] respectively.
At this point we would at minimum reserve_brk one page, but could be up to
three. Each call to set_phys_range_identity has at maximum a three page
cost. If we were to query the P2M at this stage, all those entries from
start PFN through end PFN (so 1029MB -> 2001MB) would return INVALID_P2M_ENTRY
("missing").
The next step is to walk from the start pfn to the end pfn setting
the IDENTITY_FRAME_BIT on each PFN. This is done in 'set_phys_range_identity'.
If we find that the middle leaf is pointing to p2m_missing we can swap it over
to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this point we
do not need to worry about boundary aligment (so no need to reserve_brk a middle
page, figure out which PFNs are "missing" and which ones are identity), as that
has been done earlier. If we find that the middle leaf is not occupied by
p2m_identity or p2m_missing, we dereference that page (which covers
512 PFNs) and set the appropriate PFN with IDENTITY_FRAME_BIT. In our example
263424 and 512256 end up there, and we set from p2m[1][2][256->511] and
p2m[1][488][0->256] with IDENTITY_FRAME_BIT set.
All other regions that are void (or not filled) either point to p2m_missing
(considered missing) or have the default value of INVALID_P2M_ENTRY (also
considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
contain the INVALID_P2M_ENTRY value and are considered "missing."
This is what the p2m ends up looking (for the E820 above) with this
fabulous drawing:
p2m /--------------\
/-----\ | &mfn_list[0],| /-----------------\
| 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
|-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
| 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
|-----| \ | [p2m_identity]+\\ | .... |
| 2 |--\ \-------------------->| ... | \\ \----------------/
|-----| \ \---------------/ \\
| 3 |\ \ \\ p2m_identity
|-----| \ \-------------------->/---------------\ /-----------------\
| .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
\-----/ / | [p2m_identity]+-->| ..., ~0 |
/ /---------------\ | .... | \-----------------/
/ | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
/ | IDENTITY[@256]|<----/ \---------------/
/ | ~0, ~0, .... |
| \---------------/
|
p2m_missing p2m_missing
/------------------\ /------------\
| [p2m_mid_missing]+---->| ~0, ~0, ~0 |
| [p2m_mid_missing]+---->| ..., ~0 |
\------------------/ \------------/
where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
[v5: Changed code to use ranges, added ASCII art]
[v6: Rebased on top of xen->p2m code split]
[v4: Squished patches in just this one]
[v7: Added RESERVE_BRK for potentially allocated pages]
[v8: Fixed alignment problem]
[v9: Changed 1<<3X to 1<<BITS_PER_LONG-X]
[v10: Copied git commit description in the p2m code + Add Review tag]
[v11: Title had '2-1' - should be '1-1' mapping]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2011-01-19 09:15:21 +08:00
|
|
|
#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
|
|
|
|
#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
|
2008-04-03 01:53:58 +08:00
|
|
|
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
|
xen/mmu: Add the notion of identity (1-1) mapping.
Our P2M tree structure is a three-level. On the leaf nodes
we set the Machine Frame Number (MFN) of the PFN. What this means
is that when one does: pfn_to_mfn(pfn), which is used when creating
PTE entries, you get the real MFN of the hardware. When Xen sets
up a guest it initially populates a array which has descending
(or ascending) MFN values, as so:
idx: 0, 1, 2
[0x290F, 0x290E, 0x290D, ..]
so pfn_to_mfn(2)==0x290D. If you start, restart many guests that list
starts looking quite random.
We graft this structure on our P2M tree structure and stick in
those MFN in the leafs. But for all other leaf entries, or for the top
root, or middle one, for which there is a void entry, we assume it is
"missing". So
pfn_to_mfn(0xc0000)=INVALID_P2M_ENTRY.
We add the possibility of setting 1-1 mappings on certain regions, so
that:
pfn_to_mfn(0xc0000)=0xc0000
The benefit of this is, that we can assume for non-RAM regions (think
PCI BARs, or ACPI spaces), we can create mappings easily b/c we
get the PFN value to match the MFN.
For this to work efficiently we introduce one new page p2m_identity and
allocate (via reserved_brk) any other pages we need to cover the sides
(1GB or 4MB boundary violations). All entries in p2m_identity are set to
INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
no other fancy value).
On lookup we spot that the entry points to p2m_identity and return the identity
value instead of dereferencing and returning INVALID_P2M_ENTRY. If the entry
points to an allocated page, we just proceed as before and return the PFN.
If the PFN has IDENTITY_FRAME_BIT set we unmask that in appropriate functions
(pfn_to_mfn).
The reason for having the IDENTITY_FRAME_BIT instead of just returning the
PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
non-identity pfn. To protect ourselves against we elect to set (and get) the
IDENTITY_FRAME_BIT on all identity mapped PFNs.
This simplistic diagram is used to explain the more subtle piece of code.
There is also a digram of the P2M at the end that can help.
Imagine your E820 looking as so:
1GB 2GB
/-------------------+---------\/----\ /----------\ /---+-----\
| System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
\-------------------+---------/\----/ \----------/ \---+-----/
^- 1029MB ^- 2001MB
[1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), 2048MB = 524288 (0x80000)]
And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
is actually not present (would have to kick the balloon driver to put it in).
When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
of the PFN and the end PFN (263424 and 512256 respectively). The first step is
to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
covers 512^2 of page estate (1GB) and in case the start or end PFN is not
aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn to
end pfn. We reserve_brk top leaf pages if they are missing (means they point
to p2m_mid_missing).
With the E820 example above, 263424 is not 1GB aligned so we allocate a
reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
Each entry in the allocate page is "missing" (points to p2m_missing).
Next stage is to determine if we need to do a more granular boundary check
on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
We check if the start pfn and end pfn violate that boundary check, and if
so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
granularity of setting which PFNs are missing and which ones are identity.
In our example 263424 and 512256 both fail the check so we reserve_brk two
pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" values)
and assign them to p2m[1][2] and p2m[1][488] respectively.
At this point we would at minimum reserve_brk one page, but could be up to
three. Each call to set_phys_range_identity has at maximum a three page
cost. If we were to query the P2M at this stage, all those entries from
start PFN through end PFN (so 1029MB -> 2001MB) would return INVALID_P2M_ENTRY
("missing").
The next step is to walk from the start pfn to the end pfn setting
the IDENTITY_FRAME_BIT on each PFN. This is done in 'set_phys_range_identity'.
If we find that the middle leaf is pointing to p2m_missing we can swap it over
to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this point we
do not need to worry about boundary aligment (so no need to reserve_brk a middle
page, figure out which PFNs are "missing" and which ones are identity), as that
has been done earlier. If we find that the middle leaf is not occupied by
p2m_identity or p2m_missing, we dereference that page (which covers
512 PFNs) and set the appropriate PFN with IDENTITY_FRAME_BIT. In our example
263424 and 512256 end up there, and we set from p2m[1][2][256->511] and
p2m[1][488][0->256] with IDENTITY_FRAME_BIT set.
All other regions that are void (or not filled) either point to p2m_missing
(considered missing) or have the default value of INVALID_P2M_ENTRY (also
considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
contain the INVALID_P2M_ENTRY value and are considered "missing."
This is what the p2m ends up looking (for the E820 above) with this
fabulous drawing:
p2m /--------------\
/-----\ | &mfn_list[0],| /-----------------\
| 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
|-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
| 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
|-----| \ | [p2m_identity]+\\ | .... |
| 2 |--\ \-------------------->| ... | \\ \----------------/
|-----| \ \---------------/ \\
| 3 |\ \ \\ p2m_identity
|-----| \ \-------------------->/---------------\ /-----------------\
| .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
\-----/ / | [p2m_identity]+-->| ..., ~0 |
/ /---------------\ | .... | \-----------------/
/ | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
/ | IDENTITY[@256]|<----/ \---------------/
/ | ~0, ~0, .... |
| \---------------/
|
p2m_missing p2m_missing
/------------------\ /------------\
| [p2m_mid_missing]+---->| ~0, ~0, ~0 |
| [p2m_mid_missing]+---->| ..., ~0 |
\------------------/ \------------/
where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
[v5: Changed code to use ranges, added ASCII art]
[v6: Rebased on top of xen->p2m code split]
[v4: Squished patches in just this one]
[v7: Added RESERVE_BRK for potentially allocated pages]
[v8: Fixed alignment problem]
[v9: Changed 1<<3X to 1<<BITS_PER_LONG-X]
[v10: Copied git commit description in the p2m code + Add Review tag]
[v11: Title had '2-1' - should be '1-1' mapping]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2011-01-19 09:15:21 +08:00
|
|
|
#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
|
2008-04-03 01:53:58 +08:00
|
|
|
|
2008-05-27 06:31:19 +08:00
|
|
|
/* Maximum amount of memory we can handle in a domain in pages */
|
|
|
|
#define MAX_DOMAIN_PAGES \
|
|
|
|
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
|
|
|
|
|
2010-09-30 19:37:26 +08:00
|
|
|
extern unsigned long *machine_to_phys_mapping;
|
|
|
|
extern unsigned int machine_to_phys_order;
|
2008-05-27 06:31:19 +08:00
|
|
|
|
2008-05-27 06:31:18 +08:00
|
|
|
extern unsigned long get_phys_to_machine(unsigned long pfn);
|
2010-08-28 04:42:04 +08:00
|
|
|
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
2011-01-19 09:09:41 +08:00
|
|
|
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
|
xen/mmu: Add the notion of identity (1-1) mapping.
Our P2M tree structure is a three-level. On the leaf nodes
we set the Machine Frame Number (MFN) of the PFN. What this means
is that when one does: pfn_to_mfn(pfn), which is used when creating
PTE entries, you get the real MFN of the hardware. When Xen sets
up a guest it initially populates a array which has descending
(or ascending) MFN values, as so:
idx: 0, 1, 2
[0x290F, 0x290E, 0x290D, ..]
so pfn_to_mfn(2)==0x290D. If you start, restart many guests that list
starts looking quite random.
We graft this structure on our P2M tree structure and stick in
those MFN in the leafs. But for all other leaf entries, or for the top
root, or middle one, for which there is a void entry, we assume it is
"missing". So
pfn_to_mfn(0xc0000)=INVALID_P2M_ENTRY.
We add the possibility of setting 1-1 mappings on certain regions, so
that:
pfn_to_mfn(0xc0000)=0xc0000
The benefit of this is, that we can assume for non-RAM regions (think
PCI BARs, or ACPI spaces), we can create mappings easily b/c we
get the PFN value to match the MFN.
For this to work efficiently we introduce one new page p2m_identity and
allocate (via reserved_brk) any other pages we need to cover the sides
(1GB or 4MB boundary violations). All entries in p2m_identity are set to
INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
no other fancy value).
On lookup we spot that the entry points to p2m_identity and return the identity
value instead of dereferencing and returning INVALID_P2M_ENTRY. If the entry
points to an allocated page, we just proceed as before and return the PFN.
If the PFN has IDENTITY_FRAME_BIT set we unmask that in appropriate functions
(pfn_to_mfn).
The reason for having the IDENTITY_FRAME_BIT instead of just returning the
PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
non-identity pfn. To protect ourselves against we elect to set (and get) the
IDENTITY_FRAME_BIT on all identity mapped PFNs.
This simplistic diagram is used to explain the more subtle piece of code.
There is also a digram of the P2M at the end that can help.
Imagine your E820 looking as so:
1GB 2GB
/-------------------+---------\/----\ /----------\ /---+-----\
| System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
\-------------------+---------/\----/ \----------/ \---+-----/
^- 1029MB ^- 2001MB
[1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), 2048MB = 524288 (0x80000)]
And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
is actually not present (would have to kick the balloon driver to put it in).
When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
of the PFN and the end PFN (263424 and 512256 respectively). The first step is
to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
covers 512^2 of page estate (1GB) and in case the start or end PFN is not
aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn to
end pfn. We reserve_brk top leaf pages if they are missing (means they point
to p2m_mid_missing).
With the E820 example above, 263424 is not 1GB aligned so we allocate a
reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
Each entry in the allocate page is "missing" (points to p2m_missing).
Next stage is to determine if we need to do a more granular boundary check
on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
We check if the start pfn and end pfn violate that boundary check, and if
so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
granularity of setting which PFNs are missing and which ones are identity.
In our example 263424 and 512256 both fail the check so we reserve_brk two
pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" values)
and assign them to p2m[1][2] and p2m[1][488] respectively.
At this point we would at minimum reserve_brk one page, but could be up to
three. Each call to set_phys_range_identity has at maximum a three page
cost. If we were to query the P2M at this stage, all those entries from
start PFN through end PFN (so 1029MB -> 2001MB) would return INVALID_P2M_ENTRY
("missing").
The next step is to walk from the start pfn to the end pfn setting
the IDENTITY_FRAME_BIT on each PFN. This is done in 'set_phys_range_identity'.
If we find that the middle leaf is pointing to p2m_missing we can swap it over
to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this point we
do not need to worry about boundary aligment (so no need to reserve_brk a middle
page, figure out which PFNs are "missing" and which ones are identity), as that
has been done earlier. If we find that the middle leaf is not occupied by
p2m_identity or p2m_missing, we dereference that page (which covers
512 PFNs) and set the appropriate PFN with IDENTITY_FRAME_BIT. In our example
263424 and 512256 end up there, and we set from p2m[1][2][256->511] and
p2m[1][488][0->256] with IDENTITY_FRAME_BIT set.
All other regions that are void (or not filled) either point to p2m_missing
(considered missing) or have the default value of INVALID_P2M_ENTRY (also
considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
contain the INVALID_P2M_ENTRY value and are considered "missing."
This is what the p2m ends up looking (for the E820 above) with this
fabulous drawing:
p2m /--------------\
/-----\ | &mfn_list[0],| /-----------------\
| 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
|-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
| 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
|-----| \ | [p2m_identity]+\\ | .... |
| 2 |--\ \-------------------->| ... | \\ \----------------/
|-----| \ \---------------/ \\
| 3 |\ \ \\ p2m_identity
|-----| \ \-------------------->/---------------\ /-----------------\
| .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
\-----/ / | [p2m_identity]+-->| ..., ~0 |
/ /---------------\ | .... | \-----------------/
/ | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
/ | IDENTITY[@256]|<----/ \---------------/
/ | ~0, ~0, .... |
| \---------------/
|
p2m_missing p2m_missing
/------------------\ /------------\
| [p2m_mid_missing]+---->| ~0, ~0, ~0 |
| [p2m_mid_missing]+---->| ..., ~0 |
\------------------/ \------------/
where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
[v5: Changed code to use ranges, added ASCII art]
[v6: Rebased on top of xen->p2m code split]
[v4: Squished patches in just this one]
[v7: Added RESERVE_BRK for potentially allocated pages]
[v8: Fixed alignment problem]
[v9: Changed 1<<3X to 1<<BITS_PER_LONG-X]
[v10: Copied git commit description in the p2m code + Add Review tag]
[v11: Title had '2-1' - should be '1-1' mapping]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2011-01-19 09:15:21 +08:00
|
|
|
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
|
|
|
|
unsigned long pfn_e);
|
2008-04-03 01:53:58 +08:00
|
|
|
|
2010-12-13 22:42:30 +08:00
|
|
|
extern int m2p_add_override(unsigned long mfn, struct page *page);
|
|
|
|
extern int m2p_remove_override(struct page *page);
|
2010-12-15 21:19:33 +08:00
|
|
|
extern struct page *m2p_find_override(unsigned long mfn);
|
|
|
|
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
|
|
|
|
|
2010-12-22 21:57:30 +08:00
|
|
|
#ifdef CONFIG_XEN_DEBUG_FS
|
|
|
|
extern int p2m_dump_show(struct seq_file *m, void *v);
|
|
|
|
#endif
|
2008-04-03 01:53:58 +08:00
|
|
|
static inline unsigned long pfn_to_mfn(unsigned long pfn)
|
|
|
|
{
|
2010-09-01 05:06:22 +08:00
|
|
|
unsigned long mfn;
|
|
|
|
|
2008-04-03 01:53:58 +08:00
|
|
|
if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
|
|
return pfn;
|
|
|
|
|
2010-09-01 05:06:22 +08:00
|
|
|
mfn = get_phys_to_machine(pfn);
|
|
|
|
|
|
|
|
if (mfn != INVALID_P2M_ENTRY)
|
xen/mmu: Add the notion of identity (1-1) mapping.
Our P2M tree structure is a three-level. On the leaf nodes
we set the Machine Frame Number (MFN) of the PFN. What this means
is that when one does: pfn_to_mfn(pfn), which is used when creating
PTE entries, you get the real MFN of the hardware. When Xen sets
up a guest it initially populates a array which has descending
(or ascending) MFN values, as so:
idx: 0, 1, 2
[0x290F, 0x290E, 0x290D, ..]
so pfn_to_mfn(2)==0x290D. If you start, restart many guests that list
starts looking quite random.
We graft this structure on our P2M tree structure and stick in
those MFN in the leafs. But for all other leaf entries, or for the top
root, or middle one, for which there is a void entry, we assume it is
"missing". So
pfn_to_mfn(0xc0000)=INVALID_P2M_ENTRY.
We add the possibility of setting 1-1 mappings on certain regions, so
that:
pfn_to_mfn(0xc0000)=0xc0000
The benefit of this is, that we can assume for non-RAM regions (think
PCI BARs, or ACPI spaces), we can create mappings easily b/c we
get the PFN value to match the MFN.
For this to work efficiently we introduce one new page p2m_identity and
allocate (via reserved_brk) any other pages we need to cover the sides
(1GB or 4MB boundary violations). All entries in p2m_identity are set to
INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
no other fancy value).
On lookup we spot that the entry points to p2m_identity and return the identity
value instead of dereferencing and returning INVALID_P2M_ENTRY. If the entry
points to an allocated page, we just proceed as before and return the PFN.
If the PFN has IDENTITY_FRAME_BIT set we unmask that in appropriate functions
(pfn_to_mfn).
The reason for having the IDENTITY_FRAME_BIT instead of just returning the
PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
non-identity pfn. To protect ourselves against we elect to set (and get) the
IDENTITY_FRAME_BIT on all identity mapped PFNs.
This simplistic diagram is used to explain the more subtle piece of code.
There is also a digram of the P2M at the end that can help.
Imagine your E820 looking as so:
1GB 2GB
/-------------------+---------\/----\ /----------\ /---+-----\
| System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
\-------------------+---------/\----/ \----------/ \---+-----/
^- 1029MB ^- 2001MB
[1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), 2048MB = 524288 (0x80000)]
And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
is actually not present (would have to kick the balloon driver to put it in).
When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
of the PFN and the end PFN (263424 and 512256 respectively). The first step is
to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
covers 512^2 of page estate (1GB) and in case the start or end PFN is not
aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn to
end pfn. We reserve_brk top leaf pages if they are missing (means they point
to p2m_mid_missing).
With the E820 example above, 263424 is not 1GB aligned so we allocate a
reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
Each entry in the allocate page is "missing" (points to p2m_missing).
Next stage is to determine if we need to do a more granular boundary check
on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
We check if the start pfn and end pfn violate that boundary check, and if
so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
granularity of setting which PFNs are missing and which ones are identity.
In our example 263424 and 512256 both fail the check so we reserve_brk two
pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" values)
and assign them to p2m[1][2] and p2m[1][488] respectively.
At this point we would at minimum reserve_brk one page, but could be up to
three. Each call to set_phys_range_identity has at maximum a three page
cost. If we were to query the P2M at this stage, all those entries from
start PFN through end PFN (so 1029MB -> 2001MB) would return INVALID_P2M_ENTRY
("missing").
The next step is to walk from the start pfn to the end pfn setting
the IDENTITY_FRAME_BIT on each PFN. This is done in 'set_phys_range_identity'.
If we find that the middle leaf is pointing to p2m_missing we can swap it over
to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this point we
do not need to worry about boundary aligment (so no need to reserve_brk a middle
page, figure out which PFNs are "missing" and which ones are identity), as that
has been done earlier. If we find that the middle leaf is not occupied by
p2m_identity or p2m_missing, we dereference that page (which covers
512 PFNs) and set the appropriate PFN with IDENTITY_FRAME_BIT. In our example
263424 and 512256 end up there, and we set from p2m[1][2][256->511] and
p2m[1][488][0->256] with IDENTITY_FRAME_BIT set.
All other regions that are void (or not filled) either point to p2m_missing
(considered missing) or have the default value of INVALID_P2M_ENTRY (also
considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
contain the INVALID_P2M_ENTRY value and are considered "missing."
This is what the p2m ends up looking (for the E820 above) with this
fabulous drawing:
p2m /--------------\
/-----\ | &mfn_list[0],| /-----------------\
| 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
|-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
| 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
|-----| \ | [p2m_identity]+\\ | .... |
| 2 |--\ \-------------------->| ... | \\ \----------------/
|-----| \ \---------------/ \\
| 3 |\ \ \\ p2m_identity
|-----| \ \-------------------->/---------------\ /-----------------\
| .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
\-----/ / | [p2m_identity]+-->| ..., ~0 |
/ /---------------\ | .... | \-----------------/
/ | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
/ | IDENTITY[@256]|<----/ \---------------/
/ | ~0, ~0, .... |
| \---------------/
|
p2m_missing p2m_missing
/------------------\ /------------\
| [p2m_mid_missing]+---->| ~0, ~0, ~0 |
| [p2m_mid_missing]+---->| ..., ~0 |
\------------------/ \------------/
where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
[v5: Changed code to use ranges, added ASCII art]
[v6: Rebased on top of xen->p2m code split]
[v4: Squished patches in just this one]
[v7: Added RESERVE_BRK for potentially allocated pages]
[v8: Fixed alignment problem]
[v9: Changed 1<<3X to 1<<BITS_PER_LONG-X]
[v10: Copied git commit description in the p2m code + Add Review tag]
[v11: Title had '2-1' - should be '1-1' mapping]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
2011-01-19 09:15:21 +08:00
|
|
|
mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
|
2010-09-01 05:06:22 +08:00
|
|
|
|
|
|
|
return mfn;
|
2008-04-03 01:53:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int phys_to_machine_mapping_valid(unsigned long pfn)
|
|
|
|
{
|
|
|
|
if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
|
|
return 1;
|
|
|
|
|
2008-05-27 06:31:18 +08:00
|
|
|
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
|
2008-04-03 01:53:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long mfn_to_pfn(unsigned long mfn)
|
|
|
|
{
|
|
|
|
unsigned long pfn;
|
2011-02-03 02:32:59 +08:00
|
|
|
int ret = 0;
|
2008-04-03 01:53:58 +08:00
|
|
|
|
|
|
|
if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
|
|
return mfn;
|
|
|
|
|
2011-01-15 06:55:44 +08:00
|
|
|
if (unlikely((mfn >> machine_to_phys_order) != 0)) {
|
|
|
|
pfn = ~0;
|
|
|
|
goto try_override;
|
|
|
|
}
|
2008-04-03 01:53:58 +08:00
|
|
|
pfn = 0;
|
|
|
|
/*
|
|
|
|
* The array access can fail (e.g., device space beyond end of RAM).
|
|
|
|
* In such cases it doesn't matter what we return (we return garbage),
|
|
|
|
* but we must handle the fault without crashing!
|
|
|
|
*/
|
2011-02-03 02:32:59 +08:00
|
|
|
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
|
2011-01-15 06:55:44 +08:00
|
|
|
try_override:
|
2011-02-03 02:32:59 +08:00
|
|
|
/* ret might be < 0 if there are no entries in the m2p for mfn */
|
|
|
|
if (ret < 0)
|
|
|
|
pfn = ~0;
|
|
|
|
else if (get_phys_to_machine(pfn) != mfn)
|
|
|
|
/*
|
|
|
|
* If this appears to be a foreign mfn (because the pfn
|
|
|
|
* doesn't map back to the mfn), then check the local override
|
|
|
|
* table to see if there's a better pfn to use.
|
|
|
|
*
|
|
|
|
* m2p_find_override_pfn returns ~0 if it doesn't find anything.
|
|
|
|
*/
|
|
|
|
pfn = m2p_find_override_pfn(mfn, ~0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pfn is ~0 if there are no entries in the m2p for mfn or if the
|
|
|
|
* entry doesn't map back to the mfn and m2p_override doesn't have a
|
|
|
|
* valid entry for it.
|
2010-12-15 21:19:33 +08:00
|
|
|
*/
|
2011-02-03 02:32:59 +08:00
|
|
|
if (pfn == ~0 &&
|
|
|
|
get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
|
|
|
|
pfn = mfn;
|
2010-12-15 21:19:33 +08:00
|
|
|
|
2008-04-03 01:53:58 +08:00
|
|
|
return pfn;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline xmaddr_t phys_to_machine(xpaddr_t phys)
|
|
|
|
{
|
|
|
|
unsigned offset = phys.paddr & ~PAGE_MASK;
|
2008-09-11 16:31:48 +08:00
|
|
|
return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
|
2008-04-03 01:53:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline xpaddr_t machine_to_phys(xmaddr_t machine)
|
|
|
|
{
|
|
|
|
unsigned offset = machine.maddr & ~PAGE_MASK;
|
2008-09-11 16:31:48 +08:00
|
|
|
return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
|
2008-04-03 01:53:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We detect special mappings in one of two ways:
|
|
|
|
* 1. If the MFN is an I/O page then Xen will set the m2p entry
|
|
|
|
* to be outside our maximum possible pseudophys range.
|
|
|
|
* 2. If the MFN belongs to a different domain then we will certainly
|
|
|
|
* not have MFN in our p2m table. Conversely, if the page is ours,
|
|
|
|
* then we'll have p2m(m2p(MFN))==MFN.
|
|
|
|
* If we detect a special mapping then it doesn't have a 'struct page'.
|
|
|
|
* We force !pfn_valid() by returning an out-of-range pointer.
|
|
|
|
*
|
|
|
|
* NB. These checks require that, for any MFN that is not in our reservation,
|
|
|
|
* there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
|
|
|
|
* we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
|
|
|
|
* Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
|
|
|
|
*
|
|
|
|
* NB2. When deliberately mapping foreign pages into the p2m table, you *must*
|
|
|
|
* use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
|
|
|
|
* require. In all the cases we care about, the FOREIGN_FRAME bit is
|
|
|
|
* masked (e.g., pfn_to_mfn()) so behaviour there is correct.
|
|
|
|
*/
|
|
|
|
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
|
|
|
|
{
|
|
|
|
unsigned long pfn = mfn_to_pfn(mfn);
|
2010-02-05 06:46:34 +08:00
|
|
|
if (get_phys_to_machine(pfn) != mfn)
|
|
|
|
return -1; /* force !pfn_valid() */
|
2008-04-03 01:53:58 +08:00
|
|
|
return pfn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* VIRT <-> MACHINE conversion */
|
|
|
|
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
|
2009-02-10 04:05:46 +08:00
|
|
|
#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
|
|
|
|
#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
|
2008-04-03 01:53:58 +08:00
|
|
|
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
|
|
|
|
|
|
|
|
static inline unsigned long pte_mfn(pte_t pte)
|
|
|
|
{
|
2008-07-22 13:59:42 +08:00
|
|
|
return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
|
2008-04-03 01:53:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
|
|
|
|
{
|
|
|
|
pte_t pte;
|
|
|
|
|
|
|
|
pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
|
2009-02-05 10:33:38 +08:00
|
|
|
massage_pgprot(pgprot);
|
2008-04-03 01:53:58 +08:00
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pteval_t pte_val_ma(pte_t pte)
|
|
|
|
{
|
|
|
|
return pte.pte;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline pte_t __pte_ma(pteval_t x)
|
|
|
|
{
|
|
|
|
return (pte_t) { .pte = x };
|
|
|
|
}
|
|
|
|
|
|
|
|
#define pmd_val_ma(v) ((v).pmd)
|
2008-07-09 06:06:38 +08:00
|
|
|
#ifdef __PAGETABLE_PUD_FOLDED
|
2008-04-03 01:53:58 +08:00
|
|
|
#define pud_val_ma(v) ((v).pgd.pgd)
|
2008-07-09 06:06:38 +08:00
|
|
|
#else
|
|
|
|
#define pud_val_ma(v) ((v).pud)
|
|
|
|
#endif
|
2008-04-03 01:53:58 +08:00
|
|
|
#define __pmd_ma(x) ((pmd_t) { (x) } )
|
|
|
|
|
|
|
|
#define pgd_val_ma(x) ((x).pgd)
|
|
|
|
|
2009-02-10 04:05:49 +08:00
|
|
|
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
|
2008-04-03 01:53:58 +08:00
|
|
|
|
2008-07-09 06:06:55 +08:00
|
|
|
xmaddr_t arbitrary_virt_to_machine(void *address);
|
2009-02-28 01:19:26 +08:00
|
|
|
unsigned long arbitrary_virt_to_mfn(void *vaddr);
|
2008-04-03 01:53:58 +08:00
|
|
|
void make_lowmem_page_readonly(void *vaddr);
|
|
|
|
void make_lowmem_page_readwrite(void *vaddr);
|
|
|
|
|
2008-10-23 15:01:39 +08:00
|
|
|
#endif /* _ASM_X86_XEN_PAGE_H */
|