Track mmu_idx for which the TLB is clean and need not be flushed again.

-----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJb2Z3TAAoJEGTfOOivfiFfuokIAJXxIOVp91MBextWNriZTcSH
 K8+pyFpWgFpe6Xq5AeTc/J33EJxYileOoBeJNAEYjiE9qm81EYmlBwWX8CAI5MUu
 W2e4gne1pOUnYlsmQgcZGmJLV3hC/KQEsPhSrLuTpFDs8bgJ5tQo0gTaDVCIaDZH
 Rvrb+xVcfNdda/ebkMbG8hLTmkjqM229Dvyr04GBN3y6ine+x3P/LyOFlWF/7rhC
 7iKvEzfExTVc0WS3n3+p++jsoB9J3OZQbNGuSdqOW8TqCtP32kzlIqK728bV9hF0
 /XEizlVObvL1IP7J/1GIgVvyxapMLdoP4ixU1ZF/mKSZV4wc2ISdX9WfKcy6NPQ=
 =zT2y
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20181031' into staging

Track mmu_idx for which the TLB is clean and need not be flushed again.

# gpg: Signature made Wed 31 Oct 2018 12:19:31 GMT
# gpg:                using RSA key 64DF38E8AF7E215F
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>"
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20181031:
  cputlb: Remove tlb_c.pending_flushes
  cputlb: Filter flushes on already clean tlbs
  cputlb: Count "partial" and "elided" tlb flushes
  cputlb: Merge tlb_flush_page into tlb_flush_page_by_mmuidx
  cputlb: Merge tlb_flush_nocheck into tlb_flush_by_mmuidx_async_work
  cputlb: Move env->vtlb_index to env->tlb_d.vindex
  cputlb: Split large page tracking per mmu_idx
  cputlb: Move cpu->pending_tlb_flush to env->tlb_c.pending_flush
  cputlb: Remove tcg_enabled hack from tlb_flush_nocheck
  cputlb: Move tlb_lock to CPUTLBCommon

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-11-01 15:04:36 +00:00
commit 60126df95f
5 changed files with 183 additions and 237 deletions

View File

@ -78,7 +78,10 @@ void tlb_init(CPUState *cpu)
{
CPUArchState *env = cpu->env_ptr;
qemu_spin_init(&env->tlb_lock);
qemu_spin_init(&env->tlb_c.lock);
/* Ensure that cpu_reset performs a full flush. */
env->tlb_c.dirty = ALL_MMUIDX_BITS;
}
/* flush_all_helper: run fn across all cpus
@ -100,139 +103,89 @@ static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
}
}
size_t tlb_flush_count(void)
void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
{
CPUState *cpu;
size_t count = 0;
size_t full = 0, part = 0, elide = 0;
CPU_FOREACH(cpu) {
CPUArchState *env = cpu->env_ptr;
count += atomic_read(&env->tlb_flush_count);
full += atomic_read(&env->tlb_c.full_flush_count);
part += atomic_read(&env->tlb_c.part_flush_count);
elide += atomic_read(&env->tlb_c.elide_flush_count);
}
return count;
*pfull = full;
*ppart = part;
*pelide = elide;
}
/* This is OK because CPU architectures generally permit an
* implementation to drop entries from the TLB at any time, so
* flushing more entries than required is only an efficiency issue,
* not a correctness issue.
*/
static void tlb_flush_nocheck(CPUState *cpu)
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
{
CPUArchState *env = cpu->env_ptr;
/* The QOM tests will trigger tlb_flushes without setting up TCG
* so we bug out here in that case.
*/
if (!tcg_enabled()) {
return;
}
assert_cpu_is_self(cpu);
atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
tlb_debug("(count: %zu)\n", tlb_flush_count());
/*
* tlb_table/tlb_v_table updates from any thread must hold tlb_lock.
* However, updates from the owner thread (as is the case here; see the
* above assert_cpu_is_self) do not need atomic_set because all reads
* that do not hold the lock are performed by the same owner thread.
*/
qemu_spin_lock(&env->tlb_lock);
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
qemu_spin_unlock(&env->tlb_lock);
cpu_tb_jmp_cache_clear(cpu);
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
atomic_mb_set(&cpu->pending_tlb_flush, 0);
}
static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
{
tlb_flush_nocheck(cpu);
}
void tlb_flush(CPUState *cpu)
{
if (cpu->created && !qemu_cpu_is_self(cpu)) {
if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
async_run_on_cpu(cpu, tlb_flush_global_async_work,
RUN_ON_CPU_NULL);
}
} else {
tlb_flush_nocheck(cpu);
}
}
void tlb_flush_all_cpus(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
fn(src_cpu, RUN_ON_CPU_NULL);
}
void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
const run_on_cpu_func fn = tlb_flush_global_async_work;
flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
env->tlb_d[mmu_idx].large_page_addr = -1;
env->tlb_d[mmu_idx].large_page_mask = -1;
env->tlb_d[mmu_idx].vindex = 0;
}
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
unsigned long mmu_idx_bitmask = data.host_int;
int mmu_idx;
uint16_t asked = data.host_int;
uint16_t all_dirty, work, to_clean;
assert_cpu_is_self(cpu);
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
qemu_spin_lock(&env->tlb_lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
qemu_spin_lock(&env->tlb_c.lock);
if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
tlb_debug("%d\n", mmu_idx);
all_dirty = env->tlb_c.dirty;
to_clean = asked & all_dirty;
all_dirty &= ~to_clean;
env->tlb_c.dirty = all_dirty;
memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
}
for (work = to_clean; work != 0; work &= work - 1) {
int mmu_idx = ctz32(work);
tlb_flush_one_mmuidx_locked(env, mmu_idx);
}
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
cpu_tb_jmp_cache_clear(cpu);
tlb_debug("done\n");
if (to_clean == ALL_MMUIDX_BITS) {
atomic_set(&env->tlb_c.full_flush_count,
env->tlb_c.full_flush_count + 1);
} else {
atomic_set(&env->tlb_c.part_flush_count,
env->tlb_c.part_flush_count + ctpop16(to_clean));
if (to_clean != asked) {
atomic_set(&env->tlb_c.elide_flush_count,
env->tlb_c.elide_flush_count +
ctpop16(asked & ~to_clean));
}
}
}
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
if (!qemu_cpu_is_self(cpu)) {
uint16_t pending_flushes = idxmap;
pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
if (pending_flushes) {
tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
atomic_or(&cpu->pending_tlb_flush, pending_flushes);
async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
RUN_ON_CPU_HOST_INT(pending_flushes));
}
if (cpu->created && !qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
RUN_ON_CPU_HOST_INT(idxmap));
} else {
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(idxmap));
tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
}
void tlb_flush(CPUState *cpu)
{
tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
}
void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
@ -243,8 +196,12 @@ void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
}
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
uint16_t idxmap)
void tlb_flush_all_cpus(CPUState *src_cpu)
{
tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
}
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
@ -254,6 +211,11 @@ void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
}
void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
}
static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
target_ulong page)
{
@ -262,7 +224,7 @@ static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
tlb_hit_page(tlb_entry->addr_code, page);
}
/* Called with tlb_lock held */
/* Called with tlb_c.lock held */
static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
target_ulong page)
{
@ -271,7 +233,7 @@ static inline void tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
}
}
/* Called with tlb_lock held */
/* Called with tlb_c.lock held */
static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
target_ulong page)
{
@ -283,46 +245,21 @@ static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
}
}
static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
static void tlb_flush_page_locked(CPUArchState *env, int midx,
target_ulong page)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr = (target_ulong) data.target_ptr;
int mmu_idx;
assert_cpu_is_self(cpu);
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
target_ulong lp_addr = env->tlb_d[midx].large_page_addr;
target_ulong lp_mask = env->tlb_d[midx].large_page_mask;
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
tlb_debug("forcing full flush ("
if ((page & lp_mask) == lp_addr) {
tlb_debug("forcing full flush midx %d ("
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
tlb_flush(cpu);
return;
}
addr &= TARGET_PAGE_MASK;
qemu_spin_lock(&env->tlb_lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
}
qemu_spin_unlock(&env->tlb_lock);
tb_flush_jmp_cache(cpu, addr);
}
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
tlb_debug("page :" TARGET_FMT_lx "\n", addr);
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_flush_page_async_work,
RUN_ON_CPU_TARGET_PTR(addr));
midx, lp_addr, lp_mask);
tlb_flush_one_mmuidx_locked(env, midx);
} else {
tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
tlb_flush_entry_locked(tlb_entry(env, midx, page), page);
tlb_flush_vtlb_page_locked(env, midx, page);
}
}
@ -342,44 +279,20 @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
assert_cpu_is_self(cpu);
tlb_debug("flush page addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
addr, mmu_idx_bitmap);
qemu_spin_lock(&env->tlb_lock);
qemu_spin_lock(&env->tlb_c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
tlb_flush_entry_locked(tlb_entry(env, mmu_idx, addr), addr);
tlb_flush_vtlb_page_locked(env, mmu_idx, addr);
tlb_flush_page_locked(env, mmu_idx, addr);
}
}
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
tb_flush_jmp_cache(cpu, addr);
}
static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
run_on_cpu_data data)
{
CPUArchState *env = cpu->env_ptr;
target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
tlb_debug("forced full flush ("
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
tlb_flush_by_mmuidx_async_work(cpu,
RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
} else {
tlb_flush_page_by_mmuidx_async_work(cpu, data);
}
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
{
target_ulong addr_and_mmu_idx;
@ -391,18 +304,23 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
addr_and_mmu_idx |= idxmap;
if (!qemu_cpu_is_self(cpu)) {
async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
} else {
tlb_check_page_and_flush_by_mmuidx_async_work(
tlb_flush_page_by_mmuidx_async_work(
cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
}
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
}
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
@ -415,11 +333,16 @@ void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap)
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
{
const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
}
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
target_ulong addr,
uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
target_ulong addr_and_mmu_idx;
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
@ -432,21 +355,9 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
}
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
fn(src, RUN_ON_CPU_TARGET_PTR(addr));
}
void tlb_flush_page_all_cpus_synced(CPUState *src,
target_ulong addr)
{
const run_on_cpu_func fn = tlb_flush_page_async_work;
flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
}
/* update the TLBs so that writes to code in the virtual page 'addr'
@ -479,7 +390,7 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
* te->addr_write with atomic_set. We don't need to worry about this for
* oversized guests as MTTCG is disabled for them.
*
* Called with tlb_lock held.
* Called with tlb_c.lock held.
*/
static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
uintptr_t start, uintptr_t length)
@ -501,7 +412,7 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
}
/*
* Called with tlb_lock held.
* Called with tlb_c.lock held.
* Called only from the vCPU context, i.e. the TLB's owner thread.
*/
static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
@ -511,7 +422,7 @@ static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
* the target vCPU).
* We must take tlb_lock to avoid racing with another vCPU update. The only
* We must take tlb_c.lock to avoid racing with another vCPU update. The only
* thing actually updated is the target TLB entry ->addr_write flags.
*/
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
@ -521,7 +432,7 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
int mmu_idx;
env = cpu->env_ptr;
qemu_spin_lock(&env->tlb_lock);
qemu_spin_lock(&env->tlb_c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
unsigned int i;
@ -535,10 +446,10 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
length);
}
}
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
}
/* Called with tlb_lock held */
/* Called with tlb_c.lock held */
static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
target_ulong vaddr)
{
@ -557,7 +468,7 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
assert_cpu_is_self(cpu);
vaddr &= TARGET_PAGE_MASK;
qemu_spin_lock(&env->tlb_lock);
qemu_spin_lock(&env->tlb_c.lock);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
}
@ -568,30 +479,31 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
tlb_set_dirty1_locked(&env->tlb_v_table[mmu_idx][k], vaddr);
}
}
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
}
/* Our TLB does not support large pages, so remember the area covered by
large pages and trigger a full TLB flush if these are invalidated. */
static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
target_ulong size)
static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
target_ulong vaddr, target_ulong size)
{
target_ulong mask = ~(size - 1);
target_ulong lp_addr = env->tlb_d[mmu_idx].large_page_addr;
target_ulong lp_mask = ~(size - 1);
if (env->tlb_flush_addr == (target_ulong)-1) {
env->tlb_flush_addr = vaddr & mask;
env->tlb_flush_mask = mask;
return;
if (lp_addr == (target_ulong)-1) {
/* No previous large page. */
lp_addr = vaddr;
} else {
/* Extend the existing region to include the new page.
This is a compromise between unnecessary flushes and
the cost of maintaining a full variable size TLB. */
lp_mask &= env->tlb_d[mmu_idx].large_page_mask;
while (((lp_addr ^ vaddr) & lp_mask) != 0) {
lp_mask <<= 1;
}
}
/* Extend the existing region to include the new page.
This is a compromise between unnecessary flushes and the cost
of maintaining a full variable size TLB. */
mask &= env->tlb_flush_mask;
while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
mask <<= 1;
}
env->tlb_flush_addr &= mask;
env->tlb_flush_mask = mask;
env->tlb_d[mmu_idx].large_page_addr = lp_addr & lp_mask;
env->tlb_d[mmu_idx].large_page_mask = lp_mask;
}
/* Add a new TLB entry. At most one entry for a given virtual address
@ -618,12 +530,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
assert_cpu_is_self(cpu);
if (size < TARGET_PAGE_SIZE) {
if (size <= TARGET_PAGE_SIZE) {
sz = TARGET_PAGE_SIZE;
} else {
if (size > TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
}
tlb_add_large_page(env, mmu_idx, vaddr, size);
sz = size;
}
vaddr_page = vaddr & TARGET_PAGE_MASK;
@ -669,7 +579,10 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* a longer critical section, but this is not a concern since the TLB lock
* is unlikely to be contended.
*/
qemu_spin_lock(&env->tlb_lock);
qemu_spin_lock(&env->tlb_c.lock);
/* Note that the tlb is no longer clean. */
env->tlb_c.dirty |= 1 << mmu_idx;
/* Make sure there's no cached translation for the new page. */
tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
@ -679,7 +592,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
* different page; otherwise just overwrite the stale data.
*/
if (!tlb_hit_page_anyprot(te, vaddr_page)) {
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
unsigned vidx = env->tlb_d[mmu_idx].vindex++ % CPU_VTLB_SIZE;
CPUTLBEntry *tv = &env->tlb_v_table[mmu_idx][vidx];
/* Evict the old entry into the victim tlb. */
@ -736,7 +649,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
}
copy_tlb_helper_locked(te, &tn);
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
}
/* Add a new TLB entry, but without specifying the memory
@ -917,11 +830,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
/* Found entry in victim tlb, swap tlb and iotlb. */
CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
qemu_spin_lock(&env->tlb_lock);
qemu_spin_lock(&env->tlb_c.lock);
copy_tlb_helper_locked(&tmptlb, tlb);
copy_tlb_helper_locked(tlb, vtlb);
copy_tlb_helper_locked(vtlb, &tmptlb);
qemu_spin_unlock(&env->tlb_lock);
qemu_spin_unlock(&env->tlb_c.lock);
CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];

View File

@ -2290,7 +2290,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
{
struct tb_tree_stats tst = {};
struct qht_stats hst;
size_t nb_tbs;
size_t nb_tbs, flush_full, flush_part, flush_elide;
tcg_tb_foreach(tb_tree_stats_iter, &tst);
nb_tbs = tst.nb_tbs;
@ -2326,7 +2326,11 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
cpu_fprintf(f, "TB flush count %u\n",
atomic_read(&tb_ctx.tb_flush_count));
cpu_fprintf(f, "TB invalidate count %zu\n", tcg_tb_phys_invalidate_count());
cpu_fprintf(f, "TLB flush count %zu\n", tlb_flush_count());
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
cpu_fprintf(f, "TLB full flushes %zu\n", flush_full);
cpu_fprintf(f, "TLB partial flushes %zu\n", flush_part);
cpu_fprintf(f, "TLB elided flushes %zu\n", flush_elide);
tcg_dump_info(f, cpu_fprintf);
}

View File

@ -141,18 +141,53 @@ typedef struct CPUIOTLBEntry {
MemTxAttrs attrs;
} CPUIOTLBEntry;
typedef struct CPUTLBDesc {
/*
* Describe a region covering all of the large pages allocated
* into the tlb. When any page within this region is flushed,
* we must flush the entire tlb. The region is matched if
* (addr & large_page_mask) == large_page_addr.
*/
target_ulong large_page_addr;
target_ulong large_page_mask;
/* The next index to use in the tlb victim table. */
size_t vindex;
} CPUTLBDesc;
/*
* Data elements that are shared between all MMU modes.
*/
typedef struct CPUTLBCommon {
/* Serialize updates to tlb_table and tlb_v_table, and others as noted. */
QemuSpin lock;
/*
* Within dirty, for each bit N, modifications have been made to
* mmu_idx N since the last time that mmu_idx was flushed.
* Protected by tlb_c.lock.
*/
uint16_t dirty;
/*
* Statistics. These are not lock protected, but are read and
* written atomically. This allows the monitor to print a snapshot
* of the stats without interfering with the cpu.
*/
size_t full_flush_count;
size_t part_flush_count;
size_t elide_flush_count;
} CPUTLBCommon;
/*
* The meaning of each of the MMU modes is defined in the target code.
* Note that NB_MMU_MODES is not yet defined; we can only reference it
* within preprocessor defines that will be expanded later.
*/
#define CPU_COMMON_TLB \
/* The meaning of the MMU modes is defined in the target code. */ \
/* tlb_lock serializes updates to tlb_table and tlb_v_table */ \
QemuSpin tlb_lock; \
CPUTLBCommon tlb_c; \
CPUTLBDesc tlb_d[NB_MMU_MODES]; \
CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE]; \
CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE]; \
CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE]; \
CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE]; \
size_t tlb_flush_count; \
target_ulong tlb_flush_addr; \
target_ulong tlb_flush_mask; \
target_ulong vtlb_index; \
CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];
#else

View File

@ -23,6 +23,6 @@
/* cputlb.c */
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code(ram_addr_t ram_addr);
size_t tlb_flush_count(void);
void tlb_flush_counts(size_t *full, size_t *part, size_t *elide);
#endif
#endif

View File

@ -429,12 +429,6 @@ struct CPUState {
struct hax_vcpu_state *hax_vcpu;
/* The pending_tlb_flush flag is set and cleared atomically to
* avoid potential races. The aim of the flag is to avoid
* unnecessary flushes.
*/
uint16_t pending_tlb_flush;
int hvf_fd;
/* track IOMMUs whose translations we've cached in the TCG TLB */