mirror of https://gitee.com/openkylin/qemu.git
TCG: Move translation block variables to new context inside tcg_ctx: tb_ctx
It's worth to clean-up translation blocks variables and move them into one context as was suggested by Swirl. Also if we use this context directly inside tcg_ctx, then it speeds up code generation a bit. Signed-off-by: Evgeny Voevodin <evgenyvoevodin@gmail.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
This commit is contained in:
parent
0b0d3320db
commit
5e5f07e08f
18
cpu-exec.c
18
cpu-exec.c
|
@ -23,8 +23,6 @@
|
|||
#include "qemu/atomic.h"
|
||||
#include "sysemu/qtest.h"
|
||||
|
||||
int tb_invalidated_flag;
|
||||
|
||||
//#define CONFIG_DEBUG_EXEC
|
||||
|
||||
bool qemu_cpu_has_work(CPUState *cpu)
|
||||
|
@ -90,13 +88,13 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
|
|||
tb_page_addr_t phys_pc, phys_page1;
|
||||
target_ulong virt_page2;
|
||||
|
||||
tb_invalidated_flag = 0;
|
||||
tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
|
||||
|
||||
/* find translated block using physical mappings */
|
||||
phys_pc = get_page_addr_code(env, pc);
|
||||
phys_page1 = phys_pc & TARGET_PAGE_MASK;
|
||||
h = tb_phys_hash_func(phys_pc);
|
||||
ptb1 = &tb_phys_hash[h];
|
||||
ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
|
||||
for(;;) {
|
||||
tb = *ptb1;
|
||||
if (!tb)
|
||||
|
@ -128,8 +126,8 @@ static TranslationBlock *tb_find_slow(CPUArchState *env,
|
|||
/* Move the last found TB to the head of the list */
|
||||
if (likely(*ptb1)) {
|
||||
*ptb1 = tb->phys_hash_next;
|
||||
tb->phys_hash_next = tb_phys_hash[h];
|
||||
tb_phys_hash[h] = tb;
|
||||
tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
|
||||
tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
|
||||
}
|
||||
/* we add the TB in the virtual pc hash table */
|
||||
env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
|
||||
|
@ -563,16 +561,16 @@ int cpu_exec(CPUArchState *env)
|
|||
#endif
|
||||
}
|
||||
#endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
|
||||
spin_lock(&tb_lock);
|
||||
spin_lock(&tcg_ctx.tb_ctx.tb_lock);
|
||||
tb = tb_find_fast(env);
|
||||
/* Note: we do it here to avoid a gcc bug on Mac OS X when
|
||||
doing it in tb_find_slow */
|
||||
if (tb_invalidated_flag) {
|
||||
if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
|
||||
/* as some TB could have been invalidated because
|
||||
of memory exceptions while generating the code, we
|
||||
must recompute the hash index here */
|
||||
next_tb = 0;
|
||||
tb_invalidated_flag = 0;
|
||||
tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_EXEC
|
||||
qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
|
||||
|
@ -585,7 +583,7 @@ int cpu_exec(CPUArchState *env)
|
|||
if (next_tb != 0 && tb->page_addr[1] == -1) {
|
||||
tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
|
||||
}
|
||||
spin_unlock(&tb_lock);
|
||||
spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
|
||||
|
||||
/* cpu_interrupt might be called while translating the
|
||||
TB, but before it is linked into a potentially
|
||||
|
|
|
@ -168,6 +168,25 @@ struct TranslationBlock {
|
|||
uint32_t icount;
|
||||
};
|
||||
|
||||
#include "exec/spinlock.h"
|
||||
|
||||
typedef struct TBContext TBContext;
|
||||
|
||||
struct TBContext {
|
||||
|
||||
TranslationBlock *tbs;
|
||||
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
|
||||
int nb_tbs;
|
||||
/* any access to the tbs or the page table must use this lock */
|
||||
spinlock_t tb_lock;
|
||||
|
||||
/* statistics */
|
||||
int tb_flush_count;
|
||||
int tb_phys_invalidate_count;
|
||||
|
||||
int tb_invalidated_flag;
|
||||
};
|
||||
|
||||
static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
|
||||
{
|
||||
target_ulong tmp;
|
||||
|
@ -192,8 +211,6 @@ void tb_free(TranslationBlock *tb);
|
|||
void tb_flush(CPUArchState *env);
|
||||
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
|
||||
|
||||
extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
|
||||
|
||||
#if defined(USE_DIRECT_JUMP)
|
||||
|
||||
#if defined(CONFIG_TCG_INTERPRETER)
|
||||
|
@ -275,12 +292,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
|
|||
}
|
||||
}
|
||||
|
||||
#include "exec/spinlock.h"
|
||||
|
||||
extern spinlock_t tb_lock;
|
||||
|
||||
extern int tb_invalidated_flag;
|
||||
|
||||
/* The return address may point to the start of the next instruction.
|
||||
Subtracting one gets us the call instruction itself. */
|
||||
#if defined(CONFIG_TCG_INTERPRETER)
|
||||
|
|
|
@ -111,7 +111,7 @@ static int pending_cpus;
|
|||
/* Make sure everything is in a consistent state for calling fork(). */
|
||||
void fork_start(void)
|
||||
{
|
||||
pthread_mutex_lock(&tb_lock);
|
||||
pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
|
||||
pthread_mutex_lock(&exclusive_lock);
|
||||
mmap_fork_start();
|
||||
}
|
||||
|
@ -129,11 +129,11 @@ void fork_end(int child)
|
|||
pthread_mutex_init(&cpu_list_mutex, NULL);
|
||||
pthread_cond_init(&exclusive_cond, NULL);
|
||||
pthread_cond_init(&exclusive_resume, NULL);
|
||||
pthread_mutex_init(&tb_lock, NULL);
|
||||
pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
|
||||
gdbserver_fork(thread_env);
|
||||
} else {
|
||||
pthread_mutex_unlock(&exclusive_lock);
|
||||
pthread_mutex_unlock(&tb_lock);
|
||||
pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -471,6 +471,8 @@ struct TCGContext {
|
|||
size_t code_gen_buffer_max_size;
|
||||
uint8_t *code_gen_ptr;
|
||||
|
||||
TBContext tb_ctx;
|
||||
|
||||
#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
|
||||
/* labels info for qemu_ld/st IRs
|
||||
The labels help to generate TLB miss case codes at the end of TB */
|
||||
|
|
|
@ -72,13 +72,6 @@
|
|||
|
||||
#define SMC_BITMAP_USE_THRESHOLD 10
|
||||
|
||||
/* Translation blocks */
|
||||
static TranslationBlock *tbs;
|
||||
TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
|
||||
static int nb_tbs;
|
||||
/* any access to the tbs or the page table must use this lock */
|
||||
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
|
||||
|
||||
typedef struct PageDesc {
|
||||
/* list of TBs intersecting this ram page */
|
||||
TranslationBlock *first_tb;
|
||||
|
@ -125,10 +118,6 @@ uintptr_t qemu_host_page_mask;
|
|||
The bottom level has pointers to PageDesc. */
|
||||
static void *l1_map[V_L1_SIZE];
|
||||
|
||||
/* statistics */
|
||||
static int tb_flush_count;
|
||||
static int tb_phys_invalidate_count;
|
||||
|
||||
/* code generation context */
|
||||
TCGContext tcg_ctx;
|
||||
|
||||
|
@ -589,7 +578,8 @@ static inline void code_gen_alloc(size_t tb_size)
|
|||
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
|
||||
tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
|
||||
CODE_GEN_AVG_BLOCK_SIZE;
|
||||
tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
|
||||
tcg_ctx.tb_ctx.tbs =
|
||||
g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
|
||||
}
|
||||
|
||||
/* Must be called before using the QEMU cpus. 'tb_size' is the size
|
||||
|
@ -620,12 +610,12 @@ static TranslationBlock *tb_alloc(target_ulong pc)
|
|||
{
|
||||
TranslationBlock *tb;
|
||||
|
||||
if (nb_tbs >= tcg_ctx.code_gen_max_blocks ||
|
||||
if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
|
||||
(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
|
||||
tcg_ctx.code_gen_buffer_max_size) {
|
||||
return NULL;
|
||||
}
|
||||
tb = &tbs[nb_tbs++];
|
||||
tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
|
||||
tb->pc = pc;
|
||||
tb->cflags = 0;
|
||||
return tb;
|
||||
|
@ -636,9 +626,10 @@ void tb_free(TranslationBlock *tb)
|
|||
/* In practice this is mostly used for single use temporary TB
|
||||
Ignore the hard cases and just back up if this TB happens to
|
||||
be the last one generated. */
|
||||
if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
|
||||
if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
|
||||
tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
|
||||
tcg_ctx.code_gen_ptr = tb->tc_ptr;
|
||||
nb_tbs--;
|
||||
tcg_ctx.tb_ctx.nb_tbs--;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -693,27 +684,28 @@ void tb_flush(CPUArchState *env1)
|
|||
#if defined(DEBUG_FLUSH)
|
||||
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
|
||||
(unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
|
||||
nb_tbs, nb_tbs > 0 ?
|
||||
tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
|
||||
((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
|
||||
nb_tbs : 0);
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0);
|
||||
#endif
|
||||
if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
|
||||
> tcg_ctx.code_gen_buffer_size) {
|
||||
cpu_abort(env1, "Internal error: code buffer overflow\n");
|
||||
}
|
||||
nb_tbs = 0;
|
||||
tcg_ctx.tb_ctx.nb_tbs = 0;
|
||||
|
||||
for (env = first_cpu; env != NULL; env = env->next_cpu) {
|
||||
memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
|
||||
}
|
||||
|
||||
memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
|
||||
memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
|
||||
CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
|
||||
page_flush_tb();
|
||||
|
||||
tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
|
||||
/* XXX: flush processor icache at this point if cache flush is
|
||||
expensive */
|
||||
tb_flush_count++;
|
||||
tcg_ctx.tb_ctx.tb_flush_count++;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_TB_CHECK
|
||||
|
@ -725,7 +717,7 @@ static void tb_invalidate_check(target_ulong address)
|
|||
|
||||
address &= TARGET_PAGE_MASK;
|
||||
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
|
||||
for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
|
||||
for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
|
||||
if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
|
||||
address >= tb->pc + tb->size)) {
|
||||
printf("ERROR invalidate: address=" TARGET_FMT_lx
|
||||
|
@ -743,7 +735,8 @@ static void tb_page_check(void)
|
|||
int i, flags1, flags2;
|
||||
|
||||
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
|
||||
for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
|
||||
for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
|
||||
tb = tb->phys_hash_next) {
|
||||
flags1 = page_get_flags(tb->pc);
|
||||
flags2 = page_get_flags(tb->pc + tb->size - 1);
|
||||
if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
|
||||
|
@ -835,7 +828,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
|
|||
/* remove the TB from the hash list */
|
||||
phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
|
||||
h = tb_phys_hash_func(phys_pc);
|
||||
tb_hash_remove(&tb_phys_hash[h], tb);
|
||||
tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
|
||||
|
||||
/* remove the TB from the page list */
|
||||
if (tb->page_addr[0] != page_addr) {
|
||||
|
@ -849,7 +842,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
|
|||
invalidate_page_bitmap(p);
|
||||
}
|
||||
|
||||
tb_invalidated_flag = 1;
|
||||
tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
|
||||
|
||||
/* remove the TB from the hash list */
|
||||
h = tb_jmp_cache_hash_func(tb->pc);
|
||||
|
@ -878,7 +871,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
|
|||
}
|
||||
tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
|
||||
|
||||
tb_phys_invalidate_count++;
|
||||
tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
|
||||
}
|
||||
|
||||
static inline void set_bits(uint8_t *tab, int start, int len)
|
||||
|
@ -955,7 +948,7 @@ TranslationBlock *tb_gen_code(CPUArchState *env,
|
|||
/* cannot fail at this point */
|
||||
tb = tb_alloc(pc);
|
||||
/* Don't forget to invalidate previous TB info. */
|
||||
tb_invalidated_flag = 1;
|
||||
tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
|
||||
}
|
||||
tc_ptr = tcg_ctx.code_gen_ptr;
|
||||
tb->tc_ptr = tc_ptr;
|
||||
|
@ -1273,7 +1266,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
|
|||
mmap_lock();
|
||||
/* add in the physical hash table */
|
||||
h = tb_phys_hash_func(phys_pc);
|
||||
ptb = &tb_phys_hash[h];
|
||||
ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
|
||||
tb->phys_hash_next = *ptb;
|
||||
*ptb = tb;
|
||||
|
||||
|
@ -1323,7 +1316,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
|
|||
uintptr_t v;
|
||||
TranslationBlock *tb;
|
||||
|
||||
if (nb_tbs <= 0) {
|
||||
if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
|
||||
|
@ -1332,10 +1325,10 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
|
|||
}
|
||||
/* binary search (cf Knuth) */
|
||||
m_min = 0;
|
||||
m_max = nb_tbs - 1;
|
||||
m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
|
||||
while (m_min <= m_max) {
|
||||
m = (m_min + m_max) >> 1;
|
||||
tb = &tbs[m];
|
||||
tb = &tcg_ctx.tb_ctx.tbs[m];
|
||||
v = (uintptr_t)tb->tc_ptr;
|
||||
if (v == tc_ptr) {
|
||||
return tb;
|
||||
|
@ -1345,7 +1338,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
|
|||
m_min = m + 1;
|
||||
}
|
||||
}
|
||||
return &tbs[m_max];
|
||||
return &tcg_ctx.tb_ctx.tbs[m_max];
|
||||
}
|
||||
|
||||
static void tb_reset_jump_recursive(TranslationBlock *tb);
|
||||
|
@ -1566,8 +1559,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
|
|||
cross_page = 0;
|
||||
direct_jmp_count = 0;
|
||||
direct_jmp2_count = 0;
|
||||
for (i = 0; i < nb_tbs; i++) {
|
||||
tb = &tbs[i];
|
||||
for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
|
||||
tb = &tcg_ctx.tb_ctx.tbs[i];
|
||||
target_code_size += tb->size;
|
||||
if (tb->size > max_target_code_size) {
|
||||
max_target_code_size = tb->size;
|
||||
|
@ -1588,27 +1581,32 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
|
|||
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
|
||||
tcg_ctx.code_gen_buffer_max_size);
|
||||
cpu_fprintf(f, "TB count %d/%d\n",
|
||||
nb_tbs, tcg_ctx.code_gen_max_blocks);
|
||||
tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
|
||||
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
|
||||
nb_tbs ? target_code_size / nb_tbs : 0,
|
||||
max_target_code_size);
|
||||
tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0,
|
||||
max_target_code_size);
|
||||
cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
|
||||
nb_tbs ? (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
|
||||
nb_tbs : 0,
|
||||
target_code_size ?
|
||||
(double) (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
|
||||
target_code_size : 0);
|
||||
cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
|
||||
cross_page,
|
||||
nb_tbs ? (cross_page * 100) / nb_tbs : 0);
|
||||
tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
|
||||
tcg_ctx.code_gen_buffer) /
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0,
|
||||
target_code_size ? (double) (tcg_ctx.code_gen_ptr -
|
||||
tcg_ctx.code_gen_buffer) /
|
||||
target_code_size : 0);
|
||||
cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
|
||||
tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0);
|
||||
cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
|
||||
direct_jmp_count,
|
||||
nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
|
||||
tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0,
|
||||
direct_jmp2_count,
|
||||
nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
|
||||
tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
|
||||
tcg_ctx.tb_ctx.nb_tbs : 0);
|
||||
cpu_fprintf(f, "\nStatistics:\n");
|
||||
cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
|
||||
cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
|
||||
cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count);
|
||||
cpu_fprintf(f, "TB invalidate count %d\n",
|
||||
tcg_ctx.tb_ctx.tb_phys_invalidate_count);
|
||||
cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
|
||||
tcg_dump_info(f, cpu_fprintf);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue