mirror of https://gitee.com/openkylin/qemu.git
tcg/ppc: Change TCG_REG_RA to TCG_REG_TB
At this point the conversion is a wash. Loading of TB+ofs is smaller, but the actual return address from exit_tb is larger. There are a few more insns required to transition between TBs. But the expectation is that accesses to the constant pool will on the whole be smaller. Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
afe74dbd6a
commit
5964fca8a1
|
@ -39,29 +39,8 @@
|
|||
# define TCG_REG_TMP1 TCG_REG_R12
|
||||
#endif
|
||||
|
||||
/* For the 64-bit target, we don't like the 5 insn sequence needed to build
|
||||
full 64-bit addresses. Better to have a base register to which we can
|
||||
apply a 32-bit displacement.
|
||||
|
||||
There are generally three items of interest:
|
||||
(1) helper functions in the main executable,
|
||||
(2) TranslationBlock data structures,
|
||||
(3) the return address in the epilogue.
|
||||
|
||||
For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
|
||||
will be inside the main executable, and thus near enough to make a
|
||||
pointer to the epilogue be within 2GB of all helper functions.
|
||||
|
||||
For softmmu, we'll let the kernel choose the address of code_gen_buffer,
|
||||
and odds are it'll be somewhere close to the main malloc arena, and so
|
||||
a pointer to the epilogue will be within 2GB of the TranslationBlocks.
|
||||
|
||||
For --enable-pie, everything will be kinda near everything else,
|
||||
somewhere in high memory.
|
||||
|
||||
Thus we choose to keep the return address in a call-saved register. */
|
||||
#define TCG_REG_RA TCG_REG_R31
|
||||
#define USE_REG_RA (TCG_TARGET_REG_BITS == 64)
|
||||
#define TCG_REG_TB TCG_REG_R31
|
||||
#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
|
||||
|
||||
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
|
||||
#define SZP ((int)sizeof(void *))
|
||||
|
@ -614,50 +593,68 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
|
|||
tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
|
||||
}
|
||||
|
||||
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
|
||||
static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
|
||||
tcg_target_long arg, bool in_prologue)
|
||||
{
|
||||
if (arg == (int16_t) arg) {
|
||||
intptr_t tb_diff;
|
||||
int32_t high;
|
||||
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
|
||||
arg = (int32_t)arg;
|
||||
}
|
||||
|
||||
/* Load 16-bit immediates with one insn. */
|
||||
if (arg == (int16_t)arg) {
|
||||
tcg_out32(s, ADDI | TAI(ret, 0, arg));
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Load addresses within the TB with one insn. */
|
||||
tb_diff = arg - (intptr_t)s->code_gen_ptr;
|
||||
if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
|
||||
tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Load 32-bit immediates with two insns. */
|
||||
if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
|
||||
tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
|
||||
if (arg & 0xffff) {
|
||||
tcg_out32(s, ORI | SAI(ret, ret, arg));
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (arg == (uint32_t)arg && !(arg & 0x8000)) {
|
||||
tcg_out32(s, ADDI | TAI(ret, 0, arg));
|
||||
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Load addresses within 2GB of TB with 2 (or rarely 3) insns. */
|
||||
if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
|
||||
tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
|
||||
return;
|
||||
}
|
||||
|
||||
high = arg >> 31 >> 1;
|
||||
tcg_out_movi(s, TCG_TYPE_I32, ret, high);
|
||||
if (high) {
|
||||
tcg_out_shli64(s, ret, ret, 32);
|
||||
}
|
||||
if (arg & 0xffff0000) {
|
||||
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
||||
}
|
||||
if (arg & 0xffff) {
|
||||
tcg_out32(s, ORI | SAI(ret, ret, arg));
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
|
||||
tcg_target_long arg)
|
||||
static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
|
||||
tcg_target_long arg)
|
||||
{
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
|
||||
if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
|
||||
tcg_out_movi32(s, ret, arg);
|
||||
} else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
|
||||
tcg_out32(s, ADDI | TAI(ret, 0, arg));
|
||||
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
||||
} else {
|
||||
int32_t high;
|
||||
|
||||
if (USE_REG_RA) {
|
||||
intptr_t diff = arg - (intptr_t)tb_ret_addr;
|
||||
if (diff == (int32_t)diff) {
|
||||
tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
high = arg >> 31 >> 1;
|
||||
tcg_out_movi32(s, ret, high);
|
||||
if (high) {
|
||||
tcg_out_shli64(s, ret, ret, 32);
|
||||
}
|
||||
if (arg & 0xffff0000) {
|
||||
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
|
||||
}
|
||||
if (arg & 0xffff) {
|
||||
tcg_out32(s, ORI | SAI(ret, ret, arg));
|
||||
}
|
||||
}
|
||||
tcg_out_movi_int(s, type, ret, arg, false);
|
||||
}
|
||||
|
||||
static bool mask_operand(uint32_t c, int *mb, int *me)
|
||||
|
@ -1293,49 +1290,43 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
|
|||
tcg_out32(s, insn);
|
||||
}
|
||||
|
||||
#ifdef __powerpc64__
|
||||
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||
uintptr_t addr)
|
||||
{
|
||||
tcg_insn_unit i1, i2;
|
||||
uint64_t pair;
|
||||
intptr_t diff = addr - jmp_addr;
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
tcg_insn_unit i1, i2;
|
||||
intptr_t tb_diff = addr - tc_ptr;
|
||||
intptr_t br_diff = addr - (jmp_addr + 4);
|
||||
uint64_t pair;
|
||||
|
||||
if (in_range_b(diff)) {
|
||||
i1 = B | (diff & 0x3fffffc);
|
||||
i2 = NOP;
|
||||
} else if (USE_REG_RA) {
|
||||
intptr_t lo, hi;
|
||||
diff = addr - (uintptr_t)tb_ret_addr;
|
||||
lo = (int16_t)diff;
|
||||
hi = (int32_t)(diff - lo);
|
||||
tcg_debug_assert(diff == hi + lo);
|
||||
i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
|
||||
i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
|
||||
} else {
|
||||
tcg_debug_assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
|
||||
i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
|
||||
i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
|
||||
}
|
||||
/* This does not exercise the range of the branch, but we do
|
||||
still need to be able to load the new value of TCG_REG_TB.
|
||||
But this does still happen quite often. */
|
||||
if (tb_diff == (int16_t)tb_diff) {
|
||||
i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
|
||||
i2 = B | (br_diff & 0x3fffffc);
|
||||
} else {
|
||||
intptr_t lo = (int16_t)tb_diff;
|
||||
intptr_t hi = (int32_t)(tb_diff - lo);
|
||||
assert(tb_diff == hi + lo);
|
||||
i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
|
||||
i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
|
||||
}
|
||||
#ifdef HOST_WORDS_BIGENDIAN
|
||||
pair = (uint64_t)i1 << 32 | i2;
|
||||
pair = (uint64_t)i1 << 32 | i2;
|
||||
#else
|
||||
pair = (uint64_t)i2 << 32 | i1;
|
||||
pair = (uint64_t)i2 << 32 | i1;
|
||||
#endif
|
||||
|
||||
atomic_set((uint64_t *)jmp_addr, pair);
|
||||
flush_icache_range(jmp_addr, jmp_addr + 8);
|
||||
atomic_set((uint64_t *)jmp_addr, pair);
|
||||
flush_icache_range(jmp_addr, jmp_addr + 8);
|
||||
} else {
|
||||
intptr_t diff = addr - jmp_addr;
|
||||
tcg_debug_assert(in_range_b(diff));
|
||||
atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
|
||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
|
||||
uintptr_t addr)
|
||||
{
|
||||
intptr_t diff = addr - jmp_addr;
|
||||
tcg_debug_assert(in_range_b(diff));
|
||||
atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
|
||||
flush_icache_range(jmp_addr, jmp_addr + 4);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
|
||||
{
|
||||
|
@ -1897,44 +1888,20 @@ static void tcg_target_qemu_prologue(TCGContext *s)
|
|||
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
if (guest_base) {
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
|
||||
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
|
||||
}
|
||||
#endif
|
||||
|
||||
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
|
||||
tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
|
||||
|
||||
if (USE_REG_RA) {
|
||||
#ifdef _CALL_AIX
|
||||
/* Make the caller load the value as the TOC into R2. */
|
||||
tb_ret_addr = s->code_ptr + 2;
|
||||
desc[1] = tb_ret_addr;
|
||||
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
#elif defined(_CALL_ELF) && _CALL_ELF == 2
|
||||
/* Compute from the incoming R12 value. */
|
||||
tb_ret_addr = s->code_ptr + 2;
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
|
||||
tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
#else
|
||||
/* Reserve max 5 insns for the constant load. */
|
||||
tb_ret_addr = s->code_ptr + 6;
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
while (s->code_ptr < tb_ret_addr) {
|
||||
tcg_out32(s, NOP);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
tb_ret_addr = s->code_ptr;
|
||||
if (USE_REG_TB) {
|
||||
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
|
||||
}
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
|
||||
/* Epilogue */
|
||||
tcg_debug_assert(tb_ret_addr == s->code_ptr);
|
||||
s->code_gen_epilogue = tb_ret_addr;
|
||||
s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
|
||||
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
|
||||
for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
|
||||
|
@ -1954,44 +1921,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
|||
|
||||
switch (opc) {
|
||||
case INDEX_op_exit_tb:
|
||||
if (USE_REG_RA) {
|
||||
ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
|
||||
|
||||
/* Use a direct branch if we can, otherwise use the value in RA.
|
||||
Note that the direct branch is always backward, thus we need
|
||||
to account for the possibility of 5 insns from the movi. */
|
||||
if (!in_range_b(disp - 20)) {
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
break;
|
||||
}
|
||||
}
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
|
||||
tcg_out_b(s, 0, tb_ret_addr);
|
||||
break;
|
||||
case INDEX_op_goto_tb:
|
||||
tcg_debug_assert(s->tb_jmp_insn_offset);
|
||||
/* Direct jump. */
|
||||
#ifdef __powerpc64__
|
||||
/* Ensure the next insns are 8-byte aligned. */
|
||||
if ((uintptr_t)s->code_ptr & 7) {
|
||||
tcg_out32(s, NOP);
|
||||
if (s->tb_jmp_insn_offset) {
|
||||
/* Direct jump. */
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
/* Ensure the next insns are 8-byte aligned. */
|
||||
if ((uintptr_t)s->code_ptr & 7) {
|
||||
tcg_out32(s, NOP);
|
||||
}
|
||||
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
|
||||
tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
|
||||
} else {
|
||||
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
|
||||
tcg_out32(s, B);
|
||||
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* Indirect jump. */
|
||||
tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
|
||||
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
|
||||
(intptr_t)(s->tb_jmp_insn_offset + args[0]));
|
||||
}
|
||||
s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
|
||||
/* To be replaced by either a branch+nop or a load into TMP1. */
|
||||
s->code_ptr += 2;
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
#else
|
||||
/* To be replaced by a branch. */
|
||||
s->code_ptr++;
|
||||
#endif
|
||||
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
||||
s->tb_jmp_reset_offset[args[0]] = c = tcg_current_code_size(s);
|
||||
if (USE_REG_TB) {
|
||||
/* For the unlinked case, need to reset TCG_REG_TB. */
|
||||
c = -c;
|
||||
assert(c == (int16_t)c);
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
|
||||
}
|
||||
break;
|
||||
case INDEX_op_goto_ptr:
|
||||
tcg_out32(s, MTSPR | RS(args[0]) | CTR);
|
||||
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
|
||||
if (USE_REG_TB) {
|
||||
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
|
||||
}
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
break;
|
||||
case INDEX_op_br:
|
||||
|
@ -2761,8 +2732,8 @@ static void tcg_target_init(TCGContext *s)
|
|||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
|
||||
#endif
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
|
||||
if (USE_REG_RA) {
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return addr */
|
||||
if (USE_REG_TB) {
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tb->tc_ptr */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue