tcg/ppc: ld/st optimization

Signed-off-by: malc <av1474@comtv.ru>
This commit is contained in:
malc 2012-11-03 19:38:32 +04:00
parent b51d7b2e10
commit ed224a56b3
3 changed files with 268 additions and 202 deletions

2
configure vendored
View File

@ -3882,7 +3882,7 @@ upper() {
} }
case "$cpu" in case "$cpu" in
i386|x86_64) i386|x86_64|ppc)
echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_target_mak echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_target_mak
;; ;;
esac esac

View File

@ -335,6 +335,9 @@ extern uintptr_t tci_tb_ptr;
# define GETRA() ((uintptr_t)__builtin_return_address(0)) # define GETRA() ((uintptr_t)__builtin_return_address(0))
# define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \ # define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
*(int32_t *)((void *)GETRA() + 3) - 1)) *(int32_t *)((void *)GETRA() + 3) - 1))
# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
# define GETRA() ((uintptr_t)__builtin_return_address(0))
# define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() + 4)) - 1))
# else # else
# error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!" # error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
# endif # endif

View File

@ -39,8 +39,6 @@ static uint8_t *tb_ret_addr;
#define LR_OFFSET 4 #define LR_OFFSET 4
#endif #endif
#define FAST_PATH
#ifndef GUEST_BASE #ifndef GUEST_BASE
#define GUEST_BASE 0 #define GUEST_BASE 0
#endif #endif
@ -520,6 +518,37 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
#if defined(CONFIG_SOFTMMU) #if defined(CONFIG_SOFTMMU)
static void add_qemu_ldst_label (TCGContext *s,
int is_ld,
int opc,
int data_reg,
int data_reg2,
int addrlo_reg,
int addrhi_reg,
int mem_index,
uint8_t *raddr,
uint8_t *label_ptr)
{
int idx;
TCGLabelQemuLdst *label;
if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
tcg_abort();
}
idx = s->nb_qemu_ldst_labels++;
label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
label->is_ld = is_ld;
label->opc = opc;
label->datalo_reg = data_reg;
label->datahi_reg = data_reg2;
label->addrlo_reg = addrlo_reg;
label->addrhi_reg = addrhi_reg;
label->mem_index = mem_index;
label->raddr = raddr;
label->label_ptr[0] = label_ptr;
}
#include "../../softmmu_defs.h" #include "../../softmmu_defs.h"
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@ -539,36 +568,12 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu, helper_stl_mmu,
helper_stq_mmu, helper_stq_mmu,
}; };
#endif
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
int addr_reg, int addr_reg2, int s_bits,
int offset1, int offset2, uint8_t **label_ptr)
{ {
int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap; uint16_t retranst;
#ifdef CONFIG_SOFTMMU
int mem_index, s_bits, r2, ir;
void *label1_ptr, *label2_ptr;
#if TARGET_LONG_BITS == 64
int addr_reg2;
#endif
#endif
data_reg = *args++;
if (opc == 3)
data_reg2 = *args++;
else
data_reg2 = 0;
addr_reg = *args++;
#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
addr_reg2 = *args++;
#endif
mem_index = *args;
s_bits = opc & 3;
r0 = 3;
r1 = 4;
r2 = 0;
rbase = 0;
tcg_out32 (s, (RLWINM tcg_out32 (s, (RLWINM
| RA (r0) | RA (r0)
@ -582,7 +587,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out32 (s, (LWZU tcg_out32 (s, (LWZU
| RT (r1) | RT (r1)
| RA (r0) | RA (r0)
| offsetof (CPUArchState, tlb_table[mem_index][0].addr_read) | offset1
) )
); );
tcg_out32 (s, (RLWINM tcg_out32 (s, (RLWINM
@ -600,77 +605,58 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1)); tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ)); tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif #endif
*label_ptr = s->code_ptr;
retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);
label1_ptr = s->code_ptr; /* r0 now contains &env->tlb_table[mem_index][index].addr_x */
#ifdef FAST_PATH
tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
#endif
/* slow path */
ir = 3;
tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
switch (opc) {
case 0|4:
tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
break;
case 1|4:
tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
break;
case 0:
case 1:
case 2:
if (data_reg != 3)
tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
break;
case 3:
if (data_reg == 3) {
if (data_reg2 == 4) {
tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
}
else {
tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
}
}
else {
if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
}
break;
}
label2_ptr = s->code_ptr;
tcg_out32 (s, B);
/* label1: fast path */
#ifdef FAST_PATH
reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
#endif
/* r0 now contains &env->tlb_table[mem_index][index].addr_read */
tcg_out32 (s, (LWZ tcg_out32 (s, (LWZ
| RT (r0) | RT (r0)
| RA (r0) | RA (r0)
| (offsetof (CPUTLBEntry, addend) | offset2
- offsetof (CPUTLBEntry, addr_read)) )
)); );
/* r0 = env->tlb_table[mem_index][index].addend */ /* r0 = env->tlb_table[mem_index][index].addend */
tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg)); tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */ /* r0 = env->tlb_table[mem_index][index].addend + addr */
}
#endif
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, addr_reg2, data_reg, data_reg2, r0, r1, rbase, bswap;
#ifdef CONFIG_SOFTMMU
int mem_index, s_bits, r2;
uint8_t *label_ptr;
#endif
data_reg = *args++;
if (opc == 3)
data_reg2 = *args++;
else
data_reg2 = 0;
addr_reg = *args++;
#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
addr_reg2 = *args++;
#else
addr_reg2 = 0;
#endif
mem_index = *args;
s_bits = opc & 3;
r0 = 3;
r1 = 4;
r2 = 0;
rbase = 0;
tcg_out_tlb_check (
s, r0, r1, r2, addr_reg, addr_reg2, s_bits,
offsetof (CPUArchState, tlb_table[mem_index][0].addr_read),
offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read),
&label_ptr
);
#else /* !CONFIG_SOFTMMU */ #else /* !CONFIG_SOFTMMU */
r0 = addr_reg; r0 = addr_reg;
r1 = 3; r1 = 3;
@ -736,21 +722,26 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
} }
break; break;
} }
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); add_qemu_ldst_label (s,
1,
opc,
data_reg,
data_reg2,
addr_reg,
addr_reg2,
mem_index,
s->code_ptr,
label_ptr);
#endif #endif
} }
static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{ {
int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase; int addr_reg, addr_reg2, r0, r1, data_reg, data_reg2, bswap, rbase;
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
int mem_index, r2, ir; int mem_index, r2;
void *label1_ptr, *label2_ptr; uint8_t *label_ptr;
#if TARGET_LONG_BITS == 64
int addr_reg2;
#endif
#endif #endif
data_reg = *args++; data_reg = *args++;
@ -763,6 +754,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64 #if TARGET_LONG_BITS == 64
addr_reg2 = *args++; addr_reg2 = *args++;
#else
addr_reg2 = 0;
#endif #endif
mem_index = *args; mem_index = *args;
r0 = 3; r0 = 3;
@ -770,105 +763,12 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
r2 = 0; r2 = 0;
rbase = 0; rbase = 0;
tcg_out32 (s, (RLWINM tcg_out_tlb_check (
| RA (r0) s, r0, r1, r2, addr_reg, addr_reg2, opc & 3,
| RS (addr_reg) offsetof (CPUArchState, tlb_table[mem_index][0].addr_write),
| SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)) offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write),
| MB (32 - (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)) &label_ptr
| ME (31 - CPU_TLB_ENTRY_BITS)
)
); );
tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
tcg_out32 (s, (LWZU
| RT (r1)
| RA (r0)
| offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)
)
);
tcg_out32 (s, (RLWINM
| RA (r2)
| RS (addr_reg)
| SH (0)
| MB ((32 - opc) & 31)
| ME (31 - TARGET_PAGE_BITS)
)
);
tcg_out32 (s, CMP | (7 << 23) | RA (r2) | RB (r1));
#if TARGET_LONG_BITS == 64
tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4);
tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif
label1_ptr = s->code_ptr;
#ifdef FAST_PATH
tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
#endif
/* slow path */
ir = 3;
tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
switch (opc) {
case 0:
tcg_out32 (s, (RLWINM
| RA (ir)
| RS (data_reg)
| SH (0)
| MB (24)
| ME (31)));
break;
case 1:
tcg_out32 (s, (RLWINM
| RA (ir)
| RS (data_reg)
| SH (0)
| MB (16)
| ME (31)));
break;
case 2:
tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
break;
case 3:
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
break;
}
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
label2_ptr = s->code_ptr;
tcg_out32 (s, B);
/* label1: fast path */
#ifdef FAST_PATH
reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
#endif
tcg_out32 (s, (LWZ
| RT (r0)
| RA (r0)
| (offsetof (CPUTLBEntry, addend)
- offsetof (CPUTLBEntry, addr_write))
));
/* r0 = env->tlb_table[mem_index][index].addend */
tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */
#else /* !CONFIG_SOFTMMU */ #else /* !CONFIG_SOFTMMU */
r0 = addr_reg; r0 = addr_reg;
r1 = 3; r1 = 3;
@ -916,10 +816,173 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
} }
#ifdef CONFIG_SOFTMMU #ifdef CONFIG_SOFTMMU
reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); add_qemu_ldst_label (s,
0,
opc,
data_reg,
data_reg2,
addr_reg,
addr_reg2,
mem_index,
s->code_ptr,
label_ptr);
#endif #endif
} }
#if defined(CONFIG_SOFTMMU)
static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
{
int s_bits;
int ir;
int opc = label->opc;
int mem_index = label->mem_index;
int data_reg = label->datalo_reg;
int data_reg2 = label->datahi_reg;
int addr_reg = label->addrlo_reg;
uint8_t *raddr = label->raddr;
uint8_t **label_ptr = &label->label_ptr[0];
s_bits = opc & 3;
/* resolve label address */
reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
/* slow path */
ir = 3;
tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
tcg_out32 (s, B | 8);
tcg_out32 (s, (tcg_target_long) raddr);
switch (opc) {
case 0|4:
tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
break;
case 1|4:
tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
break;
case 0:
case 1:
case 2:
if (data_reg != 3)
tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
break;
case 3:
if (data_reg == 3) {
if (data_reg2 == 4) {
tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
}
else {
tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
}
}
else {
if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
}
break;
}
/* Jump to the code corresponding to next IR of qemu_st */
tcg_out_b (s, 0, (tcg_target_long) raddr);
}
static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
{
int s_bits;
int ir;
int opc = label->opc;
int mem_index = label->mem_index;
int data_reg = label->datalo_reg;
int data_reg2 = label->datahi_reg;
int addr_reg = label->addrlo_reg;
uint8_t *raddr = label->raddr;
uint8_t **label_ptr = &label->label_ptr[0];
s_bits = opc & 3;
/* resolve label address */
reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
/* slow path */
ir = 3;
tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
switch (opc) {
case 0:
tcg_out32 (s, (RLWINM
| RA (ir)
| RS (data_reg)
| SH (0)
| MB (24)
| ME (31)));
break;
case 1:
tcg_out32 (s, (RLWINM
| RA (ir)
| RS (data_reg)
| SH (0)
| MB (16)
| ME (31)));
break;
case 2:
tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
break;
case 3:
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
tcg_out_mov (s, TCG_TYPE_I32, ir++, data_reg2);
tcg_out_mov (s, TCG_TYPE_I32, ir, data_reg);
break;
}
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
tcg_out32 (s, B | 8);
tcg_out32 (s, (tcg_target_long) raddr);
tcg_out_b (s, 0, (tcg_target_long) raddr);
}
void tcg_out_tb_finalize(TCGContext *s)
{
int i;
TCGLabelQemuLdst *label;
/* qemu_ld/st slow paths */
for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
label = (TCGLabelQemuLdst *) &s->qemu_ldst_labels[i];
if (label->is_ld) {
tcg_out_qemu_ld_slow_path (s, label);
}
else {
tcg_out_qemu_st_slow_path (s, label);
}
}
}
#endif
static void tcg_target_qemu_prologue (TCGContext *s) static void tcg_target_qemu_prologue (TCGContext *s)
{ {
int i, frame_size; int i, frame_size;