From dcf91778caa36338a252703f6d40f06cc14acfce Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 31 Oct 2013 15:19:23 -0400 Subject: [PATCH 1/8] tcg-ia64: Optimize small arguments to exit_tb Saves one bundle for the common case of exit_tb 0. Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 2d8e00cd94..74070111f0 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -950,15 +950,21 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg addr) static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg) { int64_t disp; - uint64_t imm; + uint64_t imm, opc1; - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + /* At least arg == 0 is a common operation. */ + if (arg == sextract64(arg, 0, 22)) { + opc1 = tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R8, arg); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R8, arg); + opc1 = INSN_NOP_M; + } disp = tb_ret_addr - s->code_ptr; imm = (uint64_t)disp >> 4; tcg_out_bundle(s, mLX, - INSN_NOP_M, + opc1, tcg_opc_l3 (imm), tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, imm)); } From 4c186ee2cf938d338a4fc4e53789a59d580b7625 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Sep 2013 19:46:56 -0400 Subject: [PATCH 2/8] tcg-ia64: Re-bundle the tlb load This sequencing requires 5 stop bits instead of 6, and has room left over to pre-load the tlb addend, and bswap data prior to being stored. Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 75 ++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 74070111f0..7bb3440be9 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1564,38 +1564,69 @@ static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret, } #if defined(CONFIG_SOFTMMU) +/* We're expecting to use an signed 22-bit immediate add. */ +QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) + > 0x1fffff) + /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. - R57 is loaded with the address, zero extented on 32-bit targets. */ -static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg, - TCGMemOp s_bits, uint64_t offset_rw, - uint64_t offset_addend) + R57 is loaded with the address, zero extented on 32-bit targets. + R1, R3 are clobbered. */ +static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, + TCGMemOp s_bits, int off_rw, int off_add) { - tcg_out_bundle(s, mII, - INSN_NOP_M, - tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2, + /* + .mii + mov r2 = off_rw + extr.u r3 = addr_reg, ... # extract tlb page + zxt4 r57 = addr_reg # or mov for 64-bit guest + ;; + .mii + addl r2 = r2, areg0 + shl r3 = r3, cteb # via dep.z + dep r1 = 0, r57, ... # zero page ofs, keep align + ;; + .mmi + add r2 = r2, r3 + ;; + ld4 r3 = [r2], off_add-off_rw # or ld8 for 64-bit guest + nop + ;; + .mmi + nop + cmp.eq p6, p7 = r3, r58 + nop + ;; + */ + tcg_out_bundle(s, miI, + tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3, addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1), - tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2, - TCG_REG_R2, 63 - CPU_TLB_ENTRY_BITS, - 63 - CPU_TLB_ENTRY_BITS)); - tcg_out_bundle(s, mII, - tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2, - offset_rw, TCG_REG_R2), tcg_opc_ext_i(TCG_REG_P0, TARGET_LONG_BITS == 32 ? MO_UL : MO_Q, - TCG_REG_R57, addr_reg), + TCG_REG_R57, addr_reg)); + tcg_out_bundle(s, miI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, - TCG_REG_R2, TCG_AREG0)); - tcg_out_bundle(s, mII, + TCG_REG_R2, TCG_AREG0), + tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R3, + TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS, + 63 - CPU_TLB_ENTRY_BITS), + tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0, + TCG_REG_R57, 63 - s_bits, + TARGET_PAGE_BITS - s_bits - 1)); + tcg_out_bundle(s, MmI, + tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, + TCG_REG_R2, TCG_REG_R2, TCG_REG_R3), tcg_opc_m3 (TCG_REG_P0, (TARGET_LONG_BITS == 32 - ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56, - TCG_REG_R2, offset_addend - offset_rw), - tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0, - TCG_REG_R57, 63 - s_bits, - TARGET_PAGE_BITS - s_bits - 1), + ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, + TCG_REG_R2, off_add - off_rw), + INSN_NOP_I); + tcg_out_bundle(s, mmI, + INSN_NOP_M, tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, - TCG_REG_P7, TCG_REG_R3, TCG_REG_R56)); + TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), + INSN_NOP_I); } /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, From b672cf66c37a5fc9fc143160d2395901030c9d3a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Sep 2013 20:02:51 -0400 Subject: [PATCH 3/8] tcg-ia64: Move bswap for store into tlb load Saving at least two cycles per store, and cleaning up the code. Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 94 ++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 63 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 7bb3440be9..cdc7487c44 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1571,9 +1571,11 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) /* Load and compare a TLB entry, and return the result in (p6, p7). R2 is loaded with the address of the addend TLB entry. R57 is loaded with the address, zero extented on 32-bit targets. - R1, R3 are clobbered. */ + R1, R3 are clobbered, leaving R56 free for... + BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, - TCGMemOp s_bits, int off_rw, int off_add) + TCGMemOp s_bits, int off_rw, int off_add, + uint64_t bswap1, uint64_t bswap2) { /* .mii @@ -1621,12 +1623,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, (TARGET_LONG_BITS == 32 ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3, TCG_REG_R2, off_add - off_rw), - INSN_NOP_I); + bswap1); tcg_out_bundle(s, mmI, INSN_NOP_M, tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), - INSN_NOP_I); + bswap2); } /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, @@ -1656,7 +1658,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, /* Read the TLB entry */ tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_read), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + INSN_NOP_I, INSN_NOP_I); /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, @@ -1727,17 +1730,31 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, static const uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; - int addr_reg, data_reg, mem_index; + TCGReg addr_reg, data_reg, store_reg; + int mem_index; + uint64_t bswap1, bswap2; TCGMemOp s_bits; - data_reg = *args++; + store_reg = data_reg = *args++; addr_reg = *args++; mem_index = *args; s_bits = opc & MO_SIZE; + bswap1 = bswap2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + store_reg = TCG_REG_R56; + bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + store_reg, store_reg, shift, 63 - shift); + } + } + tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), - offsetof(CPUArchState, tlb_table[mem_index][0].addend)); + offsetof(CPUArchState, tlb_table[mem_index][0].addend), + bswap1, bswap2); /* P6 is the fast path, and P7 the slow path */ tcg_out_bundle(s, mLX, @@ -1752,63 +1769,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCG_REG_R3, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); - - switch (opc) { - case MO_8: - case MO_16: - case MO_32: - case MO_64: - tcg_out_bundle(s, mii, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I); - break; - - case MO_16 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 15, 15)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_32 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I, - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R2, data_reg, 31, 31)); - tcg_out_bundle(s, miI, - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I, - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2)); - data_reg = TCG_REG_R2; - break; - - case MO_64 | MO_BSWAP: - tcg_out_bundle(s, miI, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg)); - data_reg = TCG_REG_R2; - break; - - default: - tcg_abort(); - } - + tcg_out_bundle(s, mii, + tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, + TCG_REG_R1, TCG_REG_R2), + tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), + INSN_NOP_I); tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - data_reg, TCG_REG_R3), + store_reg, TCG_REG_R3), tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); From 1f91f3921921870e76a8ee543acc0935b5230821 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Sep 2013 20:32:49 -0400 Subject: [PATCH 4/8] tcg-ia64: Move tlb addend load into tlb read Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index cdc7487c44..802ec33c07 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1569,7 +1569,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) > 0x1fffff) /* Load and compare a TLB entry, and return the result in (p6, p7). - R2 is loaded with the address of the addend TLB entry. + R2 is loaded with the addend TLB entry. R57 is loaded with the address, zero extented on 32-bit targets. R1, R3 are clobbered, leaving R56 free for... BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */ @@ -1625,7 +1625,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, TCG_REG_R2, off_add - off_rw), bswap1); tcg_out_bundle(s, mmI, - INSN_NOP_M, + tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, TCG_REG_R2), tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6, TCG_REG_P7, TCG_REG_R1, TCG_REG_R3), bswap2); @@ -1668,30 +1668,30 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_ld_helpers[s_bits])); tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, + tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); if (bswap && s_bits == MO_16) { tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), + TCG_REG_R8, TCG_REG_R2), tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, TCG_REG_R8, TCG_REG_R8, 15, 15)); } else if (bswap && s_bits == MO_32) { tcg_out_bundle(s, MmI, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), + TCG_REG_R8, TCG_REG_R2), tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, TCG_REG_R8, TCG_REG_R8, 31, 31)); } else { tcg_out_bundle(s, mmI, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R3), + TCG_REG_R8, TCG_REG_R2), tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), INSN_NOP_I); } @@ -1763,10 +1763,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, (tcg_target_long) qemu_st_helpers[s_bits])); tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3, + tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3, TCG_REG_R2, 8), - tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3, - TCG_REG_R3, TCG_REG_R57), + tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, + TCG_REG_R2, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); tcg_out_bundle(s, mii, @@ -1776,7 +1776,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, INSN_NOP_I); tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - store_reg, TCG_REG_R3), + store_reg, TCG_REG_R2), tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, TCG_REG_B0, TCG_REG_B6)); From af9fe310702396333f983f17de68db8511de7b19 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 5 Sep 2013 20:50:54 -0400 Subject: [PATCH 5/8] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld The only differences were in the bswap insns emitted. Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 61 +++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 802ec33c07..8464ad6a59 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1647,13 +1647,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg, mem_index; - TCGMemOp s_bits, bswap; + TCGMemOp s_bits; + uint64_t bswap1, bswap2; data_reg = *args++; addr_reg = *args++; mem_index = *args; s_bits = opc & MO_SIZE; - bswap = opc & MO_BSWAP; /* Read the TLB entry */ tcg_out_qemu_tlb(s, addr_reg, s_bits, @@ -1662,6 +1662,18 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, INSN_NOP_I, INSN_NOP_I); /* P6 is the fast path, and P7 the slow path */ + + bswap1 = bswap2 = INSN_NOP_I; + if (opc & MO_BSWAP) { + bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8); + if (s_bits < MO_64) { + int shift = 64 - (8 << s_bits); + bswap2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + bswap2 = tcg_opc_i11(TCG_REG_P6, bswap2, + TCG_REG_R8, TCG_REG_R8, shift, 63 - shift); + } + } + tcg_out_bundle(s, mLX, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), @@ -1674,41 +1686,16 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCG_REG_R2, TCG_REG_R57), tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R3, 0)); - if (bswap && s_bits == MO_16) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R2), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 15, 15)); - } else if (bswap && s_bits == MO_32) { - tcg_out_bundle(s, MmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R2), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12, - TCG_REG_R8, TCG_REG_R8, 31, 31)); - } else { - tcg_out_bundle(s, mmI, - tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], - TCG_REG_R8, TCG_REG_R2), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - INSN_NOP_I); - } - if (!bswap) { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - INSN_NOP_I, - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); - } else { - tcg_out_bundle(s, miB, - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); - } - + tcg_out_bundle(s, MmI, + tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], + TCG_REG_R8, TCG_REG_R2), + tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), + bswap1); + tcg_out_bundle(s, miB, + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), + bswap2, + tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, + TCG_REG_B0, TCG_REG_B6)); tcg_out_bundle(s, miI, INSN_NOP_M, INSN_NOP_I, From 4bdd547aaacd10b7e8f9bf6efe5531ae2ac8ea52 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Sep 2013 00:38:52 -0400 Subject: [PATCH 6/8] tcg-ia64: Convert to new ldst helpers Still inline, but updated to the new routines. Always use the LE helpers, reusing the bswap between the fast and slot paths. Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 142 ++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 62 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 8464ad6a59..3000a6bfa7 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -225,6 +225,7 @@ enum { OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, OPC_BRL_SPTK_MANY_X3 = 0x18000001000ull, + OPC_BRL_CALL_SPNT_MANY_X4 = 0x1a200001000ull, OPC_BRL_CALL_SPTK_MANY_X4 = 0x1a000001000ull, OPC_CMP_LT_A6 = 0x18000000000ull, OPC_CMP_LTU_A6 = 0x1a000000000ull, @@ -815,6 +816,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) #if defined(CONFIG_SOFTMMU) tcg_regset_reset_reg(ct->u.regs, TCG_REG_R56); tcg_regset_reset_reg(ct->u.regs, TCG_REG_R57); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R58); #endif break; case 'Z': @@ -1632,12 +1634,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, } /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ + int mmu_idx, uintptr_t retaddr) */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, }; static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, @@ -1648,7 +1650,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, }; int addr_reg, data_reg, mem_index; TCGMemOp s_bits; - uint64_t bswap1, bswap2; + uint64_t fin1, fin2, *desc, func, gp, here; data_reg = *args++; addr_reg = *args++; @@ -1663,52 +1665,60 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, /* P6 is the fast path, and P7 the slow path */ - bswap1 = bswap2 = INSN_NOP_I; + fin2 = 0; if (opc & MO_BSWAP) { - bswap1 = tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R8, TCG_REG_R8); + fin1 = tcg_opc_bswap64_i(TCG_REG_P0, data_reg, TCG_REG_R8); if (s_bits < MO_64) { int shift = 64 - (8 << s_bits); - bswap2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); - bswap2 = tcg_opc_i11(TCG_REG_P6, bswap2, - TCG_REG_R8, TCG_REG_R8, shift, 63 - shift); + fin2 = (opc & MO_SIGN ? OPC_EXTR_I11 : OPC_EXTR_U_I11); + fin2 = tcg_opc_i11(TCG_REG_P0, fin2, + data_reg, data_reg, shift, 63 - shift); } + } else { + fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); } - tcg_out_bundle(s, mLX, + desc = (uintptr_t *)qemu_ld_helpers[s_bits]; + func = desc[0]; + gp = desc[1]; + here = (uintptr_t)s->code_ptr; + + tcg_out_bundle(s, mlx, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_ld_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), + tcg_opc_l2 (here), + tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here)); + tcg_out_bundle(s, mLX, tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - tcg_out_bundle(s, MmI, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp)); + tcg_out_bundle(s, mmi, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R2), - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2), - bswap1); - tcg_out_bundle(s, miB, tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - bswap2, - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); - tcg_out_bundle(s, miI, + INSN_NOP_I); + func -= (uintptr_t)s->code_ptr; + tcg_out_bundle(s, mLX, INSN_NOP_M, - INSN_NOP_I, - tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8)); + tcg_opc_l4 (func >> 4), + tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4, + TCG_REG_B0, func >> 4)); + + /* Note that we always use LE helper functions, so the bswap insns + here for the fast path also apply to the slow path. */ + tcg_out_bundle(s, (fin2 ? mII : miI), + INSN_NOP_M, + fin1, + fin2 ? fin2 : INSN_NOP_I); } /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ + uintxx_t val, int mmu_idx, uintptr_t retaddr) */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, }; static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, @@ -1717,56 +1727,64 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, static const uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 }; - TCGReg addr_reg, data_reg, store_reg; + TCGReg addr_reg, data_reg; int mem_index; - uint64_t bswap1, bswap2; + uint64_t pre1, pre2, *desc, func, gp, here; TCGMemOp s_bits; - store_reg = data_reg = *args++; + data_reg = *args++; addr_reg = *args++; mem_index = *args; s_bits = opc & MO_SIZE; - bswap1 = bswap2 = INSN_NOP_I; + /* Note that we always use LE helper functions, so the bswap insns + that are here for the fast path also apply to the slow path, + and move the data into the argument register. */ + pre2 = INSN_NOP_I; if (opc & MO_BSWAP) { - store_reg = TCG_REG_R56; - bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg); + pre1 = tcg_opc_bswap64_i(TCG_REG_P0, TCG_REG_R58, data_reg); if (s_bits < MO_64) { int shift = 64 - (8 << s_bits); - bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, - store_reg, store_reg, shift, 63 - shift); + pre2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, + TCG_REG_R58, TCG_REG_R58, shift, 63 - shift); } + } else { + /* Just move the data into place for the slow path. */ + pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg); } tcg_out_qemu_tlb(s, addr_reg, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_write), offsetof(CPUArchState, tlb_table[mem_index][0].addend), - bswap1, bswap2); + pre1, pre2); /* P6 is the fast path, and P7 the slow path */ - tcg_out_bundle(s, mLX, + + desc = (uintptr_t *)qemu_st_helpers[s_bits]; + func = desc[0]; + gp = desc[1]; + here = (uintptr_t)s->code_ptr; + + tcg_out_bundle(s, mlx, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[s_bits]), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2, - (tcg_target_long) qemu_st_helpers[s_bits])); - tcg_out_bundle(s, MmI, - tcg_opc_m3 (TCG_REG_P7, OPC_LD8_M3, TCG_REG_R3, - TCG_REG_R2, 8), + tcg_opc_l2 (here), + tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here)); + tcg_out_bundle(s, mLX, tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), - tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6, - TCG_REG_R3, 0)); - tcg_out_bundle(s, mii, - tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, - TCG_REG_R1, TCG_REG_R2), - tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg), - INSN_NOP_I); - tcg_out_bundle(s, miB, + tcg_opc_l2 (gp), + tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp)); + tcg_out_bundle(s, mmi, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], - store_reg, TCG_REG_R2), + TCG_REG_R58, TCG_REG_R2), tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), - tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5, - TCG_REG_B0, TCG_REG_B6)); + INSN_NOP_I); + func -= (uintptr_t)s->code_ptr; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l4 (func >> 4), + tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4, + TCG_REG_B0, func >> 4)); } #else /* !CONFIG_SOFTMMU */ From 3bf16cb31a32bb4fdd505851f5a71500812ac3de Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Sep 2013 02:06:59 -0400 Subject: [PATCH 7/8] tcg-ia64: Move part of softmmu slow path out of line Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 176 +++++++++++++++++++++++++++--------------- 1 file changed, 114 insertions(+), 62 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 3000a6bfa7..76abb468cf 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -23,8 +23,6 @@ * THE SOFTWARE. */ -#include "tcg-be-null.h" - /* * Register definitions */ @@ -221,6 +219,7 @@ enum { OPC_ALLOC_M34 = 0x02c00000000ull, OPC_BR_DPTK_FEW_B1 = 0x08400000000ull, OPC_BR_SPTK_MANY_B1 = 0x08000001000ull, + OPC_BR_CALL_SPNT_FEW_B3 = 0x0a200000000ull, OPC_BR_SPTK_MANY_B4 = 0x00100001000ull, OPC_BR_CALL_SPTK_MANY_B5 = 0x02100001000ull, OPC_BR_RET_SPTK_MANY_B4 = 0x00108001100ull, @@ -357,6 +356,15 @@ static inline uint64_t tcg_opc_b1(int qp, uint64_t opc, uint64_t imm) | (qp & 0x3f); } +static inline uint64_t tcg_opc_b3(int qp, uint64_t opc, int b1, uint64_t imm) +{ + return opc + | ((imm & 0x100000) << 16) /* s */ + | ((imm & 0x0fffff) << 13) /* imm20b */ + | ((b1 & 0x7) << 6) + | (qp & 0x3f); +} + static inline uint64_t tcg_opc_b4(int qp, uint64_t opc, int b2) { return opc @@ -1633,14 +1641,87 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg, bswap2); } -/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx, uintptr_t retaddr) */ -static const void * const qemu_ld_helpers[4] = { - helper_ret_ldub_mmu, - helper_le_lduw_mmu, - helper_le_ldul_mmu, - helper_le_ldq_mmu, -}; +#define TCG_MAX_QEMU_LDST 640 + +typedef struct TCGLabelQemuLdst { + bool is_ld; + TCGMemOp size; + uint8_t *label_ptr; /* label pointers to be updated */ +} TCGLabelQemuLdst; + +typedef struct TCGBackendData { + int nb_ldst_labels; + TCGLabelQemuLdst ldst_labels[TCG_MAX_QEMU_LDST]; +} TCGBackendData; + +static inline void tcg_out_tb_init(TCGContext *s) +{ + s->be->nb_ldst_labels = 0; +} + +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc, + uint8_t *label_ptr) +{ + TCGBackendData *be = s->be; + TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++]; + + assert(be->nb_ldst_labels <= TCG_MAX_QEMU_LDST); + l->is_ld = is_ld; + l->size = opc & MO_SIZE; + l->label_ptr = label_ptr; +} + +static void tcg_out_tb_finalize(TCGContext *s) +{ + static const void * const helpers[8] = { + helper_ret_stb_mmu, + helper_le_stw_mmu, + helper_le_stl_mmu, + helper_le_stq_mmu, + helper_ret_ldub_mmu, + helper_le_lduw_mmu, + helper_le_ldul_mmu, + helper_le_ldq_mmu, + }; + uintptr_t thunks[8] = { }; + TCGBackendData *be = s->be; + size_t i, n = be->nb_ldst_labels; + + for (i = 0; i < n; i++) { + TCGLabelQemuLdst *l = &be->ldst_labels[i]; + long x = l->is_ld * 4 + l->size; + uintptr_t dest = thunks[x]; + + /* The out-of-line thunks are all the same; load the return address + from B0, load the GP, and branch to the code. Note that we are + always post-call, so the register window has rolled, so we're + using incomming parameter register numbers, not outgoing. */ + if (dest == 0) { + uintptr_t disp, *desc = (uintptr_t *)helpers[x]; + + thunks[x] = dest = (uintptr_t)s->code_ptr; + + tcg_out_bundle(s, mlx, + INSN_NOP_M, + tcg_opc_l2 (desc[1]), + tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, + TCG_REG_R1, desc[1])); + tcg_out_bundle(s, mii, + INSN_NOP_M, + INSN_NOP_I, + tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22, + l->is_ld ? TCG_REG_R35 : TCG_REG_R36, + TCG_REG_B0)); + disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4; + tcg_out_bundle(s, mLX, + INSN_NOP_M, + tcg_opc_l3 (disp), + tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp)); + } + + reloc_pcrel21b(l->label_ptr, dest); + } +} static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc) @@ -1650,7 +1731,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, }; int addr_reg, data_reg, mem_index; TCGMemOp s_bits; - uint64_t fin1, fin2, *desc, func, gp, here; + uint64_t fin1, fin2; + uint8_t *label_ptr; data_reg = *args++; addr_reg = *args++; @@ -1678,31 +1760,20 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, fin1 = tcg_opc_ext_i(TCG_REG_P0, opc, data_reg, TCG_REG_R8); } - desc = (uintptr_t *)qemu_ld_helpers[s_bits]; - func = desc[0]; - gp = desc[1]; - here = (uintptr_t)s->code_ptr; - - tcg_out_bundle(s, mlx, + tcg_out_bundle(s, mmI, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 (here), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R59, here)); - tcg_out_bundle(s, mLX, tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp)); - tcg_out_bundle(s, mmi, + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index)); + label_ptr = s->code_ptr + 2; + tcg_out_bundle(s, miB, tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits], TCG_REG_R8, TCG_REG_R2), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index), - INSN_NOP_I); - func -= (uintptr_t)s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l4 (func >> 4), - tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4, - TCG_REG_B0, func >> 4)); + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b(label_ptr))); + + add_qemu_ldst_label(s, 1, opc, label_ptr); /* Note that we always use LE helper functions, so the bswap insns here for the fast path also apply to the slow path. */ @@ -1712,15 +1783,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, fin2 ? fin2 : INSN_NOP_I); } -/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx, uintptr_t retaddr) */ -static const void * const qemu_st_helpers[4] = { - helper_ret_stb_mmu, - helper_le_stw_mmu, - helper_le_stl_mmu, - helper_le_stq_mmu, -}; - static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc) { @@ -1729,8 +1791,9 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, }; TCGReg addr_reg, data_reg; int mem_index; - uint64_t pre1, pre2, *desc, func, gp, here; + uint64_t pre1, pre2; TCGMemOp s_bits; + uint8_t *label_ptr; data_reg = *args++; addr_reg = *args++; @@ -1759,35 +1822,24 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, pre1, pre2); /* P6 is the fast path, and P7 the slow path */ - - desc = (uintptr_t *)qemu_st_helpers[s_bits]; - func = desc[0]; - gp = desc[1]; - here = (uintptr_t)s->code_ptr; - - tcg_out_bundle(s, mlx, + tcg_out_bundle(s, mmI, tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R56, TCG_AREG0), - tcg_opc_l2 (here), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R60, here)); - tcg_out_bundle(s, mLX, tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2, TCG_REG_R2, TCG_REG_R57), - tcg_opc_l2 (gp), - tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R1, gp)); - tcg_out_bundle(s, mmi, + tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index)); + label_ptr = s->code_ptr + 2; + tcg_out_bundle(s, miB, tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits], TCG_REG_R58, TCG_REG_R2), - tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index), - INSN_NOP_I); - func -= (uintptr_t)s->code_ptr; - tcg_out_bundle(s, mLX, - INSN_NOP_M, - tcg_opc_l4 (func >> 4), - tcg_opc_x4 (TCG_REG_P7, OPC_BRL_CALL_SPNT_MANY_X4, - TCG_REG_B0, func >> 4)); + INSN_NOP_I, + tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0, + get_reloc_pcrel21b(label_ptr))); + + add_qemu_ldst_label(s, 0, opc, label_ptr); } #else /* !CONFIG_SOFTMMU */ +# include "tcg-be-null.h" static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc) From 0374f5089ac94d7c58f3f55201b70e8578b35c64 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 4 Mar 2014 09:35:30 -0800 Subject: [PATCH 8/8] tcg-ia64: Convert to new ldst opcodes Signed-off-by: Richard Henderson --- tcg/ia64/tcg-target.c | 100 ++++++++++++++---------------------------- tcg/ia64/tcg-target.h | 2 +- 2 files changed, 35 insertions(+), 67 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 76abb468cf..afcfd36f74 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -1723,20 +1723,20 @@ static void tcg_out_tb_finalize(TCGContext *s) } } -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static const uint64_t opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg, mem_index; - TCGMemOp s_bits; + TCGMemOp opc, s_bits; uint64_t fin1, fin2; uint8_t *label_ptr; - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; /* Read the TLB entry */ @@ -1783,8 +1783,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, fin2 ? fin2 : INSN_NOP_I); } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static const uint64_t opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 @@ -1792,12 +1791,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGReg addr_reg, data_reg; int mem_index; uint64_t pre1, pre2; - TCGMemOp s_bits; + TCGMemOp opc, s_bits; uint8_t *label_ptr; - data_reg = *args++; - addr_reg = *args++; - mem_index = *args; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; + mem_index = args[3]; s_bits = opc & MO_SIZE; /* Note that we always use LE helper functions, so the bswap insns @@ -1841,17 +1841,17 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #else /* !CONFIG_SOFTMMU */ # include "tcg-be-null.h" -static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) { static uint64_t const opc_ld_m1[4] = { OPC_LD1_M1, OPC_LD2_M1, OPC_LD4_M1, OPC_LD8_M1 }; int addr_reg, data_reg; - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -1962,8 +1962,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, #endif } -static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, - TCGMemOp opc) +static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) { static uint64_t const opc_st_m4[4] = { OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4 @@ -1972,10 +1971,11 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, #if TARGET_LONG_BITS == 64 uint64_t add_guest_base; #endif - TCGMemOp s_bits, bswap; + TCGMemOp opc, s_bits, bswap; - data_reg = *args++; - addr_reg = *args++; + data_reg = args[0]; + addr_reg = args[1]; + opc = args[2]; s_bits = opc & MO_SIZE; bswap = opc & MO_BSWAP; @@ -2299,40 +2299,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, args[3], const_args[3], args[4], const_args[4], 0); break; - case INDEX_op_qemu_ld8u: - tcg_out_qemu_ld(s, args, MO_UB); + case INDEX_op_qemu_ld_i32: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld8s: - tcg_out_qemu_ld(s, args, MO_SB); + case INDEX_op_qemu_ld_i64: + tcg_out_qemu_ld(s, args); break; - case INDEX_op_qemu_ld16u: - tcg_out_qemu_ld(s, args, MO_TEUW); + case INDEX_op_qemu_st_i32: + tcg_out_qemu_st(s, args); break; - case INDEX_op_qemu_ld16s: - tcg_out_qemu_ld(s, args, MO_TESW); - break; - case INDEX_op_qemu_ld32: - case INDEX_op_qemu_ld32u: - tcg_out_qemu_ld(s, args, MO_TEUL); - break; - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, MO_TESL); - break; - case INDEX_op_qemu_ld64: - tcg_out_qemu_ld(s, args, MO_TEQ); - break; - - case INDEX_op_qemu_st8: - tcg_out_qemu_st(s, args, MO_UB); - break; - case INDEX_op_qemu_st16: - tcg_out_qemu_st(s, args, MO_TEUW); - break; - case INDEX_op_qemu_st32: - tcg_out_qemu_st(s, args, MO_TEUL); - break; - case INDEX_op_qemu_st64: - tcg_out_qemu_st(s, args, MO_TEQ); + case INDEX_op_qemu_st_i64: + tcg_out_qemu_st(s, args); break; default: @@ -2443,19 +2420,10 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_deposit_i32, { "r", "rZ", "ri" } }, { INDEX_op_deposit_i64, { "r", "rZ", "ri" } }, - { INDEX_op_qemu_ld8u, { "r", "r" } }, - { INDEX_op_qemu_ld8s, { "r", "r" } }, - { INDEX_op_qemu_ld16u, { "r", "r" } }, - { INDEX_op_qemu_ld16s, { "r", "r" } }, - { INDEX_op_qemu_ld32, { "r", "r" } }, - { INDEX_op_qemu_ld32u, { "r", "r" } }, - { INDEX_op_qemu_ld32s, { "r", "r" } }, - { INDEX_op_qemu_ld64, { "r", "r" } }, - - { INDEX_op_qemu_st8, { "SZ", "r" } }, - { INDEX_op_qemu_st16, { "SZ", "r" } }, - { INDEX_op_qemu_st32, { "SZ", "r" } }, - { INDEX_op_qemu_st64, { "SZ", "r" } }, + { INDEX_op_qemu_ld_i32, { "r", "r" } }, + { INDEX_op_qemu_ld_i64, { "r", "r" } }, + { INDEX_op_qemu_st_i32, { "SZ", "r" } }, + { INDEX_op_qemu_st_i64, { "SZ", "r" } }, { -1 }, }; diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 52a939c946..09c3ba8fe3 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -153,7 +153,7 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_new_ldst 0 +#define TCG_TARGET_HAS_new_ldst 1 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)