From 57f5c1b093e1c3ec185770d2a180259205f980be Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 2 Jul 2017 18:18:14 +0200 Subject: [PATCH 01/31] target/sh4: do not check for PR bit for fabs instruction The SH4 manual is not fully clear about that, but real hardware do not check for the PR bit, which allows to select between single or double precision, for the fabs instruction. This is probably what is meant by "Same operation is performed regardless of precision." Remove the check, and at the same time use a TCG instruction instead of a helper to clear one bit. LP: https://bugs.launchpad.net/qemu/+bug/1701821 Reported-by: Bruno Haible Message-Id: <20170702202814.27793-2-aurelien@aurel32.net> Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target/sh4/helper.h | 2 -- target/sh4/op_helper.c | 10 ---------- target/sh4/translate.c | 15 +++------------ 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/target/sh4/helper.h b/target/sh4/helper.h index dce859caea..f715224822 100644 --- a/target/sh4/helper.h +++ b/target/sh4/helper.h @@ -16,8 +16,6 @@ DEF_HELPER_3(macw, void, env, i32, i32) DEF_HELPER_2(ld_fpscr, void, env, i32) -DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32) -DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64) DEF_HELPER_FLAGS_3(fadd_FT, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 528a40ac1d..5e3a3ba68c 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -252,16 +252,6 @@ static void update_fpscr(CPUSH4State *env, uintptr_t retaddr) } } -float32 helper_fabs_FT(float32 t0) -{ - return float32_abs(t0); -} - -float64 helper_fabs_DT(float64 t0) -{ - return float64_abs(t0); -} - float32 helper_fadd_FT(CPUSH4State *env, float32 t0, float32 t1) { set_float_exception_flags(0, &env->fp_status); diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 8bc132b27b..bff212a78e 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1695,19 +1695,10 @@ static void _decode_opc(DisasContext * ctx) gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]); } return; - case 0xf05d: /* fabs FRn/DRn */ + case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */ CHECK_FPU_ENABLED - if (ctx->tbflags & FPSCR_PR) { - if (ctx->opcode & 0x0100) - break; /* illegal instruction */ - TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, DREG(B11_8)); - gen_helper_fabs_DT(fp, fp); - gen_store_fpr64(fp, DREG(B11_8)); - tcg_temp_free_i64(fp); - } else { - gen_helper_fabs_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]); - } + tcg_gen_andi_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], + 0x7fffffff); return; case 0xf06d: /* fsqrt FRn */ CHECK_FPU_ENABLED From fea7d77d3ea287d3b1878648f3049fc6bb4fd57b Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 2 Jul 2017 21:23:56 +0200 Subject: [PATCH 02/31] target/sh4: fix FPU unorderered compare In case of unordered compare, the fcmp instructions should either trigger and invalid exception (if enabled) or set T=0. The existing code left it unchanged. LP: https://bugs.launchpad.net/qemu/+bug/1701821 Reported-by: Bruno Haible Message-Id: <20170702202814.27793-3-aurelien@aurel32.net> Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target/sh4/op_helper.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 5e3a3ba68c..f228daf125 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -274,11 +274,8 @@ void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1) set_float_exception_flags(0, &env->fp_status); relation = float32_compare(t0, t1, &env->fp_status); - if (unlikely(relation == float_relation_unordered)) { - update_fpscr(env, GETPC()); - } else { - env->sr_t = (relation == float_relation_equal); - } + update_fpscr(env, GETPC()); + env->sr_t = (relation == float_relation_equal); } void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1) @@ -287,11 +284,8 @@ void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1) set_float_exception_flags(0, &env->fp_status); relation = float64_compare(t0, t1, &env->fp_status); - if (unlikely(relation == float_relation_unordered)) { - update_fpscr(env, GETPC()); - } else { - env->sr_t = (relation == float_relation_equal); - } + update_fpscr(env, GETPC()); + env->sr_t = (relation == float_relation_equal); } void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1) @@ -300,11 +294,8 @@ void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1) set_float_exception_flags(0, &env->fp_status); relation = float32_compare(t0, t1, &env->fp_status); - if (unlikely(relation == float_relation_unordered)) { - update_fpscr(env, GETPC()); - } else { - env->sr_t = (relation == float_relation_greater); - } + update_fpscr(env, GETPC()); + env->sr_t = (relation == float_relation_greater); } void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1) @@ -313,11 +304,8 @@ void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1) set_float_exception_flags(0, &env->fp_status); relation = float64_compare(t0, t1, &env->fp_status); - if (unlikely(relation == float_relation_unordered)) { - update_fpscr(env, GETPC()); - } else { - env->sr_t = (relation == float_relation_greater); - } + update_fpscr(env, GETPC()); + env->sr_t = (relation == float_relation_greater); } float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0) From 801f4dac57dad6b340ff3f60c5d9b045a2c68a0e Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 2 Jul 2017 20:34:55 +0200 Subject: [PATCH 03/31] target/sh4: fix FPSCR cause vs flag inversion The floating-point status/control register contains cause and flag bits. The cause bits are set to 0 before executing the instruction, while the flag bits hold the status of the exception generated after the field was last cleared. Message-Id: <20170702202814.27793-4-aurelien@aurel32.net> Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target/sh4/op_helper.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index f228daf125..f2e39c5ca6 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -219,29 +219,29 @@ static void update_fpscr(CPUSH4State *env, uintptr_t retaddr) xcpt = get_float_exception_flags(&env->fp_status); - /* Clear the flag entries */ - env->fpscr &= ~FPSCR_FLAG_MASK; + /* Clear the cause entries */ + env->fpscr &= ~FPSCR_CAUSE_MASK; if (unlikely(xcpt)) { if (xcpt & float_flag_invalid) { - env->fpscr |= FPSCR_FLAG_V; + env->fpscr |= FPSCR_CAUSE_V; } if (xcpt & float_flag_divbyzero) { - env->fpscr |= FPSCR_FLAG_Z; + env->fpscr |= FPSCR_CAUSE_Z; } if (xcpt & float_flag_overflow) { - env->fpscr |= FPSCR_FLAG_O; + env->fpscr |= FPSCR_CAUSE_O; } if (xcpt & float_flag_underflow) { - env->fpscr |= FPSCR_FLAG_U; + env->fpscr |= FPSCR_CAUSE_U; } if (xcpt & float_flag_inexact) { - env->fpscr |= FPSCR_FLAG_I; + env->fpscr |= FPSCR_CAUSE_I; } - /* Accumulate in cause entries */ - env->fpscr |= (env->fpscr & FPSCR_FLAG_MASK) - << (FPSCR_CAUSE_SHIFT - FPSCR_FLAG_SHIFT); + /* Accumulate in flag entries */ + env->fpscr |= (env->fpscr & FPSCR_CAUSE_MASK) + >> (FPSCR_CAUSE_SHIFT - FPSCR_FLAG_SHIFT); /* Generate an exception if enabled */ cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT; From 82e8251374568ba63343b695925c883a7da3db6f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 2 Jul 2017 18:26:43 +0200 Subject: [PATCH 04/31] target/sh4: do not use a helper to implement fneg There is no need to use a helper to flip one bit, just use a TCG xor instruction instead. Message-Id: <20170702202814.27793-5-aurelien@aurel32.net> Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target/sh4/helper.h | 1 - target/sh4/op_helper.c | 5 ----- target/sh4/translate.c | 5 ++--- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/target/sh4/helper.h b/target/sh4/helper.h index f715224822..d2398922dd 100644 --- a/target/sh4/helper.h +++ b/target/sh4/helper.h @@ -32,7 +32,6 @@ DEF_HELPER_FLAGS_2(float_DT, TCG_CALL_NO_WG, f64, env, i32) DEF_HELPER_FLAGS_4(fmac_FT, TCG_CALL_NO_WG, f32, env, f32, f32, f32) DEF_HELPER_FLAGS_3(fmul_FT, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fmul_DT, TCG_CALL_NO_WG, f64, env, f64, f64) -DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32) DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index f2e39c5ca6..64206cf803 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -384,11 +384,6 @@ float64 helper_fmul_DT(CPUSH4State *env, float64 t0, float64 t1) return t0; } -float32 helper_fneg_T(float32 t0) -{ - return float32_chs(t0); -} - float32 helper_fsqrt_FT(CPUSH4State *env, float32 t0) { set_float_exception_flags(0, &env->fp_status); diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bff212a78e..9360522a98 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1691,9 +1691,8 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */ CHECK_FPU_ENABLED - { - gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]); - } + tcg_gen_xori_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], + 0x80000000); return; case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */ CHECK_FPU_ENABLED From 92f1f83e34f0454b98f3a7fc082636c38cafa115 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sun, 2 Jul 2017 21:31:25 +0200 Subject: [PATCH 05/31] target/sh4: return result of fcmp using TCG Since that the T bit of the SR register is mapped using a TGC global, it's better to return the value through TCG than writing it directly. It allows to declare the helpers with the flag TCG_CALL_NO_WG. Reviewed-by: Richard Henderson Message-Id: <20170702202814.27793-5-aurelien@aurel32.net> Signed-off-by: Aurelien Jarno --- target/sh4/helper.h | 8 ++++---- target/sh4/op_helper.c | 16 ++++++++-------- target/sh4/translate.c | 10 ++++++---- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/target/sh4/helper.h b/target/sh4/helper.h index d2398922dd..767a6d5209 100644 --- a/target/sh4/helper.h +++ b/target/sh4/helper.h @@ -21,10 +21,10 @@ DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32) DEF_HELPER_FLAGS_2(fcnvds_DT_FT, TCG_CALL_NO_WG, f32, env, f64) -DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32) -DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64) -DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32) -DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64) +DEF_HELPER_FLAGS_3(fcmp_eq_FT, TCG_CALL_NO_WG, i32, env, f32, f32) +DEF_HELPER_FLAGS_3(fcmp_eq_DT, TCG_CALL_NO_WG, i32, env, f64, f64) +DEF_HELPER_FLAGS_3(fcmp_gt_FT, TCG_CALL_NO_WG, i32, env, f32, f32) +DEF_HELPER_FLAGS_3(fcmp_gt_DT, TCG_CALL_NO_WG, i32, env, f64, f64) DEF_HELPER_FLAGS_3(fdiv_FT, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fdiv_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_2(float_FT, TCG_CALL_NO_WG, f32, env, i32) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 64206cf803..c3d19b1f61 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -268,44 +268,44 @@ float64 helper_fadd_DT(CPUSH4State *env, float64 t0, float64 t1) return t0; } -void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1) +uint32_t helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1) { int relation; set_float_exception_flags(0, &env->fp_status); relation = float32_compare(t0, t1, &env->fp_status); update_fpscr(env, GETPC()); - env->sr_t = (relation == float_relation_equal); + return relation == float_relation_equal; } -void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1) +uint32_t helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1) { int relation; set_float_exception_flags(0, &env->fp_status); relation = float64_compare(t0, t1, &env->fp_status); update_fpscr(env, GETPC()); - env->sr_t = (relation == float_relation_equal); + return relation == float_relation_equal; } -void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1) +uint32_t helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1) { int relation; set_float_exception_flags(0, &env->fp_status); relation = float32_compare(t0, t1, &env->fp_status); update_fpscr(env, GETPC()); - env->sr_t = (relation == float_relation_greater); + return relation == float_relation_greater; } -void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1) +uint32_t helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1) { int relation; set_float_exception_flags(0, &env->fp_status); relation = float64_compare(t0, t1, &env->fp_status); update_fpscr(env, GETPC()); - env->sr_t = (relation == float_relation_greater); + return relation == float_relation_greater; } float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 9360522a98..4c3512f62f 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1077,10 +1077,10 @@ static void _decode_opc(DisasContext * ctx) gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1); break; case 0xf004: /* fcmp/eq Rm,Rn */ - gen_helper_fcmp_eq_DT(cpu_env, fp0, fp1); + gen_helper_fcmp_eq_DT(cpu_sr_t, cpu_env, fp0, fp1); return; case 0xf005: /* fcmp/gt Rm,Rn */ - gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1); + gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1); return; } gen_store_fpr64(fp0, DREG(B11_8)); @@ -1109,11 +1109,13 @@ static void _decode_opc(DisasContext * ctx) cpu_fregs[FREG(B7_4)]); break; case 0xf004: /* fcmp/eq Rm,Rn */ - gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)], + gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env, + cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]); return; case 0xf005: /* fcmp/gt Rm,Rn */ - gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)], + gen_helper_fcmp_gt_FT(cpu_sr_t, cpu_env, + cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]); return; } From 4448a83606b5861cfa11528c0395868fc2b0e99e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:27 -1000 Subject: [PATCH 06/31] target/sh4: Consolidate end-of-TB tests We can fold 3 different tests within the decode loop into a more accurate computation of max_insns to start. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-3-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 4c3512f62f..310c52ad2a 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1830,17 +1830,28 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) ctx.features = env->features; ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA); - num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; if (max_insns == 0) { max_insns = CF_COUNT_MASK; } - if (max_insns > TCG_MAX_INSNS) { - max_insns = TCG_MAX_INSNS; + max_insns = MIN(max_insns, TCG_MAX_INSNS); + + /* Since the ISA is fixed-width, we can bound by the number + of instructions remaining on the page. */ + num_insns = -(ctx.pc | TARGET_PAGE_MASK) / 2; + max_insns = MIN(max_insns, num_insns); + + /* Single stepping means just that. */ + if (ctx.singlestep_enabled || singlestep) { + max_insns = 1; } gen_tb_start(tb); - while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) { + num_insns = 0; + + while (ctx.bstate == BS_NONE + && num_insns < max_insns + && !tcg_op_buf_full()) { tcg_gen_insn_start(ctx.pc, ctx.envflags); num_insns++; @@ -1864,18 +1875,10 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) ctx.opcode = cpu_lduw_code(env, ctx.pc); decode_opc(&ctx); ctx.pc += 2; - if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) - break; - if (cs->singlestep_enabled) { - break; - } - if (num_insns >= max_insns) - break; - if (singlestep) - break; } - if (tb->cflags & CF_LAST_IO) + if (tb->cflags & CF_LAST_IO) { gen_io_end(); + } if (cs->singlestep_enabled) { gen_save_cpu_state(&ctx, true); gen_helper_debug(cpu_env); From e1933d1435d1d0ace7c93bdc429f7e4f0c499e92 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:28 -1000 Subject: [PATCH 07/31] target/sh4: Introduce TB_FLAG_ENVFLAGS_MASK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We'll be putting more things into this bitmask soon. Let's have a name that covers all possible uses. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-4-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/cpu.h | 4 +++- target/sh4/translate.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index ffb91687b8..4aa92d5f30 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -96,6 +96,8 @@ #define DELAY_SLOT_CONDITIONAL (1 << 1) #define DELAY_SLOT_RTE (1 << 2) +#define TB_FLAG_ENVFLAGS_MASK DELAY_SLOT_MASK + typedef struct tlb_t { uint32_t vpn; /* virtual page number */ uint32_t ppn; /* physical page number */ @@ -388,7 +390,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & DELAY_SLOT_MASK) /* Bits 0- 2 */ + *flags = (env->flags & TB_FLAG_ENVFLAGS_MASK) /* Bits 0-2 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 310c52ad2a..d6aa053715 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -220,7 +220,7 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc) if (ctx->delayed_pc != (uint32_t) -1) { tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc); } - if ((ctx->tbflags & DELAY_SLOT_MASK) != ctx->envflags) { + if ((ctx->tbflags & TB_FLAG_ENVFLAGS_MASK) != ctx->envflags) { tcg_gen_movi_i32(cpu_flags, ctx->envflags); } } @@ -1819,7 +1819,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) pc_start = tb->pc; ctx.pc = pc_start; ctx.tbflags = (uint32_t)tb->flags; - ctx.envflags = tb->flags & DELAY_SLOT_MASK; + ctx.envflags = tb->flags & TB_FLAG_ENVFLAGS_MASK; ctx.bstate = BS_NONE; ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, From ca69176d52ca1b9c9c7a4229ca46cf858167c5e8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:29 -1000 Subject: [PATCH 08/31] target/sh4: Keep env->flags clean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we mask off any out-of-band bits before we assign to the variable, then we don't need to clean it up when reading. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-5-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/cpu.c | 2 +- target/sh4/cpu.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 9da7e1ed38..8536f6d002 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -39,7 +39,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) SuperHCPU *cpu = SUPERH_CPU(cs); cpu->env.pc = tb->pc; - cpu->env.flags = tb->flags; + cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK; } static bool superh_cpu_has_work(CPUState *cs) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 4aa92d5f30..a7a6811a8c 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -390,7 +390,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & TB_FLAG_ENVFLAGS_MASK) /* Bits 0-2 */ + *flags = env->flags /* Bits 0-2 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ From 1516184d8ea04f9ebd5d5c2009a2b795fc33b82a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:30 -1000 Subject: [PATCH 09/31] target/sh4: Adjust TB_FLAG_PENDING_MOVCA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't leave an unused bit after DELAY_SLOT_MASK. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-6-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/cpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index a7a6811a8c..319a7555e1 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -96,6 +96,8 @@ #define DELAY_SLOT_CONDITIONAL (1 << 1) #define DELAY_SLOT_RTE (1 << 2) +#define TB_FLAG_PENDING_MOVCA (1 << 3) + #define TB_FLAG_ENVFLAGS_MASK DELAY_SLOT_MASK typedef struct tlb_t { @@ -368,8 +370,6 @@ static inline int cpu_ptel_pr (uint32_t ptel) #define PTEA_TC (1 << 3) #define cpu_ptea_tc(ptea) (((ptea) & PTEA_TC) >> 3) -#define TB_FLAG_PENDING_MOVCA (1 << 4) - static inline target_ulong cpu_read_sr(CPUSH4State *env) { return env->sr | (env->sr_m << SR_M) | @@ -394,7 +394,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ - | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ + | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */ } #endif /* SH4_CPU_H */ From 4bfa602bc2227f5b5a506a4c0c20657d68eaefd1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:31 -1000 Subject: [PATCH 10/31] target/sh4: Handle user-space atomics For uniprocessors, SH4 uses optimistic restartable atomic sequences. Upon an interrupt, a real kernel would simply notice magic values in the registers and reset the PC to the start of the sequence. For QEMU, we cannot do this in quite the same way. Instead, we notice the normal start of such a sequence (mov #-x,r15), and start a new TB that can be executed under cpu_exec_step_atomic. Reported-by: Bruno Haible LP: https://bugs.launchpad.net/bugs/1701971 Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-7-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/cpu.h | 18 +++++- target/sh4/helper.h | 1 + target/sh4/op_helper.c | 6 ++ target/sh4/translate.c | 140 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 149 insertions(+), 16 deletions(-) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 319a7555e1..3c47f0de89 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -98,7 +98,18 @@ #define TB_FLAG_PENDING_MOVCA (1 << 3) -#define TB_FLAG_ENVFLAGS_MASK DELAY_SLOT_MASK +#define GUSA_SHIFT 4 +#ifdef CONFIG_USER_ONLY +#define GUSA_EXCLUSIVE (1 << 12) +#define GUSA_MASK ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE) +#else +/* Provide dummy versions of the above to allow tests against tbflags + to be elided while avoiding ifdefs. */ +#define GUSA_EXCLUSIVE 0 +#define GUSA_MASK 0 +#endif + +#define TB_FLAG_ENVFLAGS_MASK (DELAY_SLOT_MASK | GUSA_MASK) typedef struct tlb_t { uint32_t vpn; /* virtual page number */ @@ -389,8 +400,9 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, target_ulong *cs_base, uint32_t *flags) { *pc = env->pc; - *cs_base = 0; - *flags = env->flags /* Bits 0-2 */ + /* For a gUSA region, notice the end of the region. */ + *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0; + *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/helper.h b/target/sh4/helper.h index 767a6d5209..6c6fa04732 100644 --- a/target/sh4/helper.h +++ b/target/sh4/helper.h @@ -6,6 +6,7 @@ DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env) DEF_HELPER_1(debug, noreturn, env) DEF_HELPER_1(sleep, noreturn, env) DEF_HELPER_2(trapa, noreturn, env, i32) +DEF_HELPER_1(exclusive, noreturn, env) DEF_HELPER_3(movcal, void, env, i32, i32) DEF_HELPER_1(discard_movcal_backup, void, env) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index c3d19b1f61..8513f38849 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -115,6 +115,12 @@ void helper_trapa(CPUSH4State *env, uint32_t tra) raise_exception(env, 0x160, 0); } +void helper_exclusive(CPUSH4State *env) +{ + /* We do not want cpu_restore_state to run. */ + cpu_loop_exit_atomic(ENV_GET_CPU(env), 0); +} + void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value) { if (cpu_sh4_is_cached (env, address)) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index d6aa053715..a4e614d0f7 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -230,7 +230,9 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) if (unlikely(ctx->singlestep_enabled)) { return false; } - + if (ctx->tbflags & GUSA_EXCLUSIVE) { + return false; + } #ifndef CONFIG_USER_ONLY return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK); #else @@ -269,28 +271,56 @@ static void gen_jump(DisasContext * ctx) } /* Immediate conditional jump (bt or bf) */ -static void gen_conditional_jump(DisasContext * ctx, - target_ulong ift, target_ulong ifnott) +static void gen_conditional_jump(DisasContext *ctx, target_ulong dest, + bool jump_if_true) { TCGLabel *l1 = gen_new_label(); + TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE; + + if (ctx->tbflags & GUSA_EXCLUSIVE) { + /* When in an exclusive region, we must continue to the end. + Therefore, exit the region on a taken branch, but otherwise + fall through to the next instruction. */ + tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1); + tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK); + /* Note that this won't actually use a goto_tb opcode because we + disallow it in use_goto_tb, but it handles exit + singlestep. */ + gen_goto_tb(ctx, 0, dest); + gen_set_label(l1); + return; + } + gen_save_cpu_state(ctx, false); - tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1); - gen_goto_tb(ctx, 0, ifnott); + tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1); + gen_goto_tb(ctx, 0, dest); gen_set_label(l1); - gen_goto_tb(ctx, 1, ift); + gen_goto_tb(ctx, 1, ctx->pc + 2); ctx->bstate = BS_BRANCH; } /* Delayed conditional jump (bt or bf) */ static void gen_delayed_conditional_jump(DisasContext * ctx) { - TCGLabel *l1; - TCGv ds; + TCGLabel *l1 = gen_new_label(); + TCGv ds = tcg_temp_new(); - l1 = gen_new_label(); - ds = tcg_temp_new(); tcg_gen_mov_i32(ds, cpu_delayed_cond); tcg_gen_discard_i32(cpu_delayed_cond); + + if (ctx->tbflags & GUSA_EXCLUSIVE) { + /* When in an exclusive region, we must continue to the end. + Therefore, exit the region on a taken branch, but otherwise + fall through to the next instruction. */ + tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1); + + /* Leave the gUSA region. */ + tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK); + gen_jump(ctx); + + gen_set_label(l1); + return; + } + tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1); gen_goto_tb(ctx, 1, ctx->pc + 2); gen_set_label(l1); @@ -475,6 +505,15 @@ static void _decode_opc(DisasContext * ctx) } return; case 0xe000: /* mov #imm,Rn */ +#ifdef CONFIG_USER_ONLY + /* Detect the start of a gUSA region. If so, update envflags + and end the TB. This will allow us to see the end of the + region (stored in R0) in the next TB. */ + if (B11_8 == 15 && B7_0s < 0 && parallel_cpus) { + ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s); + ctx->bstate = BS_STOP; + } +#endif tcg_gen_movi_i32(REG(B11_8), B7_0s); return; case 0x9000: /* mov.w @(disp,PC),Rn */ @@ -1155,7 +1194,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0x8b00: /* bf label */ CHECK_NOT_DELAY_SLOT - gen_conditional_jump(ctx, ctx->pc + 2, ctx->pc + 4 + B7_0s * 2); + gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, false); return; case 0x8f00: /* bf/s label */ CHECK_NOT_DELAY_SLOT @@ -1165,7 +1204,7 @@ static void _decode_opc(DisasContext * ctx) return; case 0x8900: /* bt label */ CHECK_NOT_DELAY_SLOT - gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, ctx->pc + 2); + gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, true); return; case 0x8d00: /* bt/s label */ CHECK_NOT_DELAY_SLOT @@ -1796,6 +1835,18 @@ static void decode_opc(DisasContext * ctx) if (old_flags & DELAY_SLOT_MASK) { /* go out of the delay slot */ ctx->envflags &= ~DELAY_SLOT_MASK; + + /* When in an exclusive region, we must continue to the end + for conditional branches. */ + if (ctx->tbflags & GUSA_EXCLUSIVE + && old_flags & DELAY_SLOT_CONDITIONAL) { + gen_delayed_conditional_jump(ctx); + return; + } + /* Otherwise this is probably an invalid gUSA region. + Drop the GUSA bits so the next TB doesn't see them. */ + ctx->envflags &= ~GUSA_MASK; + tcg_gen_movi_i32(cpu_flags, ctx->envflags); ctx->bstate = BS_BRANCH; if (old_flags & DELAY_SLOT_CONDITIONAL) { @@ -1803,10 +1854,61 @@ static void decode_opc(DisasContext * ctx) } else { gen_jump(ctx); } - } } +#ifdef CONFIG_USER_ONLY +/* For uniprocessors, SH4 uses optimistic restartable atomic sequences. + Upon an interrupt, a real kernel would simply notice magic values in + the registers and reset the PC to the start of the sequence. + + For QEMU, we cannot do this in quite the same way. Instead, we notice + the normal start of such a sequence (mov #-x,r15). While we can handle + any sequence via cpu_exec_step_atomic, we can recognize the "normal" + sequences and transform them into atomic operations as seen by the host. +*/ +static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns) +{ + uint32_t pc = ctx->pc; + uint32_t pc_end = ctx->tb->cs_base; + int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8); + int max_insns = (pc_end - pc) / 2; + + if (pc != pc_end + backup || max_insns < 2) { + /* This is a malformed gUSA region. Don't do anything special, + since the interpreter is likely to get confused. */ + ctx->envflags &= ~GUSA_MASK; + return 0; + } + + if (ctx->tbflags & GUSA_EXCLUSIVE) { + /* Regardless of single-stepping or the end of the page, + we must complete execution of the gUSA region while + holding the exclusive lock. */ + *pmax_insns = max_insns; + return 0; + } + + qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n", + pc, pc_end); + + /* Restart with the EXCLUSIVE bit set, within a TB run via + cpu_exec_step_atomic holding the exclusive lock. */ + tcg_gen_insn_start(pc, ctx->envflags); + ctx->envflags |= GUSA_EXCLUSIVE; + gen_save_cpu_state(ctx, false); + gen_helper_exclusive(cpu_env); + ctx->bstate = BS_EXCP; + + /* We're not executing an instruction, but we must report one for the + purposes of accounting within the TB. We might as well report the + entire region consumed via ctx->pc so that it's immediately available + in the disassembly dump. */ + ctx->pc = pc_end; + return 1; +} +#endif + void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) { SuperHCPU *cpu = sh_env_get_cpu(env); @@ -1849,6 +1951,12 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) gen_tb_start(tb); num_insns = 0; +#ifdef CONFIG_USER_ONLY + if (ctx.tbflags & GUSA_MASK) { + num_insns = decode_gusa(&ctx, env, &max_insns); + } +#endif + while (ctx.bstate == BS_NONE && num_insns < max_insns && !tcg_op_buf_full()) { @@ -1879,6 +1987,12 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) if (tb->cflags & CF_LAST_IO) { gen_io_end(); } + + if (ctx.tbflags & GUSA_EXCLUSIVE) { + /* Ending the region of exclusivity. Clear the bits. */ + ctx.envflags &= ~GUSA_MASK; + } + if (cs->singlestep_enabled) { gen_save_cpu_state(&ctx, true); gen_helper_debug(cpu_env); From d6a6cffdd3d861c2cdd09253369bba50f9e3d891 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:32 -1000 Subject: [PATCH 11/31] target/sh4: Recognize common gUSA sequences For many of the sequences produced by gcc or glibc, we can translate these as host atomic operations. Which saves the need to acquire the exclusive lock. Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-8-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 321 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index a4e614d0f7..385b69ef14 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1869,10 +1869,17 @@ static void decode_opc(DisasContext * ctx) */ static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns) { + uint16_t insns[5]; + int ld_adr, ld_dst, ld_mop; + int op_dst, op_src, op_opc; + int mv_src, mt_dst, st_src, st_mop; + TCGv op_arg; + uint32_t pc = ctx->pc; uint32_t pc_end = ctx->tb->cs_base; int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8); int max_insns = (pc_end - pc) / 2; + int i; if (pc != pc_end + backup || max_insns < 2) { /* This is a malformed gUSA region. Don't do anything special, @@ -1889,6 +1896,320 @@ static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns) return 0; } + /* The state machine below will consume only a few insns. + If there are more than that in a region, fail now. */ + if (max_insns > ARRAY_SIZE(insns)) { + goto fail; + } + + /* Read all of the insns for the region. */ + for (i = 0; i < max_insns; ++i) { + insns[i] = cpu_lduw_code(env, pc + i * 2); + } + + ld_adr = ld_dst = ld_mop = -1; + mv_src = -1; + op_dst = op_src = op_opc = -1; + mt_dst = -1; + st_src = st_mop = -1; + TCGV_UNUSED(op_arg); + i = 0; + +#define NEXT_INSN \ + do { if (i >= max_insns) goto fail; ctx->opcode = insns[i++]; } while (0) + + /* + * Expect a load to begin the region. + */ + NEXT_INSN; + switch (ctx->opcode & 0xf00f) { + case 0x6000: /* mov.b @Rm,Rn */ + ld_mop = MO_SB; + break; + case 0x6001: /* mov.w @Rm,Rn */ + ld_mop = MO_TESW; + break; + case 0x6002: /* mov.l @Rm,Rn */ + ld_mop = MO_TESL; + break; + default: + goto fail; + } + ld_adr = B7_4; + ld_dst = B11_8; + if (ld_adr == ld_dst) { + goto fail; + } + /* Unless we see a mov, any two-operand operation must use ld_dst. */ + op_dst = ld_dst; + + /* + * Expect an optional register move. + */ + NEXT_INSN; + switch (ctx->opcode & 0xf00f) { + case 0x6003: /* mov Rm,Rn */ + /* Here we want to recognize ld_dst being saved for later consumtion, + or for another input register being copied so that ld_dst need not + be clobbered during the operation. */ + op_dst = B11_8; + mv_src = B7_4; + if (op_dst == ld_dst) { + /* Overwriting the load output. */ + goto fail; + } + if (mv_src != ld_dst) { + /* Copying a new input; constrain op_src to match the load. */ + op_src = ld_dst; + } + break; + + default: + /* Put back and re-examine as operation. */ + --i; + } + + /* + * Expect the operation. + */ + NEXT_INSN; + switch (ctx->opcode & 0xf00f) { + case 0x300c: /* add Rm,Rn */ + op_opc = INDEX_op_add_i32; + goto do_reg_op; + case 0x2009: /* and Rm,Rn */ + op_opc = INDEX_op_and_i32; + goto do_reg_op; + case 0x200a: /* xor Rm,Rn */ + op_opc = INDEX_op_xor_i32; + goto do_reg_op; + case 0x200b: /* or Rm,Rn */ + op_opc = INDEX_op_or_i32; + do_reg_op: + /* The operation register should be as expected, and the + other input cannot depend on the load. */ + if (op_dst != B11_8) { + goto fail; + } + if (op_src < 0) { + /* Unconstrainted input. */ + op_src = B7_4; + } else if (op_src == B7_4) { + /* Constrained input matched load. All operations are + commutative; "swap" them by "moving" the load output + to the (implicit) first argument and the move source + to the (explicit) second argument. */ + op_src = mv_src; + } else { + goto fail; + } + op_arg = REG(op_src); + break; + + case 0x6007: /* not Rm,Rn */ + if (ld_dst != B7_4 || mv_src >= 0) { + goto fail; + } + op_dst = B11_8; + op_opc = INDEX_op_xor_i32; + op_arg = tcg_const_i32(-1); + break; + + case 0x7000 ... 0x700f: /* add #imm,Rn */ + if (op_dst != B11_8 || mv_src >= 0) { + goto fail; + } + op_opc = INDEX_op_add_i32; + op_arg = tcg_const_i32(B7_0s); + break; + + case 0x3000: /* cmp/eq Rm,Rn */ + /* Looking for the middle of a compare-and-swap sequence, + beginning with the compare. Operands can be either order, + but with only one overlapping the load. */ + if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) { + goto fail; + } + op_opc = INDEX_op_setcond_i32; /* placeholder */ + op_src = (ld_dst == B11_8 ? B7_4 : B11_8); + op_arg = REG(op_src); + + NEXT_INSN; + switch (ctx->opcode & 0xff00) { + case 0x8b00: /* bf label */ + case 0x8f00: /* bf/s label */ + if (pc + (i + 1 + B7_0s) * 2 != pc_end) { + goto fail; + } + if ((ctx->opcode & 0xff00) == 0x8b00) { /* bf label */ + break; + } + /* We're looking to unconditionally modify Rn with the + result of the comparison, within the delay slot of + the branch. This is used by older gcc. */ + NEXT_INSN; + if ((ctx->opcode & 0xf0ff) == 0x0029) { /* movt Rn */ + mt_dst = B11_8; + } else { + goto fail; + } + break; + + default: + goto fail; + } + break; + + case 0x2008: /* tst Rm,Rn */ + /* Looking for a compare-and-swap against zero. */ + if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) { + goto fail; + } + op_opc = INDEX_op_setcond_i32; + op_arg = tcg_const_i32(0); + + NEXT_INSN; + if ((ctx->opcode & 0xff00) != 0x8900 /* bt label */ + || pc + (i + 1 + B7_0s) * 2 != pc_end) { + goto fail; + } + break; + + default: + /* Put back and re-examine as store. */ + --i; + } + + /* + * Expect the store. + */ + /* The store must be the last insn. */ + if (i != max_insns - 1) { + goto fail; + } + NEXT_INSN; + switch (ctx->opcode & 0xf00f) { + case 0x2000: /* mov.b Rm,@Rn */ + st_mop = MO_UB; + break; + case 0x2001: /* mov.w Rm,@Rn */ + st_mop = MO_UW; + break; + case 0x2002: /* mov.l Rm,@Rn */ + st_mop = MO_UL; + break; + default: + goto fail; + } + /* The store must match the load. */ + if (ld_adr != B11_8 || st_mop != (ld_mop & MO_SIZE)) { + goto fail; + } + st_src = B7_4; + +#undef NEXT_INSN + + /* + * Emit the operation. + */ + tcg_gen_insn_start(pc, ctx->envflags); + switch (op_opc) { + case -1: + /* No operation found. Look for exchange pattern. */ + if (st_src == ld_dst || mv_src >= 0) { + goto fail; + } + tcg_gen_atomic_xchg_i32(REG(ld_dst), REG(ld_adr), REG(st_src), + ctx->memidx, ld_mop); + break; + + case INDEX_op_add_i32: + if (op_dst != st_src) { + goto fail; + } + if (op_dst == ld_dst && st_mop == MO_UL) { + tcg_gen_atomic_add_fetch_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + } else { + tcg_gen_atomic_fetch_add_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + if (op_dst != ld_dst) { + /* Note that mop sizes < 4 cannot use add_fetch + because it won't carry into the higher bits. */ + tcg_gen_add_i32(REG(op_dst), REG(ld_dst), op_arg); + } + } + break; + + case INDEX_op_and_i32: + if (op_dst != st_src) { + goto fail; + } + if (op_dst == ld_dst) { + tcg_gen_atomic_and_fetch_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + } else { + tcg_gen_atomic_fetch_and_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + tcg_gen_and_i32(REG(op_dst), REG(ld_dst), op_arg); + } + break; + + case INDEX_op_or_i32: + if (op_dst != st_src) { + goto fail; + } + if (op_dst == ld_dst) { + tcg_gen_atomic_or_fetch_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + } else { + tcg_gen_atomic_fetch_or_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + tcg_gen_or_i32(REG(op_dst), REG(ld_dst), op_arg); + } + break; + + case INDEX_op_xor_i32: + if (op_dst != st_src) { + goto fail; + } + if (op_dst == ld_dst) { + tcg_gen_atomic_xor_fetch_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + } else { + tcg_gen_atomic_fetch_xor_i32(REG(ld_dst), REG(ld_adr), + op_arg, ctx->memidx, ld_mop); + tcg_gen_xor_i32(REG(op_dst), REG(ld_dst), op_arg); + } + break; + + case INDEX_op_setcond_i32: + if (st_src == ld_dst) { + goto fail; + } + tcg_gen_atomic_cmpxchg_i32(REG(ld_dst), REG(ld_adr), op_arg, + REG(st_src), ctx->memidx, ld_mop); + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_dst), op_arg); + if (mt_dst >= 0) { + tcg_gen_mov_i32(REG(mt_dst), cpu_sr_t); + } + break; + + default: + g_assert_not_reached(); + } + + /* If op_src is not a valid register, then op_arg was a constant. */ + if (op_src < 0) { + tcg_temp_free_i32(op_arg); + } + + /* The entire region has been translated. */ + ctx->envflags &= ~GUSA_MASK; + ctx->pc = pc_end; + return max_insns; + + fail: qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n", pc, pc_end); From b0e4f0edf575f72d8d76717ecbefa9c748ab467b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:33 -1000 Subject: [PATCH 12/31] linux-user/sh4: Notice gUSA regions during signal delivery We translate gUSA regions atomically in a parallel context. But in a serial context a gUSA region may be interrupted. In that case, restart the region as the kernel would. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-9-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- linux-user/signal.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/linux-user/signal.c b/linux-user/signal.c index 3d18d1b3ee..c8b0733a7c 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -3471,6 +3471,30 @@ static abi_ulong get_sigframe(struct target_sigaction *ka, return (sp - frame_size) & -8ul; } +/* Notice when we're in the middle of a gUSA region and reset. + Note that this will only occur for !parallel_cpus, as we will + translate such sequences differently in a parallel context. */ +static void unwind_gusa(CPUSH4State *regs) +{ + /* If the stack pointer is sufficiently negative, and we haven't + completed the sequence, then reset to the entry to the region. */ + /* ??? The SH4 kernel checks for and address above 0xC0000000. + However, the page mappings in qemu linux-user aren't as restricted + and we wind up with the normal stack mapped above 0xF0000000. + That said, there is no reason why the kernel should be allowing + a gUSA region that spans 1GB. Use a tighter check here, for what + can actually be enabled by the immediate move. */ + if (regs->gregs[15] >= -128u && regs->pc < regs->gregs[0]) { + /* Reset the PC to before the gUSA region, as computed from + R0 = region end, SP = -(region size), plus one more for the + insn that actually initializes SP to the region size. */ + regs->pc = regs->gregs[0] + regs->gregs[15] - 2; + + /* Reset the SP to the saved version in R1. */ + regs->gregs[15] = regs->gregs[1]; + } +} + static void setup_sigcontext(struct target_sigcontext *sc, CPUSH4State *regs, unsigned long mask) { @@ -3534,6 +3558,8 @@ static void setup_frame(int sig, struct target_sigaction *ka, abi_ulong frame_addr; int i; + unwind_gusa(regs); + frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame)); trace_user_setup_frame(regs, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) { @@ -3583,6 +3609,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, abi_ulong frame_addr; int i; + unwind_gusa(regs); + frame_addr = get_sigframe(ka, regs->gregs[15], sizeof(*frame)); trace_user_setup_rt_frame(regs, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) { From b0e9c51a00e0a71b2ab666140ee0ea31b2ed43d2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:34 -1000 Subject: [PATCH 13/31] linux-user/sh4: Clean env->flags on signal boundaries If a signal is delivered during the execution of a delay slot, or a gUSA region, clear those bits from the environment so that the signal handler does not start in that same state. Cleaning the bits on signal return is paranoid good sense. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-10-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- linux-user/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/linux-user/signal.c b/linux-user/signal.c index c8b0733a7c..d68bd26013 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -3549,6 +3549,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc) __get_user(regs->fpul, &sc->sc_fpul); regs->tra = -1; /* disable syscall checks */ + regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK); } static void setup_frame(int sig, struct target_sigaction *ka, @@ -3592,6 +3593,7 @@ static void setup_frame(int sig, struct target_sigaction *ka, regs->gregs[5] = 0; regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc); regs->pc = (unsigned long) ka->_sa_handler; + regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK); unlock_user_struct(frame, frame_addr, 1); return; @@ -3654,6 +3656,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka, regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info); regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc); regs->pc = (unsigned long) ka->_sa_handler; + regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK); unlock_user_struct(frame, frame_addr, 1); return; From 3a3bb8d2b5db42fa250ee06e1bdfaac7c46d17c7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:35 -1000 Subject: [PATCH 14/31] target/sh4: Hoist register bank selection Compute which register bank to use once at the start of translation. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-11-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 385b69ef14..d6e05f77fe 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -41,6 +41,7 @@ typedef struct DisasContext { uint32_t envflags; /* should stay in sync with env->flags using TCG ops */ int bstate; int memidx; + int gbank; uint32_t delayed_pc; int singlestep_enabled; uint32_t features; @@ -64,7 +65,7 @@ enum { /* global register indexes */ static TCGv_env cpu_env; -static TCGv cpu_gregs[24]; +static TCGv cpu_gregs[32]; static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t; static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr; static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl; @@ -98,16 +99,19 @@ void sh4_translate_init(void) "FPR12_BANK1", "FPR13_BANK1", "FPR14_BANK1", "FPR15_BANK1", }; - if (done_init) + if (done_init) { return; + } cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); tcg_ctx.tcg_env = cpu_env; - for (i = 0; i < 24; i++) + for (i = 0; i < 24; i++) { cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env, offsetof(CPUSH4State, gregs[i]), gregnames[i]); + } + memcpy(cpu_gregs + 24, cpu_gregs + 8, 8 * sizeof(TCGv)); cpu_pc = tcg_global_mem_new_i32(cpu_env, offsetof(CPUSH4State, pc), "PC"); @@ -347,13 +351,8 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define B11_8 ((ctx->opcode >> 8) & 0xf) #define B15_12 ((ctx->opcode >> 12) & 0xf) -#define REG(x) ((x) < 8 && (ctx->tbflags & (1u << SR_MD))\ - && (ctx->tbflags & (1u << SR_RB))\ - ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) - -#define ALTREG(x) ((x) < 8 && (!(ctx->tbflags & (1u << SR_MD))\ - || !(ctx->tbflags & (1u << SR_RB)))\ - ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) +#define REG(x) cpu_gregs[(x) ^ ctx->gbank] +#define ALTREG(x) cpu_gregs[(x) ^ ctx->gbank ^ 0x10] #define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)) #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) @@ -2252,6 +2251,8 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) ctx.singlestep_enabled = cs->singlestep_enabled; ctx.features = env->features; ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA); + ctx.gbank = ((ctx.tbflags & (1 << SR_MD)) && + (ctx.tbflags & (1 << SR_RB))) * 0x10; max_insns = tb->cflags & CF_COUNT_MASK; if (max_insns == 0) { From 7c9f70386d1aae67055a9a278880cde6c278217c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:36 -1000 Subject: [PATCH 15/31] target/sh4: Unify cpu_fregs into FREG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were treating FREG as an index and REG as a TCGv. Making FREG return a TCGv is both less confusing and a step toward cleaner banking of cpu_fregs. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-12-rth@twiddle.net> [aurel32: fix whitespace issues] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 125 +++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 73 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index d6e05f77fe..bed52c9075 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -354,10 +354,11 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define REG(x) cpu_gregs[(x) ^ ctx->gbank] #define ALTREG(x) cpu_gregs[(x) ^ ctx->gbank ^ 0x10] -#define FREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)) +#define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)] #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -#define XREG(x) (ctx->tbflags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x)) -#define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */ +#define XREG(x) FREG(XHACK(x)) +/* Assumes lsb of (x) is always 0 */ +#define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)) #define CHECK_NOT_DELAY_SLOT \ if (ctx->envflags & DELAY_SLOT_MASK) { \ @@ -977,56 +978,51 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, XREG(B7_4)); - gen_store_fpr64(fp, XREG(B11_8)); + gen_load_fpr64(fp, XHACK(B7_4)); + gen_store_fpr64(fp, XHACK(B11_8)); tcg_temp_free_i64(fp); } else { - tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]); + tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4)); } return; case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); - int fr = XREG(B7_4); + int fr = XHACK(B7_4); tcg_gen_addi_i32(addr_hi, REG(B11_8), 4); - tcg_gen_qemu_st_i32(cpu_fregs[fr], REG(B11_8), - ctx->memidx, MO_TEUL); - tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr_hi, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(fr), REG(B11_8), ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); tcg_temp_free(addr_hi); } else { - tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], REG(B11_8), - ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL); } return; case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); - int fr = XREG(B11_8); + int fr = XHACK(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); - tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL); - tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); tcg_temp_free(addr_hi); } else { - tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4), - ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); } return; case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv addr_hi = tcg_temp_new(); - int fr = XREG(B11_8); + int fr = XHACK(B11_8); tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); - tcg_gen_qemu_ld_i32(cpu_fregs[fr], REG(B7_4), ctx->memidx, MO_TEUL); - tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr_hi, ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8); tcg_temp_free(addr_hi); } else { - tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], REG(B7_4), - ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); } return; @@ -1035,13 +1031,12 @@ static void _decode_opc(DisasContext * ctx) TCGv addr = tcg_temp_new_i32(); tcg_gen_subi_i32(addr, REG(B11_8), 4); if (ctx->tbflags & FPSCR_SZ) { - int fr = XREG(B7_4); - tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL); + int fr = XHACK(B7_4); + tcg_gen_qemu_st_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); tcg_gen_subi_i32(addr, addr, 4); - tcg_gen_qemu_st_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); } else { - tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); } tcg_gen_mov_i32(REG(B11_8), addr); tcg_temp_free(addr); @@ -1052,15 +1047,12 @@ static void _decode_opc(DisasContext * ctx) TCGv addr = tcg_temp_new_i32(); tcg_gen_add_i32(addr, REG(B7_4), REG(0)); if (ctx->tbflags & FPSCR_SZ) { - int fr = XREG(B11_8); - tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr, - ctx->memidx, MO_TEUL); + int fr = XHACK(B11_8); + tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); tcg_gen_addi_i32(addr, addr, 4); - tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); } else { - tcg_gen_qemu_ld_i32(cpu_fregs[FREG(B11_8)], addr, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL); } tcg_temp_free(addr); } @@ -1071,15 +1063,12 @@ static void _decode_opc(DisasContext * ctx) TCGv addr = tcg_temp_new(); tcg_gen_add_i32(addr, REG(B11_8), REG(0)); if (ctx->tbflags & FPSCR_SZ) { - int fr = XREG(B7_4); - tcg_gen_qemu_ld_i32(cpu_fregs[fr], addr, - ctx->memidx, MO_TEUL); + int fr = XHACK(B7_4); + tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); tcg_gen_addi_i32(addr, addr, 4); - tcg_gen_qemu_ld_i32(cpu_fregs[fr+1], addr, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); } else { - tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr, - ctx->memidx, MO_TEUL); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); } tcg_temp_free(addr); } @@ -1127,34 +1116,28 @@ static void _decode_opc(DisasContext * ctx) } else { switch (ctx->opcode & 0xf00f) { case 0xf000: /* fadd Rm,Rn */ - gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + gen_helper_fadd_FT(FREG(B11_8), cpu_env, + FREG(B11_8), FREG(B7_4)); break; case 0xf001: /* fsub Rm,Rn */ - gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + gen_helper_fsub_FT(FREG(B11_8), cpu_env, + FREG(B11_8), FREG(B7_4)); break; case 0xf002: /* fmul Rm,Rn */ - gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + gen_helper_fmul_FT(FREG(B11_8), cpu_env, + FREG(B11_8), FREG(B7_4)); break; case 0xf003: /* fdiv Rm,Rn */ - gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + gen_helper_fdiv_FT(FREG(B11_8), cpu_env, + FREG(B11_8), FREG(B7_4)); break; case 0xf004: /* fcmp/eq Rm,Rn */ gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + FREG(B11_8), FREG(B7_4)); return; case 0xf005: /* fcmp/gt Rm,Rn */ gen_helper_fcmp_gt_FT(cpu_sr_t, cpu_env, - cpu_fregs[FREG(B11_8)], - cpu_fregs[FREG(B7_4)]); + FREG(B11_8), FREG(B7_4)); return; } } @@ -1166,9 +1149,8 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_PR) { break; /* illegal instruction */ } else { - gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)], - cpu_fregs[FREG(B11_8)]); + gen_helper_fmac_FT(FREG(B11_8), cpu_env, + FREG(0), FREG(B7_4), FREG(B11_8)); return; } } @@ -1693,11 +1675,11 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */ CHECK_FPU_ENABLED - tcg_gen_mov_i32(cpu_fregs[FREG(B11_8)], cpu_fpul); + tcg_gen_mov_i32(FREG(B11_8), cpu_fpul); return; case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */ CHECK_FPU_ENABLED - tcg_gen_mov_i32(cpu_fpul, cpu_fregs[FREG(B11_8)]); + tcg_gen_mov_i32(cpu_fpul, FREG(B11_8)); return; case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */ CHECK_FPU_ENABLED @@ -1711,7 +1693,7 @@ static void _decode_opc(DisasContext * ctx) tcg_temp_free_i64(fp); } else { - gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_env, cpu_fpul); + gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul); } return; case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */ @@ -1726,18 +1708,16 @@ static void _decode_opc(DisasContext * ctx) tcg_temp_free_i64(fp); } else { - gen_helper_ftrc_FT(cpu_fpul, cpu_env, cpu_fregs[FREG(B11_8)]); + gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8)); } return; case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */ CHECK_FPU_ENABLED - tcg_gen_xori_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], - 0x80000000); + tcg_gen_xori_i32(FREG(B11_8), FREG(B11_8), 0x80000000); return; case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */ CHECK_FPU_ENABLED - tcg_gen_andi_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], - 0x7fffffff); + tcg_gen_andi_i32(FREG(B11_8), FREG(B11_8), 0x7fffffff); return; case 0xf06d: /* fsqrt FRn */ CHECK_FPU_ENABLED @@ -1750,8 +1730,7 @@ static void _decode_opc(DisasContext * ctx) gen_store_fpr64(fp, DREG(B11_8)); tcg_temp_free_i64(fp); } else { - gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_env, - cpu_fregs[FREG(B11_8)]); + gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8)); } return; case 0xf07d: /* fsrra FRn */ @@ -1760,13 +1739,13 @@ static void _decode_opc(DisasContext * ctx) case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED if (!(ctx->tbflags & FPSCR_PR)) { - tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0); + tcg_gen_movi_i32(FREG(B11_8), 0); } return; case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED if (!(ctx->tbflags & FPSCR_PR)) { - tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000); + tcg_gen_movi_i32(FREG(B11_8), 0x3f800000); } return; case 0xf0ad: /* fcnvsd FPUL,DRn */ From e5d8053e76bda79744710e5b59e70f9fcbce7df7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:37 -1000 Subject: [PATCH 16/31] target/sh4: Pass DisasContext to fpr64 routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-13-rth@twiddle.net> [aurel32: fix whitespace issues] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bed52c9075..b706a6a153 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -331,12 +331,12 @@ static void gen_delayed_conditional_jump(DisasContext * ctx) gen_jump(ctx); } -static inline void gen_load_fpr64(TCGv_i64 t, int reg) +static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]); } -static inline void gen_store_fpr64 (TCGv_i64 t, int reg) +static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t); } @@ -978,8 +978,8 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, XHACK(B7_4)); - gen_store_fpr64(fp, XHACK(B11_8)); + gen_load_fpr64(ctx, fp, XHACK(B7_4)); + gen_store_fpr64(ctx, fp, XHACK(B11_8)); tcg_temp_free_i64(fp); } else { tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4)); @@ -1088,8 +1088,8 @@ static void _decode_opc(DisasContext * ctx) break; /* illegal instruction */ fp0 = tcg_temp_new_i64(); fp1 = tcg_temp_new_i64(); - gen_load_fpr64(fp0, DREG(B11_8)); - gen_load_fpr64(fp1, DREG(B7_4)); + gen_load_fpr64(ctx, fp0, DREG(B11_8)); + gen_load_fpr64(ctx, fp1, DREG(B7_4)); switch (ctx->opcode & 0xf00f) { case 0xf000: /* fadd Rm,Rn */ gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1); @@ -1110,7 +1110,7 @@ static void _decode_opc(DisasContext * ctx) gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1); return; } - gen_store_fpr64(fp0, DREG(B11_8)); + gen_store_fpr64(ctx, fp0, DREG(B11_8)); tcg_temp_free_i64(fp0); tcg_temp_free_i64(fp1); } else { @@ -1689,7 +1689,7 @@ static void _decode_opc(DisasContext * ctx) break; /* illegal instruction */ fp = tcg_temp_new_i64(); gen_helper_float_DT(fp, cpu_env, cpu_fpul); - gen_store_fpr64(fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, DREG(B11_8)); tcg_temp_free_i64(fp); } else { @@ -1703,7 +1703,7 @@ static void _decode_opc(DisasContext * ctx) if (ctx->opcode & 0x0100) break; /* illegal instruction */ fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, DREG(B11_8)); gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp); tcg_temp_free_i64(fp); } @@ -1725,9 +1725,9 @@ static void _decode_opc(DisasContext * ctx) if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, DREG(B11_8)); gen_helper_fsqrt_DT(fp, cpu_env, fp); - gen_store_fpr64(fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, DREG(B11_8)); tcg_temp_free_i64(fp); } else { gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8)); @@ -1753,7 +1753,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv_i64 fp = tcg_temp_new_i64(); gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul); - gen_store_fpr64(fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, DREG(B11_8)); tcg_temp_free_i64(fp); } return; @@ -1761,7 +1761,7 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED { TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, DREG(B11_8)); gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp); tcg_temp_free_i64(fp); } From 5c13bad9ecf758946877d041bb3b9fd012f4503a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:38 -1000 Subject: [PATCH 17/31] target/sh4: Hoist fp register bank selection Compute which register bank to use once at the start of translation. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-14-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index b706a6a153..bc6f33970b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -42,6 +42,7 @@ typedef struct DisasContext { int bstate; int memidx; int gbank; + int fbank; uint32_t delayed_pc; int singlestep_enabled; uint32_t features; @@ -353,12 +354,12 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) #define REG(x) cpu_gregs[(x) ^ ctx->gbank] #define ALTREG(x) cpu_gregs[(x) ^ ctx->gbank ^ 0x10] +#define FREG(x) cpu_fregs[(x) ^ ctx->fbank] -#define FREG(x) cpu_fregs[ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)] #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -#define XREG(x) FREG(XHACK(x)) +#define XREG(x) FREG(XHACK(x)) /* Assumes lsb of (x) is always 0 */ -#define DREG(x) (ctx->tbflags & FPSCR_FR ? (x) ^ 0x10 : (x)) +#define DREG(x) ((x) ^ ctx->fbank) #define CHECK_NOT_DELAY_SLOT \ if (ctx->envflags & DELAY_SLOT_MASK) { \ @@ -2232,6 +2233,7 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb) ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA); ctx.gbank = ((ctx.tbflags & (1 << SR_MD)) && (ctx.tbflags & (1 << SR_RB))) * 0x10; + ctx.fbank = ctx.tbflags & FPSCR_FR ? 0x10 : 0; max_insns = tb->cflags & CF_COUNT_MASK; if (max_insns == 0) { From 0f73753d621b2dddc87bc3d8889cab8636d41d15 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:39 -1000 Subject: [PATCH 18/31] target/sh4: Eliminate unused XREG macro Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-15-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bc6f33970b..e5b88ac568 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -357,7 +357,6 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) #define FREG(x) cpu_fregs[(x) ^ ctx->fbank] #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -#define XREG(x) FREG(XHACK(x)) /* Assumes lsb of (x) is always 0 */ #define DREG(x) ((x) ^ ctx->fbank) From 1e0b21d856d7654ea683e743e964c3b292122081 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:40 -1000 Subject: [PATCH 19/31] target/sh4: Merge DREG into fpr64 routines Also add a debugging assert that we did signal illegal opc for odd double-precision registers. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-16-rth@twiddle.net> [aurel32: fix whitespace issues] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index e5b88ac568..40724819e5 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -334,11 +334,17 @@ static void gen_delayed_conditional_jump(DisasContext * ctx) static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { + /* We have already signaled illegal instruction for odd Dr. */ + tcg_debug_assert((reg & 1) == 0); + reg ^= ctx->fbank; tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]); } static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) { + /* We have already signaled illegal instruction for odd Dr. */ + tcg_debug_assert((reg & 1) == 0); + reg ^= ctx->fbank; tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t); } @@ -357,8 +363,6 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) #define FREG(x) cpu_fregs[(x) ^ ctx->fbank] #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) -/* Assumes lsb of (x) is always 0 */ -#define DREG(x) ((x) ^ ctx->fbank) #define CHECK_NOT_DELAY_SLOT \ if (ctx->envflags & DELAY_SLOT_MASK) { \ @@ -1088,8 +1092,8 @@ static void _decode_opc(DisasContext * ctx) break; /* illegal instruction */ fp0 = tcg_temp_new_i64(); fp1 = tcg_temp_new_i64(); - gen_load_fpr64(ctx, fp0, DREG(B11_8)); - gen_load_fpr64(ctx, fp1, DREG(B7_4)); + gen_load_fpr64(ctx, fp0, B11_8); + gen_load_fpr64(ctx, fp1, B7_4); switch (ctx->opcode & 0xf00f) { case 0xf000: /* fadd Rm,Rn */ gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1); @@ -1110,7 +1114,7 @@ static void _decode_opc(DisasContext * ctx) gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1); return; } - gen_store_fpr64(ctx, fp0, DREG(B11_8)); + gen_store_fpr64(ctx, fp0, B11_8); tcg_temp_free_i64(fp0); tcg_temp_free_i64(fp1); } else { @@ -1689,7 +1693,7 @@ static void _decode_opc(DisasContext * ctx) break; /* illegal instruction */ fp = tcg_temp_new_i64(); gen_helper_float_DT(fp, cpu_env, cpu_fpul); - gen_store_fpr64(ctx, fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, B11_8); tcg_temp_free_i64(fp); } else { @@ -1703,7 +1707,7 @@ static void _decode_opc(DisasContext * ctx) if (ctx->opcode & 0x0100) break; /* illegal instruction */ fp = tcg_temp_new_i64(); - gen_load_fpr64(ctx, fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, B11_8); gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp); tcg_temp_free_i64(fp); } @@ -1725,9 +1729,9 @@ static void _decode_opc(DisasContext * ctx) if (ctx->opcode & 0x0100) break; /* illegal instruction */ TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(ctx, fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, B11_8); gen_helper_fsqrt_DT(fp, cpu_env, fp); - gen_store_fpr64(ctx, fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, B11_8); tcg_temp_free_i64(fp); } else { gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8)); @@ -1753,7 +1757,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv_i64 fp = tcg_temp_new_i64(); gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul); - gen_store_fpr64(ctx, fp, DREG(B11_8)); + gen_store_fpr64(ctx, fp, B11_8); tcg_temp_free_i64(fp); } return; @@ -1761,7 +1765,7 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED { TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(ctx, fp, DREG(B11_8)); + gen_load_fpr64(ctx, fp, B11_8); gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp); tcg_temp_free_i64(fp); } From 4d57fa50d5208b92a06b0e08c32cc0bb7ab75aaf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:41 -1000 Subject: [PATCH 20/31] target/sh4: Load/store Dr as 64-bit quantities This enforces proper alignment and makes the register update more natural. Note that there is a more serious bug fix for fmov {DX}Rn,@(R0,Rn) to use a store instead of a load. Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-17-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 75 ++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 40724819e5..7dfe23d1f4 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -992,12 +992,10 @@ static void _decode_opc(DisasContext * ctx) case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { - TCGv addr_hi = tcg_temp_new(); - int fr = XHACK(B7_4); - tcg_gen_addi_i32(addr_hi, REG(B11_8), 4); - tcg_gen_qemu_st_i32(FREG(fr), REG(B11_8), ctx->memidx, MO_TEUL); - tcg_gen_qemu_st_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); - tcg_temp_free(addr_hi); + TCGv_i64 fp = tcg_temp_new_i64(); + gen_load_fpr64(ctx, fp, XHACK(B7_4)); + tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEQ); + tcg_temp_free_i64(fp); } else { tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL); } @@ -1005,12 +1003,10 @@ static void _decode_opc(DisasContext * ctx) case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { - TCGv addr_hi = tcg_temp_new(); - int fr = XHACK(B11_8); - tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); - tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL); - tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); - tcg_temp_free(addr_hi); + TCGv_i64 fp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ); + gen_store_fpr64(ctx, fp, XHACK(B11_8)); + tcg_temp_free_i64(fp); } else { tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); } @@ -1018,13 +1014,11 @@ static void _decode_opc(DisasContext * ctx) case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { - TCGv addr_hi = tcg_temp_new(); - int fr = XHACK(B11_8); - tcg_gen_addi_i32(addr_hi, REG(B7_4), 4); - tcg_gen_qemu_ld_i32(FREG(fr), REG(B7_4), ctx->memidx, MO_TEUL); - tcg_gen_qemu_ld_i32(FREG(fr + 1), addr_hi, ctx->memidx, MO_TEUL); - tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8); - tcg_temp_free(addr_hi); + TCGv_i64 fp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ); + gen_store_fpr64(ctx, fp, XHACK(B11_8)); + tcg_temp_free_i64(fp); + tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8); } else { tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL); tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4); @@ -1032,18 +1026,21 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED - TCGv addr = tcg_temp_new_i32(); - tcg_gen_subi_i32(addr, REG(B11_8), 4); - if (ctx->tbflags & FPSCR_SZ) { - int fr = XHACK(B7_4); - tcg_gen_qemu_st_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); - tcg_gen_subi_i32(addr, addr, 4); - tcg_gen_qemu_st_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); - } else { - tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); - } - tcg_gen_mov_i32(REG(B11_8), addr); - tcg_temp_free(addr); + { + TCGv addr = tcg_temp_new_i32(); + if (ctx->tbflags & FPSCR_SZ) { + TCGv_i64 fp = tcg_temp_new_i64(); + gen_load_fpr64(ctx, fp, XHACK(B7_4)); + tcg_gen_subi_i32(addr, REG(B11_8), 8); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ); + tcg_temp_free_i64(fp); + } else { + tcg_gen_subi_i32(addr, REG(B11_8), 4); + tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); + } + tcg_gen_mov_i32(REG(B11_8), addr); + tcg_temp_free(addr); + } return; case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */ CHECK_FPU_ENABLED @@ -1051,10 +1048,10 @@ static void _decode_opc(DisasContext * ctx) TCGv addr = tcg_temp_new_i32(); tcg_gen_add_i32(addr, REG(B7_4), REG(0)); if (ctx->tbflags & FPSCR_SZ) { - int fr = XHACK(B11_8); - tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); - tcg_gen_addi_i32(addr, addr, 4); - tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); + TCGv_i64 fp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEQ); + gen_store_fpr64(ctx, fp, XHACK(B11_8)); + tcg_temp_free_i64(fp); } else { tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL); } @@ -1067,10 +1064,10 @@ static void _decode_opc(DisasContext * ctx) TCGv addr = tcg_temp_new(); tcg_gen_add_i32(addr, REG(B11_8), REG(0)); if (ctx->tbflags & FPSCR_SZ) { - int fr = XHACK(B7_4); - tcg_gen_qemu_ld_i32(FREG(fr), addr, ctx->memidx, MO_TEUL); - tcg_gen_addi_i32(addr, addr, 4); - tcg_gen_qemu_ld_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL); + TCGv_i64 fp = tcg_temp_new_i64(); + gen_load_fpr64(ctx, fp, XHACK(B7_4)); + tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ); + tcg_temp_free_i64(fp); } else { tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL); } From bdcb3739024f3b6d53bd6dc34eaeafb3f2b996d9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:42 -1000 Subject: [PATCH 21/31] target/sh4: Simplify 64-bit fp reg-reg move We do not need to form full 64-bit quantities in order to perform the move. This reduces code expansion on 64-bit hosts. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-18-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 7dfe23d1f4..792a46804b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -981,10 +981,10 @@ static void _decode_opc(DisasContext * ctx) case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_SZ) { - TCGv_i64 fp = tcg_temp_new_i64(); - gen_load_fpr64(ctx, fp, XHACK(B7_4)); - gen_store_fpr64(ctx, fp, XHACK(B11_8)); - tcg_temp_free_i64(fp); + int xsrc = XHACK(B7_4); + int xdst = XHACK(B11_8); + tcg_gen_mov_i32(FREG(xdst), FREG(xsrc)); + tcg_gen_mov_i32(FREG(xdst + 1), FREG(xsrc + 1)); } else { tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4)); } From dec16c6ee8e665ec558f7564e68c09e01facf903 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:43 -1000 Subject: [PATCH 22/31] target/sh4: Unify code for CHECK_NOT_DELAY_SLOT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not need to emit N copies of raising an exception. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-19-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 792a46804b..acd756e18a 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -365,11 +365,8 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe)) #define CHECK_NOT_DELAY_SLOT \ - if (ctx->envflags & DELAY_SLOT_MASK) { \ - gen_save_cpu_state(ctx, true); \ - gen_helper_raise_slot_illegal_instruction(cpu_env); \ - ctx->bstate = BS_EXCP; \ - return; \ + if (ctx->envflags & DELAY_SLOT_MASK) { \ + goto do_illegal_slot; \ } #define CHECK_PRIVILEGED \ @@ -1796,10 +1793,12 @@ static void _decode_opc(DisasContext * ctx) ctx->opcode, ctx->pc); fflush(stderr); #endif - gen_save_cpu_state(ctx, true); if (ctx->envflags & DELAY_SLOT_MASK) { + do_illegal_slot: + gen_save_cpu_state(ctx, true); gen_helper_raise_slot_illegal_instruction(cpu_env); } else { + gen_save_cpu_state(ctx, true); gen_helper_raise_illegal_instruction(cpu_env); } ctx->bstate = BS_EXCP; From 6b98213da9b025dad0f1bd307928a4f30e96a50d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:44 -1000 Subject: [PATCH 23/31] target/sh4: Unify code for CHECK_PRIVILEGED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not need to emit N copies of raising an exception. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-20-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index acd756e18a..2476b948b9 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -369,16 +369,9 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) goto do_illegal_slot; \ } -#define CHECK_PRIVILEGED \ - if (IS_USER(ctx)) { \ - gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & DELAY_SLOT_MASK) { \ - gen_helper_raise_slot_illegal_instruction(cpu_env); \ - } else { \ - gen_helper_raise_illegal_instruction(cpu_env); \ - } \ - ctx->bstate = BS_EXCP; \ - return; \ +#define CHECK_PRIVILEGED \ + if (IS_USER(ctx)) { \ + goto do_illegal; \ } #define CHECK_FPU_ENABLED \ @@ -1793,6 +1786,7 @@ static void _decode_opc(DisasContext * ctx) ctx->opcode, ctx->pc); fflush(stderr); #endif + do_illegal: if (ctx->envflags & DELAY_SLOT_MASK) { do_illegal_slot: gen_save_cpu_state(ctx, true); From dec4f042a16b830c4d4613561f5f29e6a3263c1f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:45 -1000 Subject: [PATCH 24/31] target/sh4: Unify code for CHECK_FPU_ENABLED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not need to emit N copies of raising an exception. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-21-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 2476b948b9..b90719717e 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -374,16 +374,9 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) goto do_illegal; \ } -#define CHECK_FPU_ENABLED \ - if (ctx->tbflags & (1u << SR_FD)) { \ - gen_save_cpu_state(ctx, true); \ - if (ctx->envflags & DELAY_SLOT_MASK) { \ - gen_helper_raise_slot_fpu_disable(cpu_env); \ - } else { \ - gen_helper_raise_fpu_disable(cpu_env); \ - } \ - ctx->bstate = BS_EXCP; \ - return; \ +#define CHECK_FPU_ENABLED \ + if (ctx->tbflags & (1u << SR_FD)) { \ + goto do_fpu_disabled; \ } static void _decode_opc(DisasContext * ctx) @@ -1796,6 +1789,17 @@ static void _decode_opc(DisasContext * ctx) gen_helper_raise_illegal_instruction(cpu_env); } ctx->bstate = BS_EXCP; + return; + + do_fpu_disabled: + gen_save_cpu_state(ctx, true); + if (ctx->envflags & DELAY_SLOT_MASK) { + gen_helper_raise_slot_fpu_disable(cpu_env); + } else { + gen_helper_raise_fpu_disable(cpu_env); + } + ctx->bstate = BS_EXCP; + return; } static void decode_opc(DisasContext * ctx) From 93dc9c89640bd4aa5e49d672209b5509e3afa7e8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:46 -1000 Subject: [PATCH 25/31] target/sh4: Tidy misc illegal insn checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have a do_illegal label, use goto in order to self-document the forcing of the exception. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-22-rth@twiddle.net> [aurel32: fix whitespace issues] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index b90719717e..b16df1e70a 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1068,8 +1068,9 @@ static void _decode_opc(DisasContext * ctx) if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp0, fp1; - if (ctx->opcode & 0x0110) - break; /* illegal instruction */ + if (ctx->opcode & 0x0110) { + goto do_illegal; + } fp0 = tcg_temp_new_i64(); fp1 = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp0, B11_8); @@ -1131,7 +1132,7 @@ static void _decode_opc(DisasContext * ctx) { CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_PR) { - break; /* illegal instruction */ + goto do_illegal; } else { gen_helper_fmac_FT(FREG(B11_8), cpu_env, FREG(0), FREG(B7_4), FREG(B11_8)); @@ -1669,8 +1670,9 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp; - if (ctx->opcode & 0x0100) - break; /* illegal instruction */ + if (ctx->opcode & 0x0100) { + goto do_illegal; + } fp = tcg_temp_new_i64(); gen_helper_float_DT(fp, cpu_env, cpu_fpul); gen_store_fpr64(ctx, fp, B11_8); @@ -1684,8 +1686,9 @@ static void _decode_opc(DisasContext * ctx) CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_PR) { TCGv_i64 fp; - if (ctx->opcode & 0x0100) - break; /* illegal instruction */ + if (ctx->opcode & 0x0100) { + goto do_illegal; + } fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, B11_8); gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp); @@ -1706,8 +1709,9 @@ static void _decode_opc(DisasContext * ctx) case 0xf06d: /* fsqrt FRn */ CHECK_FPU_ENABLED if (ctx->tbflags & FPSCR_PR) { - if (ctx->opcode & 0x0100) - break; /* illegal instruction */ + if (ctx->opcode & 0x0100) { + goto do_illegal; + } TCGv_i64 fp = tcg_temp_new_i64(); gen_load_fpr64(ctx, fp, B11_8); gen_helper_fsqrt_DT(fp, cpu_env, fp); From 7e9f7ca86f83886ab2fa9ac7573635d1d6d97cf0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:47 -1000 Subject: [PATCH 26/31] target/sh4: Introduce CHECK_FPSCR_PR_* Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-23-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 57 +++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index b16df1e70a..37e9ae32a3 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -379,6 +379,16 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) goto do_fpu_disabled; \ } +#define CHECK_FPSCR_PR_0 \ + if (ctx->tbflags & FPSCR_PR) { \ + goto do_illegal; \ + } + +#define CHECK_FPSCR_PR_1 \ + if (!(ctx->tbflags & FPSCR_PR)) { \ + goto do_illegal; \ + } + static void _decode_opc(DisasContext * ctx) { /* This code tries to make movcal emulation sufficiently @@ -1129,16 +1139,11 @@ static void _decode_opc(DisasContext * ctx) } return; case 0xf00e: /* fmac FR0,RM,Rn */ - { - CHECK_FPU_ENABLED - if (ctx->tbflags & FPSCR_PR) { - goto do_illegal; - } else { - gen_helper_fmac_FT(FREG(B11_8), cpu_env, - FREG(0), FREG(B7_4), FREG(B11_8)); - return; - } - } + CHECK_FPU_ENABLED + CHECK_FPSCR_PR_0 + gen_helper_fmac_FT(FREG(B11_8), cpu_env, + FREG(0), FREG(B7_4), FREG(B11_8)); + return; } switch (ctx->opcode & 0xff00) { @@ -1726,16 +1731,14 @@ static void _decode_opc(DisasContext * ctx) break; case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->tbflags & FPSCR_PR)) { - tcg_gen_movi_i32(FREG(B11_8), 0); - } - return; + CHECK_FPSCR_PR_0 + tcg_gen_movi_i32(FREG(B11_8), 0); + return; case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED - if (!(ctx->tbflags & FPSCR_PR)) { - tcg_gen_movi_i32(FREG(B11_8), 0x3f800000); - } - return; + CHECK_FPSCR_PR_0 + tcg_gen_movi_i32(FREG(B11_8), 0x3f800000); + return; case 0xf0ad: /* fcnvsd FPUL,DRn */ CHECK_FPU_ENABLED { @@ -1756,10 +1759,10 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf0ed: /* fipr FVm,FVn */ CHECK_FPU_ENABLED - if ((ctx->tbflags & FPSCR_PR) == 0) { - TCGv m, n; - m = tcg_const_i32((ctx->opcode >> 8) & 3); - n = tcg_const_i32((ctx->opcode >> 10) & 3); + CHECK_FPSCR_PR_1 + { + TCGv m = tcg_const_i32((ctx->opcode >> 8) & 3); + TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3); gen_helper_fipr(cpu_env, m, n); tcg_temp_free(m); tcg_temp_free(n); @@ -1768,10 +1771,12 @@ static void _decode_opc(DisasContext * ctx) break; case 0xf0fd: /* ftrv XMTRX,FVn */ CHECK_FPU_ENABLED - if ((ctx->opcode & 0x0300) == 0x0100 && - (ctx->tbflags & FPSCR_PR) == 0) { - TCGv n; - n = tcg_const_i32((ctx->opcode >> 10) & 3); + CHECK_FPSCR_PR_1 + { + if ((ctx->opcode & 0x0300) != 0x0100) { + goto do_illegal; + } + TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3); gen_helper_ftrv(cpu_env, n); tcg_temp_free(n); return; From ccae24d4985507d6a0cfc1c2f18f4dc60dedda89 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:48 -1000 Subject: [PATCH 27/31] target/sh4: Introduce CHECK_SH4A Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-24-rth@twiddle.net> [aurel32: fix conflict] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 64 ++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 37e9ae32a3..6362a4781e 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -389,6 +389,11 @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg) goto do_illegal; \ } +#define CHECK_SH4A \ + if (!(ctx->features & SH_FEATURE_SH4A)) { \ + goto do_illegal; \ + } + static void _decode_opc(DisasContext * ctx) { /* This code tries to make movcal emulation sufficiently @@ -1467,7 +1472,7 @@ static void _decode_opc(DisasContext * ctx) LDST(ssr, 0x403e, 0x4037, 0x0032, 0x4033, CHECK_PRIVILEGED) LDST(spc, 0x404e, 0x4047, 0x0042, 0x4043, CHECK_PRIVILEGED) ST(sgr, 0x003a, 0x4032, CHECK_PRIVILEGED) - LD(sgr, 0x403a, 0x4036, CHECK_PRIVILEGED if (!(ctx->features & SH_FEATURE_SH4A)) break;) + LD(sgr, 0x403a, 0x4036, CHECK_PRIVILEGED CHECK_SH4A) LDST(dbr, 0x40fa, 0x40f6, 0x00fa, 0x40f2, CHECK_PRIVILEGED) LDST(mach, 0x400a, 0x4006, 0x000a, 0x4002, {}) LDST(macl, 0x401a, 0x4016, 0x001a, 0x4012, {}) @@ -1517,21 +1522,19 @@ static void _decode_opc(DisasContext * ctx) ctx->has_movcal = 1; return; case 0x40a9: /* movua.l @Rm,R0 */ + CHECK_SH4A /* Load non-boundary-aligned data */ - if (ctx->features & SH_FEATURE_SH4A) { - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, - MO_TEUL | MO_UNALN); - return; - } + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_UNALN); + return; break; case 0x40e9: /* movua.l @Rm+,R0 */ + CHECK_SH4A /* Load non-boundary-aligned data */ - if (ctx->features & SH_FEATURE_SH4A) { - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, - MO_TEUL | MO_UNALN); - tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); - return; - } + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, + MO_TEUL | MO_UNALN); + tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); + return; break; case 0x0029: /* movt Rn */ tcg_gen_mov_i32(REG(B11_8), cpu_sr_t); @@ -1542,7 +1545,8 @@ static void _decode_opc(DisasContext * ctx) If (T == 1) R0 -> (Rn) 0 -> LDST */ - if (ctx->features & SH_FEATURE_SH4A) { + CHECK_SH4A + { TCGLabel *label = gen_new_label(); tcg_gen_mov_i32(cpu_sr_t, cpu_ldst); tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label); @@ -1550,8 +1554,7 @@ static void _decode_opc(DisasContext * ctx) gen_set_label(label); tcg_gen_movi_i32(cpu_ldst, 0); return; - } else - break; + } case 0x0063: /* MOVLI.L @Rm,R0 1 -> LDST @@ -1559,13 +1562,11 @@ static void _decode_opc(DisasContext * ctx) When interrupt/exception occurred 0 -> LDST */ - if (ctx->features & SH_FEATURE_SH4A) { - tcg_gen_movi_i32(cpu_ldst, 0); - tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); - tcg_gen_movi_i32(cpu_ldst, 1); - return; - } else - break; + CHECK_SH4A + tcg_gen_movi_i32(cpu_ldst, 0); + tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL); + tcg_gen_movi_i32(cpu_ldst, 1); + return; case 0x0093: /* ocbi @Rn */ { gen_helper_ocbi(cpu_env, REG(B11_8)); @@ -1580,20 +1581,15 @@ static void _decode_opc(DisasContext * ctx) case 0x0083: /* pref @Rn */ return; case 0x00d3: /* prefi @Rn */ - if (ctx->features & SH_FEATURE_SH4A) - return; - else - break; + CHECK_SH4A + return; case 0x00e3: /* icbi @Rn */ - if (ctx->features & SH_FEATURE_SH4A) - return; - else - break; + CHECK_SH4A + return; case 0x00ab: /* synco */ - if (ctx->features & SH_FEATURE_SH4A) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); - return; - } + CHECK_SH4A + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); + return; break; case 0x4024: /* rotcl Rn */ { From 907759f9979d512eb072b8c31c921a78e44b8aa9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:49 -1000 Subject: [PATCH 28/31] target/sh4: Implement fpchg Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-25-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 6362a4781e..1ed0349374 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -480,6 +480,11 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ); ctx->bstate = BS_STOP; return; + case 0xf7fd: /* fpchg */ + CHECK_SH4A + tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_PR); + ctx->bstate = BS_STOP; + return; case 0x0009: /* nop */ return; case 0x001b: /* sleep */ From 61dedf2af79fb5866dc7a0f972093682f2185e17 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:50 -1000 Subject: [PATCH 29/31] target/sh4: Add missing FPSCR.PR == 0 checks Both frchg and fschg require PR == 0, otherwise undefined_operation. Reviewed-by: Aurelien Jarno Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-26-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 1ed0349374..92a2c002fc 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -473,10 +473,12 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_movi_i32(cpu_sr_t, 1); return; case 0xfbfd: /* frchg */ + CHECK_FPSCR_PR_0 tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR); ctx->bstate = BS_STOP; return; case 0xf3fd: /* fschg */ + CHECK_FPSCR_PR_0 tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ); ctx->bstate = BS_STOP; return; From 11b7aa234bc0cee6a2c6654993b2e083862f5216 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:51 -1000 Subject: [PATCH 30/31] target/sh4: Implement fsrra Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-27-rth@twiddle.net> Signed-off-by: Aurelien Jarno --- target/sh4/helper.h | 1 + target/sh4/op_helper.c | 16 ++++++++++++++++ target/sh4/translate.c | 2 ++ 3 files changed, 19 insertions(+) diff --git a/target/sh4/helper.h b/target/sh4/helper.h index 6c6fa04732..1e768fcbc7 100644 --- a/target/sh4/helper.h +++ b/target/sh4/helper.h @@ -37,6 +37,7 @@ DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32) DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64) DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32) DEF_HELPER_FLAGS_2(fsqrt_DT, TCG_CALL_NO_WG, f64, env, f64) +DEF_HELPER_FLAGS_2(fsrra_FT, TCG_CALL_NO_WG, f32, env, f32) DEF_HELPER_FLAGS_2(ftrc_FT, TCG_CALL_NO_WG, i32, env, f32) DEF_HELPER_FLAGS_2(ftrc_DT, TCG_CALL_NO_WG, i32, env, f64) DEF_HELPER_3(fipr, void, env, i32, i32) diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c index 8513f38849..d798f239cf 100644 --- a/target/sh4/op_helper.c +++ b/target/sh4/op_helper.c @@ -406,6 +406,22 @@ float64 helper_fsqrt_DT(CPUSH4State *env, float64 t0) return t0; } +float32 helper_fsrra_FT(CPUSH4State *env, float32 t0) +{ + set_float_exception_flags(0, &env->fp_status); + /* "Approximate" 1/sqrt(x) via actual computation. */ + t0 = float32_sqrt(t0, &env->fp_status); + t0 = float32_div(float32_one, t0, &env->fp_status); + /* Since this is supposed to be an approximation, an imprecision + exception is required. One supposes this also follows the usual + IEEE rule that other exceptions take precidence. */ + if (get_float_exception_flags(&env->fp_status) == 0) { + set_float_exception_flags(float_flag_inexact, &env->fp_status); + } + update_fpscr(env, GETPC()); + return t0; +} + float32 helper_fsub_FT(CPUSH4State *env, float32 t0, float32 t1) { set_float_exception_flags(0, &env->fp_status); diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 92a2c002fc..ce84fbb966 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -1731,6 +1731,8 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf07d: /* fsrra FRn */ CHECK_FPU_ENABLED + CHECK_FPSCR_PR_0 + gen_helper_fsrra_FT(FREG(B11_8), cpu_env, FREG(B11_8)); break; case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */ CHECK_FPU_ENABLED From ec2eb22ebb3b36f39755414dcbe4f99c2c0562c9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 18 Jul 2017 10:02:52 -1000 Subject: [PATCH 31/31] target/sh4: Use tcg_gen_lookup_and_goto_ptr Signed-off-by: Richard Henderson Message-Id: <20170718200255.31647-28-rth@twiddle.net> [aurel32: fix whitespace] Signed-off-by: Aurelien Jarno --- target/sh4/translate.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index ce84fbb966..498bb99dc1 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -230,12 +230,15 @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc) } } +static inline bool use_exit_tb(DisasContext *ctx) +{ + return (ctx->tbflags & GUSA_EXCLUSIVE) != 0; +} + static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) { - if (unlikely(ctx->singlestep_enabled)) { - return false; - } - if (ctx->tbflags & GUSA_EXCLUSIVE) { + /* Use a direct jump if in same page and singlestep not enabled */ + if (unlikely(ctx->singlestep_enabled || use_exit_tb(ctx))) { return false; } #ifndef CONFIG_USER_ONLY @@ -248,28 +251,35 @@ static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest) static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) { if (use_goto_tb(ctx, dest)) { - /* Use a direct jump if in same page and singlestep not enabled */ tcg_gen_goto_tb(n); tcg_gen_movi_i32(cpu_pc, dest); tcg_gen_exit_tb((uintptr_t)ctx->tb + n); } else { tcg_gen_movi_i32(cpu_pc, dest); - if (ctx->singlestep_enabled) + if (ctx->singlestep_enabled) { gen_helper_debug(cpu_env); - tcg_gen_exit_tb(0); + } else if (use_exit_tb(ctx)) { + tcg_gen_exit_tb(0); + } else { + tcg_gen_lookup_and_goto_ptr(cpu_pc); + } } } static void gen_jump(DisasContext * ctx) { - if (ctx->delayed_pc == (uint32_t) - 1) { + if (ctx->delayed_pc == -1) { /* Target is not statically known, it comes necessarily from a delayed jump as immediate jump are conditinal jumps */ tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc); tcg_gen_discard_i32(cpu_delayed_pc); - if (ctx->singlestep_enabled) + if (ctx->singlestep_enabled) { gen_helper_debug(cpu_env); - tcg_gen_exit_tb(0); + } else if (use_exit_tb(ctx)) { + tcg_gen_exit_tb(0); + } else { + tcg_gen_lookup_and_goto_ptr(cpu_pc); + } } else { gen_goto_tb(ctx, 0, ctx->delayed_pc); }