From be53081a619443dc4512039d89345475ef7d9a46 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 17 May 2017 00:48:18 +0200 Subject: [PATCH] target/sh4: fix RTE instruction delay slot The ReTurn from Exception (RTE) instruction loads the system register (SR) with the saved system register (SSR). It has a delay slot, and behaves specially according to the SH4 manual: The SR value accessed by the instruction in the RTE delay slot is the value restored from SSR by the RTE instruction. The SR and MD values defined prior to RTE execution are used to fetch the instruction in the RTE delay slot. The instruction in the delay slot being often a NOP, it doesn't cause any issue most of the time except in some rare cases where the NOP is being splitted in a different TB (for example when the TCG op buffer is full). In that case the NOP is fetched with the user permissions and causes an instruction TLB protection violation exception. This patches fixes that by introducing a new delay slot flag for the RTE instruction. Given it's a privileged instruction, the RTE delay slot instruction is always fetched in privileged mode. It is therefore enough to to check for this flag in cpu_mmu_index. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target/sh4/cpu.h | 13 ++++++++++--- target/sh4/translate.c | 8 ++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h index 7969c9af98..ffb91687b8 100644 --- a/target/sh4/cpu.h +++ b/target/sh4/cpu.h @@ -91,9 +91,10 @@ #define FPSCR_RM_NEAREST (0 << 0) #define FPSCR_RM_ZERO (1 << 0) -#define DELAY_SLOT_MASK 0x3 +#define DELAY_SLOT_MASK 0x7 #define DELAY_SLOT (1 << 0) #define DELAY_SLOT_CONDITIONAL (1 << 1) +#define DELAY_SLOT_RTE (1 << 2) typedef struct tlb_t { uint32_t vpn; /* virtual page number */ @@ -264,7 +265,13 @@ void cpu_load_tlb(CPUSH4State * env); #define MMU_USER_IDX 1 static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch) { - return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + /* The instruction in a RTE delay slot is fetched in privileged + mode, but executed in user mode. */ + if (ifetch && (env->flags & DELAY_SLOT_RTE)) { + return 0; + } else { + return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; + } } #include "exec/cpu-all.h" @@ -381,7 +388,7 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, { *pc = env->pc; *cs_base = 0; - *flags = (env->flags & DELAY_SLOT_MASK) /* Bits 0- 1 */ + *flags = (env->flags & DELAY_SLOT_MASK) /* Bits 0- 2 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ | (env->sr & (1u << SR_FD)) /* Bit 15 */ diff --git a/target/sh4/translate.c b/target/sh4/translate.c index aba316f593..8bc132b27b 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -185,6 +185,9 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, } else if (env->flags & DELAY_SLOT_CONDITIONAL) { cpu_fprintf(f, "in conditional delay slot (delayed_pc=0x%08x)\n", env->delayed_pc); + } else if (env->flags & DELAY_SLOT_RTE) { + cpu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n", + env->delayed_pc); } } @@ -427,8 +430,9 @@ static void _decode_opc(DisasContext * ctx) CHECK_NOT_DELAY_SLOT gen_write_sr(cpu_ssr); tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc); - ctx->envflags |= DELAY_SLOT; + ctx->envflags |= DELAY_SLOT_RTE; ctx->delayed_pc = (uint32_t) - 1; + ctx->bstate = BS_STOP; return; case 0x0058: /* sets */ tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S)); @@ -1804,7 +1808,7 @@ static void decode_opc(DisasContext * ctx) ctx->bstate = BS_BRANCH; if (old_flags & DELAY_SLOT_CONDITIONAL) { gen_delayed_conditional_jump(ctx); - } else if (old_flags & DELAY_SLOT) { + } else { gen_jump(ctx); }