diff --git a/target-arm/helper.c b/target-arm/helper.c index 2c35ea4baf..c61c610919 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -347,6 +347,35 @@ uint32_t HELPER(clz)(uint32_t x) return count; } +int32_t HELPER(sdiv)(int32_t num, int32_t den) +{ + if (den == 0) + return 0; + return num / den; +} + +uint32_t HELPER(udiv)(uint32_t num, uint32_t den) +{ + if (den == 0) + return 0; + return num / den; +} + +uint32_t HELPER(rbit)(uint32_t x) +{ + x = ((x & 0xff000000) >> 24) + | ((x & 0x00ff0000) >> 8) + | ((x & 0x0000ff00) << 8) + | ((x & 0x000000ff) << 24); + x = ((x & 0xf0f0f0f0) >> 4) + | ((x & 0x0f0f0f0f) << 4); + x = ((x & 0x88888888) >> 3) + | ((x & 0x44444444) >> 1) + | ((x & 0x22222222) << 1) + | ((x & 0x11111111) << 3); + return x; +} + #if defined(CONFIG_USER_ONLY) void do_interrupt (CPUState *env) diff --git a/target-arm/helpers.h b/target-arm/helpers.h index 3b9eca07e3..9f608147a6 100644 --- a/target-arm/helpers.h +++ b/target-arm/helpers.h @@ -29,6 +29,9 @@ DEF_HELPER_1_2(sub_saturate, uint32_t, (uint32_t, uint32_t)) DEF_HELPER_1_2(add_usaturate, uint32_t, (uint32_t, uint32_t)) DEF_HELPER_1_2(sub_usaturate, uint32_t, (uint32_t, uint32_t)) DEF_HELPER_1_1(double_saturate, uint32_t, (int32_t)) +DEF_HELPER_1_2(sdiv, int32_t, (int32_t, int32_t)) +DEF_HELPER_1_2(udiv, uint32_t, (uint32_t, uint32_t)) +DEF_HELPER_1_1(rbit, uint32_t, (uint32_t)) #undef DEF_HELPER #undef DEF_HELPER_1_1 diff --git a/target-arm/op.c b/target-arm/op.c index 2ab99a2744..e714d41deb 100644 --- a/target-arm/op.c +++ b/target-arm/op.c @@ -59,11 +59,6 @@ void OPPROTO op_ ## sub ## l_T0_T1_cc(void) \ res = T0; \ } \ \ -void OPPROTO op_ ## sbc ## l_T0_T1(void) \ -{ \ - res = T0 - T1 + env->CF - 1; \ -} \ - \ void OPPROTO op_ ## sbc ## l_T0_T1_cc(void) \ { \ unsigned int src1; \ @@ -754,12 +749,6 @@ void OPPROTO op_vfp_fconsts(void) FT0s = vfp_itos(PARAM1); } -/* Copy the most significant bit of T0 to all bits of T1. */ -void OPPROTO op_signbit_T1_T0(void) -{ - T1 = (int32_t)T0 >> 31; -} - void OPPROTO op_movl_cp_T0(void) { helper_set_cp(env, PARAM1, T0); @@ -1026,55 +1015,6 @@ static inline uint8_t sub8_usat(uint8_t a, uint8_t b) #include "op_addsub.h" -void OPPROTO op_pkhtb_T0_T1(void) -{ - T0 = (T0 & 0xffff0000) | (T1 & 0xffff); -} - -void OPPROTO op_pkhbt_T0_T1(void) -{ - T0 = (T0 & 0xffff) | (T1 & 0xffff0000); -} - -void OPPROTO op_rev16_T0(void) -{ - T0 = ((T0 & 0xff000000) >> 8) - | ((T0 & 0x00ff0000) << 8) - | ((T0 & 0x0000ff00) >> 8) - | ((T0 & 0x000000ff) << 8); -} - -void OPPROTO op_revsh_T0(void) -{ - T0 = (int16_t)( ((T0 & 0x0000ff00) >> 8) - | ((T0 & 0x000000ff) << 8)); -} - -void OPPROTO op_rbit_T0(void) -{ - T0 = ((T0 & 0xff000000) >> 24) - | ((T0 & 0x00ff0000) >> 8) - | ((T0 & 0x0000ff00) << 8) - | ((T0 & 0x000000ff) << 24); - T0 = ((T0 & 0xf0f0f0f0) >> 4) - | ((T0 & 0x0f0f0f0f) << 4); - T0 = ((T0 & 0x88888888) >> 3) - | ((T0 & 0x44444444) >> 1) - | ((T0 & 0x22222222) << 1) - | ((T0 & 0x11111111) << 3); -} - -/* Dual 16-bit signed multiply. */ -void OPPROTO op_mul_dual_T0_T1(void) -{ - int32_t low; - int32_t high; - low = (int32_t)(int16_t)T0 * (int32_t)(int16_t)T1; - high = (((int32_t)T0) >> 16) * (((int32_t)T1) >> 16); - T0 = low; - T1 = high; -} - void OPPROTO op_sel_T0_T1(void) { uint32_t mask; @@ -1094,11 +1034,6 @@ void OPPROTO op_sel_T0_T1(void) FORCE_RET(); } -void OPPROTO op_roundqd_T0_T1(void) -{ - T0 = T1 + ((uint32_t)T0 >> 31); -} - /* Signed saturation. */ static inline uint32_t do_ssat(int32_t val, int shift) { @@ -1191,66 +1126,6 @@ void OPPROTO op_usad8_T0_T1(void) T0 = sum; } -/* Thumb-2 instructions. */ - -/* Insert T1 into T0. Result goes in T1. */ -void OPPROTO op_bfi_T1_T0(void) -{ - int shift = PARAM1; - uint32_t mask = PARAM2; - uint32_t bits; - - bits = (T1 << shift) & mask; - T1 = (T0 & ~mask) | bits; -} - -/* Unsigned bitfield extract. */ -void OPPROTO op_ubfx_T1(void) -{ - uint32_t shift = PARAM1; - uint32_t mask = PARAM2; - - T1 >>= shift; - T1 &= mask; -} - -/* Signed bitfield extract. */ -void OPPROTO op_sbfx_T1(void) -{ - uint32_t shift = PARAM1; - uint32_t width = PARAM2; - int32_t val; - - val = T1 << (32 - (shift + width)); - T1 = val >> (32 - width); -} - -void OPPROTO op_sdivl_T0_T1(void) -{ - int32_t num; - int32_t den; - num = T0; - den = T1; - if (den == 0) - T0 = 0; - else - T0 = num / den; - FORCE_RET(); -} - -void OPPROTO op_udivl_T0_T1(void) -{ - uint32_t num; - uint32_t den; - num = T0; - den = T1; - if (den == 0) - T0 = 0; - else - T0 = num / den; - FORCE_RET(); -} - void OPPROTO op_movl_T1_r13_banked(void) { T1 = helper_get_r13_banked(env, PARAM1); diff --git a/target-arm/translate.c b/target-arm/translate.c index 189be17445..e46cfb9ea1 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -219,6 +219,87 @@ static void store_reg(DisasContext *s, int reg, TCGv var) #define gen_op_subl_T0_T1_usaturate() \ gen_helper_sub_usaturate(cpu_T[0], cpu_T[0], cpu_T[1]) +/* Copy the most significant bit of T0 to all bits of T1. */ +#define gen_op_signbit_T1_T0() tcg_gen_sari_i32(cpu_T[1], cpu_T[0], 31) + +static void gen_smul_dual(TCGv a, TCGv b) +{ + TCGv tmp1 = new_tmp(); + TCGv tmp2 = new_tmp(); + TCGv res; + tcg_gen_ext8s_i32(tmp1, a); + tcg_gen_ext8s_i32(tmp2, b); + tcg_gen_mul_i32(tmp1, tmp1, tmp2); + dead_tmp(tmp2); + tcg_gen_sari_i32(a, a, 16); + tcg_gen_sari_i32(b, b, 16); + tcg_gen_mul_i32(b, b, a); + tcg_gen_mov_i32(a, tmp1); + dead_tmp(tmp1); +} + +/* Byteswap each halfword. */ +static void gen_rev16(TCGv var) +{ + TCGv tmp = new_tmp(); + tcg_gen_shri_i32(tmp, var, 8); + tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff); + tcg_gen_shli_i32(var, var, 8); + tcg_gen_andi_i32(var, var, 0xff00ff00); + tcg_gen_or_i32(var, var, tmp); + dead_tmp(tmp); +} + +/* Byteswap low halfword and sign extend. */ +static void gen_revsh(TCGv var) +{ + TCGv tmp = new_tmp(); + tcg_gen_shri_i32(tmp, var, 8); + tcg_gen_andi_i32(tmp, tmp, 0x00ff); + tcg_gen_shli_i32(var, var, 8); + tcg_gen_ext8s_i32(var, var); + tcg_gen_or_i32(var, var, tmp); + dead_tmp(tmp); +} + +/* Unsigned bitfield extract. */ +static void gen_ubfx(TCGv var, int shift, uint32_t mask) +{ + if (shift) + tcg_gen_shri_i32(var, var, shift); + tcg_gen_andi_i32(var, var, mask); +} + +/* Signed bitfield extract. */ +static void gen_sbfx(TCGv var, int shift, int width) +{ + uint32_t signbit; + + if (shift) + tcg_gen_sari_i32(var, var, shift); + if (shift + width < 32) { + signbit = 1u << (width - 1); + tcg_gen_andi_i32(var, var, (1u << width) - 1); + tcg_gen_xori_i32(var, var, signbit); + tcg_gen_subi_i32(var, var, signbit); + } +} + +/* Bitfield insertion. Insert val into base. Clobbers base and val. */ +static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask) +{ + tcg_gen_shli_i32(val, val, shift); + tcg_gen_andi_i32(val, val, mask); + tcg_gen_andi_i32(base, base, ~mask); + tcg_gen_or_i32(dest, base, val); +} + +static void gen_op_roundqd_T0_T1(void) +{ + tcg_gen_shri_i32(cpu_T[0], cpu_T[0], 31); + tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1]); +} + /* FIXME: Most targets have native widening multiplication. It would be good to use that instead of a full wide multiply. */ /* Unsigned 32x32->64 multiply. */ @@ -256,6 +337,7 @@ static void gen_swap_half(TCGv var) tcg_gen_shri_i32(tmp, var, 16); tcg_gen_shli_i32(var, var, 16); tcg_gen_or_i32(var, var, tmp); + dead_tmp(tmp); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -305,6 +387,20 @@ static void gen_adc_T0_T1(void) dead_tmp(tmp); } +/* dest = T0 - T1 + CF - 1. */ +static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1) +{ + TCGv tmp = new_tmp(); + tcg_gen_sub_i32(dest, t0, t1); + tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUState, CF)); + tcg_gen_add_i32(dest, dest, tmp); + tcg_gen_subi_i32(dest, dest, 1); + dead_tmp(tmp); +} + +#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1]) +#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0]) + /* FIXME: Implement this natively. */ static inline void tcg_gen_not_i32(TCGv t0, TCGv t1) { @@ -4547,7 +4643,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) case 2: /* VREV16 */ if (size != 0) return 1; - gen_op_rev16_T0(); + gen_rev16(cpu_T[0]); break; case 4: case 5: /* VPADDL */ case 12: case 13: /* VPADAL */ @@ -4809,6 +4905,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) { unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh; TCGv tmp; + TCGv tmp2; insn = ldl_code(s->pc); s->pc += 4; @@ -5261,14 +5358,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) if (set_cc) gen_op_sbcl_T0_T1_cc(); else - gen_op_sbcl_T0_T1(); + gen_sbc_T0_T1(); gen_movl_reg_T0(s, rd); break; case 0x07: if (set_cc) gen_op_rscl_T0_T1_cc(); else - gen_op_rscl_T0_T1(); + gen_rsc_T0_T1(); gen_movl_reg_T0(s, rd); break; case 0x08: @@ -5505,16 +5602,22 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) case 1: if ((insn & 0x00700020) == 0) { /* Hafword pack. */ - gen_movl_T0_reg(s, rn); - gen_movl_T1_reg(s, rm); + tmp = load_reg(s, rn); + tmp2 = load_reg(s, rm); shift = (insn >> 7) & 0x1f; if (shift) - gen_op_shll_T1_im(shift); - if (insn & (1 << 6)) - gen_op_pkhtb_T0_T1(); - else - gen_op_pkhbt_T0_T1(); - gen_movl_reg_T0(s, rd); + tcg_gen_shli_i32(tmp2, tmp2, shift); + if (insn & (1 << 6)) { + /* pkhtb */ + tcg_gen_andi_i32(tmp, tmp, 0xffff0000); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff); + } else { + /* pkhbt */ + tcg_gen_andi_i32(tmp, tmp, 0xffff); + tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); + } + tcg_gen_or_i32(tmp, tmp, tmp2); + store_reg(s, rd, tmp); } else if ((insn & 0x00200020) == 0x00200000) { /* [us]sat */ gen_movl_T1_reg(s, rm); @@ -5583,14 +5686,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) gen_movl_T0_reg(s, rm); if (insn & (1 << 22)) { if (insn & (1 << 7)) { - gen_op_revsh_T0(); + gen_revsh(cpu_T[0]); } else { ARCH(6T2); - gen_op_rbit_T0(); + gen_helper_rbit(cpu_T[0], cpu_T[0]); } } else { if (insn & (1 << 7)) - gen_op_rev16_T0(); + gen_rev16(cpu_T[0]); else gen_op_rev_T0(); } @@ -5621,7 +5724,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } else { if (insn & (1 << 5)) gen_swap_half(cpu_T[1]); - gen_op_mul_dual_T0_T1(); + gen_smul_dual(cpu_T[0], cpu_T[1]); if (insn & (1 << 22)) { if (insn & (1 << 6)) { /* smlald */ @@ -5675,7 +5778,8 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) } if (i != 32) { gen_movl_T0_reg(s, rd); - gen_op_bfi_T1_T0(shift, ((1u << i) - 1) << shift); + gen_bfi(cpu_T[1], cpu_T[0], cpu_T[1], + shift, ((1u << i) - 1) << shift); } gen_movl_reg_T1(s, rd); break; @@ -5688,9 +5792,9 @@ static void disas_arm_insn(CPUState * env, DisasContext *s) goto illegal_op; if (i < 32) { if (op1 & 0x20) { - gen_op_ubfx_T1(shift, (1u << i) - 1); + gen_ubfx(cpu_T[1], shift, (1u << i) - 1); } else { - gen_op_sbfx_T1(shift, i); + gen_sbfx(cpu_T[1], shift, i); } } gen_movl_reg_T1(s, rd); @@ -5984,7 +6088,7 @@ gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out) if (conds) gen_op_sbcl_T0_T1_cc(); else - gen_op_sbcl_T0_T1(); + gen_sbc_T0_T1(); break; case 13: /* sub */ if (conds) @@ -6381,16 +6485,16 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) gen_movl_T0_reg(s, rn); switch (op) { case 0x0a: /* rbit */ - gen_op_rbit_T0(); + gen_helper_rbit(cpu_T[0], cpu_T[0]); break; case 0x08: /* rev */ gen_op_rev_T0(); break; case 0x09: /* rev16 */ - gen_op_rev16_T0(); + gen_rev16(cpu_T[0]); break; case 0x0b: /* revsh */ - gen_op_revsh_T0(); + gen_revsh(cpu_T[0]); break; case 0x10: /* sel */ gen_movl_T1_reg(s, rm); @@ -6433,7 +6537,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) case 4: /* Dual multiply subtract. */ if (op) gen_swap_half(cpu_T[1]); - gen_op_mul_dual_T0_T1(); + gen_smul_dual(cpu_T[0], cpu_T[1]); /* This addition cannot overflow. */ if (insn & (1 << 22)) { gen_op_subl_T0_T1(); @@ -6495,15 +6599,15 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (!arm_feature(env, ARM_FEATURE_DIV)) goto illegal_op; if (op & 0x20) - gen_op_udivl_T0_T1(); + gen_helper_udiv(cpu_T[0], cpu_T[0], cpu_T[1]); else - gen_op_sdivl_T0_T1(); + gen_helper_sdiv(cpu_T[0], cpu_T[0], cpu_T[1]); gen_movl_reg_T0(s, rd); } else if ((op & 0xe) == 0xc) { /* Dual multiply accumulate long. */ if (op & 1) gen_swap_half(cpu_T[1]); - gen_op_mul_dual_T0_T1(); + gen_smul_dual(cpu_T[0], cpu_T[1]); if (op & 0x10) { gen_op_subl_T0_T1(); } else { @@ -6727,14 +6831,14 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) if (shift + imm > 32) goto illegal_op; if (imm < 32) - gen_op_sbfx_T1(shift, imm); + gen_sbfx(cpu_T[1], shift, imm); break; case 6: /* Unsigned bitfield extract. */ imm++; if (shift + imm > 32) goto illegal_op; if (imm < 32) - gen_op_ubfx_T1(shift, (1u << imm) - 1); + gen_ubfx(cpu_T[1], shift, (1u << imm) - 1); break; case 3: /* Bitfield insert/clear. */ if (imm < shift) @@ -6742,7 +6846,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1) imm = imm + 1 - shift; if (imm != 32) { gen_movl_T0_reg(s, rd); - gen_op_bfi_T1_T0(shift, ((1u << imm) - 1) << shift); + gen_bfi(cpu_T[1], cpu_T[0], cpu_T[1], + shift, ((1u << imm) - 1) << shift); } break; case 7: @@ -7161,7 +7266,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) break; case 0x6: /* sbc */ if (s->condexec_mask) - gen_op_sbcl_T0_T1(); + gen_sbc_T0_T1(); else gen_op_sbcl_T0_T1_cc(); break; @@ -7479,8 +7584,8 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s) gen_movl_T0_reg(s, rn); switch ((insn >> 6) & 3) { case 0: gen_op_rev_T0(); break; - case 1: gen_op_rev16_T0(); break; - case 3: gen_op_revsh_T0(); break; + case 1: gen_rev16(cpu_T[0]); break; + case 3: gen_revsh(cpu_T[0]); break; default: goto illegal_op; } gen_movl_reg_T0(s, rd);