mirror of https://gitee.com/openkylin/qemu.git
target/arm: Implement MVE VCTP
Implement the MVE VCTP insn, which sets the VPR.P0 predicate bits so as to predicate any element at index Rn or greater is predicated. As with VPNOT, this insn itself is predicable and subject to beatwise execution. The calculation of the mask is the same as is used to determine ltpmask in mve_element_mask(), but we precalculate masklen in generated code to avoid having to have 4 helpers specialized by size. We put the decode line in with the low-overhead-loop insns in t32.decode because it's logically part of that collection of insn patterns, even though it is an MVE only insn. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
fea3958fa1
commit
0f31e37c7f
|
@ -121,6 +121,8 @@ DEF_HELPER_FLAGS_4(mve_veor, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
|
|||
DEF_HELPER_FLAGS_4(mve_vpsel, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
|
||||
DEF_HELPER_FLAGS_1(mve_vpnot, TCG_CALL_NO_WG, void, env)
|
||||
|
||||
DEF_HELPER_FLAGS_2(mve_vctp, TCG_CALL_NO_WG, void, env, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(mve_vaddb, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
|
||||
DEF_HELPER_FLAGS_4(mve_vaddh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
|
||||
DEF_HELPER_FLAGS_4(mve_vaddw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
|
||||
|
|
|
@ -2218,6 +2218,26 @@ void HELPER(mve_vpnot)(CPUARMState *env)
|
|||
mve_advance_vpt(env);
|
||||
}
|
||||
|
||||
/*
|
||||
* VCTP: P0 unexecuted bits unchanged, predicated bits zeroed,
|
||||
* otherwise set according to value of Rn. The calculation of
|
||||
* newmask here works in the same way as the calculation of the
|
||||
* ltpmask in mve_element_mask(), but we have pre-calculated
|
||||
* the masklen in the generated code.
|
||||
*/
|
||||
void HELPER(mve_vctp)(CPUARMState *env, uint32_t masklen)
|
||||
{
|
||||
uint16_t mask = mve_element_mask(env);
|
||||
uint16_t eci_mask = mve_eci_mask(env);
|
||||
uint16_t newmask;
|
||||
|
||||
assert(masklen <= 16);
|
||||
newmask = masklen ? MAKE_64BIT_MASK(0, masklen) : 0;
|
||||
newmask &= mask;
|
||||
env->v7m.vpr = (env->v7m.vpr & ~(uint32_t)eci_mask) | (newmask & eci_mask);
|
||||
mve_advance_vpt(env);
|
||||
}
|
||||
|
||||
#define DO_1OP_SAT(OP, ESIZE, TYPE, FN) \
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
|
||||
{ \
|
||||
|
|
|
@ -748,5 +748,6 @@ BL 1111 0. .......... 11.1 ............ @branch24
|
|||
# This is DLSTP
|
||||
DLS 1111 0 0000 0 size:2 rn:4 1110 0000 0000 0001
|
||||
}
|
||||
VCTP 1111 0 0000 0 size:2 rn:4 1110 1000 0000 0001
|
||||
]
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ long neon_element_offset(int reg, int element, MemOp memop);
|
|||
void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
|
||||
void clear_eci_state(DisasContext *s);
|
||||
bool mve_eci_check(DisasContext *s);
|
||||
void mve_update_eci(DisasContext *s);
|
||||
void mve_update_and_store_eci(DisasContext *s);
|
||||
bool mve_skip_vmov(DisasContext *s, int vn, int index, int size);
|
||||
|
||||
|
|
|
@ -93,7 +93,7 @@ bool mve_eci_check(DisasContext *s)
|
|||
}
|
||||
}
|
||||
|
||||
static void mve_update_eci(DisasContext *s)
|
||||
void mve_update_eci(DisasContext *s)
|
||||
{
|
||||
/*
|
||||
* The helper function will always update the CPUState field,
|
||||
|
|
|
@ -8669,6 +8669,39 @@ static bool trans_LCTP(DisasContext *s, arg_LCTP *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool trans_VCTP(DisasContext *s, arg_VCTP *a)
|
||||
{
|
||||
/*
|
||||
* M-profile Create Vector Tail Predicate. This insn is itself
|
||||
* predicated and is subject to beatwise execution.
|
||||
*/
|
||||
TCGv_i32 rn_shifted, masklen;
|
||||
|
||||
if (!dc_isar_feature(aa32_mve, s) || a->rn == 13 || a->rn == 15) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!mve_eci_check(s) || !vfp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* We pre-calculate the mask length here to avoid having
|
||||
* to have multiple helpers specialized for size.
|
||||
* We pass the helper "rn <= (1 << (4 - size)) ? (rn << size) : 16".
|
||||
*/
|
||||
rn_shifted = tcg_temp_new_i32();
|
||||
masklen = load_reg(s, a->rn);
|
||||
tcg_gen_shli_i32(rn_shifted, masklen, a->size);
|
||||
tcg_gen_movcond_i32(TCG_COND_LEU, masklen,
|
||||
masklen, tcg_constant_i32(1 << (4 - a->size)),
|
||||
rn_shifted, tcg_constant_i32(16));
|
||||
gen_helper_mve_vctp(cpu_env, masklen);
|
||||
tcg_temp_free_i32(masklen);
|
||||
tcg_temp_free_i32(rn_shifted);
|
||||
mve_update_eci(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue