mirror of https://gitee.com/openkylin/qemu.git
target/arm: Implement MVE VCLZ
Implement the MVE VCLZ insn (and the necessary machinery for MVE 1-input vector ops). Note that for non-load instructions predication is always performed at a byte level granularity regardless of element size (R_ZLSJ), and so the masking logic here differs from that used in the VLDR and VSTR helpers. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210617121628.20116-4-peter.maydell@linaro.org
This commit is contained in:
parent
2fc6b7510c
commit
0f0f2bd548
|
@ -32,3 +32,7 @@ DEF_HELPER_FLAGS_3(mve_vldrh_uw, TCG_CALL_NO_WG, void, env, ptr, i32)
|
|||
DEF_HELPER_FLAGS_3(mve_vstrb_h, TCG_CALL_NO_WG, void, env, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(mve_vstrb_w, TCG_CALL_NO_WG, void, env, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(mve_vstrh_w, TCG_CALL_NO_WG, void, env, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(mve_vclzb, TCG_CALL_NO_WG, void, env, ptr, ptr)
|
||||
DEF_HELPER_FLAGS_3(mve_vclzh, TCG_CALL_NO_WG, void, env, ptr, ptr)
|
||||
DEF_HELPER_FLAGS_3(mve_vclzw, TCG_CALL_NO_WG, void, env, ptr, ptr)
|
||||
|
|
|
@ -20,13 +20,17 @@
|
|||
#
|
||||
|
||||
%qd 22:1 13:3
|
||||
%qm 5:1 1:3
|
||||
|
||||
&vldr_vstr rn qd imm p a w size l u
|
||||
&1op qd qm size
|
||||
|
||||
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
|
||||
# Note that both Rn and Qd are 3 bits only (no D bit)
|
||||
@vldst_wn ... u:1 ... . . . . l:1 . rn:3 qd:3 . ... .. imm:7 &vldr_vstr
|
||||
|
||||
@1op .... .... .... size:2 .. .... .... .... .... &1op qd=%qd qm=%qm
|
||||
|
||||
# Vector loads and stores
|
||||
|
||||
# Widening loads and narrowing stores:
|
||||
|
@ -61,3 +65,7 @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
|
|||
size=1 p=1
|
||||
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
|
||||
size=2 p=1
|
||||
|
||||
# Vector miscellaneous
|
||||
|
||||
VCLZ 1111 1111 1 . 11 .. 00 ... 0 0100 11 . 0 ... 0 @1op
|
||||
|
|
|
@ -181,3 +181,85 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
|
|||
|
||||
#undef DO_VLDR
|
||||
#undef DO_VSTR
|
||||
|
||||
/*
|
||||
* The mergemask(D, R, M) macro performs the operation "*D = R" but
|
||||
* storing only the bytes which correspond to 1 bits in M,
|
||||
* leaving other bytes in *D unchanged. We use _Generic
|
||||
* to select the correct implementation based on the type of D.
|
||||
*/
|
||||
|
||||
static void mergemask_ub(uint8_t *d, uint8_t r, uint16_t mask)
|
||||
{
|
||||
if (mask & 1) {
|
||||
*d = r;
|
||||
}
|
||||
}
|
||||
|
||||
static void mergemask_sb(int8_t *d, int8_t r, uint16_t mask)
|
||||
{
|
||||
mergemask_ub((uint8_t *)d, r, mask);
|
||||
}
|
||||
|
||||
static void mergemask_uh(uint16_t *d, uint16_t r, uint16_t mask)
|
||||
{
|
||||
uint16_t bmask = expand_pred_b_data[mask & 3];
|
||||
*d = (*d & ~bmask) | (r & bmask);
|
||||
}
|
||||
|
||||
static void mergemask_sh(int16_t *d, int16_t r, uint16_t mask)
|
||||
{
|
||||
mergemask_uh((uint16_t *)d, r, mask);
|
||||
}
|
||||
|
||||
static void mergemask_uw(uint32_t *d, uint32_t r, uint16_t mask)
|
||||
{
|
||||
uint32_t bmask = expand_pred_b_data[mask & 0xf];
|
||||
*d = (*d & ~bmask) | (r & bmask);
|
||||
}
|
||||
|
||||
static void mergemask_sw(int32_t *d, int32_t r, uint16_t mask)
|
||||
{
|
||||
mergemask_uw((uint32_t *)d, r, mask);
|
||||
}
|
||||
|
||||
static void mergemask_uq(uint64_t *d, uint64_t r, uint16_t mask)
|
||||
{
|
||||
uint64_t bmask = expand_pred_b_data[mask & 0xff];
|
||||
*d = (*d & ~bmask) | (r & bmask);
|
||||
}
|
||||
|
||||
static void mergemask_sq(int64_t *d, int64_t r, uint16_t mask)
|
||||
{
|
||||
mergemask_uq((uint64_t *)d, r, mask);
|
||||
}
|
||||
|
||||
#define mergemask(D, R, M) \
|
||||
_Generic(D, \
|
||||
uint8_t *: mergemask_ub, \
|
||||
int8_t *: mergemask_sb, \
|
||||
uint16_t *: mergemask_uh, \
|
||||
int16_t *: mergemask_sh, \
|
||||
uint32_t *: mergemask_uw, \
|
||||
int32_t *: mergemask_sw, \
|
||||
uint64_t *: mergemask_uq, \
|
||||
int64_t *: mergemask_sq)(D, R, M)
|
||||
|
||||
#define DO_1OP(OP, ESIZE, TYPE, FN) \
|
||||
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
|
||||
{ \
|
||||
TYPE *d = vd, *m = vm; \
|
||||
uint16_t mask = mve_element_mask(env); \
|
||||
unsigned e; \
|
||||
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
|
||||
mergemask(&d[H##ESIZE(e)], FN(m[H##ESIZE(e)]), mask); \
|
||||
} \
|
||||
mve_advance_vpt(env); \
|
||||
}
|
||||
|
||||
#define DO_CLZ_B(N) (clz32(N) - 24)
|
||||
#define DO_CLZ_H(N) (clz32(N) - 16)
|
||||
|
||||
DO_1OP(vclzb, 1, uint8_t, DO_CLZ_B)
|
||||
DO_1OP(vclzh, 2, uint16_t, DO_CLZ_H)
|
||||
DO_1OP(vclzw, 4, uint32_t, clz32)
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "decode-mve.c.inc"
|
||||
|
||||
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
|
||||
|
||||
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
|
||||
static inline long mve_qreg_offset(unsigned reg)
|
||||
|
@ -160,3 +161,40 @@ static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
|
|||
DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h)
|
||||
DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w)
|
||||
DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w)
|
||||
|
||||
static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
|
||||
{
|
||||
TCGv_ptr qd, qm;
|
||||
|
||||
if (!dc_isar_feature(aa32_mve, s) ||
|
||||
!mve_check_qreg_bank(s, a->qd | a->qm) ||
|
||||
!fn) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!mve_eci_check(s) || !vfp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
qd = mve_qreg_ptr(a->qd);
|
||||
qm = mve_qreg_ptr(a->qm);
|
||||
fn(cpu_env, qd, qm);
|
||||
tcg_temp_free_ptr(qd);
|
||||
tcg_temp_free_ptr(qm);
|
||||
mve_update_eci(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define DO_1OP(INSN, FN) \
|
||||
static bool trans_##INSN(DisasContext *s, arg_1op *a) \
|
||||
{ \
|
||||
static MVEGenOneOpFn * const fns[] = { \
|
||||
gen_helper_mve_##FN##b, \
|
||||
gen_helper_mve_##FN##h, \
|
||||
gen_helper_mve_##FN##w, \
|
||||
NULL, \
|
||||
}; \
|
||||
return do_1op(s, a, fns[a->size]); \
|
||||
}
|
||||
|
||||
DO_1OP(VCLZ, vclz)
|
||||
|
|
Loading…
Reference in New Issue