mirror of https://gitee.com/openkylin/qemu.git
target/arm: Pass pointer to qc to qrdmla/qrdmls
Pass a pointer directly to env->vfp.qc[0], rather than env. This will allow SVE2, which does not modify QC, to pass a pointer to dummy storage. Change the return type of inl_qrdml.h_s16 to match the sense of the operation: signed. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200513163245.17915-14-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
146aa66ce5
commit
e286bf4a72
|
@ -3629,6 +3629,18 @@ static const uint8_t neon_2rm_sizes[] = {
|
|||
[NEON_2RM_VCVT_UF] = 0x4,
|
||||
};
|
||||
|
||||
static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
|
||||
uint32_t opr_sz, uint32_t max_sz,
|
||||
gen_helper_gvec_3_ptr *fn)
|
||||
{
|
||||
TCGv_ptr qc_ptr = tcg_temp_new_ptr();
|
||||
|
||||
tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
|
||||
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
|
||||
opr_sz, max_sz, 0, fn);
|
||||
tcg_temp_free_ptr(qc_ptr);
|
||||
}
|
||||
|
||||
void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
|
||||
{
|
||||
|
@ -3636,8 +3648,7 @@ void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
|||
gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
|
||||
};
|
||||
tcg_debug_assert(vece >= 1 && vece <= 2);
|
||||
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
|
||||
opr_sz, max_sz, 0, fns[vece - 1]);
|
||||
gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
|
||||
}
|
||||
|
||||
void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
||||
|
@ -3647,8 +3658,7 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
|
|||
gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
|
||||
};
|
||||
tcg_debug_assert(vece >= 1 && vece <= 2);
|
||||
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, cpu_env,
|
||||
opr_sz, max_sz, 0, fns[vece - 1]);
|
||||
gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
|
||||
}
|
||||
|
||||
#define GEN_CMP0(NAME, COND) \
|
||||
|
|
|
@ -36,8 +36,6 @@
|
|||
#define H4(x) (x)
|
||||
#endif
|
||||
|
||||
#define SET_QC() env->vfp.qc[0] = 1
|
||||
|
||||
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
|
||||
{
|
||||
uint64_t *d = vd + opr_sz;
|
||||
|
@ -49,8 +47,8 @@ static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
|
|||
}
|
||||
|
||||
/* Signed saturating rounding doubling multiply-accumulate high half, 16-bit */
|
||||
static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
|
||||
int16_t src2, int16_t src3)
|
||||
static int16_t inl_qrdmlah_s16(int16_t src1, int16_t src2,
|
||||
int16_t src3, uint32_t *sat)
|
||||
{
|
||||
/* Simplify:
|
||||
* = ((a3 << 16) + ((e1 * e2) << 1) + (1 << 15)) >> 16
|
||||
|
@ -60,7 +58,7 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
|
|||
ret = ((int32_t)src3 << 15) + ret + (1 << 14);
|
||||
ret >>= 15;
|
||||
if (ret != (int16_t)ret) {
|
||||
SET_QC();
|
||||
*sat = 1;
|
||||
ret = (ret < 0 ? -0x8000 : 0x7fff);
|
||||
}
|
||||
return ret;
|
||||
|
@ -69,30 +67,30 @@ static uint16_t inl_qrdmlah_s16(CPUARMState *env, int16_t src1,
|
|||
uint32_t HELPER(neon_qrdmlah_s16)(CPUARMState *env, uint32_t src1,
|
||||
uint32_t src2, uint32_t src3)
|
||||
{
|
||||
uint16_t e1 = inl_qrdmlah_s16(env, src1, src2, src3);
|
||||
uint16_t e2 = inl_qrdmlah_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
|
||||
uint32_t *sat = &env->vfp.qc[0];
|
||||
uint16_t e1 = inl_qrdmlah_s16(src1, src2, src3, sat);
|
||||
uint16_t e2 = inl_qrdmlah_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
|
||||
return deposit32(e1, 16, 16, e2);
|
||||
}
|
||||
|
||||
void HELPER(gvec_qrdmlah_s16)(void *vd, void *vn, void *vm,
|
||||
void *ve, uint32_t desc)
|
||||
void *vq, uint32_t desc)
|
||||
{
|
||||
uintptr_t opr_sz = simd_oprsz(desc);
|
||||
int16_t *d = vd;
|
||||
int16_t *n = vn;
|
||||
int16_t *m = vm;
|
||||
CPUARMState *env = ve;
|
||||
uintptr_t i;
|
||||
|
||||
for (i = 0; i < opr_sz / 2; ++i) {
|
||||
d[i] = inl_qrdmlah_s16(env, n[i], m[i], d[i]);
|
||||
d[i] = inl_qrdmlah_s16(n[i], m[i], d[i], vq);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
/* Signed saturating rounding doubling multiply-subtract high half, 16-bit */
|
||||
static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
|
||||
int16_t src2, int16_t src3)
|
||||
static int16_t inl_qrdmlsh_s16(int16_t src1, int16_t src2,
|
||||
int16_t src3, uint32_t *sat)
|
||||
{
|
||||
/* Similarly, using subtraction:
|
||||
* = ((a3 << 16) - ((e1 * e2) << 1) + (1 << 15)) >> 16
|
||||
|
@ -102,7 +100,7 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
|
|||
ret = ((int32_t)src3 << 15) - ret + (1 << 14);
|
||||
ret >>= 15;
|
||||
if (ret != (int16_t)ret) {
|
||||
SET_QC();
|
||||
*sat = 1;
|
||||
ret = (ret < 0 ? -0x8000 : 0x7fff);
|
||||
}
|
||||
return ret;
|
||||
|
@ -111,85 +109,97 @@ static uint16_t inl_qrdmlsh_s16(CPUARMState *env, int16_t src1,
|
|||
uint32_t HELPER(neon_qrdmlsh_s16)(CPUARMState *env, uint32_t src1,
|
||||
uint32_t src2, uint32_t src3)
|
||||
{
|
||||
uint16_t e1 = inl_qrdmlsh_s16(env, src1, src2, src3);
|
||||
uint16_t e2 = inl_qrdmlsh_s16(env, src1 >> 16, src2 >> 16, src3 >> 16);
|
||||
uint32_t *sat = &env->vfp.qc[0];
|
||||
uint16_t e1 = inl_qrdmlsh_s16(src1, src2, src3, sat);
|
||||
uint16_t e2 = inl_qrdmlsh_s16(src1 >> 16, src2 >> 16, src3 >> 16, sat);
|
||||
return deposit32(e1, 16, 16, e2);
|
||||
}
|
||||
|
||||
void HELPER(gvec_qrdmlsh_s16)(void *vd, void *vn, void *vm,
|
||||
void *ve, uint32_t desc)
|
||||
void *vq, uint32_t desc)
|
||||
{
|
||||
uintptr_t opr_sz = simd_oprsz(desc);
|
||||
int16_t *d = vd;
|
||||
int16_t *n = vn;
|
||||
int16_t *m = vm;
|
||||
CPUARMState *env = ve;
|
||||
uintptr_t i;
|
||||
|
||||
for (i = 0; i < opr_sz / 2; ++i) {
|
||||
d[i] = inl_qrdmlsh_s16(env, n[i], m[i], d[i]);
|
||||
d[i] = inl_qrdmlsh_s16(n[i], m[i], d[i], vq);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
/* Signed saturating rounding doubling multiply-accumulate high half, 32-bit */
|
||||
uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
|
||||
int32_t src2, int32_t src3)
|
||||
static int32_t inl_qrdmlah_s32(int32_t src1, int32_t src2,
|
||||
int32_t src3, uint32_t *sat)
|
||||
{
|
||||
/* Simplify similarly to int_qrdmlah_s16 above. */
|
||||
int64_t ret = (int64_t)src1 * src2;
|
||||
ret = ((int64_t)src3 << 31) + ret + (1 << 30);
|
||||
ret >>= 31;
|
||||
if (ret != (int32_t)ret) {
|
||||
SET_QC();
|
||||
*sat = 1;
|
||||
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t HELPER(neon_qrdmlah_s32)(CPUARMState *env, int32_t src1,
|
||||
int32_t src2, int32_t src3)
|
||||
{
|
||||
uint32_t *sat = &env->vfp.qc[0];
|
||||
return inl_qrdmlah_s32(src1, src2, src3, sat);
|
||||
}
|
||||
|
||||
void HELPER(gvec_qrdmlah_s32)(void *vd, void *vn, void *vm,
|
||||
void *ve, uint32_t desc)
|
||||
void *vq, uint32_t desc)
|
||||
{
|
||||
uintptr_t opr_sz = simd_oprsz(desc);
|
||||
int32_t *d = vd;
|
||||
int32_t *n = vn;
|
||||
int32_t *m = vm;
|
||||
CPUARMState *env = ve;
|
||||
uintptr_t i;
|
||||
|
||||
for (i = 0; i < opr_sz / 4; ++i) {
|
||||
d[i] = helper_neon_qrdmlah_s32(env, n[i], m[i], d[i]);
|
||||
d[i] = inl_qrdmlah_s32(n[i], m[i], d[i], vq);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
/* Signed saturating rounding doubling multiply-subtract high half, 32-bit */
|
||||
uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
|
||||
int32_t src2, int32_t src3)
|
||||
static int32_t inl_qrdmlsh_s32(int32_t src1, int32_t src2,
|
||||
int32_t src3, uint32_t *sat)
|
||||
{
|
||||
/* Simplify similarly to int_qrdmlsh_s16 above. */
|
||||
int64_t ret = (int64_t)src1 * src2;
|
||||
ret = ((int64_t)src3 << 31) - ret + (1 << 30);
|
||||
ret >>= 31;
|
||||
if (ret != (int32_t)ret) {
|
||||
SET_QC();
|
||||
*sat = 1;
|
||||
ret = (ret < 0 ? INT32_MIN : INT32_MAX);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t HELPER(neon_qrdmlsh_s32)(CPUARMState *env, int32_t src1,
|
||||
int32_t src2, int32_t src3)
|
||||
{
|
||||
uint32_t *sat = &env->vfp.qc[0];
|
||||
return inl_qrdmlsh_s32(src1, src2, src3, sat);
|
||||
}
|
||||
|
||||
void HELPER(gvec_qrdmlsh_s32)(void *vd, void *vn, void *vm,
|
||||
void *ve, uint32_t desc)
|
||||
void *vq, uint32_t desc)
|
||||
{
|
||||
uintptr_t opr_sz = simd_oprsz(desc);
|
||||
int32_t *d = vd;
|
||||
int32_t *n = vn;
|
||||
int32_t *m = vm;
|
||||
CPUARMState *env = ve;
|
||||
uintptr_t i;
|
||||
|
||||
for (i = 0; i < opr_sz / 4; ++i) {
|
||||
d[i] = helper_neon_qrdmlsh_s32(env, n[i], m[i], d[i]);
|
||||
d[i] = inl_qrdmlsh_s32(n[i], m[i], d[i], vq);
|
||||
}
|
||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue