mirror of https://gitee.com/openkylin/qemu.git
target-arm: Move Neon VZIP to helper functions
Move the implementation of the Neon VUZP unzip instruction from inline code to helper functions. (At 50+ TCG ops it was well over the recommended limit for coding inline.) The helper implementations also give the correct answers where the inline implementation did not. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
parent
02acedf93d
commit
d68a6f3a6d
|
@ -466,5 +466,10 @@ DEF_HELPER_3(neon_unzip16, void, env, i32, i32)
|
|||
DEF_HELPER_3(neon_qunzip8, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qunzip16, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qunzip32, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_zip8, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_zip16, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qzip8, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qzip16, void, env, i32, i32)
|
||||
DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
|
||||
|
||||
#include "def-helper.h"
|
||||
|
|
|
@ -1787,3 +1787,95 @@ void HELPER(neon_unzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
|||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 8) | (ELEM(zm0, 0, 8) << 8)
|
||||
| (ELEM(zd0, 1, 8) << 16) | (ELEM(zm0, 1, 8) << 24)
|
||||
| (ELEM(zd0, 2, 8) << 32) | (ELEM(zm0, 2, 8) << 40)
|
||||
| (ELEM(zd0, 3, 8) << 48) | (ELEM(zm0, 3, 8) << 56);
|
||||
uint64_t d1 = ELEM(zd0, 4, 8) | (ELEM(zm0, 4, 8) << 8)
|
||||
| (ELEM(zd0, 5, 8) << 16) | (ELEM(zm0, 5, 8) << 24)
|
||||
| (ELEM(zd0, 6, 8) << 32) | (ELEM(zm0, 6, 8) << 40)
|
||||
| (ELEM(zd0, 7, 8) << 48) | (ELEM(zm0, 7, 8) << 56);
|
||||
uint64_t m0 = ELEM(zd1, 0, 8) | (ELEM(zm1, 0, 8) << 8)
|
||||
| (ELEM(zd1, 1, 8) << 16) | (ELEM(zm1, 1, 8) << 24)
|
||||
| (ELEM(zd1, 2, 8) << 32) | (ELEM(zm1, 2, 8) << 40)
|
||||
| (ELEM(zd1, 3, 8) << 48) | (ELEM(zm1, 3, 8) << 56);
|
||||
uint64_t m1 = ELEM(zd1, 4, 8) | (ELEM(zm1, 4, 8) << 8)
|
||||
| (ELEM(zd1, 5, 8) << 16) | (ELEM(zm1, 5, 8) << 24)
|
||||
| (ELEM(zd1, 6, 8) << 32) | (ELEM(zm1, 6, 8) << 40)
|
||||
| (ELEM(zd1, 7, 8) << 48) | (ELEM(zm1, 7, 8) << 56);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 16) | (ELEM(zm0, 0, 16) << 16)
|
||||
| (ELEM(zd0, 1, 16) << 32) | (ELEM(zm0, 1, 16) << 48);
|
||||
uint64_t d1 = ELEM(zd0, 2, 16) | (ELEM(zm0, 2, 16) << 16)
|
||||
| (ELEM(zd0, 3, 16) << 32) | (ELEM(zm0, 3, 16) << 48);
|
||||
uint64_t m0 = ELEM(zd1, 0, 16) | (ELEM(zm1, 0, 16) << 16)
|
||||
| (ELEM(zd1, 1, 16) << 32) | (ELEM(zm1, 1, 16) << 48);
|
||||
uint64_t m1 = ELEM(zd1, 2, 16) | (ELEM(zm1, 2, 16) << 16)
|
||||
| (ELEM(zd1, 3, 16) << 32) | (ELEM(zm1, 3, 16) << 48);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_qzip32)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm0 = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zm1 = float64_val(env->vfp.regs[rm + 1]);
|
||||
uint64_t zd0 = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t zd1 = float64_val(env->vfp.regs[rd + 1]);
|
||||
uint64_t d0 = ELEM(zd0, 0, 32) | (ELEM(zm0, 0, 32) << 32);
|
||||
uint64_t d1 = ELEM(zd0, 1, 32) | (ELEM(zm0, 1, 32) << 32);
|
||||
uint64_t m0 = ELEM(zd1, 0, 32) | (ELEM(zm1, 0, 32) << 32);
|
||||
uint64_t m1 = ELEM(zd1, 1, 32) | (ELEM(zm1, 1, 32) << 32);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rm + 1] = make_float64(m1);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
env->vfp.regs[rd + 1] = make_float64(d1);
|
||||
}
|
||||
|
||||
void HELPER(neon_zip8)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zd = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t d0 = ELEM(zd, 0, 8) | (ELEM(zm, 0, 8) << 8)
|
||||
| (ELEM(zd, 1, 8) << 16) | (ELEM(zm, 1, 8) << 24)
|
||||
| (ELEM(zd, 2, 8) << 32) | (ELEM(zm, 2, 8) << 40)
|
||||
| (ELEM(zd, 3, 8) << 48) | (ELEM(zm, 3, 8) << 56);
|
||||
uint64_t m0 = ELEM(zd, 4, 8) | (ELEM(zm, 4, 8) << 8)
|
||||
| (ELEM(zd, 5, 8) << 16) | (ELEM(zm, 5, 8) << 24)
|
||||
| (ELEM(zd, 6, 8) << 32) | (ELEM(zm, 6, 8) << 40)
|
||||
| (ELEM(zd, 7, 8) << 48) | (ELEM(zm, 7, 8) << 56);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
||||
void HELPER(neon_zip16)(CPUState *env, uint32_t rd, uint32_t rm)
|
||||
{
|
||||
uint64_t zm = float64_val(env->vfp.regs[rm]);
|
||||
uint64_t zd = float64_val(env->vfp.regs[rd]);
|
||||
uint64_t d0 = ELEM(zd, 0, 16) | (ELEM(zm, 0, 16) << 16)
|
||||
| (ELEM(zd, 1, 16) << 32) | (ELEM(zm, 1, 16) << 48);
|
||||
uint64_t m0 = ELEM(zd, 2, 16) | (ELEM(zm, 2, 16) << 16)
|
||||
| (ELEM(zd, 3, 16) << 32) | (ELEM(zm, 3, 16) << 48);
|
||||
env->vfp.regs[rm] = make_float64(m0);
|
||||
env->vfp.regs[rd] = make_float64(d0);
|
||||
}
|
||||
|
|
|
@ -3653,59 +3653,43 @@ static int gen_neon_unzip(int rd, int rm, int size, int q)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void gen_neon_zip_u8(TCGv t0, TCGv t1)
|
||||
{
|
||||
TCGv rd, rm, tmp;
|
||||
|
||||
rd = new_tmp();
|
||||
rm = new_tmp();
|
||||
tmp = new_tmp();
|
||||
|
||||
tcg_gen_andi_i32(rd, t0, 0xff);
|
||||
tcg_gen_shli_i32(tmp, t1, 8);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff00);
|
||||
tcg_gen_or_i32(rd, rd, tmp);
|
||||
tcg_gen_shli_i32(tmp, t0, 16);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff0000);
|
||||
tcg_gen_or_i32(rd, rd, tmp);
|
||||
tcg_gen_shli_i32(tmp, t1, 24);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff000000);
|
||||
tcg_gen_or_i32(rd, rd, tmp);
|
||||
|
||||
tcg_gen_andi_i32(rm, t1, 0xff000000);
|
||||
tcg_gen_shri_i32(tmp, t0, 8);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff0000);
|
||||
tcg_gen_or_i32(rm, rm, tmp);
|
||||
tcg_gen_shri_i32(tmp, t1, 8);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff00);
|
||||
tcg_gen_or_i32(rm, rm, tmp);
|
||||
tcg_gen_shri_i32(tmp, t0, 16);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xff);
|
||||
tcg_gen_or_i32(t1, rm, tmp);
|
||||
tcg_gen_mov_i32(t0, rd);
|
||||
|
||||
dead_tmp(tmp);
|
||||
dead_tmp(rm);
|
||||
dead_tmp(rd);
|
||||
}
|
||||
|
||||
static void gen_neon_zip_u16(TCGv t0, TCGv t1)
|
||||
static int gen_neon_zip(int rd, int rm, int size, int q)
|
||||
{
|
||||
TCGv tmp, tmp2;
|
||||
|
||||
tmp = new_tmp();
|
||||
tmp2 = new_tmp();
|
||||
|
||||
tcg_gen_andi_i32(tmp, t0, 0xffff);
|
||||
tcg_gen_shli_i32(tmp2, t1, 16);
|
||||
tcg_gen_or_i32(tmp, tmp, tmp2);
|
||||
tcg_gen_andi_i32(t1, t1, 0xffff0000);
|
||||
tcg_gen_shri_i32(tmp2, t0, 16);
|
||||
tcg_gen_or_i32(t1, t1, tmp2);
|
||||
tcg_gen_mov_i32(t0, tmp);
|
||||
|
||||
dead_tmp(tmp2);
|
||||
dead_tmp(tmp);
|
||||
if (size == 3 || (!q && size == 2)) {
|
||||
return 1;
|
||||
}
|
||||
tmp = tcg_const_i32(rd);
|
||||
tmp2 = tcg_const_i32(rm);
|
||||
if (q) {
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
|
||||
break;
|
||||
case 2:
|
||||
gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 0:
|
||||
gen_helper_neon_zip8(cpu_env, tmp, tmp2);
|
||||
break;
|
||||
case 1:
|
||||
gen_helper_neon_zip16(cpu_env, tmp, tmp2);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void gen_neon_trn_u8(TCGv t0, TCGv t1)
|
||||
|
@ -5429,29 +5413,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
|
|||
}
|
||||
break;
|
||||
case 35: /* VZIP */
|
||||
/* Reg Before After
|
||||
Rd A3 A2 A1 A0 B1 A1 B0 A0
|
||||
Rm B3 B2 B1 B0 B3 A3 B2 A2
|
||||
*/
|
||||
if (size == 3)
|
||||
if (gen_neon_zip(rd, rm, size, q)) {
|
||||
return 1;
|
||||
count = (q ? 4 : 2);
|
||||
for (n = 0; n < count; n++) {
|
||||
tmp = neon_load_reg(rd, n);
|
||||
tmp2 = neon_load_reg(rd, n);
|
||||
switch (size) {
|
||||
case 0: gen_neon_zip_u8(tmp, tmp2); break;
|
||||
case 1: gen_neon_zip_u16(tmp, tmp2); break;
|
||||
case 2: /* no-op */; break;
|
||||
default: abort();
|
||||
}
|
||||
neon_store_scratch(n * 2, tmp);
|
||||
neon_store_scratch(n * 2 + 1, tmp2);
|
||||
}
|
||||
for (n = 0; n < count * 2; n++) {
|
||||
int reg = (n < count) ? rd : rm;
|
||||
tmp = neon_load_scratch(n);
|
||||
neon_store_reg(reg, n % count, tmp);
|
||||
}
|
||||
break;
|
||||
case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
|
||||
|
|
Loading…
Reference in New Issue