mirror of https://gitee.com/openkylin/qemu.git
target-arm queue:
* target/arm: Fix Neon emulation bugs on big-endian hosts * target/arm: fix handling of HCR.FB * target/arm: fix LORID_EL1 access check * disas/capstone: Fix monitor disassembly of >32 bytes * hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) * hw/arm/boot: fix SVE for EL3 direct kernel boot * hw/display/omap_lcdc: Fix potential NULL pointer dereference * hw/display/exynos4210_fimd: Fix potential NULL pointer dereference * target/arm: Get correct MMU index for other-security-state * configure: Test that gio libs from pkg-config work * hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work * docs: Fix building with Sphinx 3 * tests/qtest/npcm7xx_rng-test: Disable randomness tests -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAl+gPSwZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3rBwD/9kNodk0LilJEbE/UVL5niv EnLo0xo+qFx8jPR19VVG6Cp3mBwwImV7MVebAuuh6cgzdyofKwpd03h/XMwIOY0T gHlfk/npJnob/7bambBU5UTAZnOHj8EnuCwTKq3AuRROdi35p4OqDZTxAYNNJNQa 1dRRTEODxuPRi/bmwuYLp1esrjXlJa5KSlv+3gjunVG+uEJ6ygHJOZlgJ22704D/ 2IB3rrtwx/oYBsaQCd9TQ/uIVgkvfRo1feQp5/ukeb4nYDNWtjkk5usPYcGh7h5P dCgneinXvyTqZXgk9FpT25rVrp01IBZXNkGjEy/HMmpib6ABsKGywBQfif4ZQXc7 KlO+A8yCvAvRuJcjsVMV71z9j0MIu5eU9aOW7Oqu/ORMnRSlEionCypPaO3J/kF2 e6XoGQZJaziIo5hg8hxyALcKKtpwgd2ckAdNxQhw3vsNA7uDe1acs6BzJbiT1J1o 05zZs6Xy4OheHkFGOKoZyVAmSCsfwqgaHspl62owCRrNcT8URLzGpjEal+l4+FHN 8kMs012aiOSkDAWldPH2hjt0sYV/F4bVDID4PZj5Cwrz9lXQyq8e8Lw6WUvqXEzf Kgl/XVu9mZx4wMWLNg78cneXbM+RQNmJsWMMA/qZn5Lh2p/73a1jA9vjRBl406Tt RW00VEbywkOYtj1EZKdgqw== =PdV9 -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201102' into staging target-arm queue: * target/arm: Fix Neon emulation bugs on big-endian hosts * target/arm: fix handling of HCR.FB * target/arm: fix LORID_EL1 access check * disas/capstone: Fix monitor disassembly of >32 bytes * hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) * hw/arm/boot: fix SVE for EL3 direct kernel boot * hw/display/omap_lcdc: Fix potential NULL pointer dereference * hw/display/exynos4210_fimd: Fix potential NULL pointer dereference * target/arm: Get correct MMU index for other-security-state * configure: Test that gio libs from pkg-config work * hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work * docs: Fix building with Sphinx 3 * tests/qtest/npcm7xx_rng-test: Disable randomness tests # gpg: Signature made Mon 02 Nov 2020 17:09:00 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20201102: (26 commits) tests/qtest/npcm7xx_rng-test: Disable randomness tests qemu-option-trace.rst.inc: Don't use option:: markup scripts/kerneldoc: For Sphinx 3 use c:macro for macros with arguments hw/intc/arm_gicv3_cpuif: Make GIC maintenance interrupts work configure: Test that gio libs from pkg-config work target/arm: Get correct MMU index for other-security-state hw/display/exynos4210_fimd: Fix potential NULL pointer dereference hw/display/omap_lcdc: Fix potential NULL pointer dereference hw/arm/boot: fix SVE for EL3 direct kernel boot hw/arm/smmuv3: Fix potential integer overflow (CID 1432363) disas/capstone: Fix monitor disassembly of >32 bytes target/arm: fix LORID_EL1 access check target/arm: fix handling of HCR.FB target/arm: Fix VUDOT/VSDOT (scalar) on big-endian hosts target/arm: Fix float16 pairwise Neon ops on big-endian hosts target/arm: Improve do_prewiden_3d target/arm: Simplify do_long_3d and do_2scalar_long target/arm: Rename neon_load_reg64 to vfp_load_reg64 target/arm: Add read/write_neon_element64 target/arm: Rename neon_load_reg32 to vfp_load_reg32 ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
c7a7a877b7
|
@ -3489,13 +3489,21 @@ if test "$static" = yes && test "$mingw32" = yes; then
|
|||
fi
|
||||
|
||||
if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
|
||||
gio=yes
|
||||
gio_cflags=$($pkg_config --cflags gio-2.0)
|
||||
gio_libs=$($pkg_config --libs gio-2.0)
|
||||
gdbus_codegen=$($pkg_config --variable=gdbus_codegen gio-2.0)
|
||||
if [ ! -x "$gdbus_codegen" ]; then
|
||||
gdbus_codegen=
|
||||
fi
|
||||
# Check that the libraries actually work -- Ubuntu 18.04 ships
|
||||
# with pkg-config --static --libs data for gio-2.0 that is missing
|
||||
# -lblkid and will give a link error.
|
||||
write_c_skeleton
|
||||
if compile_prog "" "gio_libs" ; then
|
||||
gio=yes
|
||||
else
|
||||
gio=no
|
||||
fi
|
||||
else
|
||||
gio=no
|
||||
fi
|
||||
|
|
|
@ -286,7 +286,7 @@ bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
|
|||
|
||||
/* Make certain that we can make progress. */
|
||||
assert(tsize != 0);
|
||||
info->read_memory_func(pc, cap_buf + csize, tsize, info);
|
||||
info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
|
||||
csize += tsize;
|
||||
|
||||
if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Specify tracing options.
|
||||
|
||||
.. option:: [enable=]PATTERN
|
||||
``[enable=]PATTERN``
|
||||
|
||||
Immediately enable events matching *PATTERN*
|
||||
(either event name or a globbing pattern). This option is only
|
||||
|
@ -11,7 +11,7 @@ Specify tracing options.
|
|||
|
||||
Use :option:`-trace help` to print a list of names of trace points.
|
||||
|
||||
.. option:: events=FILE
|
||||
``events=FILE``
|
||||
|
||||
Immediately enable events listed in *FILE*.
|
||||
The file must contain one event name (as listed in the ``trace-events-all``
|
||||
|
@ -19,7 +19,7 @@ Specify tracing options.
|
|||
available if QEMU has been compiled with the ``simple``, ``log`` or
|
||||
``ftrace`` tracing backend.
|
||||
|
||||
.. option:: file=FILE
|
||||
``file=FILE``
|
||||
|
||||
Log output traces to *FILE*.
|
||||
This option is only available if QEMU has been compiled with
|
||||
|
|
|
@ -742,6 +742,9 @@ static void do_cpu_reset(void *opaque)
|
|||
if (cpu_isar_feature(aa64_mte, cpu)) {
|
||||
env->cp15.scr_el3 |= SCR_ATA;
|
||||
}
|
||||
if (cpu_isar_feature(aa64_sve, cpu)) {
|
||||
env->cp15.cptr_el[3] |= CPTR_EZ;
|
||||
}
|
||||
/* AArch64 kernels never boot in secure mode */
|
||||
assert(!info->secure_boot);
|
||||
/* This hook is only supported for AArch32 currently:
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/bitops.h"
|
||||
#include "hw/irq.h"
|
||||
#include "hw/sysbus.h"
|
||||
#include "migration/vmstate.h"
|
||||
|
@ -864,7 +865,7 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
|
|||
scale = CMD_SCALE(cmd);
|
||||
num = CMD_NUM(cmd);
|
||||
ttl = CMD_TTL(cmd);
|
||||
num_pages = (num + 1) * (1 << (scale));
|
||||
num_pages = (num + 1) * BIT_ULL(scale);
|
||||
}
|
||||
|
||||
if (type == SMMU_CMD_TLBI_NH_VA) {
|
||||
|
|
|
@ -1275,12 +1275,14 @@ static void exynos4210_fimd_update(void *opaque)
|
|||
bool blend = false;
|
||||
uint8_t *host_fb_addr;
|
||||
bool is_dirty = false;
|
||||
const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
|
||||
int global_width;
|
||||
|
||||
if (!s || !s->console || !s->enabled ||
|
||||
surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1;
|
||||
exynos4210_update_resolution(s);
|
||||
surface = qemu_console_surface(s->console);
|
||||
|
||||
|
|
|
@ -78,14 +78,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s)
|
|||
static void omap_update_display(void *opaque)
|
||||
{
|
||||
struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque;
|
||||
DisplaySurface *surface = qemu_console_surface(omap_lcd->con);
|
||||
DisplaySurface *surface;
|
||||
draw_line_func draw_line;
|
||||
int size, height, first, last;
|
||||
int width, linesize, step, bpp, frame_offset;
|
||||
hwaddr frame_base;
|
||||
|
||||
if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable ||
|
||||
!surface_bits_per_pixel(surface)) {
|
||||
if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) {
|
||||
return;
|
||||
}
|
||||
|
||||
surface = qemu_console_surface(omap_lcd->con);
|
||||
if (!surface_bits_per_pixel(surface)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -399,6 +399,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
|
|||
int irqlevel = 0;
|
||||
int fiqlevel = 0;
|
||||
int maintlevel = 0;
|
||||
ARMCPU *cpu = ARM_CPU(cs->cpu);
|
||||
|
||||
idx = hppvi_index(cs);
|
||||
trace_gicv3_cpuif_virt_update(gicv3_redist_affid(cs), idx);
|
||||
|
@ -424,7 +425,7 @@ static void gicv3_cpuif_virt_update(GICv3CPUState *cs)
|
|||
|
||||
qemu_set_irq(cs->parent_vfiq, fiqlevel);
|
||||
qemu_set_irq(cs->parent_virq, irqlevel);
|
||||
qemu_set_irq(cs->maintenance_irq, maintlevel);
|
||||
qemu_set_irq(cpu->gicv3_maintenance_interrupt, maintlevel);
|
||||
}
|
||||
|
||||
static uint64_t icv_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
|
||||
|
@ -2624,8 +2625,6 @@ void gicv3_init_cpuif(GICv3State *s)
|
|||
&& cpu->gic_num_lrs) {
|
||||
int j;
|
||||
|
||||
cs->maintenance_irq = cpu->gicv3_maintenance_interrupt;
|
||||
|
||||
cs->num_list_regs = cpu->gic_num_lrs;
|
||||
cs->vpribits = cpu->gic_vpribits;
|
||||
cs->vprebits = cpu->gic_vprebits;
|
||||
|
|
|
@ -153,7 +153,6 @@ struct GICv3CPUState {
|
|||
qemu_irq parent_fiq;
|
||||
qemu_irq parent_virq;
|
||||
qemu_irq parent_vfiq;
|
||||
qemu_irq maintenance_irq;
|
||||
|
||||
/* Redistributor */
|
||||
uint32_t level; /* Current IRQ level */
|
||||
|
|
|
@ -839,7 +839,23 @@ sub output_function_rst(%) {
|
|||
output_highlight_rst($args{'purpose'});
|
||||
$start = "\n\n**Syntax**\n\n ``";
|
||||
} else {
|
||||
print ".. c:function:: ";
|
||||
if ((split(/\./, $sphinx_version))[0] >= 3) {
|
||||
# Sphinx 3 and later distinguish macros and functions and
|
||||
# complain if you use c:function with something that's not
|
||||
# syntactically valid as a function declaration.
|
||||
# We assume that anything with a return type is a function
|
||||
# and anything without is a macro.
|
||||
if ($args{'functiontype'} ne "") {
|
||||
print ".. c:function:: ";
|
||||
} else {
|
||||
print ".. c:macro:: ";
|
||||
}
|
||||
} else {
|
||||
# Older Sphinx don't support documenting macros that take
|
||||
# arguments with c:macro, and don't complain about the use
|
||||
# of c:function for this.
|
||||
print ".. c:function:: ";
|
||||
}
|
||||
}
|
||||
if ($args{'functiontype'} ne "") {
|
||||
$start .= $args{'functiontype'} . " " . $args{'function'} . " (";
|
||||
|
|
|
@ -731,13 +731,12 @@ static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
|
|||
|
||||
/*
|
||||
* Non-IS variants of TLB operations are upgraded to
|
||||
* IS versions if we are at NS EL1 and HCR_EL2.FB is set to
|
||||
* IS versions if we are at EL1 and HCR_EL2.FB is effectively set to
|
||||
* force broadcast of these operations.
|
||||
*/
|
||||
static bool tlb_force_broadcast(CPUARMState *env)
|
||||
{
|
||||
return (env->cp15.hcr_el2 & HCR_FB) &&
|
||||
arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
|
||||
return arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_FB);
|
||||
}
|
||||
|
||||
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||
|
@ -6680,9 +6679,10 @@ static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
|
|||
#endif
|
||||
|
||||
/* Shared logic between LORID and the rest of the LOR* registers.
|
||||
* Secure state has already been delt with.
|
||||
* Secure state exclusion has already been dealt with.
|
||||
*/
|
||||
static CPAccessResult access_lor_ns(CPUARMState *env)
|
||||
static CPAccessResult access_lor_ns(CPUARMState *env,
|
||||
const ARMCPRegInfo *ri, bool isread)
|
||||
{
|
||||
int el = arm_current_el(env);
|
||||
|
||||
|
@ -6695,16 +6695,6 @@ static CPAccessResult access_lor_ns(CPUARMState *env)
|
|||
return CP_ACCESS_OK;
|
||||
}
|
||||
|
||||
static CPAccessResult access_lorid(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||
bool isread)
|
||||
{
|
||||
if (arm_is_secure_below_el3(env)) {
|
||||
/* Access ok in secure mode. */
|
||||
return CP_ACCESS_OK;
|
||||
}
|
||||
return access_lor_ns(env);
|
||||
}
|
||||
|
||||
static CPAccessResult access_lor_other(CPUARMState *env,
|
||||
const ARMCPRegInfo *ri, bool isread)
|
||||
{
|
||||
|
@ -6712,7 +6702,7 @@ static CPAccessResult access_lor_other(CPUARMState *env,
|
|||
/* Access denied in secure mode. */
|
||||
return CP_ACCESS_TRAP;
|
||||
}
|
||||
return access_lor_ns(env);
|
||||
return access_lor_ns(env, ri, isread);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6739,7 +6729,7 @@ static const ARMCPRegInfo lor_reginfo[] = {
|
|||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
{ .name = "LORID_EL1", .state = ARM_CP_STATE_AA64,
|
||||
.opc0 = 3, .opc1 = 0, .crn = 10, .crm = 4, .opc2 = 7,
|
||||
.access = PL1_R, .accessfn = access_lorid,
|
||||
.access = PL1_R, .accessfn = access_lor_ns,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
REGINFO_SENTINEL
|
||||
};
|
||||
|
|
|
@ -2719,7 +2719,8 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
|
|||
/* Return the MMU index for a v7M CPU in the specified security state */
|
||||
ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
|
||||
{
|
||||
bool priv = arm_current_el(env) != 0;
|
||||
bool priv = arm_v7m_is_handler_mode(env) ||
|
||||
!(env->v7m.control[secstate] & 1);
|
||||
|
||||
return arm_v7m_mmu_idx_for_secstate_and_priv(env, secstate, priv);
|
||||
}
|
||||
|
|
|
@ -60,25 +60,6 @@ static inline int neon_3same_fp_size(DisasContext *s, int x)
|
|||
#include "decode-neon-ls.c.inc"
|
||||
#include "decode-neon-shared.c.inc"
|
||||
|
||||
/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
|
||||
* where 0 is the least significant end of the register.
|
||||
*/
|
||||
static inline long
|
||||
neon_element_offset(int reg, int element, MemOp size)
|
||||
{
|
||||
int element_size = 1 << size;
|
||||
int ofs = element * element_size;
|
||||
#ifdef HOST_WORDS_BIGENDIAN
|
||||
/* Calculate the offset assuming fully little-endian,
|
||||
* then XOR to account for the order of the 8-byte units.
|
||||
*/
|
||||
if (element_size < 8) {
|
||||
ofs ^= 8 - element_size;
|
||||
}
|
||||
#endif
|
||||
return neon_reg_offset(reg, 0) + ofs;
|
||||
}
|
||||
|
||||
static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
|
||||
{
|
||||
long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
|
||||
|
@ -585,12 +566,12 @@ static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
|
|||
* We cannot write 16 bytes at once because the
|
||||
* destination is unaligned.
|
||||
*/
|
||||
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
|
||||
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
|
||||
8, 8, tmp);
|
||||
tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
|
||||
neon_reg_offset(vd, 0), 8, 8);
|
||||
tcg_gen_gvec_mov(0, neon_full_reg_offset(vd + 1),
|
||||
neon_full_reg_offset(vd), 8, 8);
|
||||
} else {
|
||||
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
|
||||
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(vd),
|
||||
vec_size, vec_size, tmp);
|
||||
}
|
||||
tcg_gen_addi_i32(addr, addr, 1 << size);
|
||||
|
@ -691,9 +672,9 @@ static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a)
|
|||
static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn)
|
||||
{
|
||||
int vec_size = a->q ? 16 : 8;
|
||||
int rd_ofs = neon_reg_offset(a->vd, 0);
|
||||
int rn_ofs = neon_reg_offset(a->vn, 0);
|
||||
int rm_ofs = neon_reg_offset(a->vm, 0);
|
||||
int rd_ofs = neon_full_reg_offset(a->vd);
|
||||
int rn_ofs = neon_full_reg_offset(a->vn);
|
||||
int rm_ofs = neon_full_reg_offset(a->vm);
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -975,18 +956,24 @@ static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn)
|
|||
* early. Since Q is 0 there are always just two passes, so instead
|
||||
* of a complicated loop over each pass we just unroll.
|
||||
*/
|
||||
tmp = neon_load_reg(a->vn, 0);
|
||||
tmp2 = neon_load_reg(a->vn, 1);
|
||||
tmp = tcg_temp_new_i32();
|
||||
tmp2 = tcg_temp_new_i32();
|
||||
tmp3 = tcg_temp_new_i32();
|
||||
|
||||
read_neon_element32(tmp, a->vn, 0, MO_32);
|
||||
read_neon_element32(tmp2, a->vn, 1, MO_32);
|
||||
fn(tmp, tmp, tmp2);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
|
||||
tmp3 = neon_load_reg(a->vm, 0);
|
||||
tmp2 = neon_load_reg(a->vm, 1);
|
||||
read_neon_element32(tmp3, a->vm, 0, MO_32);
|
||||
read_neon_element32(tmp2, a->vm, 1, MO_32);
|
||||
fn(tmp3, tmp3, tmp2);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
|
||||
neon_store_reg(a->vd, 0, tmp);
|
||||
neon_store_reg(a->vd, 1, tmp3);
|
||||
write_neon_element32(tmp, a->vd, 0, MO_32);
|
||||
write_neon_element32(tmp3, a->vd, 1, MO_32);
|
||||
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
tcg_temp_free_i32(tmp3);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1177,8 +1164,8 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
|
|||
{
|
||||
/* Handle a 2-reg-shift insn which can be vectorized. */
|
||||
int vec_size = a->q ? 16 : 8;
|
||||
int rd_ofs = neon_reg_offset(a->vd, 0);
|
||||
int rm_ofs = neon_reg_offset(a->vm, 0);
|
||||
int rd_ofs = neon_full_reg_offset(a->vd);
|
||||
int rm_ofs = neon_full_reg_offset(a->vm);
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -1278,9 +1265,9 @@ static bool do_2shift_env_64(DisasContext *s, arg_2reg_shift *a,
|
|||
for (pass = 0; pass < a->q + 1; pass++) {
|
||||
TCGv_i64 tmp = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(tmp, a->vm + pass);
|
||||
read_neon_element64(tmp, a->vm, pass, MO_64);
|
||||
fn(tmp, cpu_env, tmp, constimm);
|
||||
neon_store_reg64(tmp, a->vd + pass);
|
||||
write_neon_element64(tmp, a->vd, pass, MO_64);
|
||||
tcg_temp_free_i64(tmp);
|
||||
}
|
||||
tcg_temp_free_i64(constimm);
|
||||
|
@ -1294,7 +1281,7 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
|
|||
* 2-reg-and-shift operations, size < 3 case, where the
|
||||
* helper needs to be passed cpu_env.
|
||||
*/
|
||||
TCGv_i32 constimm;
|
||||
TCGv_i32 constimm, tmp;
|
||||
int pass;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
|
@ -1320,12 +1307,14 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
|
|||
* by immediate using the variable shift operations.
|
||||
*/
|
||||
constimm = tcg_const_i32(dup_const(a->size, a->shift));
|
||||
tmp = tcg_temp_new_i32();
|
||||
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
|
||||
TCGv_i32 tmp = neon_load_reg(a->vm, pass);
|
||||
read_neon_element32(tmp, a->vm, pass, MO_32);
|
||||
fn(tmp, cpu_env, tmp, constimm);
|
||||
neon_store_reg(a->vd, pass, tmp);
|
||||
write_neon_element32(tmp, a->vd, pass, MO_32);
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(constimm);
|
||||
return true;
|
||||
}
|
||||
|
@ -1383,21 +1372,21 @@ static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
|
|||
constimm = tcg_const_i64(-a->shift);
|
||||
rm1 = tcg_temp_new_i64();
|
||||
rm2 = tcg_temp_new_i64();
|
||||
rd = tcg_temp_new_i32();
|
||||
|
||||
/* Load both inputs first to avoid potential overwrite if rm == rd */
|
||||
neon_load_reg64(rm1, a->vm);
|
||||
neon_load_reg64(rm2, a->vm + 1);
|
||||
read_neon_element64(rm1, a->vm, 0, MO_64);
|
||||
read_neon_element64(rm2, a->vm, 1, MO_64);
|
||||
|
||||
shiftfn(rm1, rm1, constimm);
|
||||
rd = tcg_temp_new_i32();
|
||||
narrowfn(rd, cpu_env, rm1);
|
||||
neon_store_reg(a->vd, 0, rd);
|
||||
write_neon_element32(rd, a->vd, 0, MO_32);
|
||||
|
||||
shiftfn(rm2, rm2, constimm);
|
||||
rd = tcg_temp_new_i32();
|
||||
narrowfn(rd, cpu_env, rm2);
|
||||
neon_store_reg(a->vd, 1, rd);
|
||||
write_neon_element32(rd, a->vd, 1, MO_32);
|
||||
|
||||
tcg_temp_free_i32(rd);
|
||||
tcg_temp_free_i64(rm1);
|
||||
tcg_temp_free_i64(rm2);
|
||||
tcg_temp_free_i64(constimm);
|
||||
|
@ -1447,10 +1436,14 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
|
|||
constimm = tcg_const_i32(imm);
|
||||
|
||||
/* Load all inputs first to avoid potential overwrite */
|
||||
rm1 = neon_load_reg(a->vm, 0);
|
||||
rm2 = neon_load_reg(a->vm, 1);
|
||||
rm3 = neon_load_reg(a->vm + 1, 0);
|
||||
rm4 = neon_load_reg(a->vm + 1, 1);
|
||||
rm1 = tcg_temp_new_i32();
|
||||
rm2 = tcg_temp_new_i32();
|
||||
rm3 = tcg_temp_new_i32();
|
||||
rm4 = tcg_temp_new_i32();
|
||||
read_neon_element32(rm1, a->vm, 0, MO_32);
|
||||
read_neon_element32(rm2, a->vm, 1, MO_32);
|
||||
read_neon_element32(rm3, a->vm, 2, MO_32);
|
||||
read_neon_element32(rm4, a->vm, 3, MO_32);
|
||||
rtmp = tcg_temp_new_i64();
|
||||
|
||||
shiftfn(rm1, rm1, constimm);
|
||||
|
@ -1460,7 +1453,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
|
|||
tcg_temp_free_i32(rm2);
|
||||
|
||||
narrowfn(rm1, cpu_env, rtmp);
|
||||
neon_store_reg(a->vd, 0, rm1);
|
||||
write_neon_element32(rm1, a->vd, 0, MO_32);
|
||||
tcg_temp_free_i32(rm1);
|
||||
|
||||
shiftfn(rm3, rm3, constimm);
|
||||
shiftfn(rm4, rm4, constimm);
|
||||
|
@ -1471,7 +1465,8 @@ static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
|
|||
|
||||
narrowfn(rm3, cpu_env, rtmp);
|
||||
tcg_temp_free_i64(rtmp);
|
||||
neon_store_reg(a->vd, 1, rm3);
|
||||
write_neon_element32(rm3, a->vd, 1, MO_32);
|
||||
tcg_temp_free_i32(rm3);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1572,8 +1567,10 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
|
|||
widen_mask = dup_const(a->size + 1, widen_mask);
|
||||
}
|
||||
|
||||
rm0 = neon_load_reg(a->vm, 0);
|
||||
rm1 = neon_load_reg(a->vm, 1);
|
||||
rm0 = tcg_temp_new_i32();
|
||||
rm1 = tcg_temp_new_i32();
|
||||
read_neon_element32(rm0, a->vm, 0, MO_32);
|
||||
read_neon_element32(rm1, a->vm, 1, MO_32);
|
||||
tmp = tcg_temp_new_i64();
|
||||
|
||||
widenfn(tmp, rm0);
|
||||
|
@ -1582,7 +1579,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
|
|||
tcg_gen_shli_i64(tmp, tmp, a->shift);
|
||||
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
|
||||
}
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
write_neon_element64(tmp, a->vd, 0, MO_64);
|
||||
|
||||
widenfn(tmp, rm1);
|
||||
tcg_temp_free_i32(rm1);
|
||||
|
@ -1590,7 +1587,7 @@ static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
|
|||
tcg_gen_shli_i64(tmp, tmp, a->shift);
|
||||
tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
|
||||
}
|
||||
neon_store_reg64(tmp, a->vd + 1);
|
||||
write_neon_element64(tmp, a->vd, 1, MO_64);
|
||||
tcg_temp_free_i64(tmp);
|
||||
return true;
|
||||
}
|
||||
|
@ -1620,8 +1617,8 @@ static bool do_fp_2sh(DisasContext *s, arg_2reg_shift *a,
|
|||
{
|
||||
/* FP operations in 2-reg-and-shift group */
|
||||
int vec_size = a->q ? 16 : 8;
|
||||
int rd_ofs = neon_reg_offset(a->vd, 0);
|
||||
int rm_ofs = neon_reg_offset(a->vm, 0);
|
||||
int rd_ofs = neon_full_reg_offset(a->vd);
|
||||
int rm_ofs = neon_full_reg_offset(a->vm);
|
||||
TCGv_ptr fpst;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
|
@ -1756,7 +1753,7 @@ static bool do_1reg_imm(DisasContext *s, arg_1reg_imm *a,
|
|||
return true;
|
||||
}
|
||||
|
||||
reg_ofs = neon_reg_offset(a->vd, 0);
|
||||
reg_ofs = neon_full_reg_offset(a->vd);
|
||||
vec_size = a->q ? 16 : 8;
|
||||
imm = asimd_imm_const(a->imm, a->cmode, a->op);
|
||||
|
||||
|
@ -1791,11 +1788,10 @@ static bool trans_Vimm_1r(DisasContext *s, arg_1reg_imm *a)
|
|||
static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
||||
NeonGenWidenFn *widenfn,
|
||||
NeonGenTwo64OpFn *opfn,
|
||||
bool src1_wide)
|
||||
int src1_mop, int src2_mop)
|
||||
{
|
||||
/* 3-regs different lengths, prewidening case (VADDL/VSUBL/VAADW/VSUBW) */
|
||||
TCGv_i64 rn0_64, rn1_64, rm_64;
|
||||
TCGv_i32 rm;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -1807,12 +1803,12 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!widenfn || !opfn) {
|
||||
if (!opfn) {
|
||||
/* size == 3 case, which is an entirely different insn group */
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((a->vd & 1) || (src1_wide && (a->vn & 1))) {
|
||||
if ((a->vd & 1) || (src1_mop == MO_Q && (a->vn & 1))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1824,38 +1820,50 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||
rn1_64 = tcg_temp_new_i64();
|
||||
rm_64 = tcg_temp_new_i64();
|
||||
|
||||
if (src1_wide) {
|
||||
neon_load_reg64(rn0_64, a->vn);
|
||||
if (src1_mop >= 0) {
|
||||
read_neon_element64(rn0_64, a->vn, 0, src1_mop);
|
||||
} else {
|
||||
TCGv_i32 tmp = neon_load_reg(a->vn, 0);
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vn, 0, MO_32);
|
||||
widenfn(rn0_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
rm = neon_load_reg(a->vm, 0);
|
||||
if (src2_mop >= 0) {
|
||||
read_neon_element64(rm_64, a->vm, 0, src2_mop);
|
||||
} else {
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vm, 0, MO_32);
|
||||
widenfn(rm_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
widenfn(rm_64, rm);
|
||||
tcg_temp_free_i32(rm);
|
||||
opfn(rn0_64, rn0_64, rm_64);
|
||||
|
||||
/*
|
||||
* Load second pass inputs before storing the first pass result, to
|
||||
* avoid incorrect results if a narrow input overlaps with the result.
|
||||
*/
|
||||
if (src1_wide) {
|
||||
neon_load_reg64(rn1_64, a->vn + 1);
|
||||
if (src1_mop >= 0) {
|
||||
read_neon_element64(rn1_64, a->vn, 1, src1_mop);
|
||||
} else {
|
||||
TCGv_i32 tmp = neon_load_reg(a->vn, 1);
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vn, 1, MO_32);
|
||||
widenfn(rn1_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
rm = neon_load_reg(a->vm, 1);
|
||||
if (src2_mop >= 0) {
|
||||
read_neon_element64(rm_64, a->vm, 1, src2_mop);
|
||||
} else {
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vm, 1, MO_32);
|
||||
widenfn(rm_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
neon_store_reg64(rn0_64, a->vd);
|
||||
write_neon_element64(rn0_64, a->vd, 0, MO_64);
|
||||
|
||||
widenfn(rm_64, rm);
|
||||
tcg_temp_free_i32(rm);
|
||||
opfn(rn1_64, rn1_64, rm_64);
|
||||
neon_store_reg64(rn1_64, a->vd + 1);
|
||||
write_neon_element64(rn1_64, a->vd, 1, MO_64);
|
||||
|
||||
tcg_temp_free_i64(rn0_64);
|
||||
tcg_temp_free_i64(rn1_64);
|
||||
|
@ -1864,14 +1872,13 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||
return true;
|
||||
}
|
||||
|
||||
#define DO_PREWIDEN(INSN, S, EXT, OP, SRC1WIDE) \
|
||||
#define DO_PREWIDEN(INSN, S, OP, SRC1WIDE, SIGN) \
|
||||
static bool trans_##INSN##_3d(DisasContext *s, arg_3diff *a) \
|
||||
{ \
|
||||
static NeonGenWidenFn * const widenfn[] = { \
|
||||
gen_helper_neon_widen_##S##8, \
|
||||
gen_helper_neon_widen_##S##16, \
|
||||
tcg_gen_##EXT##_i32_i64, \
|
||||
NULL, \
|
||||
NULL, NULL, \
|
||||
}; \
|
||||
static NeonGenTwo64OpFn * const addfn[] = { \
|
||||
gen_helper_neon_##OP##l_u16, \
|
||||
|
@ -1879,18 +1886,20 @@ static bool do_prewiden_3d(DisasContext *s, arg_3diff *a,
|
|||
tcg_gen_##OP##_i64, \
|
||||
NULL, \
|
||||
}; \
|
||||
return do_prewiden_3d(s, a, widenfn[a->size], \
|
||||
addfn[a->size], SRC1WIDE); \
|
||||
int narrow_mop = a->size == MO_32 ? MO_32 | SIGN : -1; \
|
||||
return do_prewiden_3d(s, a, widenfn[a->size], addfn[a->size], \
|
||||
SRC1WIDE ? MO_Q : narrow_mop, \
|
||||
narrow_mop); \
|
||||
}
|
||||
|
||||
DO_PREWIDEN(VADDL_S, s, ext, add, false)
|
||||
DO_PREWIDEN(VADDL_U, u, extu, add, false)
|
||||
DO_PREWIDEN(VSUBL_S, s, ext, sub, false)
|
||||
DO_PREWIDEN(VSUBL_U, u, extu, sub, false)
|
||||
DO_PREWIDEN(VADDW_S, s, ext, add, true)
|
||||
DO_PREWIDEN(VADDW_U, u, extu, add, true)
|
||||
DO_PREWIDEN(VSUBW_S, s, ext, sub, true)
|
||||
DO_PREWIDEN(VSUBW_U, u, extu, sub, true)
|
||||
DO_PREWIDEN(VADDL_S, s, add, false, MO_SIGN)
|
||||
DO_PREWIDEN(VADDL_U, u, add, false, 0)
|
||||
DO_PREWIDEN(VSUBL_S, s, sub, false, MO_SIGN)
|
||||
DO_PREWIDEN(VSUBL_U, u, sub, false, 0)
|
||||
DO_PREWIDEN(VADDW_S, s, add, true, MO_SIGN)
|
||||
DO_PREWIDEN(VADDW_U, u, add, true, 0)
|
||||
DO_PREWIDEN(VSUBW_S, s, sub, true, MO_SIGN)
|
||||
DO_PREWIDEN(VSUBW_U, u, sub, true, 0)
|
||||
|
||||
static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
|
||||
NeonGenTwo64OpFn *opfn, NeonGenNarrowFn *narrowfn)
|
||||
|
@ -1927,23 +1936,25 @@ static bool do_narrow_3d(DisasContext *s, arg_3diff *a,
|
|||
rd0 = tcg_temp_new_i32();
|
||||
rd1 = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg64(rn_64, a->vn);
|
||||
neon_load_reg64(rm_64, a->vm);
|
||||
read_neon_element64(rn_64, a->vn, 0, MO_64);
|
||||
read_neon_element64(rm_64, a->vm, 0, MO_64);
|
||||
|
||||
opfn(rn_64, rn_64, rm_64);
|
||||
|
||||
narrowfn(rd0, rn_64);
|
||||
|
||||
neon_load_reg64(rn_64, a->vn + 1);
|
||||
neon_load_reg64(rm_64, a->vm + 1);
|
||||
read_neon_element64(rn_64, a->vn, 1, MO_64);
|
||||
read_neon_element64(rm_64, a->vm, 1, MO_64);
|
||||
|
||||
opfn(rn_64, rn_64, rm_64);
|
||||
|
||||
narrowfn(rd1, rn_64);
|
||||
|
||||
neon_store_reg(a->vd, 0, rd0);
|
||||
neon_store_reg(a->vd, 1, rd1);
|
||||
write_neon_element32(rd0, a->vd, 0, MO_32);
|
||||
write_neon_element32(rd1, a->vd, 1, MO_32);
|
||||
|
||||
tcg_temp_free_i32(rd0);
|
||||
tcg_temp_free_i32(rd1);
|
||||
tcg_temp_free_i64(rn_64);
|
||||
tcg_temp_free_i64(rm_64);
|
||||
|
||||
|
@ -2018,14 +2029,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
|
|||
rd0 = tcg_temp_new_i64();
|
||||
rd1 = tcg_temp_new_i64();
|
||||
|
||||
rn = neon_load_reg(a->vn, 0);
|
||||
rm = neon_load_reg(a->vm, 0);
|
||||
rn = tcg_temp_new_i32();
|
||||
rm = tcg_temp_new_i32();
|
||||
read_neon_element32(rn, a->vn, 0, MO_32);
|
||||
read_neon_element32(rm, a->vm, 0, MO_32);
|
||||
opfn(rd0, rn, rm);
|
||||
tcg_temp_free_i32(rn);
|
||||
tcg_temp_free_i32(rm);
|
||||
|
||||
rn = neon_load_reg(a->vn, 1);
|
||||
rm = neon_load_reg(a->vm, 1);
|
||||
read_neon_element32(rn, a->vn, 1, MO_32);
|
||||
read_neon_element32(rm, a->vm, 1, MO_32);
|
||||
opfn(rd1, rn, rm);
|
||||
tcg_temp_free_i32(rn);
|
||||
tcg_temp_free_i32(rm);
|
||||
|
@ -2033,18 +2044,15 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a,
|
|||
/* Don't store results until after all loads: they might overlap */
|
||||
if (accfn) {
|
||||
tmp = tcg_temp_new_i64();
|
||||
neon_load_reg64(tmp, a->vd);
|
||||
accfn(tmp, tmp, rd0);
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
neon_load_reg64(tmp, a->vd + 1);
|
||||
accfn(tmp, tmp, rd1);
|
||||
neon_store_reg64(tmp, a->vd + 1);
|
||||
read_neon_element64(tmp, a->vd, 0, MO_64);
|
||||
accfn(rd0, tmp, rd0);
|
||||
read_neon_element64(tmp, a->vd, 1, MO_64);
|
||||
accfn(rd1, tmp, rd1);
|
||||
tcg_temp_free_i64(tmp);
|
||||
} else {
|
||||
neon_store_reg64(rd0, a->vd);
|
||||
neon_store_reg64(rd1, a->vd + 1);
|
||||
}
|
||||
|
||||
write_neon_element64(rd0, a->vd, 0, MO_64);
|
||||
write_neon_element64(rd1, a->vd, 1, MO_64);
|
||||
tcg_temp_free_i64(rd0);
|
||||
tcg_temp_free_i64(rd1);
|
||||
|
||||
|
@ -2300,9 +2308,9 @@ static bool trans_VMULL_P_3d(DisasContext *s, arg_3diff *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
tcg_gen_gvec_3_ool(neon_reg_offset(a->vd, 0),
|
||||
neon_reg_offset(a->vn, 0),
|
||||
neon_reg_offset(a->vm, 0),
|
||||
tcg_gen_gvec_3_ool(neon_full_reg_offset(a->vd),
|
||||
neon_full_reg_offset(a->vn),
|
||||
neon_full_reg_offset(a->vm),
|
||||
16, 16, 0, fn_gvec);
|
||||
return true;
|
||||
}
|
||||
|
@ -2327,16 +2335,16 @@ static void gen_neon_dup_high16(TCGv_i32 var)
|
|||
|
||||
static inline TCGv_i32 neon_get_scalar(int size, int reg)
|
||||
{
|
||||
TCGv_i32 tmp;
|
||||
if (size == 1) {
|
||||
tmp = neon_load_reg(reg & 7, reg >> 4);
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
if (size == MO_16) {
|
||||
read_neon_element32(tmp, reg & 7, reg >> 4, MO_32);
|
||||
if (reg & 8) {
|
||||
gen_neon_dup_high16(tmp);
|
||||
} else {
|
||||
gen_neon_dup_low16(tmp);
|
||||
}
|
||||
} else {
|
||||
tmp = neon_load_reg(reg & 15, reg >> 4);
|
||||
read_neon_element32(tmp, reg & 15, reg >> 4, MO_32);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
@ -2350,7 +2358,7 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
|
|||
* perform an accumulation operation of that result into the
|
||||
* destination.
|
||||
*/
|
||||
TCGv_i32 scalar;
|
||||
TCGv_i32 scalar, tmp;
|
||||
int pass;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
|
@ -2377,17 +2385,20 @@ static bool do_2scalar(DisasContext *s, arg_2scalar *a,
|
|||
}
|
||||
|
||||
scalar = neon_get_scalar(a->size, a->vm);
|
||||
tmp = tcg_temp_new_i32();
|
||||
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
|
||||
TCGv_i32 tmp = neon_load_reg(a->vn, pass);
|
||||
read_neon_element32(tmp, a->vn, pass, MO_32);
|
||||
opfn(tmp, tmp, scalar);
|
||||
if (accfn) {
|
||||
TCGv_i32 rd = neon_load_reg(a->vd, pass);
|
||||
TCGv_i32 rd = tcg_temp_new_i32();
|
||||
read_neon_element32(rd, a->vd, pass, MO_32);
|
||||
accfn(tmp, rd, tmp);
|
||||
tcg_temp_free_i32(rd);
|
||||
}
|
||||
neon_store_reg(a->vd, pass, tmp);
|
||||
write_neon_element32(tmp, a->vd, pass, MO_32);
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(scalar);
|
||||
return true;
|
||||
}
|
||||
|
@ -2445,8 +2456,8 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
|
|||
{
|
||||
/* Two registers and a scalar, using gvec */
|
||||
int vec_size = a->q ? 16 : 8;
|
||||
int rd_ofs = neon_reg_offset(a->vd, 0);
|
||||
int rn_ofs = neon_reg_offset(a->vn, 0);
|
||||
int rd_ofs = neon_full_reg_offset(a->vd);
|
||||
int rn_ofs = neon_full_reg_offset(a->vn);
|
||||
int rm_ofs;
|
||||
int idx;
|
||||
TCGv_ptr fpstatus;
|
||||
|
@ -2477,7 +2488,7 @@ static bool do_2scalar_fp_vec(DisasContext *s, arg_2scalar *a,
|
|||
/* a->vm is M:Vm, which encodes both register and index */
|
||||
idx = extract32(a->vm, a->size + 2, 2);
|
||||
a->vm = extract32(a->vm, 0, a->size + 2);
|
||||
rm_ofs = neon_reg_offset(a->vm, 0);
|
||||
rm_ofs = neon_full_reg_offset(a->vm);
|
||||
|
||||
fpstatus = fpstatus_ptr(a->size == 1 ? FPST_STD_F16 : FPST_STD);
|
||||
tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, fpstatus,
|
||||
|
@ -2542,7 +2553,7 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
|
|||
* performs a kind of fused op-then-accumulate using a helper
|
||||
* function that takes all of rd, rn and the scalar at once.
|
||||
*/
|
||||
TCGv_i32 scalar;
|
||||
TCGv_i32 scalar, rn, rd;
|
||||
int pass;
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
|
@ -2573,14 +2584,17 @@ static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
|
|||
}
|
||||
|
||||
scalar = neon_get_scalar(a->size, a->vm);
|
||||
rn = tcg_temp_new_i32();
|
||||
rd = tcg_temp_new_i32();
|
||||
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
|
||||
TCGv_i32 rn = neon_load_reg(a->vn, pass);
|
||||
TCGv_i32 rd = neon_load_reg(a->vd, pass);
|
||||
read_neon_element32(rn, a->vn, pass, MO_32);
|
||||
read_neon_element32(rd, a->vd, pass, MO_32);
|
||||
opfn(rd, cpu_env, rn, scalar, rd);
|
||||
tcg_temp_free_i32(rn);
|
||||
neon_store_reg(a->vd, pass, rd);
|
||||
write_neon_element32(rd, a->vd, pass, MO_32);
|
||||
}
|
||||
tcg_temp_free_i32(rn);
|
||||
tcg_temp_free_i32(rd);
|
||||
tcg_temp_free_i32(scalar);
|
||||
|
||||
return true;
|
||||
|
@ -2647,12 +2661,12 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
|
|||
scalar = neon_get_scalar(a->size, a->vm);
|
||||
|
||||
/* Load all inputs before writing any outputs, in case of overlap */
|
||||
rn = neon_load_reg(a->vn, 0);
|
||||
rn = tcg_temp_new_i32();
|
||||
read_neon_element32(rn, a->vn, 0, MO_32);
|
||||
rn0_64 = tcg_temp_new_i64();
|
||||
opfn(rn0_64, rn, scalar);
|
||||
tcg_temp_free_i32(rn);
|
||||
|
||||
rn = neon_load_reg(a->vn, 1);
|
||||
read_neon_element32(rn, a->vn, 1, MO_32);
|
||||
rn1_64 = tcg_temp_new_i64();
|
||||
opfn(rn1_64, rn, scalar);
|
||||
tcg_temp_free_i32(rn);
|
||||
|
@ -2660,17 +2674,15 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a,
|
|||
|
||||
if (accfn) {
|
||||
TCGv_i64 t64 = tcg_temp_new_i64();
|
||||
neon_load_reg64(t64, a->vd);
|
||||
accfn(t64, t64, rn0_64);
|
||||
neon_store_reg64(t64, a->vd);
|
||||
neon_load_reg64(t64, a->vd + 1);
|
||||
accfn(t64, t64, rn1_64);
|
||||
neon_store_reg64(t64, a->vd + 1);
|
||||
read_neon_element64(t64, a->vd, 0, MO_64);
|
||||
accfn(rn0_64, t64, rn0_64);
|
||||
read_neon_element64(t64, a->vd, 1, MO_64);
|
||||
accfn(rn1_64, t64, rn1_64);
|
||||
tcg_temp_free_i64(t64);
|
||||
} else {
|
||||
neon_store_reg64(rn0_64, a->vd);
|
||||
neon_store_reg64(rn1_64, a->vd + 1);
|
||||
}
|
||||
|
||||
write_neon_element64(rn0_64, a->vd, 0, MO_64);
|
||||
write_neon_element64(rn1_64, a->vd, 1, MO_64);
|
||||
tcg_temp_free_i64(rn0_64);
|
||||
tcg_temp_free_i64(rn1_64);
|
||||
return true;
|
||||
|
@ -2803,10 +2815,10 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
|
|||
right = tcg_temp_new_i64();
|
||||
dest = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(right, a->vn);
|
||||
neon_load_reg64(left, a->vm);
|
||||
read_neon_element64(right, a->vn, 0, MO_64);
|
||||
read_neon_element64(left, a->vm, 0, MO_64);
|
||||
tcg_gen_extract2_i64(dest, right, left, a->imm * 8);
|
||||
neon_store_reg64(dest, a->vd);
|
||||
write_neon_element64(dest, a->vd, 0, MO_64);
|
||||
|
||||
tcg_temp_free_i64(left);
|
||||
tcg_temp_free_i64(right);
|
||||
|
@ -2822,21 +2834,21 @@ static bool trans_VEXT(DisasContext *s, arg_VEXT *a)
|
|||
destright = tcg_temp_new_i64();
|
||||
|
||||
if (a->imm < 8) {
|
||||
neon_load_reg64(right, a->vn);
|
||||
neon_load_reg64(middle, a->vn + 1);
|
||||
read_neon_element64(right, a->vn, 0, MO_64);
|
||||
read_neon_element64(middle, a->vn, 1, MO_64);
|
||||
tcg_gen_extract2_i64(destright, right, middle, a->imm * 8);
|
||||
neon_load_reg64(left, a->vm);
|
||||
read_neon_element64(left, a->vm, 0, MO_64);
|
||||
tcg_gen_extract2_i64(destleft, middle, left, a->imm * 8);
|
||||
} else {
|
||||
neon_load_reg64(right, a->vn + 1);
|
||||
neon_load_reg64(middle, a->vm);
|
||||
read_neon_element64(right, a->vn, 1, MO_64);
|
||||
read_neon_element64(middle, a->vm, 0, MO_64);
|
||||
tcg_gen_extract2_i64(destright, right, middle, (a->imm - 8) * 8);
|
||||
neon_load_reg64(left, a->vm + 1);
|
||||
read_neon_element64(left, a->vm, 1, MO_64);
|
||||
tcg_gen_extract2_i64(destleft, middle, left, (a->imm - 8) * 8);
|
||||
}
|
||||
|
||||
neon_store_reg64(destright, a->vd);
|
||||
neon_store_reg64(destleft, a->vd + 1);
|
||||
write_neon_element64(destright, a->vd, 0, MO_64);
|
||||
write_neon_element64(destleft, a->vd, 1, MO_64);
|
||||
|
||||
tcg_temp_free_i64(destright);
|
||||
tcg_temp_free_i64(destleft);
|
||||
|
@ -2876,30 +2888,34 @@ static bool trans_VTBL(DisasContext *s, arg_VTBL *a)
|
|||
return false;
|
||||
}
|
||||
n <<= 3;
|
||||
tmp = tcg_temp_new_i32();
|
||||
if (a->op) {
|
||||
tmp = neon_load_reg(a->vd, 0);
|
||||
read_neon_element32(tmp, a->vd, 0, MO_32);
|
||||
} else {
|
||||
tmp = tcg_temp_new_i32();
|
||||
tcg_gen_movi_i32(tmp, 0);
|
||||
}
|
||||
tmp2 = neon_load_reg(a->vm, 0);
|
||||
tmp2 = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp2, a->vm, 0, MO_32);
|
||||
ptr1 = vfp_reg_ptr(true, a->vn);
|
||||
tmp4 = tcg_const_i32(n);
|
||||
gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp4);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
if (a->op) {
|
||||
tmp = neon_load_reg(a->vd, 1);
|
||||
read_neon_element32(tmp, a->vd, 1, MO_32);
|
||||
} else {
|
||||
tmp = tcg_temp_new_i32();
|
||||
tcg_gen_movi_i32(tmp, 0);
|
||||
}
|
||||
tmp3 = neon_load_reg(a->vm, 1);
|
||||
tmp3 = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp3, a->vm, 1, MO_32);
|
||||
gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp4);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(tmp4);
|
||||
tcg_temp_free_ptr(ptr1);
|
||||
neon_store_reg(a->vd, 0, tmp2);
|
||||
neon_store_reg(a->vd, 1, tmp3);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
write_neon_element32(tmp2, a->vd, 0, MO_32);
|
||||
write_neon_element32(tmp3, a->vd, 1, MO_32);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
tcg_temp_free_i32(tmp3);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2923,7 +2939,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
tcg_gen_gvec_dup_mem(a->size, neon_reg_offset(a->vd, 0),
|
||||
tcg_gen_gvec_dup_mem(a->size, neon_full_reg_offset(a->vd),
|
||||
neon_element_offset(a->vm, a->index, a->size),
|
||||
a->q ? 16 : 8, a->q ? 16 : 8);
|
||||
return true;
|
||||
|
@ -2932,6 +2948,7 @@ static bool trans_VDUP_scalar(DisasContext *s, arg_VDUP_scalar *a)
|
|||
static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
|
||||
{
|
||||
int pass, half;
|
||||
TCGv_i32 tmp[2];
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -2955,11 +2972,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
|
||||
TCGv_i32 tmp[2];
|
||||
tmp[0] = tcg_temp_new_i32();
|
||||
tmp[1] = tcg_temp_new_i32();
|
||||
|
||||
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
|
||||
for (half = 0; half < 2; half++) {
|
||||
tmp[half] = neon_load_reg(a->vm, pass * 2 + half);
|
||||
read_neon_element32(tmp[half], a->vm, pass * 2 + half, MO_32);
|
||||
switch (a->size) {
|
||||
case 0:
|
||||
tcg_gen_bswap32_i32(tmp[half], tmp[half]);
|
||||
|
@ -2973,9 +2991,12 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
|
|||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
neon_store_reg(a->vd, pass * 2, tmp[1]);
|
||||
neon_store_reg(a->vd, pass * 2 + 1, tmp[0]);
|
||||
write_neon_element32(tmp[1], a->vd, pass * 2, MO_32);
|
||||
write_neon_element32(tmp[0], a->vd, pass * 2 + 1, MO_32);
|
||||
}
|
||||
|
||||
tcg_temp_free_i32(tmp[0]);
|
||||
tcg_temp_free_i32(tmp[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -3020,23 +3041,25 @@ static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
|
|||
rm0_64 = tcg_temp_new_i64();
|
||||
rm1_64 = tcg_temp_new_i64();
|
||||
rd_64 = tcg_temp_new_i64();
|
||||
tmp = neon_load_reg(a->vm, pass * 2);
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vm, pass * 2, MO_32);
|
||||
widenfn(rm0_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tmp = neon_load_reg(a->vm, pass * 2 + 1);
|
||||
read_neon_element32(tmp, a->vm, pass * 2 + 1, MO_32);
|
||||
widenfn(rm1_64, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
opfn(rd_64, rm0_64, rm1_64);
|
||||
tcg_temp_free_i64(rm0_64);
|
||||
tcg_temp_free_i64(rm1_64);
|
||||
|
||||
if (accfn) {
|
||||
TCGv_i64 tmp64 = tcg_temp_new_i64();
|
||||
neon_load_reg64(tmp64, a->vd + pass);
|
||||
read_neon_element64(tmp64, a->vd, pass, MO_64);
|
||||
accfn(rd_64, tmp64, rd_64);
|
||||
tcg_temp_free_i64(tmp64);
|
||||
}
|
||||
neon_store_reg64(rd_64, a->vd + pass);
|
||||
write_neon_element64(rd_64, a->vd, pass, MO_64);
|
||||
tcg_temp_free_i64(rd_64);
|
||||
}
|
||||
return true;
|
||||
|
@ -3234,12 +3257,14 @@ static bool do_vmovn(DisasContext *s, arg_2misc *a,
|
|||
rd0 = tcg_temp_new_i32();
|
||||
rd1 = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg64(rm, a->vm);
|
||||
read_neon_element64(rm, a->vm, 0, MO_64);
|
||||
narrowfn(rd0, cpu_env, rm);
|
||||
neon_load_reg64(rm, a->vm + 1);
|
||||
read_neon_element64(rm, a->vm, 1, MO_64);
|
||||
narrowfn(rd1, cpu_env, rm);
|
||||
neon_store_reg(a->vd, 0, rd0);
|
||||
neon_store_reg(a->vd, 1, rd1);
|
||||
write_neon_element32(rd0, a->vd, 0, MO_32);
|
||||
write_neon_element32(rd1, a->vd, 1, MO_32);
|
||||
tcg_temp_free_i32(rd0);
|
||||
tcg_temp_free_i32(rd1);
|
||||
tcg_temp_free_i64(rm);
|
||||
return true;
|
||||
}
|
||||
|
@ -3296,16 +3321,18 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
|
|||
}
|
||||
|
||||
rd = tcg_temp_new_i64();
|
||||
rm0 = tcg_temp_new_i32();
|
||||
rm1 = tcg_temp_new_i32();
|
||||
|
||||
rm0 = neon_load_reg(a->vm, 0);
|
||||
rm1 = neon_load_reg(a->vm, 1);
|
||||
read_neon_element32(rm0, a->vm, 0, MO_32);
|
||||
read_neon_element32(rm1, a->vm, 1, MO_32);
|
||||
|
||||
widenfn(rd, rm0);
|
||||
tcg_gen_shli_i64(rd, rd, 8 << a->size);
|
||||
neon_store_reg64(rd, a->vd);
|
||||
write_neon_element64(rd, a->vd, 0, MO_64);
|
||||
widenfn(rd, rm1);
|
||||
tcg_gen_shli_i64(rd, rd, 8 << a->size);
|
||||
neon_store_reg64(rd, a->vd + 1);
|
||||
write_neon_element64(rd, a->vd, 1, MO_64);
|
||||
|
||||
tcg_temp_free_i64(rd);
|
||||
tcg_temp_free_i32(rm0);
|
||||
|
@ -3339,21 +3366,25 @@ static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
|
|||
|
||||
fpst = fpstatus_ptr(FPST_STD);
|
||||
ahp = get_ahp_flag();
|
||||
tmp = neon_load_reg(a->vm, 0);
|
||||
tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vm, 0, MO_32);
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
|
||||
tmp2 = neon_load_reg(a->vm, 1);
|
||||
tmp2 = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp2, a->vm, 1, MO_32);
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
|
||||
tcg_gen_shli_i32(tmp2, tmp2, 16);
|
||||
tcg_gen_or_i32(tmp2, tmp2, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tmp = neon_load_reg(a->vm, 2);
|
||||
read_neon_element32(tmp, a->vm, 2, MO_32);
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
|
||||
tmp3 = neon_load_reg(a->vm, 3);
|
||||
neon_store_reg(a->vd, 0, tmp2);
|
||||
tmp3 = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp3, a->vm, 3, MO_32);
|
||||
write_neon_element32(tmp2, a->vd, 0, MO_32);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
|
||||
tcg_gen_shli_i32(tmp3, tmp3, 16);
|
||||
tcg_gen_or_i32(tmp3, tmp3, tmp);
|
||||
neon_store_reg(a->vd, 1, tmp3);
|
||||
write_neon_element32(tmp3, a->vd, 1, MO_32);
|
||||
tcg_temp_free_i32(tmp3);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(ahp);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3388,21 +3419,25 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
|
|||
fpst = fpstatus_ptr(FPST_STD);
|
||||
ahp = get_ahp_flag();
|
||||
tmp3 = tcg_temp_new_i32();
|
||||
tmp = neon_load_reg(a->vm, 0);
|
||||
tmp2 = neon_load_reg(a->vm, 1);
|
||||
tmp2 = tcg_temp_new_i32();
|
||||
tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vm, 0, MO_32);
|
||||
read_neon_element32(tmp2, a->vm, 1, MO_32);
|
||||
tcg_gen_ext16u_i32(tmp3, tmp);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
|
||||
neon_store_reg(a->vd, 0, tmp3);
|
||||
write_neon_element32(tmp3, a->vd, 0, MO_32);
|
||||
tcg_gen_shri_i32(tmp, tmp, 16);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
|
||||
neon_store_reg(a->vd, 1, tmp);
|
||||
tmp3 = tcg_temp_new_i32();
|
||||
write_neon_element32(tmp, a->vd, 1, MO_32);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_gen_ext16u_i32(tmp3, tmp2);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
|
||||
neon_store_reg(a->vd, 2, tmp3);
|
||||
write_neon_element32(tmp3, a->vd, 2, MO_32);
|
||||
tcg_temp_free_i32(tmp3);
|
||||
tcg_gen_shri_i32(tmp2, tmp2, 16);
|
||||
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
|
||||
neon_store_reg(a->vd, 3, tmp2);
|
||||
write_neon_element32(tmp2, a->vd, 3, MO_32);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
tcg_temp_free_i32(ahp);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
||||
|
@ -3412,8 +3447,8 @@ static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
|
|||
static bool do_2misc_vec(DisasContext *s, arg_2misc *a, GVecGen2Fn *fn)
|
||||
{
|
||||
int vec_size = a->q ? 16 : 8;
|
||||
int rd_ofs = neon_reg_offset(a->vd, 0);
|
||||
int rm_ofs = neon_reg_offset(a->vm, 0);
|
||||
int rd_ofs = neon_full_reg_offset(a->vd);
|
||||
int rm_ofs = neon_full_reg_offset(a->vm);
|
||||
|
||||
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -3508,6 +3543,7 @@ DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2)
|
|||
|
||||
static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
|
||||
{
|
||||
TCGv_i32 tmp;
|
||||
int pass;
|
||||
|
||||
/* Handle a 2-reg-misc operation by iterating 32 bits at a time */
|
||||
|
@ -3533,11 +3569,13 @@ static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn)
|
|||
return true;
|
||||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
|
||||
TCGv_i32 tmp = neon_load_reg(a->vm, pass);
|
||||
read_neon_element32(tmp, a->vm, pass, MO_32);
|
||||
fn(tmp, tmp);
|
||||
neon_store_reg(a->vd, pass, tmp);
|
||||
write_neon_element32(tmp, a->vd, pass, MO_32);
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -3812,10 +3850,10 @@ static bool trans_VSWP(DisasContext *s, arg_2misc *a)
|
|||
rm = tcg_temp_new_i64();
|
||||
rd = tcg_temp_new_i64();
|
||||
for (pass = 0; pass < (a->q ? 2 : 1); pass++) {
|
||||
neon_load_reg64(rm, a->vm + pass);
|
||||
neon_load_reg64(rd, a->vd + pass);
|
||||
neon_store_reg64(rm, a->vd + pass);
|
||||
neon_store_reg64(rd, a->vm + pass);
|
||||
read_neon_element64(rm, a->vm, pass, MO_64);
|
||||
read_neon_element64(rd, a->vd, pass, MO_64);
|
||||
write_neon_element64(rm, a->vd, pass, MO_64);
|
||||
write_neon_element64(rd, a->vm, pass, MO_64);
|
||||
}
|
||||
tcg_temp_free_i64(rm);
|
||||
tcg_temp_free_i64(rd);
|
||||
|
@ -3890,25 +3928,29 @@ static bool trans_VTRN(DisasContext *s, arg_2misc *a)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (a->size == 2) {
|
||||
tmp = tcg_temp_new_i32();
|
||||
tmp2 = tcg_temp_new_i32();
|
||||
if (a->size == MO_32) {
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass += 2) {
|
||||
tmp = neon_load_reg(a->vm, pass);
|
||||
tmp2 = neon_load_reg(a->vd, pass + 1);
|
||||
neon_store_reg(a->vm, pass, tmp2);
|
||||
neon_store_reg(a->vd, pass + 1, tmp);
|
||||
read_neon_element32(tmp, a->vm, pass, MO_32);
|
||||
read_neon_element32(tmp2, a->vd, pass + 1, MO_32);
|
||||
write_neon_element32(tmp2, a->vm, pass, MO_32);
|
||||
write_neon_element32(tmp, a->vd, pass + 1, MO_32);
|
||||
}
|
||||
} else {
|
||||
for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
|
||||
tmp = neon_load_reg(a->vm, pass);
|
||||
tmp2 = neon_load_reg(a->vd, pass);
|
||||
if (a->size == 0) {
|
||||
read_neon_element32(tmp, a->vm, pass, MO_32);
|
||||
read_neon_element32(tmp2, a->vd, pass, MO_32);
|
||||
if (a->size == MO_8) {
|
||||
gen_neon_trn_u8(tmp, tmp2);
|
||||
} else {
|
||||
gen_neon_trn_u16(tmp, tmp2);
|
||||
}
|
||||
neon_store_reg(a->vm, pass, tmp2);
|
||||
neon_store_reg(a->vd, pass, tmp);
|
||||
write_neon_element32(tmp2, a->vm, pass, MO_32);
|
||||
write_neon_element32(tmp, a->vd, pass, MO_32);
|
||||
}
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -236,8 +236,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
|
|||
tcg_gen_ext_i32_i64(nf, cpu_NF);
|
||||
tcg_gen_ext_i32_i64(vf, cpu_VF);
|
||||
|
||||
neon_load_reg64(frn, rn);
|
||||
neon_load_reg64(frm, rm);
|
||||
vfp_load_reg64(frn, rn);
|
||||
vfp_load_reg64(frm, rm);
|
||||
switch (a->cc) {
|
||||
case 0: /* eq: Z */
|
||||
tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
|
||||
|
@ -264,7 +264,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
|
|||
tcg_temp_free_i64(tmp);
|
||||
break;
|
||||
}
|
||||
neon_store_reg64(dest, rd);
|
||||
vfp_store_reg64(dest, rd);
|
||||
tcg_temp_free_i64(frn);
|
||||
tcg_temp_free_i64(frm);
|
||||
tcg_temp_free_i64(dest);
|
||||
|
@ -283,8 +283,8 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
|
|||
frn = tcg_temp_new_i32();
|
||||
frm = tcg_temp_new_i32();
|
||||
dest = tcg_temp_new_i32();
|
||||
neon_load_reg32(frn, rn);
|
||||
neon_load_reg32(frm, rm);
|
||||
vfp_load_reg32(frn, rn);
|
||||
vfp_load_reg32(frm, rm);
|
||||
switch (a->cc) {
|
||||
case 0: /* eq: Z */
|
||||
tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
|
||||
|
@ -315,7 +315,7 @@ static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
|
|||
if (sz == 1) {
|
||||
tcg_gen_andi_i32(dest, dest, 0xffff);
|
||||
}
|
||||
neon_store_reg32(dest, rd);
|
||||
vfp_store_reg32(dest, rd);
|
||||
tcg_temp_free_i32(frn);
|
||||
tcg_temp_free_i32(frm);
|
||||
tcg_temp_free_i32(dest);
|
||||
|
@ -385,9 +385,9 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
|
|||
TCGv_i64 tcg_res;
|
||||
tcg_op = tcg_temp_new_i64();
|
||||
tcg_res = tcg_temp_new_i64();
|
||||
neon_load_reg64(tcg_op, rm);
|
||||
vfp_load_reg64(tcg_op, rm);
|
||||
gen_helper_rintd(tcg_res, tcg_op, fpst);
|
||||
neon_store_reg64(tcg_res, rd);
|
||||
vfp_store_reg64(tcg_res, rd);
|
||||
tcg_temp_free_i64(tcg_op);
|
||||
tcg_temp_free_i64(tcg_res);
|
||||
} else {
|
||||
|
@ -395,13 +395,13 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
|
|||
TCGv_i32 tcg_res;
|
||||
tcg_op = tcg_temp_new_i32();
|
||||
tcg_res = tcg_temp_new_i32();
|
||||
neon_load_reg32(tcg_op, rm);
|
||||
vfp_load_reg32(tcg_op, rm);
|
||||
if (sz == 1) {
|
||||
gen_helper_rinth(tcg_res, tcg_op, fpst);
|
||||
} else {
|
||||
gen_helper_rints(tcg_res, tcg_op, fpst);
|
||||
}
|
||||
neon_store_reg32(tcg_res, rd);
|
||||
vfp_store_reg32(tcg_res, rd);
|
||||
tcg_temp_free_i32(tcg_op);
|
||||
tcg_temp_free_i32(tcg_res);
|
||||
}
|
||||
|
@ -463,14 +463,14 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
|
|||
tcg_double = tcg_temp_new_i64();
|
||||
tcg_res = tcg_temp_new_i64();
|
||||
tcg_tmp = tcg_temp_new_i32();
|
||||
neon_load_reg64(tcg_double, rm);
|
||||
vfp_load_reg64(tcg_double, rm);
|
||||
if (is_signed) {
|
||||
gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
|
||||
} else {
|
||||
gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
|
||||
}
|
||||
tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
|
||||
neon_store_reg32(tcg_tmp, rd);
|
||||
vfp_store_reg32(tcg_tmp, rd);
|
||||
tcg_temp_free_i32(tcg_tmp);
|
||||
tcg_temp_free_i64(tcg_res);
|
||||
tcg_temp_free_i64(tcg_double);
|
||||
|
@ -478,7 +478,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
|
|||
TCGv_i32 tcg_single, tcg_res;
|
||||
tcg_single = tcg_temp_new_i32();
|
||||
tcg_res = tcg_temp_new_i32();
|
||||
neon_load_reg32(tcg_single, rm);
|
||||
vfp_load_reg32(tcg_single, rm);
|
||||
if (sz == 1) {
|
||||
if (is_signed) {
|
||||
gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
|
||||
|
@ -492,7 +492,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
|
|||
gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
|
||||
}
|
||||
}
|
||||
neon_store_reg32(tcg_res, rd);
|
||||
vfp_store_reg32(tcg_res, rd);
|
||||
tcg_temp_free_i32(tcg_res);
|
||||
tcg_temp_free_i32(tcg_single);
|
||||
}
|
||||
|
@ -511,11 +511,9 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
|
|||
{
|
||||
/* VMOV scalar to general purpose register */
|
||||
TCGv_i32 tmp;
|
||||
int pass;
|
||||
uint32_t offset;
|
||||
|
||||
/* SIZE == 2 is a VFP instruction; otherwise NEON. */
|
||||
if (a->size == 2
|
||||
/* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
|
||||
if (a->size == MO_32
|
||||
? !dc_isar_feature(aa32_fpsp_v2, s)
|
||||
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -526,44 +524,12 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
|
|||
return false;
|
||||
}
|
||||
|
||||
offset = a->index << a->size;
|
||||
pass = extract32(offset, 2, 1);
|
||||
offset = extract32(offset, 0, 2) * 8;
|
||||
|
||||
if (!vfp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
tmp = neon_load_reg(a->vn, pass);
|
||||
switch (a->size) {
|
||||
case 0:
|
||||
if (offset) {
|
||||
tcg_gen_shri_i32(tmp, tmp, offset);
|
||||
}
|
||||
if (a->u) {
|
||||
gen_uxtb(tmp);
|
||||
} else {
|
||||
gen_sxtb(tmp);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (a->u) {
|
||||
if (offset) {
|
||||
tcg_gen_shri_i32(tmp, tmp, 16);
|
||||
} else {
|
||||
gen_uxth(tmp);
|
||||
}
|
||||
} else {
|
||||
if (offset) {
|
||||
tcg_gen_sari_i32(tmp, tmp, 16);
|
||||
} else {
|
||||
gen_sxth(tmp);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
break;
|
||||
}
|
||||
tmp = tcg_temp_new_i32();
|
||||
read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
|
||||
store_reg(s, a->rt, tmp);
|
||||
|
||||
return true;
|
||||
|
@ -572,12 +538,10 @@ static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
|
|||
static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
|
||||
{
|
||||
/* VMOV general purpose register to scalar */
|
||||
TCGv_i32 tmp, tmp2;
|
||||
int pass;
|
||||
uint32_t offset;
|
||||
TCGv_i32 tmp;
|
||||
|
||||
/* SIZE == 2 is a VFP instruction; otherwise NEON. */
|
||||
if (a->size == 2
|
||||
/* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
|
||||
if (a->size == MO_32
|
||||
? !dc_isar_feature(aa32_fpsp_v2, s)
|
||||
: !arm_dc_feature(s, ARM_FEATURE_NEON)) {
|
||||
return false;
|
||||
|
@ -588,30 +552,13 @@ static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
|
|||
return false;
|
||||
}
|
||||
|
||||
offset = a->index << a->size;
|
||||
pass = extract32(offset, 2, 1);
|
||||
offset = extract32(offset, 0, 2) * 8;
|
||||
|
||||
if (!vfp_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
tmp = load_reg(s, a->rt);
|
||||
switch (a->size) {
|
||||
case 0:
|
||||
tmp2 = neon_load_reg(a->vn, pass);
|
||||
tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
break;
|
||||
case 1:
|
||||
tmp2 = neon_load_reg(a->vn, pass);
|
||||
tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
|
||||
tcg_temp_free_i32(tmp2);
|
||||
break;
|
||||
case 2:
|
||||
break;
|
||||
}
|
||||
neon_store_reg(a->vn, pass, tmp);
|
||||
write_neon_element32(tmp, a->vn, a->index, a->size);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -653,7 +600,7 @@ static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
|
|||
}
|
||||
|
||||
tmp = load_reg(s, a->rt);
|
||||
tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
|
||||
tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
|
||||
vec_size, vec_size, tmp);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
||||
|
@ -829,14 +776,14 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
|
|||
if (a->l) {
|
||||
/* VFP to general purpose register */
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vn);
|
||||
vfp_load_reg32(tmp, a->vn);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xffff);
|
||||
store_reg(s, a->rt, tmp);
|
||||
} else {
|
||||
/* general purpose register to VFP */
|
||||
tmp = load_reg(s, a->rt);
|
||||
tcg_gen_andi_i32(tmp, tmp, 0xffff);
|
||||
neon_store_reg32(tmp, a->vn);
|
||||
vfp_store_reg32(tmp, a->vn);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
|
@ -858,7 +805,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
|
|||
if (a->l) {
|
||||
/* VFP to general purpose register */
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vn);
|
||||
vfp_load_reg32(tmp, a->vn);
|
||||
if (a->rt == 15) {
|
||||
/* Set the 4 flag bits in the CPSR. */
|
||||
gen_set_nzcv(tmp);
|
||||
|
@ -869,7 +816,7 @@ static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
|
|||
} else {
|
||||
/* general purpose register to VFP */
|
||||
tmp = load_reg(s, a->rt);
|
||||
neon_store_reg32(tmp, a->vn);
|
||||
vfp_store_reg32(tmp, a->vn);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
|
@ -895,18 +842,18 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
|
|||
if (a->op) {
|
||||
/* fpreg to gpreg */
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
store_reg(s, a->rt, tmp);
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm + 1);
|
||||
vfp_load_reg32(tmp, a->vm + 1);
|
||||
store_reg(s, a->rt2, tmp);
|
||||
} else {
|
||||
/* gpreg to fpreg */
|
||||
tmp = load_reg(s, a->rt);
|
||||
neon_store_reg32(tmp, a->vm);
|
||||
vfp_store_reg32(tmp, a->vm);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tmp = load_reg(s, a->rt2);
|
||||
neon_store_reg32(tmp, a->vm + 1);
|
||||
vfp_store_reg32(tmp, a->vm + 1);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
|
@ -938,18 +885,18 @@ static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
|
|||
if (a->op) {
|
||||
/* fpreg to gpreg */
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm * 2);
|
||||
vfp_load_reg32(tmp, a->vm * 2);
|
||||
store_reg(s, a->rt, tmp);
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm * 2 + 1);
|
||||
vfp_load_reg32(tmp, a->vm * 2 + 1);
|
||||
store_reg(s, a->rt2, tmp);
|
||||
} else {
|
||||
/* gpreg to fpreg */
|
||||
tmp = load_reg(s, a->rt);
|
||||
neon_store_reg32(tmp, a->vm * 2);
|
||||
vfp_store_reg32(tmp, a->vm * 2);
|
||||
tcg_temp_free_i32(tmp);
|
||||
tmp = load_reg(s, a->rt2);
|
||||
neon_store_reg32(tmp, a->vm * 2 + 1);
|
||||
vfp_store_reg32(tmp, a->vm * 2 + 1);
|
||||
tcg_temp_free_i32(tmp);
|
||||
}
|
||||
|
||||
|
@ -980,9 +927,9 @@ static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
|
|||
tmp = tcg_temp_new_i32();
|
||||
if (a->l) {
|
||||
gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
} else {
|
||||
neon_load_reg32(tmp, a->vd);
|
||||
vfp_load_reg32(tmp, a->vd);
|
||||
gen_aa32_st16(s, tmp, addr, get_mem_index(s));
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -1014,9 +961,9 @@ static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
|
|||
tmp = tcg_temp_new_i32();
|
||||
if (a->l) {
|
||||
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
} else {
|
||||
neon_load_reg32(tmp, a->vd);
|
||||
vfp_load_reg32(tmp, a->vd);
|
||||
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
|
||||
}
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -1055,9 +1002,9 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
|
|||
tmp = tcg_temp_new_i64();
|
||||
if (a->l) {
|
||||
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
vfp_store_reg64(tmp, a->vd);
|
||||
} else {
|
||||
neon_load_reg64(tmp, a->vd);
|
||||
vfp_load_reg64(tmp, a->vd);
|
||||
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
|
||||
}
|
||||
tcg_temp_free_i64(tmp);
|
||||
|
@ -1119,10 +1066,10 @@ static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
|
|||
if (a->l) {
|
||||
/* load */
|
||||
gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
|
||||
neon_store_reg32(tmp, a->vd + i);
|
||||
vfp_store_reg32(tmp, a->vd + i);
|
||||
} else {
|
||||
/* store */
|
||||
neon_load_reg32(tmp, a->vd + i);
|
||||
vfp_load_reg32(tmp, a->vd + i);
|
||||
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
|
||||
}
|
||||
tcg_gen_addi_i32(addr, addr, offset);
|
||||
|
@ -1202,10 +1149,10 @@ static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
|
|||
if (a->l) {
|
||||
/* load */
|
||||
gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
|
||||
neon_store_reg64(tmp, a->vd + i);
|
||||
vfp_store_reg64(tmp, a->vd + i);
|
||||
} else {
|
||||
/* store */
|
||||
neon_load_reg64(tmp, a->vd + i);
|
||||
vfp_load_reg64(tmp, a->vd + i);
|
||||
gen_aa32_st64(s, tmp, addr, get_mem_index(s));
|
||||
}
|
||||
tcg_gen_addi_i32(addr, addr, offset);
|
||||
|
@ -1338,15 +1285,15 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
|
|||
fd = tcg_temp_new_i32();
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
|
||||
neon_load_reg32(f0, vn);
|
||||
neon_load_reg32(f1, vm);
|
||||
vfp_load_reg32(f0, vn);
|
||||
vfp_load_reg32(f1, vm);
|
||||
|
||||
for (;;) {
|
||||
if (reads_vd) {
|
||||
neon_load_reg32(fd, vd);
|
||||
vfp_load_reg32(fd, vd);
|
||||
}
|
||||
fn(fd, f0, f1, fpst);
|
||||
neon_store_reg32(fd, vd);
|
||||
vfp_store_reg32(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -1356,10 +1303,10 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
|
|||
veclen--;
|
||||
vd = vfp_advance_sreg(vd, delta_d);
|
||||
vn = vfp_advance_sreg(vn, delta_d);
|
||||
neon_load_reg32(f0, vn);
|
||||
vfp_load_reg32(f0, vn);
|
||||
if (delta_m) {
|
||||
vm = vfp_advance_sreg(vm, delta_m);
|
||||
neon_load_reg32(f1, vm);
|
||||
vfp_load_reg32(f1, vm);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1402,14 +1349,14 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
|
|||
fd = tcg_temp_new_i32();
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
|
||||
neon_load_reg32(f0, vn);
|
||||
neon_load_reg32(f1, vm);
|
||||
vfp_load_reg32(f0, vn);
|
||||
vfp_load_reg32(f1, vm);
|
||||
|
||||
if (reads_vd) {
|
||||
neon_load_reg32(fd, vd);
|
||||
vfp_load_reg32(fd, vd);
|
||||
}
|
||||
fn(fd, f0, f1, fpst);
|
||||
neon_store_reg32(fd, vd);
|
||||
vfp_store_reg32(fd, vd);
|
||||
|
||||
tcg_temp_free_i32(f0);
|
||||
tcg_temp_free_i32(f1);
|
||||
|
@ -1469,15 +1416,15 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
|
|||
fd = tcg_temp_new_i64();
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
|
||||
neon_load_reg64(f0, vn);
|
||||
neon_load_reg64(f1, vm);
|
||||
vfp_load_reg64(f0, vn);
|
||||
vfp_load_reg64(f1, vm);
|
||||
|
||||
for (;;) {
|
||||
if (reads_vd) {
|
||||
neon_load_reg64(fd, vd);
|
||||
vfp_load_reg64(fd, vd);
|
||||
}
|
||||
fn(fd, f0, f1, fpst);
|
||||
neon_store_reg64(fd, vd);
|
||||
vfp_store_reg64(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -1486,10 +1433,10 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
|
|||
veclen--;
|
||||
vd = vfp_advance_dreg(vd, delta_d);
|
||||
vn = vfp_advance_dreg(vn, delta_d);
|
||||
neon_load_reg64(f0, vn);
|
||||
vfp_load_reg64(f0, vn);
|
||||
if (delta_m) {
|
||||
vm = vfp_advance_dreg(vm, delta_m);
|
||||
neon_load_reg64(f1, vm);
|
||||
vfp_load_reg64(f1, vm);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1542,11 +1489,11 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
|
|||
f0 = tcg_temp_new_i32();
|
||||
fd = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(f0, vm);
|
||||
vfp_load_reg32(f0, vm);
|
||||
|
||||
for (;;) {
|
||||
fn(fd, f0);
|
||||
neon_store_reg32(fd, vd);
|
||||
vfp_store_reg32(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -1556,7 +1503,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
|
|||
/* single source one-many */
|
||||
while (veclen--) {
|
||||
vd = vfp_advance_sreg(vd, delta_d);
|
||||
neon_store_reg32(fd, vd);
|
||||
vfp_store_reg32(fd, vd);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1565,7 +1512,7 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
|
|||
veclen--;
|
||||
vd = vfp_advance_sreg(vd, delta_d);
|
||||
vm = vfp_advance_sreg(vm, delta_m);
|
||||
neon_load_reg32(f0, vm);
|
||||
vfp_load_reg32(f0, vm);
|
||||
}
|
||||
|
||||
tcg_temp_free_i32(f0);
|
||||
|
@ -1598,9 +1545,9 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
|
|||
}
|
||||
|
||||
f0 = tcg_temp_new_i32();
|
||||
neon_load_reg32(f0, vm);
|
||||
vfp_load_reg32(f0, vm);
|
||||
fn(f0, f0);
|
||||
neon_store_reg32(f0, vd);
|
||||
vfp_store_reg32(f0, vd);
|
||||
tcg_temp_free_i32(f0);
|
||||
|
||||
return true;
|
||||
|
@ -1652,11 +1599,11 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
|
|||
f0 = tcg_temp_new_i64();
|
||||
fd = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(f0, vm);
|
||||
vfp_load_reg64(f0, vm);
|
||||
|
||||
for (;;) {
|
||||
fn(fd, f0);
|
||||
neon_store_reg64(fd, vd);
|
||||
vfp_store_reg64(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -1666,7 +1613,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
|
|||
/* single source one-many */
|
||||
while (veclen--) {
|
||||
vd = vfp_advance_dreg(vd, delta_d);
|
||||
neon_store_reg64(fd, vd);
|
||||
vfp_store_reg64(fd, vd);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1675,7 +1622,7 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
|
|||
veclen--;
|
||||
vd = vfp_advance_dreg(vd, delta_d);
|
||||
vd = vfp_advance_dreg(vm, delta_m);
|
||||
neon_load_reg64(f0, vm);
|
||||
vfp_load_reg64(f0, vm);
|
||||
}
|
||||
|
||||
tcg_temp_free_i64(f0);
|
||||
|
@ -2090,20 +2037,20 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
|
|||
vm = tcg_temp_new_i32();
|
||||
vd = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(vn, a->vn);
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vn, a->vn);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
if (neg_n) {
|
||||
/* VFNMS, VFMS */
|
||||
gen_helper_vfp_negh(vn, vn);
|
||||
}
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
if (neg_d) {
|
||||
/* VFNMA, VFNMS */
|
||||
gen_helper_vfp_negh(vd, vd);
|
||||
}
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(vn);
|
||||
|
@ -2155,20 +2102,20 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
|
|||
vm = tcg_temp_new_i32();
|
||||
vd = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(vn, a->vn);
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vn, a->vn);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
if (neg_n) {
|
||||
/* VFNMS, VFMS */
|
||||
gen_helper_vfp_negs(vn, vn);
|
||||
}
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
if (neg_d) {
|
||||
/* VFNMA, VFNMS */
|
||||
gen_helper_vfp_negs(vd, vd);
|
||||
}
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(vn);
|
||||
|
@ -2226,20 +2173,20 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
|
|||
vm = tcg_temp_new_i64();
|
||||
vd = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(vn, a->vn);
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vn, a->vn);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
if (neg_n) {
|
||||
/* VFNMS, VFMS */
|
||||
gen_helper_vfp_negd(vn, vn);
|
||||
}
|
||||
neon_load_reg64(vd, a->vd);
|
||||
vfp_load_reg64(vd, a->vd);
|
||||
if (neg_d) {
|
||||
/* VFNMA, VFNMS */
|
||||
gen_helper_vfp_negd(vd, vd);
|
||||
}
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
|
||||
neon_store_reg64(vd, a->vd);
|
||||
vfp_store_reg64(vd, a->vd);
|
||||
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i64(vn);
|
||||
|
@ -2283,7 +2230,7 @@ static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
|
|||
}
|
||||
|
||||
fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
|
||||
neon_store_reg32(fd, a->vd);
|
||||
vfp_store_reg32(fd, a->vd);
|
||||
tcg_temp_free_i32(fd);
|
||||
return true;
|
||||
}
|
||||
|
@ -2323,7 +2270,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
|
|||
fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
|
||||
|
||||
for (;;) {
|
||||
neon_store_reg32(fd, vd);
|
||||
vfp_store_reg32(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -2378,7 +2325,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
|
|||
fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
|
||||
|
||||
for (;;) {
|
||||
neon_store_reg64(fd, vd);
|
||||
vfp_store_reg64(fd, vd);
|
||||
|
||||
if (veclen == 0) {
|
||||
break;
|
||||
|
@ -2450,11 +2397,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
|
|||
vd = tcg_temp_new_i32();
|
||||
vm = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
if (a->z) {
|
||||
tcg_gen_movi_i32(vm, 0);
|
||||
} else {
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
}
|
||||
|
||||
if (a->e) {
|
||||
|
@ -2489,11 +2436,11 @@ static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
|
|||
vd = tcg_temp_new_i32();
|
||||
vm = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
if (a->z) {
|
||||
tcg_gen_movi_i32(vm, 0);
|
||||
} else {
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
}
|
||||
|
||||
if (a->e) {
|
||||
|
@ -2533,11 +2480,11 @@ static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
|
|||
vd = tcg_temp_new_i64();
|
||||
vm = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(vd, a->vd);
|
||||
vfp_load_reg64(vd, a->vd);
|
||||
if (a->z) {
|
||||
tcg_gen_movi_i64(vm, 0);
|
||||
} else {
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
}
|
||||
|
||||
if (a->e) {
|
||||
|
@ -2572,7 +2519,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
|
|||
/* The T bit tells us if we want the low or high 16 bits of Vm */
|
||||
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
|
||||
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_i32(ahp_mode);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -2610,7 +2557,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
|
|||
tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
|
||||
vd = tcg_temp_new_i64();
|
||||
gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
|
||||
neon_store_reg64(vd, a->vd);
|
||||
vfp_store_reg64(vd, a->vd);
|
||||
tcg_temp_free_i32(ahp_mode);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -2636,7 +2583,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
|
|||
ahp_mode = get_ahp_flag();
|
||||
tmp = tcg_temp_new_i32();
|
||||
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
|
||||
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
|
||||
tcg_temp_free_i32(ahp_mode);
|
||||
|
@ -2674,7 +2621,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
|
|||
tmp = tcg_temp_new_i32();
|
||||
vm = tcg_temp_new_i64();
|
||||
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
|
||||
tcg_temp_free_i64(vm);
|
||||
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
|
||||
|
@ -2698,10 +2645,10 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
gen_helper_rinth(tmp, tmp, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
return true;
|
||||
|
@ -2721,10 +2668,10 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_rints(tmp, tmp, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
return true;
|
||||
|
@ -2753,10 +2700,10 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i64();
|
||||
neon_load_reg64(tmp, a->vm);
|
||||
vfp_load_reg64(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_rintd(tmp, tmp, fpst);
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
vfp_store_reg64(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i64(tmp);
|
||||
return true;
|
||||
|
@ -2777,13 +2724,13 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
tcg_rmode = tcg_const_i32(float_round_to_zero);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
gen_helper_rinth(tmp, tmp, fpst);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tcg_rmode);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -2805,13 +2752,13 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
tcg_rmode = tcg_const_i32(float_round_to_zero);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
gen_helper_rints(tmp, tmp, fpst);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tcg_rmode);
|
||||
tcg_temp_free_i32(tmp);
|
||||
|
@ -2842,13 +2789,13 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i64();
|
||||
neon_load_reg64(tmp, a->vm);
|
||||
vfp_load_reg64(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
tcg_rmode = tcg_const_i32(float_round_to_zero);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
gen_helper_rintd(tmp, tmp, fpst);
|
||||
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
vfp_store_reg64(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i64(tmp);
|
||||
tcg_temp_free_i32(tcg_rmode);
|
||||
|
@ -2869,10 +2816,10 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
gen_helper_rinth_exact(tmp, tmp, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
return true;
|
||||
|
@ -2892,10 +2839,10 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i32();
|
||||
neon_load_reg32(tmp, a->vm);
|
||||
vfp_load_reg32(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_rints_exact(tmp, tmp, fpst);
|
||||
neon_store_reg32(tmp, a->vd);
|
||||
vfp_store_reg32(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i32(tmp);
|
||||
return true;
|
||||
|
@ -2924,10 +2871,10 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
|
|||
}
|
||||
|
||||
tmp = tcg_temp_new_i64();
|
||||
neon_load_reg64(tmp, a->vm);
|
||||
vfp_load_reg64(tmp, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
gen_helper_rintd_exact(tmp, tmp, fpst);
|
||||
neon_store_reg64(tmp, a->vd);
|
||||
vfp_store_reg64(tmp, a->vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
tcg_temp_free_i64(tmp);
|
||||
return true;
|
||||
|
@ -2953,9 +2900,9 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
|
|||
|
||||
vm = tcg_temp_new_i32();
|
||||
vd = tcg_temp_new_i64();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
gen_helper_vfp_fcvtds(vd, vm, cpu_env);
|
||||
neon_store_reg64(vd, a->vd);
|
||||
vfp_store_reg64(vd, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_i64(vd);
|
||||
return true;
|
||||
|
@ -2981,9 +2928,9 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
|
|||
|
||||
vd = tcg_temp_new_i32();
|
||||
vm = tcg_temp_new_i64();
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
tcg_temp_free_i32(vd);
|
||||
tcg_temp_free_i64(vm);
|
||||
return true;
|
||||
|
@ -3003,7 +2950,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
|
|||
}
|
||||
|
||||
vm = tcg_temp_new_i32();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
if (a->s) {
|
||||
/* i32 -> f16 */
|
||||
|
@ -3012,7 +2959,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
|
|||
/* u32 -> f16 */
|
||||
gen_helper_vfp_uitoh(vm, vm, fpst);
|
||||
}
|
||||
neon_store_reg32(vm, a->vd);
|
||||
vfp_store_reg32(vm, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
return true;
|
||||
|
@ -3032,7 +2979,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
|
|||
}
|
||||
|
||||
vm = tcg_temp_new_i32();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
if (a->s) {
|
||||
/* i32 -> f32 */
|
||||
|
@ -3041,7 +2988,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
|
|||
/* u32 -> f32 */
|
||||
gen_helper_vfp_uitos(vm, vm, fpst);
|
||||
}
|
||||
neon_store_reg32(vm, a->vd);
|
||||
vfp_store_reg32(vm, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
return true;
|
||||
|
@ -3068,7 +3015,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
|
|||
|
||||
vm = tcg_temp_new_i32();
|
||||
vd = tcg_temp_new_i64();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
if (a->s) {
|
||||
/* i32 -> f64 */
|
||||
|
@ -3077,7 +3024,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
|
|||
/* u32 -> f64 */
|
||||
gen_helper_vfp_uitod(vd, vm, fpst);
|
||||
}
|
||||
neon_store_reg64(vd, a->vd);
|
||||
vfp_store_reg64(vd, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_i64(vd);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3108,9 +3055,9 @@ static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
|
|||
|
||||
vm = tcg_temp_new_i64();
|
||||
vd = tcg_temp_new_i32();
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
gen_helper_vjcvt(vd, vm, cpu_env);
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
tcg_temp_free_i64(vm);
|
||||
tcg_temp_free_i32(vd);
|
||||
return true;
|
||||
|
@ -3133,7 +3080,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
|
|||
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
|
||||
|
||||
vd = tcg_temp_new_i32();
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
shift = tcg_const_i32(frac_bits);
|
||||
|
@ -3168,7 +3115,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
|
|||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
tcg_temp_free_i32(vd);
|
||||
tcg_temp_free_i32(shift);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3192,7 +3139,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
|
|||
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
|
||||
|
||||
vd = tcg_temp_new_i32();
|
||||
neon_load_reg32(vd, a->vd);
|
||||
vfp_load_reg32(vd, a->vd);
|
||||
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
shift = tcg_const_i32(frac_bits);
|
||||
|
@ -3227,7 +3174,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
|
|||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
tcg_temp_free_i32(vd);
|
||||
tcg_temp_free_i32(shift);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3257,7 +3204,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
|
|||
frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
|
||||
|
||||
vd = tcg_temp_new_i64();
|
||||
neon_load_reg64(vd, a->vd);
|
||||
vfp_load_reg64(vd, a->vd);
|
||||
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
shift = tcg_const_i32(frac_bits);
|
||||
|
@ -3292,7 +3239,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
|
|||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
neon_store_reg64(vd, a->vd);
|
||||
vfp_store_reg64(vd, a->vd);
|
||||
tcg_temp_free_i64(vd);
|
||||
tcg_temp_free_i32(shift);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3314,7 +3261,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
|
|||
|
||||
fpst = fpstatus_ptr(FPST_FPCR_F16);
|
||||
vm = tcg_temp_new_i32();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
|
||||
if (a->s) {
|
||||
if (a->rz) {
|
||||
|
@ -3329,7 +3276,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
|
|||
gen_helper_vfp_touih(vm, vm, fpst);
|
||||
}
|
||||
}
|
||||
neon_store_reg32(vm, a->vd);
|
||||
vfp_store_reg32(vm, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
return true;
|
||||
|
@ -3350,7 +3297,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
|
|||
|
||||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
vm = tcg_temp_new_i32();
|
||||
neon_load_reg32(vm, a->vm);
|
||||
vfp_load_reg32(vm, a->vm);
|
||||
|
||||
if (a->s) {
|
||||
if (a->rz) {
|
||||
|
@ -3365,7 +3312,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
|
|||
gen_helper_vfp_touis(vm, vm, fpst);
|
||||
}
|
||||
}
|
||||
neon_store_reg32(vm, a->vd);
|
||||
vfp_store_reg32(vm, a->vd);
|
||||
tcg_temp_free_i32(vm);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
return true;
|
||||
|
@ -3393,7 +3340,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
|
|||
fpst = fpstatus_ptr(FPST_FPCR);
|
||||
vm = tcg_temp_new_i64();
|
||||
vd = tcg_temp_new_i32();
|
||||
neon_load_reg64(vm, a->vm);
|
||||
vfp_load_reg64(vm, a->vm);
|
||||
|
||||
if (a->s) {
|
||||
if (a->rz) {
|
||||
|
@ -3408,7 +3355,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
|
|||
gen_helper_vfp_touid(vd, vm, fpst);
|
||||
}
|
||||
}
|
||||
neon_store_reg32(vd, a->vd);
|
||||
vfp_store_reg32(vd, a->vd);
|
||||
tcg_temp_free_i32(vd);
|
||||
tcg_temp_free_i64(vm);
|
||||
tcg_temp_free_ptr(fpst);
|
||||
|
@ -3521,10 +3468,10 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a)
|
|||
/* Insert low half of Vm into high half of Vd */
|
||||
rm = tcg_temp_new_i32();
|
||||
rd = tcg_temp_new_i32();
|
||||
neon_load_reg32(rm, a->vm);
|
||||
neon_load_reg32(rd, a->vd);
|
||||
vfp_load_reg32(rm, a->vm);
|
||||
vfp_load_reg32(rd, a->vd);
|
||||
tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
|
||||
neon_store_reg32(rd, a->vd);
|
||||
vfp_store_reg32(rd, a->vd);
|
||||
tcg_temp_free_i32(rm);
|
||||
tcg_temp_free_i32(rd);
|
||||
return true;
|
||||
|
@ -3548,9 +3495,9 @@ static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
|
|||
|
||||
/* Set Vd to high half of Vm */
|
||||
rm = tcg_temp_new_i32();
|
||||
neon_load_reg32(rm, a->vm);
|
||||
vfp_load_reg32(rm, a->vm);
|
||||
tcg_gen_shri_i32(rm, rm, 16);
|
||||
neon_store_reg32(rm, a->vd);
|
||||
vfp_store_reg32(rm, a->vd);
|
||||
tcg_temp_free_i32(rm);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1094,64 +1094,141 @@ static inline void gen_hlt(DisasContext *s, int imm)
|
|||
unallocated_encoding(s);
|
||||
}
|
||||
|
||||
static inline long vfp_reg_offset(bool dp, unsigned reg)
|
||||
/*
|
||||
* Return the offset of a "full" NEON Dreg.
|
||||
*/
|
||||
static long neon_full_reg_offset(unsigned reg)
|
||||
{
|
||||
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
|
||||
* where 0 is the least significant end of the register.
|
||||
*/
|
||||
static long neon_element_offset(int reg, int element, MemOp memop)
|
||||
{
|
||||
int element_size = 1 << (memop & MO_SIZE);
|
||||
int ofs = element * element_size;
|
||||
#ifdef HOST_WORDS_BIGENDIAN
|
||||
/*
|
||||
* Calculate the offset assuming fully little-endian,
|
||||
* then XOR to account for the order of the 8-byte units.
|
||||
*/
|
||||
if (element_size < 8) {
|
||||
ofs ^= 8 - element_size;
|
||||
}
|
||||
#endif
|
||||
return neon_full_reg_offset(reg) + ofs;
|
||||
}
|
||||
|
||||
/* Return the offset of a VFP Dreg (dp = true) or VFP Sreg (dp = false). */
|
||||
static long vfp_reg_offset(bool dp, unsigned reg)
|
||||
{
|
||||
if (dp) {
|
||||
return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
|
||||
return neon_element_offset(reg, 0, MO_64);
|
||||
} else {
|
||||
long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
|
||||
if (reg & 1) {
|
||||
ofs += offsetof(CPU_DoubleU, l.upper);
|
||||
} else {
|
||||
ofs += offsetof(CPU_DoubleU, l.lower);
|
||||
}
|
||||
return ofs;
|
||||
return neon_element_offset(reg >> 1, reg & 1, MO_32);
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the offset of a 32-bit piece of a NEON register.
|
||||
zero is the least significant end of the register. */
|
||||
static inline long
|
||||
neon_reg_offset (int reg, int n)
|
||||
static inline void vfp_load_reg64(TCGv_i64 var, int reg)
|
||||
{
|
||||
int sreg;
|
||||
sreg = reg * 2 + n;
|
||||
return vfp_reg_offset(0, sreg);
|
||||
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
|
||||
}
|
||||
|
||||
static TCGv_i32 neon_load_reg(int reg, int pass)
|
||||
static inline void vfp_store_reg64(TCGv_i64 var, int reg)
|
||||
{
|
||||
TCGv_i32 tmp = tcg_temp_new_i32();
|
||||
tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
|
||||
return tmp;
|
||||
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
|
||||
}
|
||||
|
||||
static void neon_store_reg(int reg, int pass, TCGv_i32 var)
|
||||
{
|
||||
tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
|
||||
tcg_temp_free_i32(var);
|
||||
}
|
||||
|
||||
static inline void neon_load_reg64(TCGv_i64 var, int reg)
|
||||
{
|
||||
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
|
||||
}
|
||||
|
||||
static inline void neon_store_reg64(TCGv_i64 var, int reg)
|
||||
{
|
||||
tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
|
||||
}
|
||||
|
||||
static inline void neon_load_reg32(TCGv_i32 var, int reg)
|
||||
static inline void vfp_load_reg32(TCGv_i32 var, int reg)
|
||||
{
|
||||
tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
|
||||
}
|
||||
|
||||
static inline void neon_store_reg32(TCGv_i32 var, int reg)
|
||||
static inline void vfp_store_reg32(TCGv_i32 var, int reg)
|
||||
{
|
||||
tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
|
||||
}
|
||||
|
||||
static void read_neon_element32(TCGv_i32 dest, int reg, int ele, MemOp memop)
|
||||
{
|
||||
long off = neon_element_offset(reg, ele, memop);
|
||||
|
||||
switch (memop) {
|
||||
case MO_SB:
|
||||
tcg_gen_ld8s_i32(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_UB:
|
||||
tcg_gen_ld8u_i32(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_SW:
|
||||
tcg_gen_ld16s_i32(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_UW:
|
||||
tcg_gen_ld16u_i32(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_UL:
|
||||
case MO_SL:
|
||||
tcg_gen_ld_i32(dest, cpu_env, off);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static void read_neon_element64(TCGv_i64 dest, int reg, int ele, MemOp memop)
|
||||
{
|
||||
long off = neon_element_offset(reg, ele, memop);
|
||||
|
||||
switch (memop) {
|
||||
case MO_SL:
|
||||
tcg_gen_ld32s_i64(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_UL:
|
||||
tcg_gen_ld32u_i64(dest, cpu_env, off);
|
||||
break;
|
||||
case MO_Q:
|
||||
tcg_gen_ld_i64(dest, cpu_env, off);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static void write_neon_element32(TCGv_i32 src, int reg, int ele, MemOp memop)
|
||||
{
|
||||
long off = neon_element_offset(reg, ele, memop);
|
||||
|
||||
switch (memop) {
|
||||
case MO_8:
|
||||
tcg_gen_st8_i32(src, cpu_env, off);
|
||||
break;
|
||||
case MO_16:
|
||||
tcg_gen_st16_i32(src, cpu_env, off);
|
||||
break;
|
||||
case MO_32:
|
||||
tcg_gen_st_i32(src, cpu_env, off);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static void write_neon_element64(TCGv_i64 src, int reg, int ele, MemOp memop)
|
||||
{
|
||||
long off = neon_element_offset(reg, ele, memop);
|
||||
|
||||
switch (memop) {
|
||||
case MO_64:
|
||||
tcg_gen_st_i64(src, cpu_env, off);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
|
||||
{
|
||||
TCGv_ptr ret = tcg_temp_new_ptr();
|
||||
|
|
|
@ -293,7 +293,7 @@ void HELPER(gvec_sdot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||
intptr_t index = simd_data(desc);
|
||||
uint32_t *d = vd;
|
||||
int8_t *n = vn;
|
||||
int8_t *m_indexed = (int8_t *)vm + index * 4;
|
||||
int8_t *m_indexed = (int8_t *)vm + H4(index) * 4;
|
||||
|
||||
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
|
||||
* Otherwise opr_sz is a multiple of 16.
|
||||
|
@ -324,7 +324,7 @@ void HELPER(gvec_udot_idx_b)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||
intptr_t index = simd_data(desc);
|
||||
uint32_t *d = vd;
|
||||
uint8_t *n = vn;
|
||||
uint8_t *m_indexed = (uint8_t *)vm + index * 4;
|
||||
uint8_t *m_indexed = (uint8_t *)vm + H4(index) * 4;
|
||||
|
||||
/* Notice the special case of opr_sz == 8, from aa64/aa32 advsimd.
|
||||
* Otherwise opr_sz is a multiple of 16.
|
||||
|
@ -1858,10 +1858,10 @@ DO_ABA(gvec_uaba_d, uint64_t)
|
|||
r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \
|
||||
r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \
|
||||
\
|
||||
d[H4(0)] = r0; \
|
||||
d[H4(1)] = r1; \
|
||||
d[H4(2)] = r2; \
|
||||
d[H4(3)] = r3; \
|
||||
d[H2(0)] = r0; \
|
||||
d[H2(1)] = r1; \
|
||||
d[H2(2)] = r2; \
|
||||
d[H2(3)] = r3; \
|
||||
}
|
||||
|
||||
DO_NEON_PAIRWISE(neon_padd, add)
|
||||
|
|
|
@ -265,10 +265,16 @@ int main(int argc, char **argv)
|
|||
|
||||
qtest_add_func("npcm7xx_rng/enable_disable", test_enable_disable);
|
||||
qtest_add_func("npcm7xx_rng/rosel", test_rosel);
|
||||
qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
|
||||
qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
|
||||
qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
|
||||
qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
|
||||
/*
|
||||
* These tests fail intermittently; only run them on explicit
|
||||
* request until we figure out why.
|
||||
*/
|
||||
if (getenv("QEMU_TEST_FLAKY_RNG_TESTS")) {
|
||||
qtest_add_func("npcm7xx_rng/continuous/monobit", test_continuous_monobit);
|
||||
qtest_add_func("npcm7xx_rng/continuous/runs", test_continuous_runs);
|
||||
qtest_add_func("npcm7xx_rng/first_byte/monobit", test_first_byte_monobit);
|
||||
qtest_add_func("npcm7xx_rng/first_byte/runs", test_first_byte_runs);
|
||||
}
|
||||
|
||||
qtest_start("-machine npcm750-evb");
|
||||
ret = g_test_run();
|
||||
|
|
Loading…
Reference in New Issue