mirror of https://gitee.com/openkylin/qemu.git
tcg generic vectors
-----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJafH8UAAoJEGTfOOivfiFfMi0H/iAL5GiFaLXgWSDXXrpodzqk J8Ritb4IQ/7FZymy41Qk6xkElvHxmL3fwBW7+u2UwGt246c9/fHDpAicCmt6MLSs 45W5Z4+jOnCj3fs6LZufdybVVcpqLUElL50S8/RfHTt80zwYrCFE30ipPt38sSUx 2FiD+XHLqWXMCPEEz+krbdZxHqzjw6JTJrYh5/Gey1T0+V0SQIkFHPL3qpPhCT95 LZi/LSUCEg4k87WTpGx5TgtE1RichCab+0zT4Lcuwl6r5WMUiLgQ/m9gIxe01BIi 3Jt4THPUe8H9Lert2sU9KJdQL6fDI5ardajZfEhu15bDQ7UcUkteTiF433Tf69U= =iy5H -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180208' into staging tcg generic vectors # gpg: Signature made Thu 08 Feb 2018 16:47:16 GMT # gpg: using RSA key 64DF38E8AF7E215F # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth/tags/pull-tcg-20180208: tcg/aarch64: Add vector operations tcg/i386: Add vector operations target/arm: Use vector infrastructure for aa64 orr/bic immediate target/arm: Use vector infrastructure for aa64 multiplies target/arm: Use vector infrastructure for aa64 compares target/arm: Use vector infrastructure for aa64 constant shifts target/arm: Use vector infrastructure for aa64 dup/movi target/arm: Use vector infrastructure for aa64 mov/not/neg target/arm: Use vector infrastructure for aa64 add/sub/logic target/arm: Align vector registers tcg/optimize: Handle vector opcodes during optimize tcg: Add generic vector helpers with a scalar operand tcg: Add generic helpers for saturating arithmetic tcg: Add generic vector ops for multiplication tcg: Add generic vector ops for comparisons tcg: Add generic vector ops for constant shifts tcg: Add generic vector expanders tcg: Standardize integral arguments to expanders tcg: Add types and basic operations for host vectors tcg: Allow multiple word entries into the constant pool Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
04bb7fe2bf
|
@ -93,8 +93,8 @@ all: $(PROGS) stap
|
|||
# cpu emulator library
|
||||
obj-y += exec.o
|
||||
obj-y += accel/
|
||||
obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
|
||||
obj-$(CONFIG_TCG) += tcg/tcg-common.o
|
||||
obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o tcg/tcg-op-gvec.o
|
||||
obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/optimize.o
|
||||
obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
|
||||
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
|
||||
obj-y += fpu/softfloat.o
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
obj-$(CONFIG_SOFTMMU) += tcg-all.o
|
||||
obj-$(CONFIG_SOFTMMU) += cputlb.o
|
||||
obj-y += tcg-runtime.o
|
||||
obj-y += tcg-runtime.o tcg-runtime-gvec.o
|
||||
obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
|
||||
obj-y += translator.o
|
||||
|
||||
|
|
|
@ -0,0 +1,997 @@
|
|||
/*
|
||||
* Generic vectorized operation runtime
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/host-utils.h"
|
||||
#include "cpu.h"
|
||||
#include "exec/helper-proto.h"
|
||||
#include "tcg-gvec-desc.h"
|
||||
|
||||
|
||||
/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
|
||||
* them via GCC's generic vector extension. This turns out to be simpler and
|
||||
* more reliable than getting the compiler to autovectorize.
|
||||
*
|
||||
* In tcg-op-gvec.c, we asserted that both the size and alignment of the data
|
||||
* are multiples of 16.
|
||||
*
|
||||
* When the compiler does not support all of the operations we require, the
|
||||
* loops are written so that we can always fall back on the base types.
|
||||
*/
|
||||
#ifdef CONFIG_VECTOR16
|
||||
typedef uint8_t vec8 __attribute__((vector_size(16)));
|
||||
typedef uint16_t vec16 __attribute__((vector_size(16)));
|
||||
typedef uint32_t vec32 __attribute__((vector_size(16)));
|
||||
typedef uint64_t vec64 __attribute__((vector_size(16)));
|
||||
|
||||
typedef int8_t svec8 __attribute__((vector_size(16)));
|
||||
typedef int16_t svec16 __attribute__((vector_size(16)));
|
||||
typedef int32_t svec32 __attribute__((vector_size(16)));
|
||||
typedef int64_t svec64 __attribute__((vector_size(16)));
|
||||
|
||||
#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
|
||||
#define DUP8(X) { X, X, X, X, X, X, X, X }
|
||||
#define DUP4(X) { X, X, X, X }
|
||||
#define DUP2(X) { X, X }
|
||||
#else
|
||||
typedef uint8_t vec8;
|
||||
typedef uint16_t vec16;
|
||||
typedef uint32_t vec32;
|
||||
typedef uint64_t vec64;
|
||||
|
||||
typedef int8_t svec8;
|
||||
typedef int16_t svec16;
|
||||
typedef int32_t svec32;
|
||||
typedef int64_t svec64;
|
||||
|
||||
#define DUP16(X) X
|
||||
#define DUP8(X) X
|
||||
#define DUP4(X) X
|
||||
#define DUP2(X) X
|
||||
#endif /* CONFIG_VECTOR16 */
|
||||
|
||||
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
|
||||
{
|
||||
intptr_t maxsz = simd_maxsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (unlikely(maxsz > oprsz)) {
|
||||
for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
|
||||
*(uint64_t *)(d + i) = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec8 vecb = (vec8)DUP16(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec16 vecb = (vec16)DUP8(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec32 vecb = (vec32)DUP4(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec8 vecb = (vec8)DUP16(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec16 vecb = (vec16)DUP8(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec32 vecb = (vec32)DUP4(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec8 vecb = (vec8)DUP16(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec16 vecb = (vec16)DUP8(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec32 vecb = (vec32)DUP4(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = -*(vec8 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = -*(vec16 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = -*(vec32 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = -*(vec64 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
|
||||
memcpy(d, a, oprsz);
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (c == 0) {
|
||||
oprsz = 0;
|
||||
} else {
|
||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
*(uint64_t *)(d + i) = c;
|
||||
}
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (c == 0) {
|
||||
oprsz = 0;
|
||||
} else {
|
||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
*(uint32_t *)(d + i) = c;
|
||||
}
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
|
||||
}
|
||||
|
||||
void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = ~*(vec64 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
vec64 vecb = (vec64)DUP2(b);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
int shift = simd_data(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
/* If vectors are enabled, the compiler fills in -1 for true.
|
||||
Otherwise, we must take care of this by hand. */
|
||||
#ifdef CONFIG_VECTOR16
|
||||
# define DO_CMP0(X) X
|
||||
#else
|
||||
# define DO_CMP0(X) -(X)
|
||||
#endif
|
||||
|
||||
#define DO_CMP1(NAME, TYPE, OP) \
|
||||
void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t oprsz = simd_oprsz(desc); \
|
||||
intptr_t i; \
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) { \
|
||||
*(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
|
||||
} \
|
||||
clear_high(d, oprsz, desc); \
|
||||
}
|
||||
|
||||
#define DO_CMP2(SZ) \
|
||||
DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
|
||||
DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
|
||||
DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
|
||||
DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
|
||||
DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
|
||||
DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
|
||||
|
||||
DO_CMP2(8)
|
||||
DO_CMP2(16)
|
||||
DO_CMP2(32)
|
||||
DO_CMP2(64)
|
||||
|
||||
#undef DO_CMP0
|
||||
#undef DO_CMP1
|
||||
#undef DO_CMP2
|
||||
|
||||
void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int8_t)) {
|
||||
int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
|
||||
if (r > INT8_MAX) {
|
||||
r = INT8_MAX;
|
||||
} else if (r < INT8_MIN) {
|
||||
r = INT8_MIN;
|
||||
}
|
||||
*(int8_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int16_t)) {
|
||||
int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
|
||||
if (r > INT16_MAX) {
|
||||
r = INT16_MAX;
|
||||
} else if (r < INT16_MIN) {
|
||||
r = INT16_MIN;
|
||||
}
|
||||
*(int16_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||
int32_t ai = *(int32_t *)(a + i);
|
||||
int32_t bi = *(int32_t *)(b + i);
|
||||
int32_t di = ai + bi;
|
||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||
}
|
||||
*(int32_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||
int64_t ai = *(int64_t *)(a + i);
|
||||
int64_t bi = *(int64_t *)(b + i);
|
||||
int64_t di = ai + bi;
|
||||
if (((di ^ ai) &~ (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||
}
|
||||
*(int64_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
|
||||
int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
|
||||
if (r > INT8_MAX) {
|
||||
r = INT8_MAX;
|
||||
} else if (r < INT8_MIN) {
|
||||
r = INT8_MIN;
|
||||
}
|
||||
*(uint8_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int16_t)) {
|
||||
int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
|
||||
if (r > INT16_MAX) {
|
||||
r = INT16_MAX;
|
||||
} else if (r < INT16_MIN) {
|
||||
r = INT16_MIN;
|
||||
}
|
||||
*(int16_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int32_t)) {
|
||||
int32_t ai = *(int32_t *)(a + i);
|
||||
int32_t bi = *(int32_t *)(b + i);
|
||||
int32_t di = ai - bi;
|
||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
di = (di < 0 ? INT32_MAX : INT32_MIN);
|
||||
}
|
||||
*(int32_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(int64_t)) {
|
||||
int64_t ai = *(int64_t *)(a + i);
|
||||
int64_t bi = *(int64_t *)(b + i);
|
||||
int64_t di = ai - bi;
|
||||
if (((di ^ ai) & (ai ^ bi)) < 0) {
|
||||
/* Signed overflow. */
|
||||
di = (di < 0 ? INT64_MAX : INT64_MIN);
|
||||
}
|
||||
*(int64_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
|
||||
unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
|
||||
if (r > UINT8_MAX) {
|
||||
r = UINT8_MAX;
|
||||
}
|
||||
*(uint8_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
|
||||
unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
|
||||
if (r > UINT16_MAX) {
|
||||
r = UINT16_MAX;
|
||||
}
|
||||
*(uint16_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
uint32_t ai = *(uint32_t *)(a + i);
|
||||
uint32_t bi = *(uint32_t *)(b + i);
|
||||
uint32_t di = ai + bi;
|
||||
if (di < ai) {
|
||||
di = UINT32_MAX;
|
||||
}
|
||||
*(uint32_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
uint64_t ai = *(uint64_t *)(a + i);
|
||||
uint64_t bi = *(uint64_t *)(b + i);
|
||||
uint64_t di = ai + bi;
|
||||
if (di < ai) {
|
||||
di = UINT64_MAX;
|
||||
}
|
||||
*(uint64_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
|
||||
int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
|
||||
if (r < 0) {
|
||||
r = 0;
|
||||
}
|
||||
*(uint8_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
|
||||
int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
|
||||
if (r < 0) {
|
||||
r = 0;
|
||||
}
|
||||
*(uint16_t *)(d + i) = r;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
uint32_t ai = *(uint32_t *)(a + i);
|
||||
uint32_t bi = *(uint32_t *)(b + i);
|
||||
uint32_t di = ai - bi;
|
||||
if (ai < bi) {
|
||||
di = 0;
|
||||
}
|
||||
*(uint32_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
uint64_t ai = *(uint64_t *)(a + i);
|
||||
uint64_t bi = *(uint64_t *)(b + i);
|
||||
uint64_t di = ai - bi;
|
||||
if (ai < bi) {
|
||||
di = 0;
|
||||
}
|
||||
*(uint64_t *)(d + i) = di;
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
|
@ -134,3 +134,121 @@ GEN_ATOMIC_HELPERS(xor_fetch)
|
|||
GEN_ATOMIC_HELPERS(xchg)
|
||||
|
||||
#undef GEN_ATOMIC_HELPERS
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_mov, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_dup8, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup16, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup32, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup64, TCG_CALL_NO_RWG, void, ptr, i32, i64)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_add8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_adds8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_adds16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_adds32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_adds64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_subs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_subs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_subs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_subs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_mul8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_mul16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_mul32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_mul64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_muls8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_muls16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_muls32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_muls64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ssadd8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ssadd16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ssadd32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ssadd64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_sssub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sssub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sssub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sssub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_usadd8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_usadd16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_usadd32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_usadd64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ussub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ussub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ussub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ussub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_neg8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_shl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shl64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_shr8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shr16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shr32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_shr64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_sar8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_eq64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ne8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ne16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ne32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ne64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_lt8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lt16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lt32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_lt64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_le8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_le16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_le32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_le64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_ltu8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_ltu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_leu8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
|
|
@ -5000,6 +5000,50 @@ if compile_prog "" "" ; then
|
|||
atomic64=yes
|
||||
fi
|
||||
|
||||
########################################
|
||||
# See if 16-byte vector operations are supported.
|
||||
# Even without a vector unit the compiler may expand these.
|
||||
# There is a bug in old GCC for PPC that crashes here.
|
||||
# Unfortunately it's the system compiler for Centos 7.
|
||||
|
||||
cat > $TMPC << EOF
|
||||
typedef unsigned char U1 __attribute__((vector_size(16)));
|
||||
typedef unsigned short U2 __attribute__((vector_size(16)));
|
||||
typedef unsigned int U4 __attribute__((vector_size(16)));
|
||||
typedef unsigned long long U8 __attribute__((vector_size(16)));
|
||||
typedef signed char S1 __attribute__((vector_size(16)));
|
||||
typedef signed short S2 __attribute__((vector_size(16)));
|
||||
typedef signed int S4 __attribute__((vector_size(16)));
|
||||
typedef signed long long S8 __attribute__((vector_size(16)));
|
||||
static U1 a1, b1;
|
||||
static U2 a2, b2;
|
||||
static U4 a4, b4;
|
||||
static U8 a8, b8;
|
||||
static S1 c1;
|
||||
static S2 c2;
|
||||
static S4 c4;
|
||||
static S8 c8;
|
||||
static int i;
|
||||
int main(void)
|
||||
{
|
||||
a1 += b1; a2 += b2; a4 += b4; a8 += b8;
|
||||
a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
|
||||
a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
|
||||
a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
|
||||
a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
|
||||
a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
|
||||
a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
|
||||
a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
|
||||
c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
|
||||
vector16=no
|
||||
if compile_prog "" "" ; then
|
||||
vector16=yes
|
||||
fi
|
||||
|
||||
########################################
|
||||
# check if getauxval is available.
|
||||
|
||||
|
@ -6329,6 +6373,10 @@ if test "$atomic64" = "yes" ; then
|
|||
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$vector16" = "yes" ; then
|
||||
echo "CONFIG_VECTOR16=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$getauxval" = "yes" ; then
|
||||
echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
|
||||
fi
|
||||
|
|
|
@ -492,7 +492,7 @@ typedef struct CPUARMState {
|
|||
* the two execution states, and means we do not need to explicitly
|
||||
* map these registers when changing states.
|
||||
*/
|
||||
uint64_t regs[64];
|
||||
uint64_t regs[64] QEMU_ALIGNED(16);
|
||||
|
||||
uint32_t xregs[16];
|
||||
/* We store these fpcsr fields separately for convenience. */
|
||||
|
|
File diff suppressed because it is too large
Load Diff
86
tcg/README
86
tcg/README
|
@ -503,6 +503,92 @@ of the memory access.
|
|||
For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
|
||||
64-bit memory access specified in flags.
|
||||
|
||||
********* Host vector operations
|
||||
|
||||
All of the vector ops have two parameters, TCGOP_VECL & TCGOP_VECE.
|
||||
The former specifies the length of the vector in log2 64-bit units; the
|
||||
later specifies the length of the element (if applicable) in log2 8-bit units.
|
||||
E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32.
|
||||
|
||||
* mov_vec v0, v1
|
||||
* ld_vec v0, t1
|
||||
* st_vec v0, t1
|
||||
|
||||
Move, load and store.
|
||||
|
||||
* dup_vec v0, r1
|
||||
|
||||
Duplicate the low N bits of R1 into VECL/VECE copies across V0.
|
||||
|
||||
* dupi_vec v0, c
|
||||
|
||||
Similarly, for a constant.
|
||||
Smaller values will be replicated to host register size by the expanders.
|
||||
|
||||
* dup2_vec v0, r1, r2
|
||||
|
||||
Duplicate r2:r1 into VECL/64 copies across V0. This opcode is
|
||||
only present for 32-bit hosts.
|
||||
|
||||
* add_vec v0, v1, v2
|
||||
|
||||
v0 = v1 + v2, in elements across the vector.
|
||||
|
||||
* sub_vec v0, v1, v2
|
||||
|
||||
Similarly, v0 = v1 - v2.
|
||||
|
||||
* mul_vec v0, v1, v2
|
||||
|
||||
Similarly, v0 = v1 * v2.
|
||||
|
||||
* neg_vec v0, v1
|
||||
|
||||
Similarly, v0 = -v1.
|
||||
|
||||
* and_vec v0, v1, v2
|
||||
* or_vec v0, v1, v2
|
||||
* xor_vec v0, v1, v2
|
||||
* andc_vec v0, v1, v2
|
||||
* orc_vec v0, v1, v2
|
||||
* not_vec v0, v1
|
||||
|
||||
Similarly, logical operations with and without compliment.
|
||||
Note that VECE is unused.
|
||||
|
||||
* shli_vec v0, v1, i2
|
||||
* shls_vec v0, v1, s2
|
||||
|
||||
Shift all elements from v1 by a scalar i2/s2. I.e.
|
||||
|
||||
for (i = 0; i < VECL/VECE; ++i) {
|
||||
v0[i] = v1[i] << s2;
|
||||
}
|
||||
|
||||
* shri_vec v0, v1, i2
|
||||
* sari_vec v0, v1, i2
|
||||
* shrs_vec v0, v1, s2
|
||||
* sars_vec v0, v1, s2
|
||||
|
||||
Similarly for logical and arithmetic right shift.
|
||||
|
||||
* shlv_vec v0, v1, v2
|
||||
|
||||
Shift elements from v1 by elements from v2. I.e.
|
||||
|
||||
for (i = 0; i < VECL/VECE; ++i) {
|
||||
v0[i] = v1[i] << v2[i];
|
||||
}
|
||||
|
||||
* shrv_vec v0, v1, v2
|
||||
* sarv_vec v0, v1, v2
|
||||
|
||||
Similarly for logical and arithmetic right shift.
|
||||
|
||||
* cmp_vec v0, v1, v2, cond
|
||||
|
||||
Compare vectors by element, storing -1 for true and 0 for false.
|
||||
|
||||
*********
|
||||
|
||||
Note 1: Some shortcuts are defined when the last operand is known to be
|
||||
|
|
|
@ -31,13 +31,22 @@ typedef enum {
|
|||
TCG_REG_SP = 31,
|
||||
TCG_REG_XZR = 31,
|
||||
|
||||
TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
|
||||
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
|
||||
TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
|
||||
TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
|
||||
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
|
||||
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
|
||||
TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
|
||||
TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
|
||||
|
||||
/* Aliases. */
|
||||
TCG_REG_FP = TCG_REG_X29,
|
||||
TCG_REG_LR = TCG_REG_X30,
|
||||
TCG_AREG0 = TCG_REG_X19,
|
||||
} TCGReg;
|
||||
|
||||
#define TCG_TARGET_NB_REGS 32
|
||||
#define TCG_TARGET_NB_REGS 64
|
||||
|
||||
/* used for function call generation */
|
||||
#define TCG_REG_CALL_STACK TCG_REG_SP
|
||||
|
@ -113,6 +122,20 @@ typedef enum {
|
|||
#define TCG_TARGET_HAS_mulsh_i64 1
|
||||
#define TCG_TARGET_HAS_direct_jump 1
|
||||
|
||||
#define TCG_TARGET_HAS_v64 1
|
||||
#define TCG_TARGET_HAS_v128 1
|
||||
#define TCG_TARGET_HAS_v256 0
|
||||
|
||||
#define TCG_TARGET_HAS_andc_vec 1
|
||||
#define TCG_TARGET_HAS_orc_vec 1
|
||||
#define TCG_TARGET_HAS_not_vec 1
|
||||
#define TCG_TARGET_HAS_neg_vec 1
|
||||
#define TCG_TARGET_HAS_shi_vec 1
|
||||
#define TCG_TARGET_HAS_shs_vec 0
|
||||
#define TCG_TARGET_HAS_shv_vec 0
|
||||
#define TCG_TARGET_HAS_cmp_vec 1
|
||||
#define TCG_TARGET_HAS_mul_vec 1
|
||||
|
||||
#define TCG_TARGET_DEFAULT_MO (0)
|
||||
|
||||
static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
|
||||
|
|
|
@ -20,10 +20,15 @@ QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
|
|||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
|
||||
"%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
|
||||
"%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
|
||||
"%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
|
||||
"%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
|
||||
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
|
||||
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
|
||||
"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
|
||||
"x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
|
||||
|
||||
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
"v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
|
||||
};
|
||||
#endif /* CONFIG_DEBUG_TCG */
|
||||
|
||||
|
@ -43,6 +48,14 @@ static const int tcg_target_reg_alloc_order[] = {
|
|||
/* X19 reserved for AREG0 */
|
||||
/* X29 reserved as fp */
|
||||
/* X30 reserved as temporary */
|
||||
|
||||
TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
|
||||
TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
|
||||
/* V8 - V15 are call-saved, and skipped. */
|
||||
TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
|
||||
TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
|
||||
TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
|
||||
TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
|
||||
};
|
||||
|
||||
static const int tcg_target_call_iarg_regs[8] = {
|
||||
|
@ -54,6 +67,7 @@ static const int tcg_target_call_oarg_regs[1] = {
|
|||
};
|
||||
|
||||
#define TCG_REG_TMP TCG_REG_X30
|
||||
#define TCG_VEC_TMP TCG_REG_V31
|
||||
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
/* Note that XZR cannot be encoded in the address base register slot,
|
||||
|
@ -119,9 +133,13 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
|
|||
const char *ct_str, TCGType type)
|
||||
{
|
||||
switch (*ct_str++) {
|
||||
case 'r':
|
||||
case 'r': /* general registers */
|
||||
ct->ct |= TCG_CT_REG;
|
||||
ct->u.regs = 0xffffffffu;
|
||||
ct->u.regs |= 0xffffffffu;
|
||||
break;
|
||||
case 'w': /* advsimd registers */
|
||||
ct->ct |= TCG_CT_REG;
|
||||
ct->u.regs |= 0xffffffff00000000ull;
|
||||
break;
|
||||
case 'l': /* qemu_ld / qemu_st address, data_reg */
|
||||
ct->ct |= TCG_CT_REG;
|
||||
|
@ -153,11 +171,13 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
|
|||
return ct_str;
|
||||
}
|
||||
|
||||
/* Match a constant valid for addition (12-bit, optionally shifted). */
|
||||
static inline bool is_aimm(uint64_t val)
|
||||
{
|
||||
return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
|
||||
}
|
||||
|
||||
/* Match a constant valid for logical operations. */
|
||||
static inline bool is_limm(uint64_t val)
|
||||
{
|
||||
/* Taking a simplified view of the logical immediates for now, ignoring
|
||||
|
@ -178,6 +198,106 @@ static inline bool is_limm(uint64_t val)
|
|||
return (val & (val - 1)) == 0;
|
||||
}
|
||||
|
||||
/* Match a constant that is valid for vectors. */
|
||||
static bool is_fimm(uint64_t v64, int *op, int *cmode, int *imm8)
|
||||
{
|
||||
int i;
|
||||
|
||||
*op = 0;
|
||||
/* Match replication across 8 bits. */
|
||||
if (v64 == dup_const(MO_8, v64)) {
|
||||
*cmode = 0xe;
|
||||
*imm8 = v64 & 0xff;
|
||||
return true;
|
||||
}
|
||||
/* Match replication across 16 bits. */
|
||||
if (v64 == dup_const(MO_16, v64)) {
|
||||
uint16_t v16 = v64;
|
||||
|
||||
if (v16 == (v16 & 0xff)) {
|
||||
*cmode = 0x8;
|
||||
*imm8 = v16 & 0xff;
|
||||
return true;
|
||||
} else if (v16 == (v16 & 0xff00)) {
|
||||
*cmode = 0xa;
|
||||
*imm8 = v16 >> 8;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/* Match replication across 32 bits. */
|
||||
if (v64 == dup_const(MO_32, v64)) {
|
||||
uint32_t v32 = v64;
|
||||
|
||||
if (v32 == (v32 & 0xff)) {
|
||||
*cmode = 0x0;
|
||||
*imm8 = v32 & 0xff;
|
||||
return true;
|
||||
} else if (v32 == (v32 & 0xff00)) {
|
||||
*cmode = 0x2;
|
||||
*imm8 = (v32 >> 8) & 0xff;
|
||||
return true;
|
||||
} else if (v32 == (v32 & 0xff0000)) {
|
||||
*cmode = 0x4;
|
||||
*imm8 = (v32 >> 16) & 0xff;
|
||||
return true;
|
||||
} else if (v32 == (v32 & 0xff000000)) {
|
||||
*cmode = 0x6;
|
||||
*imm8 = v32 >> 24;
|
||||
return true;
|
||||
} else if ((v32 & 0xffff00ff) == 0xff) {
|
||||
*cmode = 0xc;
|
||||
*imm8 = (v32 >> 8) & 0xff;
|
||||
return true;
|
||||
} else if ((v32 & 0xff00ffff) == 0xffff) {
|
||||
*cmode = 0xd;
|
||||
*imm8 = (v32 >> 16) & 0xff;
|
||||
return true;
|
||||
}
|
||||
/* Match forms of a float32. */
|
||||
if (extract32(v32, 0, 19) == 0
|
||||
&& (extract32(v32, 25, 6) == 0x20
|
||||
|| extract32(v32, 25, 6) == 0x1f)) {
|
||||
*cmode = 0xf;
|
||||
*imm8 = (extract32(v32, 31, 1) << 7)
|
||||
| (extract32(v32, 25, 1) << 6)
|
||||
| extract32(v32, 19, 6);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/* Match forms of a float64. */
|
||||
if (extract64(v64, 0, 48) == 0
|
||||
&& (extract64(v64, 54, 9) == 0x100
|
||||
|| extract64(v64, 54, 9) == 0x0ff)) {
|
||||
*cmode = 0xf;
|
||||
*op = 1;
|
||||
*imm8 = (extract64(v64, 63, 1) << 7)
|
||||
| (extract64(v64, 54, 1) << 6)
|
||||
| extract64(v64, 48, 6);
|
||||
return true;
|
||||
}
|
||||
/* Match bytes of 0x00 and 0xff. */
|
||||
for (i = 0; i < 64; i += 8) {
|
||||
uint64_t byte = extract64(v64, i, 8);
|
||||
if (byte != 0 && byte != 0xff) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == 64) {
|
||||
*cmode = 0xe;
|
||||
*op = 1;
|
||||
*imm8 = (extract64(v64, 0, 1) << 0)
|
||||
| (extract64(v64, 8, 1) << 1)
|
||||
| (extract64(v64, 16, 1) << 2)
|
||||
| (extract64(v64, 24, 1) << 3)
|
||||
| (extract64(v64, 32, 1) << 4)
|
||||
| (extract64(v64, 40, 1) << 5)
|
||||
| (extract64(v64, 48, 1) << 6)
|
||||
| (extract64(v64, 56, 1) << 7);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int tcg_target_const_match(tcg_target_long val, TCGType type,
|
||||
const TCGArgConstraint *arg_ct)
|
||||
{
|
||||
|
@ -271,6 +391,9 @@ typedef enum {
|
|||
|
||||
/* Load literal for loading the address at pc-relative offset */
|
||||
I3305_LDR = 0x58000000,
|
||||
I3305_LDR_v64 = 0x5c000000,
|
||||
I3305_LDR_v128 = 0x9c000000,
|
||||
|
||||
/* Load/store register. Described here as 3.3.12, but the helper
|
||||
that emits them can transform to 3.3.10 or 3.3.13. */
|
||||
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
|
||||
|
@ -290,6 +413,15 @@ typedef enum {
|
|||
I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
|
||||
I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
|
||||
|
||||
I3312_LDRVS = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
|
||||
I3312_STRVS = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
|
||||
|
||||
I3312_LDRVD = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
|
||||
I3312_STRVD = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
|
||||
|
||||
I3312_LDRVQ = 0x3c000000 | 3 << 22 | 0 << 30,
|
||||
I3312_STRVQ = 0x3c000000 | 2 << 22 | 0 << 30,
|
||||
|
||||
I3312_TO_I3310 = 0x00200800,
|
||||
I3312_TO_I3313 = 0x01000000,
|
||||
|
||||
|
@ -374,8 +506,48 @@ typedef enum {
|
|||
I3510_EON = 0x4a200000,
|
||||
I3510_ANDS = 0x6a000000,
|
||||
|
||||
NOP = 0xd503201f,
|
||||
/* AdvSIMD copy */
|
||||
I3605_DUP = 0x0e000400,
|
||||
I3605_INS = 0x4e001c00,
|
||||
I3605_UMOV = 0x0e003c00,
|
||||
|
||||
/* AdvSIMD modified immediate */
|
||||
I3606_MOVI = 0x0f000400,
|
||||
|
||||
/* AdvSIMD shift by immediate */
|
||||
I3614_SSHR = 0x0f000400,
|
||||
I3614_SSRA = 0x0f001400,
|
||||
I3614_SHL = 0x0f005400,
|
||||
I3614_USHR = 0x2f000400,
|
||||
I3614_USRA = 0x2f001400,
|
||||
|
||||
/* AdvSIMD three same. */
|
||||
I3616_ADD = 0x0e208400,
|
||||
I3616_AND = 0x0e201c00,
|
||||
I3616_BIC = 0x0e601c00,
|
||||
I3616_EOR = 0x2e201c00,
|
||||
I3616_MUL = 0x0e209c00,
|
||||
I3616_ORR = 0x0ea01c00,
|
||||
I3616_ORN = 0x0ee01c00,
|
||||
I3616_SUB = 0x2e208400,
|
||||
I3616_CMGT = 0x0e203400,
|
||||
I3616_CMGE = 0x0e203c00,
|
||||
I3616_CMTST = 0x0e208c00,
|
||||
I3616_CMHI = 0x2e203400,
|
||||
I3616_CMHS = 0x2e203c00,
|
||||
I3616_CMEQ = 0x2e208c00,
|
||||
|
||||
/* AdvSIMD two-reg misc. */
|
||||
I3617_CMGT0 = 0x0e208800,
|
||||
I3617_CMEQ0 = 0x0e209800,
|
||||
I3617_CMLT0 = 0x0e20a800,
|
||||
I3617_CMGE0 = 0x2e208800,
|
||||
I3617_CMLE0 = 0x2e20a800,
|
||||
I3617_NOT = 0x2e205800,
|
||||
I3617_NEG = 0x2e20b800,
|
||||
|
||||
/* System instructions. */
|
||||
NOP = 0xd503201f,
|
||||
DMB_ISH = 0xd50338bf,
|
||||
DMB_LD = 0x00000100,
|
||||
DMB_ST = 0x00000200,
|
||||
|
@ -520,26 +692,64 @@ static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
|
|||
tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
|
||||
TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
|
||||
{
|
||||
/* Note that bit 11 set means general register input. Therefore
|
||||
we can handle both register sets with one function. */
|
||||
tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
|
||||
| (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
|
||||
TCGReg rd, bool op, int cmode, uint8_t imm8)
|
||||
{
|
||||
tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
|
||||
| (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
|
||||
TCGReg rd, TCGReg rn, unsigned immhb)
|
||||
{
|
||||
tcg_out32(s, insn | q << 30 | immhb << 16
|
||||
| (rn & 0x1f) << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
|
||||
unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
|
||||
{
|
||||
tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
|
||||
| (rn & 0x1f) << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
|
||||
unsigned size, TCGReg rd, TCGReg rn)
|
||||
{
|
||||
tcg_out32(s, insn | q << 30 | (size << 22)
|
||||
| (rn & 0x1f) << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
|
||||
TCGReg rd, TCGReg base, TCGType ext,
|
||||
TCGReg regoff)
|
||||
{
|
||||
/* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
|
||||
tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
|
||||
0x4000 | ext << 13 | base << 5 | rd);
|
||||
0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
|
||||
TCGReg rd, TCGReg rn, intptr_t offset)
|
||||
{
|
||||
tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
|
||||
tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
|
||||
TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
|
||||
{
|
||||
/* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
|
||||
tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
|
||||
tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
|
||||
| rn << 5 | (rd & 0x1f));
|
||||
}
|
||||
|
||||
/* Register to register move using ORR (shifted register with no shift). */
|
||||
|
@ -585,6 +795,22 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
|
|||
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
|
||||
}
|
||||
|
||||
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
|
||||
TCGReg rd, uint64_t v64)
|
||||
{
|
||||
int op, cmode, imm8;
|
||||
|
||||
if (is_fimm(v64, &op, &cmode, &imm8)) {
|
||||
tcg_out_insn(s, 3606, MOVI, type == TCG_TYPE_V128, rd, op, cmode, imm8);
|
||||
} else if (type == TCG_TYPE_V128) {
|
||||
new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
|
||||
tcg_out_insn(s, 3305, LDR_v128, 0, rd);
|
||||
} else {
|
||||
new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
|
||||
tcg_out_insn(s, 3305, LDR_v64, 0, rd);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
|
||||
tcg_target_long value)
|
||||
{
|
||||
|
@ -594,6 +820,22 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
|
|||
int s0, s1;
|
||||
AArch64Insn opc;
|
||||
|
||||
switch (type) {
|
||||
case TCG_TYPE_I32:
|
||||
case TCG_TYPE_I64:
|
||||
tcg_debug_assert(rd < 32);
|
||||
break;
|
||||
|
||||
case TCG_TYPE_V64:
|
||||
case TCG_TYPE_V128:
|
||||
tcg_debug_assert(rd >= 32);
|
||||
tcg_out_dupi_vec(s, type, rd, value);
|
||||
return;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
/* For 32-bit values, discard potential garbage in value. For 64-bit
|
||||
values within [2**31, 2**32-1], we can create smaller sequences by
|
||||
interpreting this as a negative 32-bit number, while ensuring that
|
||||
|
@ -669,15 +911,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
|
|||
/* Define something more legible for general use. */
|
||||
#define tcg_out_ldst_r tcg_out_insn_3310
|
||||
|
||||
static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
|
||||
TCGReg rd, TCGReg rn, intptr_t offset)
|
||||
static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
|
||||
TCGReg rn, intptr_t offset, int lgsize)
|
||||
{
|
||||
TCGMemOp size = (uint32_t)insn >> 30;
|
||||
|
||||
/* If the offset is naturally aligned and in range, then we can
|
||||
use the scaled uimm12 encoding */
|
||||
if (offset >= 0 && !(offset & ((1 << size) - 1))) {
|
||||
uintptr_t scaled_uimm = offset >> size;
|
||||
if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
|
||||
uintptr_t scaled_uimm = offset >> lgsize;
|
||||
if (scaled_uimm <= 0xfff) {
|
||||
tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
|
||||
return;
|
||||
|
@ -695,32 +935,102 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
|
|||
tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
|
||||
}
|
||||
|
||||
static inline void tcg_out_mov(TCGContext *s,
|
||||
TCGType type, TCGReg ret, TCGReg arg)
|
||||
static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
|
||||
{
|
||||
if (ret != arg) {
|
||||
tcg_out_movr(s, type, ret, arg);
|
||||
if (ret == arg) {
|
||||
return;
|
||||
}
|
||||
switch (type) {
|
||||
case TCG_TYPE_I32:
|
||||
case TCG_TYPE_I64:
|
||||
if (ret < 32 && arg < 32) {
|
||||
tcg_out_movr(s, type, ret, arg);
|
||||
break;
|
||||
} else if (ret < 32) {
|
||||
tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
|
||||
break;
|
||||
} else if (arg < 32) {
|
||||
tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
|
||||
break;
|
||||
}
|
||||
/* FALLTHRU */
|
||||
|
||||
case TCG_TYPE_V64:
|
||||
tcg_debug_assert(ret >= 32 && arg >= 32);
|
||||
tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
|
||||
break;
|
||||
case TCG_TYPE_V128:
|
||||
tcg_debug_assert(ret >= 32 && arg >= 32);
|
||||
tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
|
||||
break;
|
||||
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
|
||||
TCGReg arg1, intptr_t arg2)
|
||||
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
|
||||
TCGReg base, intptr_t ofs)
|
||||
{
|
||||
tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
|
||||
arg, arg1, arg2);
|
||||
AArch64Insn insn;
|
||||
int lgsz;
|
||||
|
||||
switch (type) {
|
||||
case TCG_TYPE_I32:
|
||||
insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
|
||||
lgsz = 2;
|
||||
break;
|
||||
case TCG_TYPE_I64:
|
||||
insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
|
||||
lgsz = 3;
|
||||
break;
|
||||
case TCG_TYPE_V64:
|
||||
insn = I3312_LDRVD;
|
||||
lgsz = 3;
|
||||
break;
|
||||
case TCG_TYPE_V128:
|
||||
insn = I3312_LDRVQ;
|
||||
lgsz = 4;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
|
||||
}
|
||||
|
||||
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
|
||||
TCGReg arg1, intptr_t arg2)
|
||||
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
|
||||
TCGReg base, intptr_t ofs)
|
||||
{
|
||||
tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
|
||||
arg, arg1, arg2);
|
||||
AArch64Insn insn;
|
||||
int lgsz;
|
||||
|
||||
switch (type) {
|
||||
case TCG_TYPE_I32:
|
||||
insn = (src < 32 ? I3312_STRW : I3312_STRVS);
|
||||
lgsz = 2;
|
||||
break;
|
||||
case TCG_TYPE_I64:
|
||||
insn = (src < 32 ? I3312_STRX : I3312_STRVD);
|
||||
lgsz = 3;
|
||||
break;
|
||||
case TCG_TYPE_V64:
|
||||
insn = I3312_STRVD;
|
||||
lgsz = 3;
|
||||
break;
|
||||
case TCG_TYPE_V128:
|
||||
insn = I3312_STRVQ;
|
||||
lgsz = 4;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
tcg_out_ldst(s, insn, src, base, ofs, lgsz);
|
||||
}
|
||||
|
||||
static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
|
||||
TCGReg base, intptr_t ofs)
|
||||
{
|
||||
if (val == 0) {
|
||||
if (type <= TCG_TYPE_I64 && val == 0) {
|
||||
tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
|
||||
return true;
|
||||
}
|
||||
|
@ -1210,14 +1520,15 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
|
|||
/* Merge "low bits" from tlb offset, load the tlb comparator into X0.
|
||||
X0 = load [X2 + (tlb_offset & 0x000fff)] */
|
||||
tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
|
||||
TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
|
||||
TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
|
||||
TARGET_LONG_BITS == 32 ? 2 : 3);
|
||||
|
||||
/* Load the tlb addend. Do that early to avoid stalling.
|
||||
X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
|
||||
tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
|
||||
(tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
|
||||
(is_read ? offsetof(CPUTLBEntry, addr_read)
|
||||
: offsetof(CPUTLBEntry, addr_write)));
|
||||
: offsetof(CPUTLBEntry, addr_write)), 3);
|
||||
|
||||
/* Perform the address comparison. */
|
||||
tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
|
||||
|
@ -1435,49 +1746,49 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
|
||||
case INDEX_op_ld8u_i32:
|
||||
case INDEX_op_ld8u_i64:
|
||||
tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
|
||||
break;
|
||||
case INDEX_op_ld8s_i32:
|
||||
tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
|
||||
break;
|
||||
case INDEX_op_ld8s_i64:
|
||||
tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
|
||||
break;
|
||||
case INDEX_op_ld16u_i32:
|
||||
case INDEX_op_ld16u_i64:
|
||||
tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
|
||||
break;
|
||||
case INDEX_op_ld16s_i32:
|
||||
tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
|
||||
break;
|
||||
case INDEX_op_ld16s_i64:
|
||||
tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
|
||||
break;
|
||||
case INDEX_op_ld_i32:
|
||||
case INDEX_op_ld32u_i64:
|
||||
tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
|
||||
break;
|
||||
case INDEX_op_ld32s_i64:
|
||||
tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
|
||||
break;
|
||||
case INDEX_op_ld_i64:
|
||||
tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
|
||||
tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
|
||||
break;
|
||||
|
||||
case INDEX_op_st8_i32:
|
||||
case INDEX_op_st8_i64:
|
||||
tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
|
||||
tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
|
||||
break;
|
||||
case INDEX_op_st16_i32:
|
||||
case INDEX_op_st16_i64:
|
||||
tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
|
||||
tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
|
||||
break;
|
||||
case INDEX_op_st_i32:
|
||||
case INDEX_op_st32_i64:
|
||||
tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
|
||||
tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
|
||||
break;
|
||||
case INDEX_op_st_i64:
|
||||
tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
|
||||
tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
|
||||
break;
|
||||
|
||||
case INDEX_op_add_i32:
|
||||
|
@ -1776,25 +2087,176 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
|
||||
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_mov_vec:
|
||||
case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
|
||||
case INDEX_op_movi_i64:
|
||||
case INDEX_op_dupi_vec:
|
||||
case INDEX_op_call: /* Always emitted via tcg_out_call. */
|
||||
default:
|
||||
tcg_abort();
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
#undef REG0
|
||||
}
|
||||
|
||||
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
|
||||
unsigned vecl, unsigned vece,
|
||||
const TCGArg *args, const int *const_args)
|
||||
{
|
||||
static const AArch64Insn cmp_insn[16] = {
|
||||
[TCG_COND_EQ] = I3616_CMEQ,
|
||||
[TCG_COND_GT] = I3616_CMGT,
|
||||
[TCG_COND_GE] = I3616_CMGE,
|
||||
[TCG_COND_GTU] = I3616_CMHI,
|
||||
[TCG_COND_GEU] = I3616_CMHS,
|
||||
};
|
||||
static const AArch64Insn cmp0_insn[16] = {
|
||||
[TCG_COND_EQ] = I3617_CMEQ0,
|
||||
[TCG_COND_GT] = I3617_CMGT0,
|
||||
[TCG_COND_GE] = I3617_CMGE0,
|
||||
[TCG_COND_LT] = I3617_CMLT0,
|
||||
[TCG_COND_LE] = I3617_CMLE0,
|
||||
};
|
||||
|
||||
TCGType type = vecl + TCG_TYPE_V64;
|
||||
unsigned is_q = vecl;
|
||||
TCGArg a0, a1, a2;
|
||||
|
||||
a0 = args[0];
|
||||
a1 = args[1];
|
||||
a2 = args[2];
|
||||
|
||||
switch (opc) {
|
||||
case INDEX_op_ld_vec:
|
||||
tcg_out_ld(s, type, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_st_vec:
|
||||
tcg_out_st(s, type, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_add_vec:
|
||||
tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_sub_vec:
|
||||
tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_mul_vec:
|
||||
tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_neg_vec:
|
||||
tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
|
||||
break;
|
||||
case INDEX_op_and_vec:
|
||||
tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_or_vec:
|
||||
tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_xor_vec:
|
||||
tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_andc_vec:
|
||||
tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_orc_vec:
|
||||
tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
|
||||
break;
|
||||
case INDEX_op_not_vec:
|
||||
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
|
||||
break;
|
||||
case INDEX_op_dup_vec:
|
||||
tcg_out_insn(s, 3605, DUP, is_q, a0, a1, 1 << vece, 0);
|
||||
break;
|
||||
case INDEX_op_shli_vec:
|
||||
tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
|
||||
break;
|
||||
case INDEX_op_shri_vec:
|
||||
tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
|
||||
break;
|
||||
case INDEX_op_sari_vec:
|
||||
tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
|
||||
break;
|
||||
case INDEX_op_cmp_vec:
|
||||
{
|
||||
TCGCond cond = args[3];
|
||||
AArch64Insn insn;
|
||||
|
||||
if (cond == TCG_COND_NE) {
|
||||
if (const_args[2]) {
|
||||
tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
|
||||
} else {
|
||||
tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
|
||||
tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
|
||||
}
|
||||
} else {
|
||||
if (const_args[2]) {
|
||||
insn = cmp0_insn[cond];
|
||||
if (insn) {
|
||||
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
|
||||
break;
|
||||
}
|
||||
tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
|
||||
a2 = TCG_VEC_TMP;
|
||||
}
|
||||
insn = cmp_insn[cond];
|
||||
if (insn == 0) {
|
||||
TCGArg t;
|
||||
t = a1, a1 = a2, a2 = t;
|
||||
cond = tcg_swap_cond(cond);
|
||||
insn = cmp_insn[cond];
|
||||
tcg_debug_assert(insn != 0);
|
||||
}
|
||||
tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
}
|
||||
|
||||
int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
||||
{
|
||||
switch (opc) {
|
||||
case INDEX_op_add_vec:
|
||||
case INDEX_op_sub_vec:
|
||||
case INDEX_op_mul_vec:
|
||||
case INDEX_op_and_vec:
|
||||
case INDEX_op_or_vec:
|
||||
case INDEX_op_xor_vec:
|
||||
case INDEX_op_andc_vec:
|
||||
case INDEX_op_orc_vec:
|
||||
case INDEX_op_neg_vec:
|
||||
case INDEX_op_not_vec:
|
||||
case INDEX_op_cmp_vec:
|
||||
case INDEX_op_shli_vec:
|
||||
case INDEX_op_shri_vec:
|
||||
case INDEX_op_sari_vec:
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
|
||||
TCGArg a0, ...)
|
||||
{
|
||||
}
|
||||
|
||||
static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
|
||||
{
|
||||
static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
|
||||
static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
|
||||
static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
|
||||
static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
|
||||
static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
|
||||
static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
|
||||
static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
|
||||
static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
|
||||
static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
|
||||
static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
|
||||
static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
|
||||
static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
|
||||
static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
|
||||
static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
|
||||
static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
|
||||
|
@ -1938,6 +2400,29 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
|
|||
case INDEX_op_sub2_i64:
|
||||
return &add2;
|
||||
|
||||
case INDEX_op_add_vec:
|
||||
case INDEX_op_sub_vec:
|
||||
case INDEX_op_mul_vec:
|
||||
case INDEX_op_and_vec:
|
||||
case INDEX_op_or_vec:
|
||||
case INDEX_op_xor_vec:
|
||||
case INDEX_op_andc_vec:
|
||||
case INDEX_op_orc_vec:
|
||||
return &w_w_w;
|
||||
case INDEX_op_not_vec:
|
||||
case INDEX_op_neg_vec:
|
||||
case INDEX_op_shli_vec:
|
||||
case INDEX_op_shri_vec:
|
||||
case INDEX_op_sari_vec:
|
||||
return &w_w;
|
||||
case INDEX_op_ld_vec:
|
||||
case INDEX_op_st_vec:
|
||||
return &w_r;
|
||||
case INDEX_op_dup_vec:
|
||||
return &w_wr;
|
||||
case INDEX_op_cmp_vec:
|
||||
return &w_w_wZ;
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1947,8 +2432,10 @@ static void tcg_target_init(TCGContext *s)
|
|||
{
|
||||
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
|
||||
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
|
||||
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
|
||||
tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
|
||||
|
||||
tcg_target_call_clobber_regs = 0xfffffffu;
|
||||
tcg_target_call_clobber_regs = -1ull;
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
|
||||
|
@ -1960,12 +2447,21 @@ static void tcg_target_init(TCGContext *s)
|
|||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
|
||||
tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
|
||||
|
||||
s->reserved_regs = 0;
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
|
||||
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
|
||||
}
|
||||
|
||||
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
/* Target-specific opcodes for host vector expansion. These will be
|
||||
emitted by tcg_expand_vec_op. For those familiar with GCC internals,
|
||||
consider these to be UNSPEC with names. */
|
|
@ -30,10 +30,10 @@
|
|||
|
||||
#ifdef __x86_64__
|
||||
# define TCG_TARGET_REG_BITS 64
|
||||
# define TCG_TARGET_NB_REGS 16
|
||||
# define TCG_TARGET_NB_REGS 32
|
||||
#else
|
||||
# define TCG_TARGET_REG_BITS 32
|
||||
# define TCG_TARGET_NB_REGS 8
|
||||
# define TCG_TARGET_NB_REGS 24
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
|
@ -56,6 +56,26 @@ typedef enum {
|
|||
TCG_REG_R13,
|
||||
TCG_REG_R14,
|
||||
TCG_REG_R15,
|
||||
|
||||
TCG_REG_XMM0,
|
||||
TCG_REG_XMM1,
|
||||
TCG_REG_XMM2,
|
||||
TCG_REG_XMM3,
|
||||
TCG_REG_XMM4,
|
||||
TCG_REG_XMM5,
|
||||
TCG_REG_XMM6,
|
||||
TCG_REG_XMM7,
|
||||
|
||||
/* 64-bit registers; likewise always define. */
|
||||
TCG_REG_XMM8,
|
||||
TCG_REG_XMM9,
|
||||
TCG_REG_XMM10,
|
||||
TCG_REG_XMM11,
|
||||
TCG_REG_XMM12,
|
||||
TCG_REG_XMM13,
|
||||
TCG_REG_XMM14,
|
||||
TCG_REG_XMM15,
|
||||
|
||||
TCG_REG_RAX = TCG_REG_EAX,
|
||||
TCG_REG_RCX = TCG_REG_ECX,
|
||||
TCG_REG_RDX = TCG_REG_EDX,
|
||||
|
@ -77,6 +97,8 @@ typedef enum {
|
|||
|
||||
extern bool have_bmi1;
|
||||
extern bool have_popcnt;
|
||||
extern bool have_avx1;
|
||||
extern bool have_avx2;
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_div2_i32 1
|
||||
|
@ -146,6 +168,21 @@ extern bool have_popcnt;
|
|||
#define TCG_TARGET_HAS_mulsh_i64 0
|
||||
#endif
|
||||
|
||||
/* We do not support older SSE systems, only beginning with AVX1. */
|
||||
#define TCG_TARGET_HAS_v64 have_avx1
|
||||
#define TCG_TARGET_HAS_v128 have_avx1
|
||||
#define TCG_TARGET_HAS_v256 have_avx2
|
||||
|
||||
#define TCG_TARGET_HAS_andc_vec 1
|
||||
#define TCG_TARGET_HAS_orc_vec 0
|
||||
#define TCG_TARGET_HAS_not_vec 0
|
||||
#define TCG_TARGET_HAS_neg_vec 0
|
||||
#define TCG_TARGET_HAS_shi_vec 1
|
||||
#define TCG_TARGET_HAS_shs_vec 0
|
||||
#define TCG_TARGET_HAS_shv_vec 0
|
||||
#define TCG_TARGET_HAS_cmp_vec 1
|
||||
#define TCG_TARGET_HAS_mul_vec 1
|
||||
|
||||
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
|
||||
(((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
|
||||
((ofs) == 0 && (len) == 16))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
|||
/* Target-specific opcodes for host vector expansion. These will be
|
||||
emitted by tcg_expand_vec_op. For those familiar with GCC internals,
|
||||
consider these to be UNSPEC with names. */
|
||||
|
||||
DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
|
||||
DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC)
|
||||
DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
|
||||
DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
|
||||
DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
|
||||
DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
|
150
tcg/optimize.c
150
tcg/optimize.c
|
@ -32,6 +32,11 @@
|
|||
glue(glue(case INDEX_op_, x), _i32): \
|
||||
glue(glue(case INDEX_op_, x), _i64)
|
||||
|
||||
#define CASE_OP_32_64_VEC(x) \
|
||||
glue(glue(case INDEX_op_, x), _i32): \
|
||||
glue(glue(case INDEX_op_, x), _i64): \
|
||||
glue(glue(case INDEX_op_, x), _vec)
|
||||
|
||||
struct tcg_temp_info {
|
||||
bool is_const;
|
||||
TCGTemp *prev_copy;
|
||||
|
@ -108,40 +113,6 @@ static void init_arg_info(struct tcg_temp_info *infos,
|
|||
init_ts_info(infos, temps_used, arg_temp(arg));
|
||||
}
|
||||
|
||||
static int op_bits(TCGOpcode op)
|
||||
{
|
||||
const TCGOpDef *def = &tcg_op_defs[op];
|
||||
return def->flags & TCG_OPF_64BIT ? 64 : 32;
|
||||
}
|
||||
|
||||
static TCGOpcode op_to_mov(TCGOpcode op)
|
||||
{
|
||||
switch (op_bits(op)) {
|
||||
case 32:
|
||||
return INDEX_op_mov_i32;
|
||||
case 64:
|
||||
return INDEX_op_mov_i64;
|
||||
default:
|
||||
fprintf(stderr, "op_to_mov: unexpected return value of "
|
||||
"function op_bits.\n");
|
||||
tcg_abort();
|
||||
}
|
||||
}
|
||||
|
||||
static TCGOpcode op_to_movi(TCGOpcode op)
|
||||
{
|
||||
switch (op_bits(op)) {
|
||||
case 32:
|
||||
return INDEX_op_movi_i32;
|
||||
case 64:
|
||||
return INDEX_op_movi_i64;
|
||||
default:
|
||||
fprintf(stderr, "op_to_movi: unexpected return value of "
|
||||
"function op_bits.\n");
|
||||
tcg_abort();
|
||||
}
|
||||
}
|
||||
|
||||
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
|
||||
{
|
||||
TCGTemp *i;
|
||||
|
@ -199,11 +170,23 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
|
|||
|
||||
static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
|
||||
{
|
||||
TCGOpcode new_op = op_to_movi(op->opc);
|
||||
const TCGOpDef *def;
|
||||
TCGOpcode new_op;
|
||||
tcg_target_ulong mask;
|
||||
struct tcg_temp_info *di = arg_info(dst);
|
||||
|
||||
def = &tcg_op_defs[op->opc];
|
||||
if (def->flags & TCG_OPF_VECTOR) {
|
||||
new_op = INDEX_op_dupi_vec;
|
||||
} else if (def->flags & TCG_OPF_64BIT) {
|
||||
new_op = INDEX_op_movi_i64;
|
||||
} else {
|
||||
new_op = INDEX_op_movi_i32;
|
||||
}
|
||||
op->opc = new_op;
|
||||
/* TCGOP_VECL and TCGOP_VECE remain unchanged. */
|
||||
op->args[0] = dst;
|
||||
op->args[1] = val;
|
||||
|
||||
reset_temp(dst);
|
||||
di->is_const = true;
|
||||
|
@ -214,15 +197,13 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
|
|||
mask |= ~0xffffffffull;
|
||||
}
|
||||
di->mask = mask;
|
||||
|
||||
op->args[0] = dst;
|
||||
op->args[1] = val;
|
||||
}
|
||||
|
||||
static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
|
||||
{
|
||||
TCGTemp *dst_ts = arg_temp(dst);
|
||||
TCGTemp *src_ts = arg_temp(src);
|
||||
const TCGOpDef *def;
|
||||
struct tcg_temp_info *di;
|
||||
struct tcg_temp_info *si;
|
||||
tcg_target_ulong mask;
|
||||
|
@ -236,9 +217,16 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
|
|||
reset_ts(dst_ts);
|
||||
di = ts_info(dst_ts);
|
||||
si = ts_info(src_ts);
|
||||
new_op = op_to_mov(op->opc);
|
||||
|
||||
def = &tcg_op_defs[op->opc];
|
||||
if (def->flags & TCG_OPF_VECTOR) {
|
||||
new_op = INDEX_op_mov_vec;
|
||||
} else if (def->flags & TCG_OPF_64BIT) {
|
||||
new_op = INDEX_op_mov_i64;
|
||||
} else {
|
||||
new_op = INDEX_op_mov_i32;
|
||||
}
|
||||
op->opc = new_op;
|
||||
/* TCGOP_VECL and TCGOP_VECE remain unchanged. */
|
||||
op->args[0] = dst;
|
||||
op->args[1] = src;
|
||||
|
||||
|
@ -417,8 +405,9 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
|
|||
|
||||
static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
|
||||
{
|
||||
const TCGOpDef *def = &tcg_op_defs[op];
|
||||
TCGArg res = do_constant_folding_2(op, x, y);
|
||||
if (op_bits(op) == 32) {
|
||||
if (!(def->flags & TCG_OPF_64BIT)) {
|
||||
res = (int32_t)res;
|
||||
}
|
||||
return res;
|
||||
|
@ -508,13 +497,12 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
|
|||
tcg_target_ulong xv = arg_info(x)->val;
|
||||
tcg_target_ulong yv = arg_info(y)->val;
|
||||
if (arg_is_const(x) && arg_is_const(y)) {
|
||||
switch (op_bits(op)) {
|
||||
case 32:
|
||||
return do_constant_folding_cond_32(xv, yv, c);
|
||||
case 64:
|
||||
const TCGOpDef *def = &tcg_op_defs[op];
|
||||
tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
|
||||
if (def->flags & TCG_OPF_64BIT) {
|
||||
return do_constant_folding_cond_64(xv, yv, c);
|
||||
default:
|
||||
tcg_abort();
|
||||
} else {
|
||||
return do_constant_folding_cond_32(xv, yv, c);
|
||||
}
|
||||
} else if (args_are_copies(x, y)) {
|
||||
return do_constant_folding_cond_eq(c);
|
||||
|
@ -653,11 +641,11 @@ void tcg_optimize(TCGContext *s)
|
|||
|
||||
/* For commutative operations make constant second argument */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(add):
|
||||
CASE_OP_32_64(mul):
|
||||
CASE_OP_32_64(and):
|
||||
CASE_OP_32_64(or):
|
||||
CASE_OP_32_64(xor):
|
||||
CASE_OP_32_64_VEC(add):
|
||||
CASE_OP_32_64_VEC(mul):
|
||||
CASE_OP_32_64_VEC(and):
|
||||
CASE_OP_32_64_VEC(or):
|
||||
CASE_OP_32_64_VEC(xor):
|
||||
CASE_OP_32_64(eqv):
|
||||
CASE_OP_32_64(nand):
|
||||
CASE_OP_32_64(nor):
|
||||
|
@ -722,7 +710,7 @@ void tcg_optimize(TCGContext *s)
|
|||
continue;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(sub):
|
||||
CASE_OP_32_64_VEC(sub):
|
||||
{
|
||||
TCGOpcode neg_op;
|
||||
bool have_neg;
|
||||
|
@ -734,9 +722,12 @@ void tcg_optimize(TCGContext *s)
|
|||
if (opc == INDEX_op_sub_i32) {
|
||||
neg_op = INDEX_op_neg_i32;
|
||||
have_neg = TCG_TARGET_HAS_neg_i32;
|
||||
} else {
|
||||
} else if (opc == INDEX_op_sub_i64) {
|
||||
neg_op = INDEX_op_neg_i64;
|
||||
have_neg = TCG_TARGET_HAS_neg_i64;
|
||||
} else {
|
||||
neg_op = INDEX_op_neg_vec;
|
||||
have_neg = TCG_TARGET_HAS_neg_vec;
|
||||
}
|
||||
if (!have_neg) {
|
||||
break;
|
||||
|
@ -750,7 +741,7 @@ void tcg_optimize(TCGContext *s)
|
|||
}
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(xor):
|
||||
CASE_OP_32_64_VEC(xor):
|
||||
CASE_OP_32_64(nand):
|
||||
if (!arg_is_const(op->args[1])
|
||||
&& arg_is_const(op->args[2])
|
||||
|
@ -767,7 +758,7 @@ void tcg_optimize(TCGContext *s)
|
|||
goto try_not;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(andc):
|
||||
CASE_OP_32_64_VEC(andc):
|
||||
if (!arg_is_const(op->args[2])
|
||||
&& arg_is_const(op->args[1])
|
||||
&& arg_info(op->args[1])->val == -1) {
|
||||
|
@ -775,7 +766,7 @@ void tcg_optimize(TCGContext *s)
|
|||
goto try_not;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(orc):
|
||||
CASE_OP_32_64_VEC(orc):
|
||||
CASE_OP_32_64(eqv):
|
||||
if (!arg_is_const(op->args[2])
|
||||
&& arg_is_const(op->args[1])
|
||||
|
@ -789,7 +780,10 @@ void tcg_optimize(TCGContext *s)
|
|||
TCGOpcode not_op;
|
||||
bool have_not;
|
||||
|
||||
if (def->flags & TCG_OPF_64BIT) {
|
||||
if (def->flags & TCG_OPF_VECTOR) {
|
||||
not_op = INDEX_op_not_vec;
|
||||
have_not = TCG_TARGET_HAS_not_vec;
|
||||
} else if (def->flags & TCG_OPF_64BIT) {
|
||||
not_op = INDEX_op_not_i64;
|
||||
have_not = TCG_TARGET_HAS_not_i64;
|
||||
} else {
|
||||
|
@ -810,16 +804,16 @@ void tcg_optimize(TCGContext *s)
|
|||
|
||||
/* Simplify expression for "op r, a, const => mov r, a" cases */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(add):
|
||||
CASE_OP_32_64(sub):
|
||||
CASE_OP_32_64_VEC(add):
|
||||
CASE_OP_32_64_VEC(sub):
|
||||
CASE_OP_32_64_VEC(or):
|
||||
CASE_OP_32_64_VEC(xor):
|
||||
CASE_OP_32_64_VEC(andc):
|
||||
CASE_OP_32_64(shl):
|
||||
CASE_OP_32_64(shr):
|
||||
CASE_OP_32_64(sar):
|
||||
CASE_OP_32_64(rotl):
|
||||
CASE_OP_32_64(rotr):
|
||||
CASE_OP_32_64(or):
|
||||
CASE_OP_32_64(xor):
|
||||
CASE_OP_32_64(andc):
|
||||
if (!arg_is_const(op->args[1])
|
||||
&& arg_is_const(op->args[2])
|
||||
&& arg_info(op->args[2])->val == 0) {
|
||||
|
@ -827,8 +821,8 @@ void tcg_optimize(TCGContext *s)
|
|||
continue;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(and):
|
||||
CASE_OP_32_64(orc):
|
||||
CASE_OP_32_64_VEC(and):
|
||||
CASE_OP_32_64_VEC(orc):
|
||||
CASE_OP_32_64(eqv):
|
||||
if (!arg_is_const(op->args[1])
|
||||
&& arg_is_const(op->args[2])
|
||||
|
@ -1042,8 +1036,8 @@ void tcg_optimize(TCGContext *s)
|
|||
|
||||
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(and):
|
||||
CASE_OP_32_64(mul):
|
||||
CASE_OP_32_64_VEC(and):
|
||||
CASE_OP_32_64_VEC(mul):
|
||||
CASE_OP_32_64(muluh):
|
||||
CASE_OP_32_64(mulsh):
|
||||
if (arg_is_const(op->args[2])
|
||||
|
@ -1058,8 +1052,8 @@ void tcg_optimize(TCGContext *s)
|
|||
|
||||
/* Simplify expression for "op r, a, a => mov r, a" cases */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(or):
|
||||
CASE_OP_32_64(and):
|
||||
CASE_OP_32_64_VEC(or):
|
||||
CASE_OP_32_64_VEC(and):
|
||||
if (args_are_copies(op->args[1], op->args[2])) {
|
||||
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
|
||||
continue;
|
||||
|
@ -1071,9 +1065,9 @@ void tcg_optimize(TCGContext *s)
|
|||
|
||||
/* Simplify expression for "op r, a, a => movi r, 0" cases */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(andc):
|
||||
CASE_OP_32_64(sub):
|
||||
CASE_OP_32_64(xor):
|
||||
CASE_OP_32_64_VEC(andc):
|
||||
CASE_OP_32_64_VEC(sub):
|
||||
CASE_OP_32_64_VEC(xor):
|
||||
if (args_are_copies(op->args[1], op->args[2])) {
|
||||
tcg_opt_gen_movi(s, op, op->args[0], 0);
|
||||
continue;
|
||||
|
@ -1087,13 +1081,23 @@ void tcg_optimize(TCGContext *s)
|
|||
folding. Constants will be substituted to arguments by register
|
||||
allocator where needed and possible. Also detect copies. */
|
||||
switch (opc) {
|
||||
CASE_OP_32_64(mov):
|
||||
CASE_OP_32_64_VEC(mov):
|
||||
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
|
||||
break;
|
||||
CASE_OP_32_64(movi):
|
||||
case INDEX_op_dupi_vec:
|
||||
tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
|
||||
break;
|
||||
|
||||
case INDEX_op_dup_vec:
|
||||
if (arg_is_const(op->args[1])) {
|
||||
tmp = arg_info(op->args[1])->val;
|
||||
tmp = dup_const(TCGOP_VECE(op), tmp);
|
||||
tcg_opt_gen_movi(s, op, op->args[0], tmp);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
CASE_OP_32_64(not):
|
||||
CASE_OP_32_64(neg):
|
||||
CASE_OP_32_64(ext8s):
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Generic vector operation descriptor
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
|
||||
#define SIMD_OPRSZ_SHIFT 0
|
||||
#define SIMD_OPRSZ_BITS 5
|
||||
|
||||
#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
|
||||
#define SIMD_MAXSZ_BITS 5
|
||||
|
||||
#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
|
||||
#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT)
|
||||
|
||||
/* Create a descriptor from components. */
|
||||
uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
|
||||
|
||||
/* Extract the operation size from a descriptor. */
|
||||
static inline intptr_t simd_oprsz(uint32_t desc)
|
||||
{
|
||||
return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
|
||||
}
|
||||
|
||||
/* Extract the max vector size from a descriptor. */
|
||||
static inline intptr_t simd_maxsz(uint32_t desc)
|
||||
{
|
||||
return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
|
||||
}
|
||||
|
||||
/* Extract the operation-specific data from a descriptor. */
|
||||
static inline int32_t simd_data(uint32_t desc)
|
||||
{
|
||||
return sextract32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
* Generic vector operation expansion
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Generic" vectors. All operands are given as offsets from ENV,
|
||||
* and therefore cannot also be allocated via tcg_global_mem_new_*.
|
||||
* OPRSZ is the byte size of the vector upon which the operation is performed.
|
||||
* MAXSZ is the byte size of the full vector; bytes beyond OPSZ are cleared.
|
||||
*
|
||||
* All sizes must be 8 or any multiple of 16.
|
||||
* When OPRSZ is 8, the alignment may be 8, otherwise must be 16.
|
||||
* Operands may completely, but not partially, overlap.
|
||||
*/
|
||||
|
||||
/* Expand a call to a gvec-style helper, with pointers to two vector
|
||||
operands, and a descriptor (see tcg-gvec-desc.h). */
|
||||
typedef void gen_helper_gvec_2(TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_2 *fn);
|
||||
|
||||
/* Similarly, passing an extra data value. */
|
||||
typedef void gen_helper_gvec_2i(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
|
||||
void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
|
||||
uint32_t oprsz, uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_2i *fn);
|
||||
|
||||
/* Similarly, passing an extra pointer (e.g. env or float_status). */
|
||||
typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
|
||||
TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_2_ptr *fn);
|
||||
|
||||
/* Similarly, with three vector operands. */
|
||||
typedef void gen_helper_gvec_3(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_3 *fn);
|
||||
|
||||
/* Similarly, with four vector operands. */
|
||||
typedef void gen_helper_gvec_4(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_4 *fn);
|
||||
|
||||
/* Similarly, with five vector operands. */
|
||||
typedef void gen_helper_gvec_5(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, uint32_t xofs, uint32_t oprsz,
|
||||
uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn);
|
||||
|
||||
typedef void gen_helper_gvec_3_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_3_ptr *fn);
|
||||
|
||||
typedef void gen_helper_gvec_4_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
|
||||
uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_4_ptr *fn);
|
||||
|
||||
/* Expand a gvec operation. Either inline or out-of-line depending on
|
||||
the actual vector size and the operations supported by the host. */
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_2 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
} GVecGen2;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, int64_t);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, int32_t);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, int64_t);
|
||||
/* Expand out-of-line helper w/descriptor, data in descriptor. */
|
||||
gen_helper_gvec_2 *fno;
|
||||
/* Expand out-of-line helper w/descriptor, data as argument. */
|
||||
gen_helper_gvec_2i *fnoi;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
/* Load dest as a 3rd source operand. */
|
||||
bool load_dest;
|
||||
} GVecGen2i;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_2i *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
uint32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
/* Load scalar as 1st source operand. */
|
||||
bool scalar_first;
|
||||
} GVecGen2s;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_3 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
/* Load dest as a 3rd source operand. */
|
||||
bool load_dest;
|
||||
} GVecGen3;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_4 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
} GVecGen4;
|
||||
|
||||
void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
|
||||
void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
|
||||
uint32_t maxsz, int64_t c, const GVecGen2i *);
|
||||
void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
|
||||
uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
|
||||
void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
|
||||
void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
|
||||
|
||||
/* Expand a specific vector operation. */
|
||||
|
||||
void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t c, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_muls(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
/* Saturated arithmetic. */
|
||||
void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_andi(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_xori(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_ori(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t c, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_ands(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_xors(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_ors(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t s, uint32_t m);
|
||||
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
|
||||
uint32_t m, TCGv_i32);
|
||||
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
|
||||
uint32_t m, TCGv_i64);
|
||||
|
||||
void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
|
||||
void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
|
||||
void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
|
||||
void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
|
||||
|
||||
void tcg_gen_gvec_shli(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_shri(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_sari(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
int64_t shift, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
|
||||
uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
/*
|
||||
* 64-bit vector operations. Use these when the register has been allocated
|
||||
* with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
|
||||
* OPRSZ = MAXSZ = 8.
|
||||
*/
|
||||
|
||||
void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
|
||||
void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
|
||||
void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
|
||||
void tcg_gen_vec_shl8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||
void tcg_gen_vec_shl16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||
void tcg_gen_vec_shr8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||
void tcg_gen_vec_shr16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||
void tcg_gen_vec_sar8i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
||||
void tcg_gen_vec_sar16i_i64(TCGv_i64 d, TCGv_i64 a, int64_t);
|
|
@ -0,0 +1,389 @@
|
|||
/*
|
||||
* Tiny Code Generator for QEMU
|
||||
*
|
||||
* Copyright (c) 2018 Linaro, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu-common.h"
|
||||
#include "cpu.h"
|
||||
#include "exec/exec-all.h"
|
||||
#include "tcg.h"
|
||||
#include "tcg-op.h"
|
||||
#include "tcg-mo.h"
|
||||
|
||||
/* Reduce the number of ifdefs below. This assumes that all uses of
|
||||
TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
|
||||
the compiler can eliminate. */
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
|
||||
extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
|
||||
#define TCGV_LOW TCGV_LOW_link_error
|
||||
#define TCGV_HIGH TCGV_HIGH_link_error
|
||||
#endif
|
||||
|
||||
void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
|
||||
{
|
||||
TCGOp *op = tcg_emit_op(opc);
|
||||
TCGOP_VECL(op) = type - TCG_TYPE_V64;
|
||||
TCGOP_VECE(op) = vece;
|
||||
op->args[0] = r;
|
||||
op->args[1] = a;
|
||||
}
|
||||
|
||||
void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
|
||||
TCGArg r, TCGArg a, TCGArg b)
|
||||
{
|
||||
TCGOp *op = tcg_emit_op(opc);
|
||||
TCGOP_VECL(op) = type - TCG_TYPE_V64;
|
||||
TCGOP_VECE(op) = vece;
|
||||
op->args[0] = r;
|
||||
op->args[1] = a;
|
||||
op->args[2] = b;
|
||||
}
|
||||
|
||||
void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
|
||||
TCGArg r, TCGArg a, TCGArg b, TCGArg c)
|
||||
{
|
||||
TCGOp *op = tcg_emit_op(opc);
|
||||
TCGOP_VECL(op) = type - TCG_TYPE_V64;
|
||||
TCGOP_VECE(op) = vece;
|
||||
op->args[0] = r;
|
||||
op->args[1] = a;
|
||||
op->args[2] = b;
|
||||
op->args[3] = c;
|
||||
}
|
||||
|
||||
static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
/* Must enough inputs for the output. */
|
||||
tcg_debug_assert(at->base_type >= type);
|
||||
vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
|
||||
}
|
||||
|
||||
static void vec_gen_op3(TCGOpcode opc, unsigned vece,
|
||||
TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGTemp *bt = tcgv_vec_temp(b);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
/* Must enough inputs for the output. */
|
||||
tcg_debug_assert(at->base_type >= type);
|
||||
tcg_debug_assert(bt->base_type >= type);
|
||||
vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
|
||||
}
|
||||
|
||||
void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
|
||||
{
|
||||
if (r != a) {
|
||||
vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
|
||||
}
|
||||
}
|
||||
|
||||
#define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
|
||||
|
||||
static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
|
||||
}
|
||||
|
||||
TCGv_vec tcg_const_zeros_vec(TCGType type)
|
||||
{
|
||||
TCGv_vec ret = tcg_temp_new_vec(type);
|
||||
do_dupi_vec(ret, MO_REG, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TCGv_vec tcg_const_ones_vec(TCGType type)
|
||||
{
|
||||
TCGv_vec ret = tcg_temp_new_vec(type);
|
||||
do_dupi_vec(ret, MO_REG, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
|
||||
{
|
||||
TCGTemp *t = tcgv_vec_temp(m);
|
||||
return tcg_const_zeros_vec(t->base_type);
|
||||
}
|
||||
|
||||
TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
|
||||
{
|
||||
TCGTemp *t = tcgv_vec_temp(m);
|
||||
return tcg_const_ones_vec(t->base_type);
|
||||
}
|
||||
|
||||
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
|
||||
{
|
||||
if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
|
||||
do_dupi_vec(r, MO_32, a);
|
||||
} else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
|
||||
do_dupi_vec(r, MO_64, a);
|
||||
} else {
|
||||
TCGv_i64 c = tcg_const_i64(a);
|
||||
tcg_gen_dup_i64_vec(MO_64, r, c);
|
||||
tcg_temp_free_i64(c);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
|
||||
{
|
||||
do_dupi_vec(r, MO_REG, dup_const(vece, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
|
||||
{
|
||||
TCGArg ri = tcgv_vec_arg(r);
|
||||
TCGTemp *rt = arg_temp(ri);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
TCGArg ai = tcgv_i64_arg(a);
|
||||
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
|
||||
} else if (vece == MO_64) {
|
||||
TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
|
||||
TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
|
||||
vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
|
||||
} else {
|
||||
TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
|
||||
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
|
||||
{
|
||||
TCGArg ri = tcgv_vec_arg(r);
|
||||
TCGArg ai = tcgv_i32_arg(a);
|
||||
TCGTemp *rt = arg_temp(ri);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
|
||||
}
|
||||
|
||||
static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
|
||||
{
|
||||
TCGArg ri = tcgv_vec_arg(r);
|
||||
TCGArg bi = tcgv_ptr_arg(b);
|
||||
TCGTemp *rt = arg_temp(ri);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
vec_gen_3(opc, type, 0, ri, bi, o);
|
||||
}
|
||||
|
||||
void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
|
||||
{
|
||||
vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
|
||||
}
|
||||
|
||||
void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
|
||||
{
|
||||
vec_gen_ldst(INDEX_op_st_vec, r, b, o);
|
||||
}
|
||||
|
||||
void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
|
||||
{
|
||||
TCGArg ri = tcgv_vec_arg(r);
|
||||
TCGArg bi = tcgv_ptr_arg(b);
|
||||
TCGTemp *rt = arg_temp(ri);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
tcg_debug_assert(low_type >= TCG_TYPE_V64);
|
||||
tcg_debug_assert(low_type <= type);
|
||||
vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
|
||||
}
|
||||
|
||||
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
vec_gen_op3(INDEX_op_add_vec, vece, r, a, b);
|
||||
}
|
||||
|
||||
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
vec_gen_op3(INDEX_op_sub_vec, vece, r, a, b);
|
||||
}
|
||||
|
||||
void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
|
||||
}
|
||||
|
||||
void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
|
||||
}
|
||||
|
||||
void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
|
||||
}
|
||||
|
||||
void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
if (TCG_TARGET_HAS_andc_vec) {
|
||||
vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
|
||||
} else {
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(r);
|
||||
tcg_gen_not_vec(0, t, b);
|
||||
tcg_gen_and_vec(0, r, a, t);
|
||||
tcg_temp_free_vec(t);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
if (TCG_TARGET_HAS_orc_vec) {
|
||||
vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
|
||||
} else {
|
||||
TCGv_vec t = tcg_temp_new_vec_matching(r);
|
||||
tcg_gen_not_vec(0, t, b);
|
||||
tcg_gen_or_vec(0, r, a, t);
|
||||
tcg_temp_free_vec(t);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
|
||||
{
|
||||
if (TCG_TARGET_HAS_not_vec) {
|
||||
vec_gen_op2(INDEX_op_not_vec, 0, r, a);
|
||||
} else {
|
||||
TCGv_vec t = tcg_const_ones_vec_matching(r);
|
||||
tcg_gen_xor_vec(0, r, a, t);
|
||||
tcg_temp_free_vec(t);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
|
||||
{
|
||||
if (TCG_TARGET_HAS_neg_vec) {
|
||||
vec_gen_op2(INDEX_op_neg_vec, vece, r, a);
|
||||
} else {
|
||||
TCGv_vec t = tcg_const_zeros_vec_matching(r);
|
||||
tcg_gen_sub_vec(vece, r, t, a);
|
||||
tcg_temp_free_vec(t);
|
||||
}
|
||||
}
|
||||
|
||||
static void do_shifti(TCGOpcode opc, unsigned vece,
|
||||
TCGv_vec r, TCGv_vec a, int64_t i)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGArg ri = temp_arg(rt);
|
||||
TCGArg ai = temp_arg(at);
|
||||
TCGType type = rt->base_type;
|
||||
int can;
|
||||
|
||||
tcg_debug_assert(at->base_type == type);
|
||||
tcg_debug_assert(i >= 0 && i < (8 << vece));
|
||||
|
||||
if (i == 0) {
|
||||
tcg_gen_mov_vec(r, a);
|
||||
return;
|
||||
}
|
||||
|
||||
can = tcg_can_emit_vec_op(opc, type, vece);
|
||||
if (can > 0) {
|
||||
vec_gen_3(opc, type, vece, ri, ai, i);
|
||||
} else {
|
||||
/* We leave the choice of expansion via scalar or vector shift
|
||||
to the target. Often, but not always, dupi can feed a vector
|
||||
shift easier than a scalar. */
|
||||
tcg_debug_assert(can < 0);
|
||||
tcg_expand_vec_op(opc, type, vece, ri, ai, i);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||
{
|
||||
do_shifti(INDEX_op_shli_vec, vece, r, a, i);
|
||||
}
|
||||
|
||||
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||
{
|
||||
do_shifti(INDEX_op_shri_vec, vece, r, a, i);
|
||||
}
|
||||
|
||||
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
|
||||
{
|
||||
do_shifti(INDEX_op_sari_vec, vece, r, a, i);
|
||||
}
|
||||
|
||||
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
|
||||
TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGTemp *bt = tcgv_vec_temp(b);
|
||||
TCGArg ri = temp_arg(rt);
|
||||
TCGArg ai = temp_arg(at);
|
||||
TCGArg bi = temp_arg(bt);
|
||||
TCGType type = rt->base_type;
|
||||
int can;
|
||||
|
||||
tcg_debug_assert(at->base_type == type);
|
||||
tcg_debug_assert(bt->base_type == type);
|
||||
can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
|
||||
if (can > 0) {
|
||||
vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
|
||||
} else {
|
||||
tcg_debug_assert(can < 0);
|
||||
tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGTemp *bt = tcgv_vec_temp(b);
|
||||
TCGArg ri = temp_arg(rt);
|
||||
TCGArg ai = temp_arg(at);
|
||||
TCGArg bi = temp_arg(bt);
|
||||
TCGType type = rt->base_type;
|
||||
int can;
|
||||
|
||||
tcg_debug_assert(at->base_type == type);
|
||||
tcg_debug_assert(bt->base_type == type);
|
||||
can = tcg_can_emit_vec_op(INDEX_op_mul_vec, type, vece);
|
||||
if (can > 0) {
|
||||
vec_gen_3(INDEX_op_mul_vec, type, vece, ri, ai, bi);
|
||||
} else {
|
||||
tcg_debug_assert(can < 0);
|
||||
tcg_expand_vec_op(INDEX_op_mul_vec, type, vece, ri, ai, bi);
|
||||
}
|
||||
}
|
42
tcg/tcg-op.c
42
tcg/tcg-op.c
|
@ -140,7 +140,7 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
|
||||
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
||||
{
|
||||
TCGv_i32 t0;
|
||||
/* Some cases can be optimized here. */
|
||||
|
@ -148,17 +148,17 @@ void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
|
|||
case 0:
|
||||
tcg_gen_movi_i32(ret, 0);
|
||||
return;
|
||||
case 0xffffffffu:
|
||||
case -1:
|
||||
tcg_gen_mov_i32(ret, arg1);
|
||||
return;
|
||||
case 0xffu:
|
||||
case 0xff:
|
||||
/* Don't recurse with tcg_gen_ext8u_i32. */
|
||||
if (TCG_TARGET_HAS_ext8u_i32) {
|
||||
tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0xffffu:
|
||||
case 0xffff:
|
||||
if (TCG_TARGET_HAS_ext16u_i32) {
|
||||
tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
|
||||
return;
|
||||
|
@ -199,9 +199,9 @@ void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
|
||||
void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 32);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 32);
|
||||
if (arg2 == 0) {
|
||||
tcg_gen_mov_i32(ret, arg1);
|
||||
} else {
|
||||
|
@ -211,9 +211,9 @@ void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
|
||||
void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 32);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 32);
|
||||
if (arg2 == 0) {
|
||||
tcg_gen_mov_i32(ret, arg1);
|
||||
} else {
|
||||
|
@ -223,9 +223,9 @@ void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2)
|
||||
void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 32);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 32);
|
||||
if (arg2 == 0) {
|
||||
tcg_gen_mov_i32(ret, arg1);
|
||||
} else {
|
||||
|
@ -1201,7 +1201,7 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
|
||||
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
|
||||
{
|
||||
TCGv_i64 t0;
|
||||
|
||||
|
@ -1216,23 +1216,23 @@ void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
|
|||
case 0:
|
||||
tcg_gen_movi_i64(ret, 0);
|
||||
return;
|
||||
case 0xffffffffffffffffull:
|
||||
case -1:
|
||||
tcg_gen_mov_i64(ret, arg1);
|
||||
return;
|
||||
case 0xffull:
|
||||
case 0xff:
|
||||
/* Don't recurse with tcg_gen_ext8u_i64. */
|
||||
if (TCG_TARGET_HAS_ext8u_i64) {
|
||||
tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0xffffu:
|
||||
case 0xffff:
|
||||
if (TCG_TARGET_HAS_ext16u_i64) {
|
||||
tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case 0xffffffffull:
|
||||
case 0xffffffffu:
|
||||
if (TCG_TARGET_HAS_ext32u_i64) {
|
||||
tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
|
||||
return;
|
||||
|
@ -1332,9 +1332,9 @@ static inline void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
|
||||
void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 64);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 64);
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
tcg_gen_shifti_i64(ret, arg1, arg2, 0, 0);
|
||||
} else if (arg2 == 0) {
|
||||
|
@ -1346,9 +1346,9 @@ void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
|
||||
void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 64);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 64);
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
tcg_gen_shifti_i64(ret, arg1, arg2, 1, 0);
|
||||
} else if (arg2 == 0) {
|
||||
|
@ -1360,9 +1360,9 @@ void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
|
|||
}
|
||||
}
|
||||
|
||||
void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2)
|
||||
void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
|
||||
{
|
||||
tcg_debug_assert(arg2 < 64);
|
||||
tcg_debug_assert(arg2 >= 0 && arg2 < 64);
|
||||
if (TCG_TARGET_REG_BITS == 32) {
|
||||
tcg_gen_shifti_i64(ret, arg1, arg2, 1, 1);
|
||||
} else if (arg2 == 0) {
|
||||
|
|
52
tcg/tcg-op.h
52
tcg/tcg-op.h
|
@ -35,6 +35,10 @@ void tcg_gen_op4(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg);
|
|||
void tcg_gen_op5(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
|
||||
void tcg_gen_op6(TCGOpcode, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg, TCGArg);
|
||||
|
||||
void vec_gen_2(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg);
|
||||
void vec_gen_3(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg);
|
||||
void vec_gen_4(TCGOpcode, TCGType, unsigned, TCGArg, TCGArg, TCGArg, TCGArg);
|
||||
|
||||
static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 a1)
|
||||
{
|
||||
tcg_gen_op1(opc, tcgv_i32_arg(a1));
|
||||
|
@ -265,12 +269,12 @@ void tcg_gen_mb(TCGBar);
|
|||
void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_subfi_i32(TCGv_i32 ret, int32_t arg1, TCGv_i32 arg2);
|
||||
void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
|
||||
void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
|
||||
void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
|
||||
void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
|
||||
void tcg_gen_shli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_shri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_muli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2);
|
||||
void tcg_gen_div_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
|
||||
void tcg_gen_rem_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
|
||||
|
@ -454,12 +458,12 @@ static inline void tcg_gen_not_i32(TCGv_i32 ret, TCGv_i32 arg)
|
|||
void tcg_gen_addi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_subfi_i64(TCGv_i64 ret, int64_t arg1, TCGv_i64 arg2);
|
||||
void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
|
||||
void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
|
||||
void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
|
||||
void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
|
||||
void tcg_gen_shli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_shri_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_muli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2);
|
||||
void tcg_gen_div_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
|
||||
void tcg_gen_rem_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
|
||||
|
@ -903,6 +907,36 @@ void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
|
|||
void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
|
||||
void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
|
||||
|
||||
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
|
||||
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
|
||||
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec, TCGv_i64);
|
||||
void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
|
||||
void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
|
||||
void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
|
||||
void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
|
||||
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
|
||||
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
|
||||
void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
|
||||
|
||||
void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||
void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||
void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i);
|
||||
|
||||
void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
|
||||
TCGv_vec a, TCGv_vec b);
|
||||
|
||||
void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
|
||||
void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset);
|
||||
void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t);
|
||||
|
||||
#if TARGET_LONG_BITS == 64
|
||||
#define tcg_gen_movi_tl tcg_gen_movi_i64
|
||||
#define tcg_gen_mov_tl tcg_gen_mov_i64
|
||||
|
@ -1001,6 +1035,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
|
|||
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
|
||||
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
|
||||
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
|
||||
#define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec
|
||||
#else
|
||||
#define tcg_gen_movi_tl tcg_gen_movi_i32
|
||||
#define tcg_gen_mov_tl tcg_gen_mov_i32
|
||||
|
@ -1098,6 +1133,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
|
|||
#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
|
||||
#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
|
||||
#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
|
||||
#define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec
|
||||
#endif
|
||||
|
||||
#if UINTPTR_MAX == UINT32_MAX
|
||||
|
|
|
@ -204,8 +204,54 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
|
|||
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
|
||||
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
|
||||
|
||||
/* Host vector support. */
|
||||
|
||||
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
|
||||
|
||||
DEF(mov_vec, 1, 1, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
|
||||
DEF(dupi_vec, 1, 0, 1, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT)
|
||||
|
||||
DEF(dup_vec, 1, 1, 0, IMPLVEC)
|
||||
DEF(dup2_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_REG_BITS == 32))
|
||||
|
||||
DEF(ld_vec, 1, 1, 1, IMPLVEC)
|
||||
DEF(st_vec, 0, 2, 1, IMPLVEC)
|
||||
|
||||
DEF(add_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(sub_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(mul_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_mul_vec))
|
||||
DEF(neg_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_neg_vec))
|
||||
|
||||
DEF(and_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(or_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(xor_vec, 1, 2, 0, IMPLVEC)
|
||||
DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
|
||||
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
|
||||
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
|
||||
|
||||
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||
DEF(shri_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||
DEF(sari_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
|
||||
|
||||
DEF(shls_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
||||
DEF(shrs_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
||||
DEF(sars_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shs_vec))
|
||||
|
||||
DEF(shlv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
|
||||
DEF(shrv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
|
||||
DEF(sarv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_shv_vec))
|
||||
|
||||
DEF(cmp_vec, 1, 2, 1, IMPLVEC)
|
||||
|
||||
DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
|
||||
|
||||
#if TCG_TARGET_MAYBE_vec
|
||||
#include "tcg-target.opc.h"
|
||||
#endif
|
||||
|
||||
#undef TLADDR_ARGS
|
||||
#undef DATA64_ARGS
|
||||
#undef IMPL
|
||||
#undef IMPL64
|
||||
#undef IMPLVEC
|
||||
#undef DEF
|
||||
|
|
|
@ -22,39 +22,110 @@
|
|||
|
||||
typedef struct TCGLabelPoolData {
|
||||
struct TCGLabelPoolData *next;
|
||||
tcg_target_ulong data;
|
||||
tcg_insn_unit *label;
|
||||
intptr_t addend;
|
||||
int type;
|
||||
int rtype;
|
||||
unsigned nlong;
|
||||
tcg_target_ulong data[];
|
||||
} TCGLabelPoolData;
|
||||
|
||||
|
||||
static void new_pool_label(TCGContext *s, tcg_target_ulong data, int type,
|
||||
tcg_insn_unit *label, intptr_t addend)
|
||||
static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
|
||||
tcg_insn_unit *label, intptr_t addend)
|
||||
{
|
||||
TCGLabelPoolData *n = tcg_malloc(sizeof(*n));
|
||||
TCGLabelPoolData *i, **pp;
|
||||
TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
|
||||
+ sizeof(tcg_target_ulong) * nlong);
|
||||
|
||||
n->data = data;
|
||||
n->label = label;
|
||||
n->type = type;
|
||||
n->addend = addend;
|
||||
n->rtype = rtype;
|
||||
n->nlong = nlong;
|
||||
return n;
|
||||
}
|
||||
|
||||
static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
|
||||
{
|
||||
TCGLabelPoolData *i, **pp;
|
||||
int nlong = n->nlong;
|
||||
|
||||
/* Insertion sort on the pool. */
|
||||
for (pp = &s->pool_labels; (i = *pp) && i->data < data; pp = &i->next) {
|
||||
continue;
|
||||
for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
|
||||
if (nlong > i->nlong) {
|
||||
break;
|
||||
}
|
||||
if (nlong < i->nlong) {
|
||||
continue;
|
||||
}
|
||||
if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
n->next = *pp;
|
||||
*pp = n;
|
||||
}
|
||||
|
||||
/* The "usual" for generic integer code. */
|
||||
static inline void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
|
||||
tcg_insn_unit *label, intptr_t addend)
|
||||
{
|
||||
TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
|
||||
n->data[0] = d;
|
||||
new_pool_insert(s, n);
|
||||
}
|
||||
|
||||
/* For v64 or v128, depending on the host. */
|
||||
static inline void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
|
||||
intptr_t addend, tcg_target_ulong d0,
|
||||
tcg_target_ulong d1)
|
||||
{
|
||||
TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
|
||||
n->data[0] = d0;
|
||||
n->data[1] = d1;
|
||||
new_pool_insert(s, n);
|
||||
}
|
||||
|
||||
/* For v128 or v256, depending on the host. */
|
||||
static inline void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
|
||||
intptr_t addend, tcg_target_ulong d0,
|
||||
tcg_target_ulong d1, tcg_target_ulong d2,
|
||||
tcg_target_ulong d3)
|
||||
{
|
||||
TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
|
||||
n->data[0] = d0;
|
||||
n->data[1] = d1;
|
||||
n->data[2] = d2;
|
||||
n->data[3] = d3;
|
||||
new_pool_insert(s, n);
|
||||
}
|
||||
|
||||
/* For v256, for 32-bit host. */
|
||||
static inline void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
|
||||
intptr_t addend, tcg_target_ulong d0,
|
||||
tcg_target_ulong d1, tcg_target_ulong d2,
|
||||
tcg_target_ulong d3, tcg_target_ulong d4,
|
||||
tcg_target_ulong d5, tcg_target_ulong d6,
|
||||
tcg_target_ulong d7)
|
||||
{
|
||||
TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
|
||||
n->data[0] = d0;
|
||||
n->data[1] = d1;
|
||||
n->data[2] = d2;
|
||||
n->data[3] = d3;
|
||||
n->data[4] = d4;
|
||||
n->data[5] = d5;
|
||||
n->data[6] = d6;
|
||||
n->data[7] = d7;
|
||||
new_pool_insert(s, n);
|
||||
}
|
||||
|
||||
/* To be provided by cpu/tcg-target.inc.c. */
|
||||
static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
|
||||
|
||||
static bool tcg_out_pool_finalize(TCGContext *s)
|
||||
{
|
||||
TCGLabelPoolData *p = s->pool_labels;
|
||||
tcg_target_ulong d, *a;
|
||||
TCGLabelPoolData *l = NULL;
|
||||
void *a;
|
||||
|
||||
if (p == NULL) {
|
||||
return true;
|
||||
|
@ -62,24 +133,24 @@ static bool tcg_out_pool_finalize(TCGContext *s)
|
|||
|
||||
/* ??? Round up to qemu_icache_linesize, but then do not round
|
||||
again when allocating the next TranslationBlock structure. */
|
||||
a = (void *)ROUND_UP((uintptr_t)s->code_ptr, sizeof(tcg_target_ulong));
|
||||
a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
|
||||
sizeof(tcg_target_ulong) * p->nlong);
|
||||
tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
|
||||
s->data_gen_ptr = a;
|
||||
|
||||
/* Ensure the first comparison fails. */
|
||||
d = p->data + 1;
|
||||
|
||||
for (; p != NULL; p = p->next) {
|
||||
if (p->data != d) {
|
||||
d = p->data;
|
||||
if (unlikely((void *)a > s->code_gen_highwater)) {
|
||||
size_t size = sizeof(tcg_target_ulong) * p->nlong;
|
||||
if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
|
||||
if (unlikely(a > s->code_gen_highwater)) {
|
||||
return false;
|
||||
}
|
||||
*a++ = d;
|
||||
memcpy(a, p->data, size);
|
||||
a += size;
|
||||
l = p;
|
||||
}
|
||||
patch_reloc(p->label, p->type, (intptr_t)(a - 1), p->addend);
|
||||
patch_reloc(p->label, p->rtype, (intptr_t)a - size, p->addend);
|
||||
}
|
||||
|
||||
s->code_ptr = (void *)a;
|
||||
s->code_ptr = a;
|
||||
return true;
|
||||
}
|
||||
|
|
125
tcg/tcg.c
125
tcg/tcg.c
|
@ -106,6 +106,18 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
|
|||
TCGReg ret, tcg_target_long arg);
|
||||
static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
||||
const int *const_args);
|
||||
#if TCG_TARGET_MAYBE_vec
|
||||
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
|
||||
unsigned vece, const TCGArg *args,
|
||||
const int *const_args);
|
||||
#else
|
||||
static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
|
||||
unsigned vece, const TCGArg *args,
|
||||
const int *const_args)
|
||||
{
|
||||
g_assert_not_reached();
|
||||
}
|
||||
#endif
|
||||
static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
|
||||
intptr_t arg2);
|
||||
static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
|
||||
|
@ -146,8 +158,7 @@ struct tcg_region_state {
|
|||
};
|
||||
|
||||
static struct tcg_region_state region;
|
||||
|
||||
static TCGRegSet tcg_target_available_regs[2];
|
||||
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
|
||||
static TCGRegSet tcg_target_call_clobber_regs;
|
||||
|
||||
#if TCG_TARGET_INSN_UNIT_SIZE == 1
|
||||
|
@ -1026,6 +1037,41 @@ TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
|
|||
return temp_tcgv_i64(t);
|
||||
}
|
||||
|
||||
TCGv_vec tcg_temp_new_vec(TCGType type)
|
||||
{
|
||||
TCGTemp *t;
|
||||
|
||||
#ifdef CONFIG_DEBUG_TCG
|
||||
switch (type) {
|
||||
case TCG_TYPE_V64:
|
||||
assert(TCG_TARGET_HAS_v64);
|
||||
break;
|
||||
case TCG_TYPE_V128:
|
||||
assert(TCG_TARGET_HAS_v128);
|
||||
break;
|
||||
case TCG_TYPE_V256:
|
||||
assert(TCG_TARGET_HAS_v256);
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
#endif
|
||||
|
||||
t = tcg_temp_new_internal(type, 0);
|
||||
return temp_tcgv_vec(t);
|
||||
}
|
||||
|
||||
/* Create a new temp of the same type as an existing temp. */
|
||||
TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
|
||||
{
|
||||
TCGTemp *t = tcgv_vec_temp(match);
|
||||
|
||||
tcg_debug_assert(t->temp_allocated != 0);
|
||||
|
||||
t = tcg_temp_new_internal(t->base_type, 0);
|
||||
return temp_tcgv_vec(t);
|
||||
}
|
||||
|
||||
static void tcg_temp_free_internal(TCGTemp *ts)
|
||||
{
|
||||
TCGContext *s = tcg_ctx;
|
||||
|
@ -1057,6 +1103,11 @@ void tcg_temp_free_i64(TCGv_i64 arg)
|
|||
tcg_temp_free_internal(tcgv_i64_temp(arg));
|
||||
}
|
||||
|
||||
void tcg_temp_free_vec(TCGv_vec arg)
|
||||
{
|
||||
tcg_temp_free_internal(tcgv_vec_temp(arg));
|
||||
}
|
||||
|
||||
TCGv_i32 tcg_const_i32(int32_t val)
|
||||
{
|
||||
TCGv_i32 t0;
|
||||
|
@ -1114,6 +1165,9 @@ int tcg_check_temp_count(void)
|
|||
Test the runtime variable that controls each opcode. */
|
||||
bool tcg_op_supported(TCGOpcode op)
|
||||
{
|
||||
const bool have_vec
|
||||
= TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
|
||||
|
||||
switch (op) {
|
||||
case INDEX_op_discard:
|
||||
case INDEX_op_set_label:
|
||||
|
@ -1327,10 +1381,47 @@ bool tcg_op_supported(TCGOpcode op)
|
|||
case INDEX_op_mulsh_i64:
|
||||
return TCG_TARGET_HAS_mulsh_i64;
|
||||
|
||||
case NB_OPS:
|
||||
break;
|
||||
case INDEX_op_mov_vec:
|
||||
case INDEX_op_dup_vec:
|
||||
case INDEX_op_dupi_vec:
|
||||
case INDEX_op_ld_vec:
|
||||
case INDEX_op_st_vec:
|
||||
case INDEX_op_add_vec:
|
||||
case INDEX_op_sub_vec:
|
||||
case INDEX_op_and_vec:
|
||||
case INDEX_op_or_vec:
|
||||
case INDEX_op_xor_vec:
|
||||
case INDEX_op_cmp_vec:
|
||||
return have_vec;
|
||||
case INDEX_op_dup2_vec:
|
||||
return have_vec && TCG_TARGET_REG_BITS == 32;
|
||||
case INDEX_op_not_vec:
|
||||
return have_vec && TCG_TARGET_HAS_not_vec;
|
||||
case INDEX_op_neg_vec:
|
||||
return have_vec && TCG_TARGET_HAS_neg_vec;
|
||||
case INDEX_op_andc_vec:
|
||||
return have_vec && TCG_TARGET_HAS_andc_vec;
|
||||
case INDEX_op_orc_vec:
|
||||
return have_vec && TCG_TARGET_HAS_orc_vec;
|
||||
case INDEX_op_mul_vec:
|
||||
return have_vec && TCG_TARGET_HAS_mul_vec;
|
||||
case INDEX_op_shli_vec:
|
||||
case INDEX_op_shri_vec:
|
||||
case INDEX_op_sari_vec:
|
||||
return have_vec && TCG_TARGET_HAS_shi_vec;
|
||||
case INDEX_op_shls_vec:
|
||||
case INDEX_op_shrs_vec:
|
||||
case INDEX_op_sars_vec:
|
||||
return have_vec && TCG_TARGET_HAS_shs_vec;
|
||||
case INDEX_op_shlv_vec:
|
||||
case INDEX_op_shrv_vec:
|
||||
case INDEX_op_sarv_vec:
|
||||
return have_vec && TCG_TARGET_HAS_shv_vec;
|
||||
|
||||
default:
|
||||
tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
|
||||
return true;
|
||||
}
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
/* Note: we convert the 64 bit args to 32 bit and do some alignment
|
||||
|
@ -1661,6 +1752,11 @@ void tcg_dump_ops(TCGContext *s)
|
|||
nb_iargs = def->nb_iargs;
|
||||
nb_cargs = def->nb_cargs;
|
||||
|
||||
if (def->flags & TCG_OPF_VECTOR) {
|
||||
col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
|
||||
8 << TCGOP_VECE(op));
|
||||
}
|
||||
|
||||
k = 0;
|
||||
for (i = 0; i < nb_oargs; i++) {
|
||||
if (k != 0) {
|
||||
|
@ -1685,6 +1781,7 @@ void tcg_dump_ops(TCGContext *s)
|
|||
case INDEX_op_brcond_i64:
|
||||
case INDEX_op_setcond_i64:
|
||||
case INDEX_op_movcond_i64:
|
||||
case INDEX_op_cmp_vec:
|
||||
if (op->args[k] < ARRAY_SIZE(cond_name)
|
||||
&& cond_name[op->args[k]]) {
|
||||
col += qemu_log(",%s", cond_name[op->args[k++]]);
|
||||
|
@ -2890,8 +2987,13 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
|
|||
}
|
||||
|
||||
/* emit instruction */
|
||||
tcg_out_op(s, op->opc, new_args, const_args);
|
||||
|
||||
if (def->flags & TCG_OPF_VECTOR) {
|
||||
tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
|
||||
new_args, const_args);
|
||||
} else {
|
||||
tcg_out_op(s, op->opc, new_args, const_args);
|
||||
}
|
||||
|
||||
/* move the outputs in the correct register if needed */
|
||||
for(i = 0; i < nb_oargs; i++) {
|
||||
ts = arg_temp(op->args[i]);
|
||||
|
@ -3239,10 +3341,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
|||
switch (opc) {
|
||||
case INDEX_op_mov_i32:
|
||||
case INDEX_op_mov_i64:
|
||||
case INDEX_op_mov_vec:
|
||||
tcg_reg_alloc_mov(s, op);
|
||||
break;
|
||||
case INDEX_op_movi_i32:
|
||||
case INDEX_op_movi_i64:
|
||||
case INDEX_op_dupi_vec:
|
||||
tcg_reg_alloc_movi(s, op);
|
||||
break;
|
||||
case INDEX_op_insn_start:
|
||||
|
@ -3645,3 +3749,10 @@ void tcg_register_jit(void *buf, size_t buf_size)
|
|||
{
|
||||
}
|
||||
#endif /* ELF_HOST_MACHINE */
|
||||
|
||||
#if !TCG_TARGET_MAYBE_vec
|
||||
void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
|
||||
{
|
||||
g_assert_not_reached();
|
||||
}
|
||||
#endif
|
||||
|
|
87
tcg/tcg.h
87
tcg/tcg.h
|
@ -170,6 +170,31 @@ typedef uint64_t TCGRegSet;
|
|||
# error "Missing unsigned widening multiply"
|
||||
#endif
|
||||
|
||||
#if !defined(TCG_TARGET_HAS_v64) \
|
||||
&& !defined(TCG_TARGET_HAS_v128) \
|
||||
&& !defined(TCG_TARGET_HAS_v256)
|
||||
#define TCG_TARGET_MAYBE_vec 0
|
||||
#define TCG_TARGET_HAS_neg_vec 0
|
||||
#define TCG_TARGET_HAS_not_vec 0
|
||||
#define TCG_TARGET_HAS_andc_vec 0
|
||||
#define TCG_TARGET_HAS_orc_vec 0
|
||||
#define TCG_TARGET_HAS_shi_vec 0
|
||||
#define TCG_TARGET_HAS_shs_vec 0
|
||||
#define TCG_TARGET_HAS_shv_vec 0
|
||||
#define TCG_TARGET_HAS_mul_vec 0
|
||||
#else
|
||||
#define TCG_TARGET_MAYBE_vec 1
|
||||
#endif
|
||||
#ifndef TCG_TARGET_HAS_v64
|
||||
#define TCG_TARGET_HAS_v64 0
|
||||
#endif
|
||||
#ifndef TCG_TARGET_HAS_v128
|
||||
#define TCG_TARGET_HAS_v128 0
|
||||
#endif
|
||||
#ifndef TCG_TARGET_HAS_v256
|
||||
#define TCG_TARGET_HAS_v256 0
|
||||
#endif
|
||||
|
||||
#ifndef TARGET_INSN_START_EXTRA_WORDS
|
||||
# define TARGET_INSN_START_WORDS 1
|
||||
#else
|
||||
|
@ -246,6 +271,11 @@ typedef struct TCGPool {
|
|||
typedef enum TCGType {
|
||||
TCG_TYPE_I32,
|
||||
TCG_TYPE_I64,
|
||||
|
||||
TCG_TYPE_V64,
|
||||
TCG_TYPE_V128,
|
||||
TCG_TYPE_V256,
|
||||
|
||||
TCG_TYPE_COUNT, /* number of different types */
|
||||
|
||||
/* An alias for the size of the host register. */
|
||||
|
@ -396,6 +426,8 @@ typedef tcg_target_ulong TCGArg;
|
|||
* TCGv_i32 : 32 bit integer type
|
||||
* TCGv_i64 : 64 bit integer type
|
||||
* TCGv_ptr : a host pointer type
|
||||
* TCGv_vec : a host vector type; the exact size is not exposed
|
||||
to the CPU front-end code.
|
||||
* TCGv : an integer type the same size as target_ulong
|
||||
(an alias for either TCGv_i32 or TCGv_i64)
|
||||
The compiler's type checking will complain if you mix them
|
||||
|
@ -418,6 +450,7 @@ typedef tcg_target_ulong TCGArg;
|
|||
typedef struct TCGv_i32_d *TCGv_i32;
|
||||
typedef struct TCGv_i64_d *TCGv_i64;
|
||||
typedef struct TCGv_ptr_d *TCGv_ptr;
|
||||
typedef struct TCGv_vec_d *TCGv_vec;
|
||||
typedef TCGv_ptr TCGv_env;
|
||||
#if TARGET_LONG_BITS == 32
|
||||
#define TCGv TCGv_i32
|
||||
|
@ -589,6 +622,9 @@ typedef struct TCGOp {
|
|||
#define TCGOP_CALLI(X) (X)->param1
|
||||
#define TCGOP_CALLO(X) (X)->param2
|
||||
|
||||
#define TCGOP_VECL(X) (X)->param1
|
||||
#define TCGOP_VECE(X) (X)->param2
|
||||
|
||||
/* Make sure operands fit in the bitfields above. */
|
||||
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
|
||||
|
||||
|
@ -726,6 +762,11 @@ static inline TCGTemp *tcgv_ptr_temp(TCGv_ptr v)
|
|||
return tcgv_i32_temp((TCGv_i32)v);
|
||||
}
|
||||
|
||||
static inline TCGTemp *tcgv_vec_temp(TCGv_vec v)
|
||||
{
|
||||
return tcgv_i32_temp((TCGv_i32)v);
|
||||
}
|
||||
|
||||
static inline TCGArg tcgv_i32_arg(TCGv_i32 v)
|
||||
{
|
||||
return temp_arg(tcgv_i32_temp(v));
|
||||
|
@ -741,6 +782,11 @@ static inline TCGArg tcgv_ptr_arg(TCGv_ptr v)
|
|||
return temp_arg(tcgv_ptr_temp(v));
|
||||
}
|
||||
|
||||
static inline TCGArg tcgv_vec_arg(TCGv_vec v)
|
||||
{
|
||||
return temp_arg(tcgv_vec_temp(v));
|
||||
}
|
||||
|
||||
static inline TCGv_i32 temp_tcgv_i32(TCGTemp *t)
|
||||
{
|
||||
(void)temp_idx(t); /* trigger embedded assert */
|
||||
|
@ -757,6 +803,11 @@ static inline TCGv_ptr temp_tcgv_ptr(TCGTemp *t)
|
|||
return (TCGv_ptr)temp_tcgv_i32(t);
|
||||
}
|
||||
|
||||
static inline TCGv_vec temp_tcgv_vec(TCGTemp *t)
|
||||
{
|
||||
return (TCGv_vec)temp_tcgv_i32(t);
|
||||
}
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 32
|
||||
static inline TCGv_i32 TCGV_LOW(TCGv_i64 t)
|
||||
{
|
||||
|
@ -832,9 +883,12 @@ TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr,
|
|||
|
||||
TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
|
||||
TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
|
||||
TCGv_vec tcg_temp_new_vec(TCGType type);
|
||||
TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match);
|
||||
|
||||
void tcg_temp_free_i32(TCGv_i32 arg);
|
||||
void tcg_temp_free_i64(TCGv_i64 arg);
|
||||
void tcg_temp_free_vec(TCGv_vec arg);
|
||||
|
||||
static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
|
||||
const char *name)
|
||||
|
@ -916,6 +970,8 @@ enum {
|
|||
/* Instruction is optional and not implemented by the host, or insn
|
||||
is generic and should not be implemened by the host. */
|
||||
TCG_OPF_NOT_PRESENT = 0x10,
|
||||
/* Instruction operands are vectors. */
|
||||
TCG_OPF_VECTOR = 0x20,
|
||||
};
|
||||
|
||||
typedef struct TCGOpDef {
|
||||
|
@ -981,6 +1037,10 @@ TCGv_i32 tcg_const_i32(int32_t val);
|
|||
TCGv_i64 tcg_const_i64(int64_t val);
|
||||
TCGv_i32 tcg_const_local_i32(int32_t val);
|
||||
TCGv_i64 tcg_const_local_i64(int64_t val);
|
||||
TCGv_vec tcg_const_zeros_vec(TCGType);
|
||||
TCGv_vec tcg_const_ones_vec(TCGType);
|
||||
TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec);
|
||||
TCGv_vec tcg_const_ones_vec_matching(TCGv_vec);
|
||||
|
||||
TCGLabel *gen_new_label(void);
|
||||
|
||||
|
@ -1151,6 +1211,33 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
|
|||
|
||||
void tcg_register_jit(void *buf, size_t buf_size);
|
||||
|
||||
#if TCG_TARGET_MAYBE_vec
|
||||
/* Return zero if the tuple (opc, type, vece) is unsupportable;
|
||||
return > 0 if it is directly supportable;
|
||||
return < 0 if we must call tcg_expand_vec_op. */
|
||||
int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
|
||||
#else
|
||||
static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Expand the tuple (opc, type, vece) on the given arguments. */
|
||||
void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
|
||||
|
||||
/* Replicate a constant C accoring to the log2 of the element size. */
|
||||
uint64_t dup_const(unsigned vece, uint64_t c);
|
||||
|
||||
#define dup_const(VECE, C) \
|
||||
(__builtin_constant_p(VECE) \
|
||||
? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \
|
||||
: (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \
|
||||
: (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \
|
||||
: dup_const(VECE, C)) \
|
||||
: dup_const(VECE, C))
|
||||
|
||||
|
||||
/*
|
||||
* Memory helpers that will be used by TCG generated code.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue