mirror of https://gitee.com/openkylin/qemu.git
tcg: Add generic vector expanders
Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
474b2e8f0f
commit
db432672dc
|
@ -93,7 +93,7 @@ all: $(PROGS) stap
|
|||
# cpu emulator library
|
||||
obj-y += exec.o
|
||||
obj-y += accel/
|
||||
obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o
|
||||
obj-$(CONFIG_TCG) += tcg/tcg.o tcg/tcg-op.o tcg/tcg-op-vec.o tcg/tcg-op-gvec.o
|
||||
obj-$(CONFIG_TCG) += tcg/tcg-common.o tcg/optimize.o
|
||||
obj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
|
||||
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
obj-$(CONFIG_SOFTMMU) += tcg-all.o
|
||||
obj-$(CONFIG_SOFTMMU) += cputlb.o
|
||||
obj-y += tcg-runtime.o
|
||||
obj-y += tcg-runtime.o tcg-runtime-gvec.o
|
||||
obj-y += cpu-exec.o cpu-exec-common.o translate-all.o
|
||||
obj-y += translator.o
|
||||
|
||||
|
|
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* Generic vectorized operation runtime
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/host-utils.h"
|
||||
#include "cpu.h"
|
||||
#include "exec/helper-proto.h"
|
||||
#include "tcg-gvec-desc.h"
|
||||
|
||||
|
||||
/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
|
||||
* them via GCC's generic vector extension. This turns out to be simpler and
|
||||
* more reliable than getting the compiler to autovectorize.
|
||||
*
|
||||
* In tcg-op-gvec.c, we asserted that both the size and alignment of the data
|
||||
* are multiples of 16.
|
||||
*
|
||||
* When the compiler does not support all of the operations we require, the
|
||||
* loops are written so that we can always fall back on the base types.
|
||||
*/
|
||||
#ifdef CONFIG_VECTOR16
|
||||
typedef uint8_t vec8 __attribute__((vector_size(16)));
|
||||
typedef uint16_t vec16 __attribute__((vector_size(16)));
|
||||
typedef uint32_t vec32 __attribute__((vector_size(16)));
|
||||
typedef uint64_t vec64 __attribute__((vector_size(16)));
|
||||
|
||||
typedef int8_t svec8 __attribute__((vector_size(16)));
|
||||
typedef int16_t svec16 __attribute__((vector_size(16)));
|
||||
typedef int32_t svec32 __attribute__((vector_size(16)));
|
||||
typedef int64_t svec64 __attribute__((vector_size(16)));
|
||||
|
||||
#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
|
||||
#define DUP8(X) { X, X, X, X, X, X, X, X }
|
||||
#define DUP4(X) { X, X, X, X }
|
||||
#define DUP2(X) { X, X }
|
||||
#else
|
||||
typedef uint8_t vec8;
|
||||
typedef uint16_t vec16;
|
||||
typedef uint32_t vec32;
|
||||
typedef uint64_t vec64;
|
||||
|
||||
typedef int8_t svec8;
|
||||
typedef int16_t svec16;
|
||||
typedef int32_t svec32;
|
||||
typedef int64_t svec64;
|
||||
|
||||
#define DUP16(X) X
|
||||
#define DUP8(X) X
|
||||
#define DUP4(X) X
|
||||
#define DUP2(X) X
|
||||
#endif /* CONFIG_VECTOR16 */
|
||||
|
||||
static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
|
||||
{
|
||||
intptr_t maxsz = simd_maxsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (unlikely(maxsz > oprsz)) {
|
||||
for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
|
||||
*(uint64_t *)(d + i) = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec8)) {
|
||||
*(vec8 *)(d + i) = -*(vec8 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec16)) {
|
||||
*(vec16 *)(d + i) = -*(vec16 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec32)) {
|
||||
*(vec32 *)(d + i) = -*(vec32 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = -*(vec64 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
|
||||
memcpy(d, a, oprsz);
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (c == 0) {
|
||||
oprsz = 0;
|
||||
} else {
|
||||
for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
|
||||
*(uint64_t *)(d + i) = c;
|
||||
}
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
if (c == 0) {
|
||||
oprsz = 0;
|
||||
} else {
|
||||
for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
|
||||
*(uint32_t *)(d + i) = c;
|
||||
}
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
|
||||
}
|
||||
|
||||
void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
|
||||
{
|
||||
HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
|
||||
}
|
||||
|
||||
void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = ~*(vec64 *)(a + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
||||
|
||||
void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
|
||||
{
|
||||
intptr_t oprsz = simd_oprsz(desc);
|
||||
intptr_t i;
|
||||
|
||||
for (i = 0; i < oprsz; i += sizeof(vec64)) {
|
||||
*(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
|
||||
}
|
||||
clear_high(d, oprsz, desc);
|
||||
}
|
|
@ -134,3 +134,32 @@ GEN_ATOMIC_HELPERS(xor_fetch)
|
|||
GEN_ATOMIC_HELPERS(xchg)
|
||||
|
||||
#undef GEN_ATOMIC_HELPERS
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_mov, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_dup8, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup16, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup32, TCG_CALL_NO_RWG, void, ptr, i32, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_dup64, TCG_CALL_NO_RWG, void, ptr, i32, i64)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_add8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_neg8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||
|
|
|
@ -5000,6 +5000,50 @@ if compile_prog "" "" ; then
|
|||
atomic64=yes
|
||||
fi
|
||||
|
||||
########################################
|
||||
# See if 16-byte vector operations are supported.
|
||||
# Even without a vector unit the compiler may expand these.
|
||||
# There is a bug in old GCC for PPC that crashes here.
|
||||
# Unfortunately it's the system compiler for Centos 7.
|
||||
|
||||
cat > $TMPC << EOF
|
||||
typedef unsigned char U1 __attribute__((vector_size(16)));
|
||||
typedef unsigned short U2 __attribute__((vector_size(16)));
|
||||
typedef unsigned int U4 __attribute__((vector_size(16)));
|
||||
typedef unsigned long long U8 __attribute__((vector_size(16)));
|
||||
typedef signed char S1 __attribute__((vector_size(16)));
|
||||
typedef signed short S2 __attribute__((vector_size(16)));
|
||||
typedef signed int S4 __attribute__((vector_size(16)));
|
||||
typedef signed long long S8 __attribute__((vector_size(16)));
|
||||
static U1 a1, b1;
|
||||
static U2 a2, b2;
|
||||
static U4 a4, b4;
|
||||
static U8 a8, b8;
|
||||
static S1 c1;
|
||||
static S2 c2;
|
||||
static S4 c4;
|
||||
static S8 c8;
|
||||
static int i;
|
||||
int main(void)
|
||||
{
|
||||
a1 += b1; a2 += b2; a4 += b4; a8 += b8;
|
||||
a1 -= b1; a2 -= b2; a4 -= b4; a8 -= b8;
|
||||
a1 *= b1; a2 *= b2; a4 *= b4; a8 *= b8;
|
||||
a1 &= b1; a2 &= b2; a4 &= b4; a8 &= b8;
|
||||
a1 |= b1; a2 |= b2; a4 |= b4; a8 |= b8;
|
||||
a1 ^= b1; a2 ^= b2; a4 ^= b4; a8 ^= b8;
|
||||
a1 <<= i; a2 <<= i; a4 <<= i; a8 <<= i;
|
||||
a1 >>= i; a2 >>= i; a4 >>= i; a8 >>= i;
|
||||
c1 >>= i; c2 >>= i; c4 >>= i; c8 >>= i;
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
|
||||
vector16=no
|
||||
if compile_prog "" "" ; then
|
||||
vector16=yes
|
||||
fi
|
||||
|
||||
########################################
|
||||
# check if getauxval is available.
|
||||
|
||||
|
@ -6329,6 +6373,10 @@ if test "$atomic64" = "yes" ; then
|
|||
echo "CONFIG_ATOMIC64=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$vector16" = "yes" ; then
|
||||
echo "CONFIG_VECTOR16=y" >> $config_host_mak
|
||||
fi
|
||||
|
||||
if test "$getauxval" = "yes" ; then
|
||||
echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Generic vector operation descriptor
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* ??? These bit widths are set for ARM SVE, maxing out at 256 byte vectors. */
|
||||
#define SIMD_OPRSZ_SHIFT 0
|
||||
#define SIMD_OPRSZ_BITS 5
|
||||
|
||||
#define SIMD_MAXSZ_SHIFT (SIMD_OPRSZ_SHIFT + SIMD_OPRSZ_BITS)
|
||||
#define SIMD_MAXSZ_BITS 5
|
||||
|
||||
#define SIMD_DATA_SHIFT (SIMD_MAXSZ_SHIFT + SIMD_MAXSZ_BITS)
|
||||
#define SIMD_DATA_BITS (32 - SIMD_DATA_SHIFT)
|
||||
|
||||
/* Create a descriptor from components. */
|
||||
uint32_t simd_desc(uint32_t oprsz, uint32_t maxsz, int32_t data);
|
||||
|
||||
/* Extract the operation size from a descriptor. */
|
||||
static inline intptr_t simd_oprsz(uint32_t desc)
|
||||
{
|
||||
return (extract32(desc, SIMD_OPRSZ_SHIFT, SIMD_OPRSZ_BITS) + 1) * 8;
|
||||
}
|
||||
|
||||
/* Extract the max vector size from a descriptor. */
|
||||
static inline intptr_t simd_maxsz(uint32_t desc)
|
||||
{
|
||||
return (extract32(desc, SIMD_MAXSZ_SHIFT, SIMD_MAXSZ_BITS) + 1) * 8;
|
||||
}
|
||||
|
||||
/* Extract the operation-specific data from a descriptor. */
|
||||
static inline int32_t simd_data(uint32_t desc)
|
||||
{
|
||||
return sextract32(desc, SIMD_DATA_SHIFT, SIMD_DATA_BITS);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Generic vector operation expansion
|
||||
*
|
||||
* Copyright (c) 2018 Linaro
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Generic" vectors. All operands are given as offsets from ENV,
|
||||
* and therefore cannot also be allocated via tcg_global_mem_new_*.
|
||||
* OPRSZ is the byte size of the vector upon which the operation is performed.
|
||||
* MAXSZ is the byte size of the full vector; bytes beyond OPSZ are cleared.
|
||||
*
|
||||
* All sizes must be 8 or any multiple of 16.
|
||||
* When OPRSZ is 8, the alignment may be 8, otherwise must be 16.
|
||||
* Operands may completely, but not partially, overlap.
|
||||
*/
|
||||
|
||||
/* Expand a call to a gvec-style helper, with pointers to two vector
|
||||
operands, and a descriptor (see tcg-gvec-desc.h). */
|
||||
typedef void gen_helper_gvec_2(TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_2 *fn);
|
||||
|
||||
/* Similarly, passing an extra pointer (e.g. env or float_status). */
|
||||
typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
|
||||
TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_2_ptr *fn);
|
||||
|
||||
/* Similarly, with three vector operands. */
|
||||
typedef void gen_helper_gvec_3(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_3_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_3 *fn);
|
||||
|
||||
/* Similarly, with four vector operands. */
|
||||
typedef void gen_helper_gvec_4(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_4_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_4 *fn);
|
||||
|
||||
/* Similarly, with five vector operands. */
|
||||
typedef void gen_helper_gvec_5(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_5_ool(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, uint32_t xofs, uint32_t oprsz,
|
||||
uint32_t maxsz, int32_t data, gen_helper_gvec_5 *fn);
|
||||
|
||||
typedef void gen_helper_gvec_3_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_3_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
TCGv_ptr ptr, uint32_t oprsz, uint32_t maxsz,
|
||||
int32_t data, gen_helper_gvec_3_ptr *fn);
|
||||
|
||||
typedef void gen_helper_gvec_4_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr,
|
||||
TCGv_ptr, TCGv_ptr, TCGv_i32);
|
||||
void tcg_gen_gvec_4_ptr(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t cofs, TCGv_ptr ptr, uint32_t oprsz,
|
||||
uint32_t maxsz, int32_t data,
|
||||
gen_helper_gvec_4_ptr *fn);
|
||||
|
||||
/* Expand a gvec operation. Either inline or out-of-line depending on
|
||||
the actual vector size and the operations supported by the host. */
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_2 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
} GVecGen2;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_3 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
/* Load dest as a 3rd source operand. */
|
||||
bool load_dest;
|
||||
} GVecGen3;
|
||||
|
||||
typedef struct {
|
||||
/* Expand inline as a 64-bit or 32-bit integer.
|
||||
Only one of these will be non-NULL. */
|
||||
void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64);
|
||||
void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32);
|
||||
/* Expand inline with a host vector type. */
|
||||
void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec);
|
||||
/* Expand out-of-line helper w/descriptor. */
|
||||
gen_helper_gvec_4 *fno;
|
||||
/* The opcode, if any, to which this corresponds. */
|
||||
TCGOpcode opc;
|
||||
/* The data argument to the out-of-line helper. */
|
||||
int32_t data;
|
||||
/* The vector element size, if applicable. */
|
||||
uint8_t vece;
|
||||
/* Prefer i64 to v64. */
|
||||
bool prefer_i64;
|
||||
} GVecGen4;
|
||||
|
||||
void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
|
||||
void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
|
||||
void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
|
||||
uint32_t oprsz, uint32_t maxsz, const GVecGen4 *);
|
||||
|
||||
/* Expand a specific vector operation. */
|
||||
|
||||
void tcg_gen_gvec_mov(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_not(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_neg(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_add(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_and(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_or(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_xor(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_andc(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
void tcg_gen_gvec_orc(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
|
||||
|
||||
void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
|
||||
uint32_t s, uint32_t m);
|
||||
void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t s,
|
||||
uint32_t m, TCGv_i32);
|
||||
void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t s,
|
||||
uint32_t m, TCGv_i64);
|
||||
|
||||
void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t s, uint32_t m, uint8_t x);
|
||||
void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t s, uint32_t m, uint16_t x);
|
||||
void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t s, uint32_t m, uint32_t x);
|
||||
void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t s, uint32_t m, uint64_t x);
|
||||
|
||||
/*
|
||||
* 64-bit vector operations. Use these when the register has been allocated
|
||||
* with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
|
||||
* OPRSZ = MAXSZ = 8.
|
||||
*/
|
||||
|
||||
void tcg_gen_vec_neg8_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
void tcg_gen_vec_neg16_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
void tcg_gen_vec_neg32_i64(TCGv_i64 d, TCGv_i64 a);
|
||||
|
||||
void tcg_gen_vec_add8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_add16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_add32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
|
||||
void tcg_gen_vec_sub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_sub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
void tcg_gen_vec_sub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
|
@ -73,7 +73,8 @@ static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
|
|||
TCGTemp *at = tcgv_vec_temp(a);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
tcg_debug_assert(at->base_type == type);
|
||||
/* Must enough inputs for the output. */
|
||||
tcg_debug_assert(at->base_type >= type);
|
||||
vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
|
||||
}
|
||||
|
||||
|
@ -85,8 +86,9 @@ static void vec_gen_op3(TCGOpcode opc, unsigned vece,
|
|||
TCGTemp *bt = tcgv_vec_temp(b);
|
||||
TCGType type = rt->base_type;
|
||||
|
||||
tcg_debug_assert(at->base_type == type);
|
||||
tcg_debug_assert(bt->base_type == type);
|
||||
/* Must enough inputs for the output. */
|
||||
tcg_debug_assert(at->base_type >= type);
|
||||
tcg_debug_assert(bt->base_type >= type);
|
||||
vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
|
||||
}
|
||||
|
||||
|
@ -99,7 +101,7 @@ void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
|
|||
|
||||
#define MO_REG (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
|
||||
|
||||
static void tcg_gen_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
|
||||
static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
|
||||
{
|
||||
TCGTemp *rt = tcgv_vec_temp(r);
|
||||
vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
|
||||
|
@ -108,14 +110,14 @@ static void tcg_gen_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
|
|||
TCGv_vec tcg_const_zeros_vec(TCGType type)
|
||||
{
|
||||
TCGv_vec ret = tcg_temp_new_vec(type);
|
||||
tcg_gen_dupi_vec(ret, MO_REG, 0);
|
||||
do_dupi_vec(ret, MO_REG, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TCGv_vec tcg_const_ones_vec(TCGType type)
|
||||
{
|
||||
TCGv_vec ret = tcg_temp_new_vec(type);
|
||||
tcg_gen_dupi_vec(ret, MO_REG, -1);
|
||||
do_dupi_vec(ret, MO_REG, -1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -134,9 +136,9 @@ TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
|
|||
void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
|
||||
{
|
||||
if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
|
||||
tcg_gen_dupi_vec(r, MO_32, a);
|
||||
do_dupi_vec(r, MO_32, a);
|
||||
} else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
|
||||
tcg_gen_dupi_vec(r, MO_64, a);
|
||||
do_dupi_vec(r, MO_64, a);
|
||||
} else {
|
||||
TCGv_i64 c = tcg_const_i64(a);
|
||||
tcg_gen_dup_i64_vec(MO_64, r, c);
|
||||
|
@ -146,17 +148,22 @@ void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
|
|||
|
||||
void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xffffffffu) * a);
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xffff) * (a & 0xffff));
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
|
||||
{
|
||||
tcg_gen_dupi_vec(r, MO_REG, ((TCGArg)-1 / 0xff) * (a & 0xff));
|
||||
do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
|
||||
{
|
||||
do_dupi_vec(r, MO_REG, dup_const(vece, a));
|
||||
}
|
||||
|
||||
void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
|
||||
|
@ -167,14 +174,14 @@ void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
|
|||
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
TCGArg ai = tcgv_i64_arg(a);
|
||||
vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
|
||||
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
|
||||
} else if (vece == MO_64) {
|
||||
TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
|
||||
TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
|
||||
vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
|
||||
} else {
|
||||
TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
|
||||
vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
|
||||
vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -914,6 +914,7 @@ void tcg_gen_dup8i_vec(TCGv_vec, uint32_t);
|
|||
void tcg_gen_dup16i_vec(TCGv_vec, uint32_t);
|
||||
void tcg_gen_dup32i_vec(TCGv_vec, uint32_t);
|
||||
void tcg_gen_dup64i_vec(TCGv_vec, uint64_t);
|
||||
void tcg_gen_dupi_vec(unsigned vece, TCGv_vec, uint64_t);
|
||||
void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
|
||||
|
|
|
@ -228,6 +228,12 @@ DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
|
|||
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
|
||||
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
|
||||
|
||||
DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
|
||||
|
||||
#if TCG_TARGET_MAYBE_vec
|
||||
#include "tcg-target.opc.h"
|
||||
#endif
|
||||
|
||||
#undef TLADDR_ARGS
|
||||
#undef DATA64_ARGS
|
||||
#undef IMPL
|
||||
|
|
13
tcg/tcg.c
13
tcg/tcg.c
|
@ -1403,10 +1403,10 @@ bool tcg_op_supported(TCGOpcode op)
|
|||
case INDEX_op_orc_vec:
|
||||
return have_vec && TCG_TARGET_HAS_orc_vec;
|
||||
|
||||
case NB_OPS:
|
||||
break;
|
||||
default:
|
||||
tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
|
||||
return true;
|
||||
}
|
||||
g_assert_not_reached();
|
||||
}
|
||||
|
||||
/* Note: we convert the 64 bit args to 32 bit and do some alignment
|
||||
|
@ -3733,3 +3733,10 @@ void tcg_register_jit(void *buf, size_t buf_size)
|
|||
{
|
||||
}
|
||||
#endif /* ELF_HOST_MACHINE */
|
||||
|
||||
#if !TCG_TARGET_MAYBE_vec
|
||||
void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
|
||||
{
|
||||
g_assert_not_reached();
|
||||
}
|
||||
#endif
|
||||
|
|
27
tcg/tcg.h
27
tcg/tcg.h
|
@ -1207,6 +1207,33 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
|
|||
|
||||
void tcg_register_jit(void *buf, size_t buf_size);
|
||||
|
||||
#if TCG_TARGET_MAYBE_vec
|
||||
/* Return zero if the tuple (opc, type, vece) is unsupportable;
|
||||
return > 0 if it is directly supportable;
|
||||
return < 0 if we must call tcg_expand_vec_op. */
|
||||
int tcg_can_emit_vec_op(TCGOpcode, TCGType, unsigned);
|
||||
#else
|
||||
static inline int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Expand the tuple (opc, type, vece) on the given arguments. */
|
||||
void tcg_expand_vec_op(TCGOpcode, TCGType, unsigned, TCGArg, ...);
|
||||
|
||||
/* Replicate a constant C accoring to the log2 of the element size. */
|
||||
uint64_t dup_const(unsigned vece, uint64_t c);
|
||||
|
||||
#define dup_const(VECE, C) \
|
||||
(__builtin_constant_p(VECE) \
|
||||
? ( (VECE) == MO_8 ? 0x0101010101010101ull * (uint8_t)(C) \
|
||||
: (VECE) == MO_16 ? 0x0001000100010001ull * (uint16_t)(C) \
|
||||
: (VECE) == MO_32 ? 0x0000000100000001ull * (uint32_t)(C) \
|
||||
: dup_const(VECE, C)) \
|
||||
: dup_const(VECE, C))
|
||||
|
||||
|
||||
/*
|
||||
* Memory helpers that will be used by TCG generated code.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue