tcg: Increase tcg_out_dupi_vec immediate to int64_t

While we don't store more than tcg_target_long in TCGTemp,
we shouldn't be limited to that for code generation.  We will
be able to use this for INDEX_op_dup2_vec with 2 constants.

Also pass along the minimal vece that may be said to apply
to the constant.  This allows some simplification in the
various backends.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2020-03-31 01:02:08 -07:00
parent 0a6a8bc8eb
commit 4e18617555
4 changed files with 69 additions and 33 deletions

View File

@ -857,14 +857,14 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg rd, tcg_target_long v64)
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg rd, int64_t v64)
{
bool q = type == TCG_TYPE_V128;
int cmode, imm8, i;
/* Test all bytes equal first. */
if (v64 == dup_const(MO_8, v64)) {
if (vece == MO_8) {
imm8 = (uint8_t)v64;
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
return;
@ -891,7 +891,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
* cannot find an expansion there's no point checking a larger
* width because we already know by replication it cannot match.
*/
if (v64 == dup_const(MO_16, v64)) {
if (vece == MO_16) {
uint16_t v16 = v64;
if (is_shimm16(v16, &cmode, &imm8)) {
@ -910,7 +910,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
return;
} else if (v64 == dup_const(MO_32, v64)) {
} else if (vece == MO_32) {
uint32_t v32 = v64;
uint32_t n32 = ~v32;
@ -2435,7 +2435,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
break;
}
tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
a2 = TCG_VEC_TMP;
}
insn = cmp_insn[cond];

View File

@ -942,8 +942,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
return true;
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg ret, tcg_target_long arg)
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg ret, int64_t arg)
{
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
@ -956,7 +956,14 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
return;
}
if (TCG_TARGET_REG_BITS == 64) {
if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
} else {
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
}
new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
} else {
if (type == TCG_TYPE_V64) {
tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
} else if (have_avx2) {
@ -964,14 +971,11 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
} else {
tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
}
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
} else {
if (have_avx2) {
tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
if (TCG_TARGET_REG_BITS == 64) {
new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
} else {
tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
}
new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
}
}

View File

@ -912,31 +912,41 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
}
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
tcg_target_long val)
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg ret, int64_t val)
{
uint32_t load_insn;
int rel, low;
intptr_t add;
low = (int8_t)val;
if (low >= -16 && low < 16) {
if (val == (tcg_target_long)dup_const(MO_8, low)) {
switch (vece) {
case MO_8:
low = (int8_t)val;
if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
return;
}
if (val == (tcg_target_long)dup_const(MO_16, low)) {
if (have_isa_3_00) {
tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
return;
}
break;
case MO_16:
low = (int16_t)val;
if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
return;
}
if (val == (tcg_target_long)dup_const(MO_32, low)) {
break;
case MO_32:
low = (int32_t)val;
if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
return;
}
}
if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
return;
break;
}
/*
@ -956,14 +966,15 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
if (TCG_TARGET_REG_BITS == 64) {
new_pool_label(s, val, rel, s->code_ptr, add);
} else {
new_pool_l2(s, rel, s->code_ptr, add, val, val);
new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
}
} else {
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
if (TCG_TARGET_REG_BITS == 64) {
new_pool_l2(s, rel, s->code_ptr, add, val, val);
} else {
new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
new_pool_l4(s, rel, s->code_ptr, add,
val >> 32, val, val >> 32, val);
}
}

View File

@ -117,8 +117,8 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset);
static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg);
static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, int64_t arg);
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
unsigned vece, const TCGArg *args,
const int *const_args);
@ -133,8 +133,8 @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
{
g_assert_not_reached();
}
static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
TCGReg dst, tcg_target_long arg)
static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, int64_t arg)
{
g_assert_not_reached();
}
@ -3390,7 +3390,28 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
if (ts->type <= TCG_TYPE_I64) {
tcg_out_movi(s, ts->type, reg, ts->val);
} else {
tcg_out_dupi_vec(s, ts->type, reg, ts->val);
uint64_t val = ts->val;
MemOp vece = MO_64;
/*
* Find the minimal vector element that matches the constant.
* The targets will, in general, have to do this search anyway,
* do this generically.
*/
if (TCG_TARGET_REG_BITS == 32) {
val = dup_const(MO_32, val);
vece = MO_32;
}
if (val == dup_const(MO_8, val)) {
vece = MO_8;
} else if (val == dup_const(MO_16, val)) {
vece = MO_16;
} else if (TCG_TARGET_REG_BITS == 64 &&
val == dup_const(MO_32, val)) {
vece = MO_32;
}
tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
}
ts->mem_coherent = 0;
break;