tcg/s390: Use constant pool for ori

Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2017-07-25 19:21:36 -04:00
parent bdcd5d1926
commit 4046d9ca04
1 changed files with 34 additions and 40 deletions

View File

@ -225,6 +225,7 @@ typedef enum S390Opcode {
RXY_LRVH = 0xe31f,
RXY_LY = 0xe358,
RXY_NG = 0xe380,
RXY_OG = 0xe381,
RXY_STCY = 0xe372,
RXY_STG = 0xe324,
RXY_STHY = 0xe370,
@ -1004,55 +1005,60 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
}
}
static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
{
static const S390Opcode oi_insns[4] = {
RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
};
static const S390Opcode nif_insns[2] = {
static const S390Opcode oif_insns[2] = {
RIL_OILF, RIL_OIHF
};
int i;
/* Look for no-op. */
if (val == 0) {
if (unlikely(val == 0)) {
return;
}
if (s390_facilities & FACILITY_EXT_IMM) {
/* Try all 32-bit insns that can perform it in one go. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = (0xffffull << i*16);
if ((val & mask) != 0 && (val & ~mask) == 0) {
tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
return;
}
/* Try all 32-bit insns that can perform it in one go. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = (0xffffull << i*16);
if ((val & mask) != 0 && (val & ~mask) == 0) {
tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
return;
}
}
/* Try all 48-bit insns that can perform it in one go. */
/* Try all 48-bit insns that can perform it in one go. */
if (s390_facilities & FACILITY_EXT_IMM) {
for (i = 0; i < 2; i++) {
tcg_target_ulong mask = (0xffffffffull << i*32);
if ((val & mask) != 0 && (val & ~mask) == 0) {
tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
return;
}
}
}
/* Use the constant pool if USE_REG_TB, but not for small constants. */
if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
if (type == TCG_TYPE_I32) {
tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
} else {
tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
}
} else if (USE_REG_TB) {
tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
new_pool_label(s, val, R_390_20, s->code_ptr - 2,
-(intptr_t)s->code_gen_ptr);
} else {
/* Perform the OR via sequential modifications to the high and
low parts. Do this via recursion to handle 16-bit vs 32-bit
masks in each half. */
tgen64_ori(s, dest, val & 0x00000000ffffffffull);
tgen64_ori(s, dest, val & 0xffffffff00000000ull);
} else {
/* With no extended-immediate facility, we don't need to be so
clever. Just iterate over the insns and mask in the constant. */
for (i = 0; i < 4; i++) {
tcg_target_ulong mask = (0xffffull << i*16);
if ((val & mask) != 0) {
tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
}
}
tcg_debug_assert(s390_facilities & FACILITY_EXT_IMM);
tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
}
}
@ -1872,7 +1878,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
tgen64_ori(s, a0, a2);
tgen_ori(s, TCG_TYPE_I32, a0, a2);
} else if (a0 == a1) {
tcg_out_insn(s, RR, OR, a0, a2);
} else {
@ -2104,7 +2110,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
tgen64_ori(s, a0, a2);
tgen_ori(s, TCG_TYPE_I64, a0, a2);
} else if (a0 == a1) {
tcg_out_insn(s, RRE, OGR, a0, a2);
} else {
@ -2312,7 +2318,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef r_0_ri = { .args_ct_str = { "r", "0", "ri" } };
static const TCGTargetOpDef r_0_rI = { .args_ct_str = { "r", "0", "rI" } };
static const TCGTargetOpDef r_0_rJ = { .args_ct_str = { "r", "0", "rJ" } };
static const TCGTargetOpDef r_0_rN = { .args_ct_str = { "r", "0", "rN" } };
static const TCGTargetOpDef r_0_rM = { .args_ct_str = { "r", "0", "rM" } };
static const TCGTargetOpDef a2_r
= { .args_ct_str = { "r", "r", "0", "1", "r", "r" } };
@ -2353,6 +2358,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_sub_i64:
case INDEX_op_and_i32:
case INDEX_op_and_i64:
case INDEX_op_or_i32:
case INDEX_op_or_i64:
return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
case INDEX_op_mul_i32:
@ -2363,19 +2370,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_mul_i64:
return (s390_facilities & FACILITY_GEN_INST_EXT ? &r_0_rJ : &r_0_rI);
case INDEX_op_or_i32:
/* The use of [iNM] constraints are optimization only, since a full
64-bit immediate OR can always be performed with 4 sequential
OI[LH][LH] instructions. By rejecting certain negative ranges,
the immediate load plus the reg-reg OR is smaller. */
return (s390_facilities & FACILITY_EXT_IMM
? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri)
: &r_0_rN);
case INDEX_op_or_i64:
return (s390_facilities & FACILITY_EXT_IMM
? (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_rM : &r_0_rM)
: &r_0_rN);
case INDEX_op_xor_i32:
/* Without EXT_IMM, no immediates are supported. Otherwise,
rejecting certain negative ranges leads to smaller code. */