mirror of https://gitee.com/openkylin/qemu.git
Merge remote-tracking branch 'remotes/rth/tcg-next' into staging
* remotes/rth/tcg-next: tcg/i386: Use SHLX/SHRX/SARX instructions tcg/i386: Use ANDN instruction tcg/i386: Add tcg_out_vex_modrm tcg/i386: Move TCG_CT_CONST_* to tcg-target.c disas/i386: Disassemble ANDN/SHLX/SHRX/SHAX tcg/optimize: Add more identity simplifications tcg/optimize: Optmize ANDC X,Y,Y to MOV X,0 tcg/optimize: Simply some logical ops to NOT tcg/optimize: Handle known-zeros masks for ANDC tcg/optimize: add known-zero bits compute for load ops tcg/optimize: improve known-zero bits for 32-bit ops tcg/optimize: fix known-zero bits optimization tcg/optimize: fix known-zero bits for right shift ops tcg-arm: The shift count of op_rotl_i32 is in args[2] not args[1]. TCG: Fix 32-bit host allocation typo Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
3d2bb5cc81
146
disas/i386.c
146
disas/i386.c
|
@ -171,6 +171,7 @@ static void print_operand_value (char *buf, size_t bufsize, int hex, bfd_vma dis
|
|||
static void print_displacement (char *, bfd_vma);
|
||||
static void OP_E (int, int);
|
||||
static void OP_G (int, int);
|
||||
static void OP_vvvv (int, int);
|
||||
static bfd_vma get64 (void);
|
||||
static bfd_signed_vma get32 (void);
|
||||
static bfd_signed_vma get32s (void);
|
||||
|
@ -264,6 +265,9 @@ static int rex_used;
|
|||
current instruction. */
|
||||
static int used_prefixes;
|
||||
|
||||
/* The VEX.vvvv register, unencoded. */
|
||||
static int vex_reg;
|
||||
|
||||
/* Flags stored in PREFIXES. */
|
||||
#define PREFIX_REPZ 1
|
||||
#define PREFIX_REPNZ 2
|
||||
|
@ -278,6 +282,10 @@ static int used_prefixes;
|
|||
#define PREFIX_ADDR 0x400
|
||||
#define PREFIX_FWAIT 0x800
|
||||
|
||||
#define PREFIX_VEX_0F 0x1000
|
||||
#define PREFIX_VEX_0F38 0x2000
|
||||
#define PREFIX_VEX_0F3A 0x4000
|
||||
|
||||
/* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
|
||||
to ADDR (exclusive) are valid. Returns 1 for success, longjmps
|
||||
on error. */
|
||||
|
@ -323,6 +331,7 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
|
|||
|
||||
#define XX { NULL, 0 }
|
||||
|
||||
#define Bv { OP_vvvv, v_mode }
|
||||
#define Eb { OP_E, b_mode }
|
||||
#define Ev { OP_E, v_mode }
|
||||
#define Ed { OP_E, d_mode }
|
||||
|
@ -671,7 +680,8 @@ fetch_data(struct disassemble_info *info, bfd_byte *addr)
|
|||
#define PREGRP102 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 102 } }
|
||||
#define PREGRP103 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 103 } }
|
||||
#define PREGRP104 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 104 } }
|
||||
|
||||
#define PREGRP105 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 105 } }
|
||||
#define PREGRP106 NULL, { { NULL, USE_PREFIX_USER_TABLE }, { NULL, 106 } }
|
||||
|
||||
#define X86_64_0 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 0 } }
|
||||
#define X86_64_1 NULL, { { NULL, X86_64_SPECIAL }, { NULL, 1 } }
|
||||
|
@ -1449,7 +1459,7 @@ static const unsigned char threebyte_0x38_uses_DATA_prefix[256] = {
|
|||
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
|
||||
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1, /* df */
|
||||
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
|
||||
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* ------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
|
@ -1473,7 +1483,7 @@ static const unsigned char threebyte_0x38_uses_REPNZ_prefix[256] = {
|
|||
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
|
||||
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
|
||||
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
|
||||
/* f0 */ 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* f0 */ 1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* ------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
|
@ -1497,7 +1507,7 @@ static const unsigned char threebyte_0x38_uses_REPZ_prefix[256] = {
|
|||
/* c0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* cf */
|
||||
/* d0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* df */
|
||||
/* e0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ef */
|
||||
/* f0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* f0 */ 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, /* ff */
|
||||
/* ------------------------------- */
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
};
|
||||
|
@ -2774,6 +2784,22 @@ static const struct dis386 prefix_user_table[][4] = {
|
|||
{ "(bad)", { XX } },
|
||||
},
|
||||
|
||||
/* PREGRP105 */
|
||||
{
|
||||
{ "andnS", { Gv, Bv, Ev } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
},
|
||||
|
||||
/* PREGRP106 */
|
||||
{
|
||||
{ "bextrS", { Gv, Ev, Bv } },
|
||||
{ "sarxS", { Gv, Ev, Bv } },
|
||||
{ "shlxS", { Gv, Ev, Bv } },
|
||||
{ "shrxS", { Gv, Ev, Bv } },
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
static const struct dis386 x86_64_table[][2] = {
|
||||
|
@ -3071,12 +3097,12 @@ static const struct dis386 three_byte_table[][256] = {
|
|||
/* f0 */
|
||||
{ PREGRP87 },
|
||||
{ PREGRP88 },
|
||||
{ PREGRP105 },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
{ PREGRP106 },
|
||||
/* f8 */
|
||||
{ "(bad)", { XX } },
|
||||
{ "(bad)", { XX } },
|
||||
|
@ -3477,6 +3503,74 @@ ckprefix (void)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ckvexprefix (void)
|
||||
{
|
||||
int op, vex2, vex3, newrex = 0, newpfx = prefixes;
|
||||
|
||||
if (address_mode == mode_16bit) {
|
||||
return;
|
||||
}
|
||||
|
||||
fetch_data(the_info, codep + 1);
|
||||
op = *codep;
|
||||
|
||||
if (op != 0xc4 && op != 0xc5) {
|
||||
return;
|
||||
}
|
||||
|
||||
fetch_data(the_info, codep + 2);
|
||||
vex2 = codep[1];
|
||||
|
||||
if (address_mode == mode_32bit && (vex2 & 0xc0) != 0xc0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (op == 0xc4) {
|
||||
/* Three byte VEX prefix. */
|
||||
fetch_data(the_info, codep + 3);
|
||||
vex3 = codep[2];
|
||||
|
||||
newrex |= (vex2 & 0x80 ? 0 : REX_R);
|
||||
newrex |= (vex2 & 0x40 ? 0 : REX_X);
|
||||
newrex |= (vex2 & 0x20 ? 0 : REX_B);
|
||||
newrex |= (vex3 & 0x80 ? REX_W : 0);
|
||||
switch (vex2 & 0x1f) { /* VEX.m-mmmm */
|
||||
case 1:
|
||||
newpfx |= PREFIX_VEX_0F;
|
||||
break;
|
||||
case 2:
|
||||
newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F38;
|
||||
break;
|
||||
case 3:
|
||||
newpfx |= PREFIX_VEX_0F | PREFIX_VEX_0F3A;
|
||||
break;
|
||||
}
|
||||
vex2 = vex3;
|
||||
codep += 3;
|
||||
} else {
|
||||
/* Two byte VEX prefix. */
|
||||
newrex |= (vex2 & 0x80 ? 0 : REX_R);
|
||||
codep += 2;
|
||||
}
|
||||
|
||||
vex_reg = (~vex2 >> 3) & 15; /* VEX.vvvv */
|
||||
switch (vex2 & 3) { /* VEX.pp */
|
||||
case 1:
|
||||
newpfx |= PREFIX_DATA; /* 0x66 */
|
||||
break;
|
||||
case 2:
|
||||
newpfx |= PREFIX_REPZ; /* 0xf3 */
|
||||
break;
|
||||
case 3:
|
||||
newpfx |= PREFIX_REPNZ; /* 0xf2 */
|
||||
break;
|
||||
}
|
||||
|
||||
rex = newrex;
|
||||
prefixes = newpfx;
|
||||
}
|
||||
|
||||
/* Return the name of the prefix byte PREF, or NULL if PREF is not a
|
||||
prefix byte. */
|
||||
|
||||
|
@ -3598,6 +3692,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
|
|||
const char *p;
|
||||
struct dis_private priv;
|
||||
unsigned char op;
|
||||
unsigned char threebyte;
|
||||
|
||||
if (info->mach == bfd_mach_x86_64_intel_syntax
|
||||
|| info->mach == bfd_mach_x86_64)
|
||||
|
@ -3752,6 +3847,7 @@ print_insn (bfd_vma pc, disassemble_info *info)
|
|||
|
||||
obufp = obuf;
|
||||
ckprefix ();
|
||||
ckvexprefix ();
|
||||
|
||||
insn_codep = codep;
|
||||
sizeflag = priv.orig_sizeflag;
|
||||
|
@ -3775,18 +3871,29 @@ print_insn (bfd_vma pc, disassemble_info *info)
|
|||
}
|
||||
|
||||
op = 0;
|
||||
if (prefixes & PREFIX_VEX_0F)
|
||||
{
|
||||
used_prefixes |= PREFIX_VEX_0F | PREFIX_VEX_0F38 | PREFIX_VEX_0F3A;
|
||||
if (prefixes & PREFIX_VEX_0F38)
|
||||
threebyte = 0x38;
|
||||
else if (prefixes & PREFIX_VEX_0F3A)
|
||||
threebyte = 0x3a;
|
||||
else
|
||||
threebyte = *codep++;
|
||||
goto vex_opcode;
|
||||
}
|
||||
if (*codep == 0x0f)
|
||||
{
|
||||
unsigned char threebyte;
|
||||
fetch_data(info, codep + 2);
|
||||
threebyte = *++codep;
|
||||
threebyte = codep[1];
|
||||
codep += 2;
|
||||
vex_opcode:
|
||||
dp = &dis386_twobyte[threebyte];
|
||||
need_modrm = twobyte_has_modrm[*codep];
|
||||
uses_DATA_prefix = twobyte_uses_DATA_prefix[*codep];
|
||||
uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[*codep];
|
||||
uses_REPZ_prefix = twobyte_uses_REPZ_prefix[*codep];
|
||||
uses_LOCK_prefix = (*codep & ~0x02) == 0x20;
|
||||
codep++;
|
||||
need_modrm = twobyte_has_modrm[threebyte];
|
||||
uses_DATA_prefix = twobyte_uses_DATA_prefix[threebyte];
|
||||
uses_REPNZ_prefix = twobyte_uses_REPNZ_prefix[threebyte];
|
||||
uses_REPZ_prefix = twobyte_uses_REPZ_prefix[threebyte];
|
||||
uses_LOCK_prefix = (threebyte & ~0x02) == 0x20;
|
||||
if (dp->name == NULL && dp->op[0].bytemode == IS_3BYTE_OPCODE)
|
||||
{
|
||||
fetch_data(info, codep + 2);
|
||||
|
@ -5291,6 +5398,17 @@ OP_G (int bytemode, int sizeflag)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
OP_vvvv (int bytemode, int sizeflags)
|
||||
{
|
||||
USED_REX (REX_W);
|
||||
if (rex & REX_W) {
|
||||
oappend(names64[vex_reg]);
|
||||
} else {
|
||||
oappend(names32[vex_reg]);
|
||||
}
|
||||
}
|
||||
|
||||
static bfd_vma
|
||||
get64 (void)
|
||||
{
|
||||
|
|
|
@ -1866,7 +1866,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
|
||||
SHIFT_IMM_LSL(0));
|
||||
} else {
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20);
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
|
||||
SHIFT_REG_ROR(TCG_REG_TMP));
|
||||
}
|
||||
|
|
|
@ -88,6 +88,11 @@ static const int tcg_target_call_oarg_regs[] = {
|
|||
#endif
|
||||
};
|
||||
|
||||
/* Constants we accept. */
|
||||
#define TCG_CT_CONST_S32 0x100
|
||||
#define TCG_CT_CONST_U32 0x200
|
||||
#define TCG_CT_CONST_I32 0x400
|
||||
|
||||
/* Registers used with L constraint, which are the first argument
|
||||
registers on x86_64, and two random call clobbered registers on
|
||||
i386. */
|
||||
|
@ -124,6 +129,16 @@ static bool have_movbe;
|
|||
# define have_movbe 0
|
||||
#endif
|
||||
|
||||
/* We need this symbol in tcg-target.h, and we can't properly conditionalize
|
||||
it there. Therefore we always define the variable. */
|
||||
bool have_bmi1;
|
||||
|
||||
#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
|
||||
static bool have_bmi2;
|
||||
#else
|
||||
# define have_bmi2 0
|
||||
#endif
|
||||
|
||||
static uint8_t *tb_ret_addr;
|
||||
|
||||
static void patch_reloc(uint8_t *code_ptr, int type,
|
||||
|
@ -166,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
|
|||
tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
|
||||
break;
|
||||
case 'c':
|
||||
case_c:
|
||||
ct->ct |= TCG_CT_REG;
|
||||
tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
|
||||
break;
|
||||
|
@ -194,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
|
|||
tcg_regset_set32(ct->u.regs, 0, 0xf);
|
||||
break;
|
||||
case 'r':
|
||||
case_r:
|
||||
ct->ct |= TCG_CT_REG;
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
tcg_regset_set32(ct->u.regs, 0, 0xffff);
|
||||
|
@ -201,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
|
|||
tcg_regset_set32(ct->u.regs, 0, 0xff);
|
||||
}
|
||||
break;
|
||||
case 'C':
|
||||
/* With SHRX et al, we need not use ECX as shift count register. */
|
||||
if (have_bmi2) {
|
||||
goto case_r;
|
||||
} else {
|
||||
goto case_c;
|
||||
}
|
||||
|
||||
/* qemu_ld/st address constraint */
|
||||
case 'L':
|
||||
|
@ -220,6 +244,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
|
|||
case 'Z':
|
||||
ct->ct |= TCG_CT_CONST_U32;
|
||||
break;
|
||||
case 'I':
|
||||
ct->ct |= TCG_CT_CONST_I32;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
|
@ -243,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
|
|||
if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
|
||||
return 1;
|
||||
}
|
||||
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -268,10 +298,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
|
|||
# define P_REXB_RM 0
|
||||
# define P_GS 0
|
||||
#endif
|
||||
#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
|
||||
#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
|
||||
|
||||
#define OPC_ARITH_EvIz (0x81)
|
||||
#define OPC_ARITH_EvIb (0x83)
|
||||
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
|
||||
#define OPC_ANDN (0xf2 | P_EXT38)
|
||||
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
|
||||
#define OPC_BSWAP (0xc8 | P_EXT)
|
||||
#define OPC_CALL_Jz (0xe8)
|
||||
|
@ -309,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
|
|||
#define OPC_SHIFT_1 (0xd1)
|
||||
#define OPC_SHIFT_Ib (0xc1)
|
||||
#define OPC_SHIFT_cl (0xd3)
|
||||
#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
|
||||
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
|
||||
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
|
||||
#define OPC_TESTL (0x85)
|
||||
#define OPC_XCHG_ax_r32 (0x90)
|
||||
|
||||
|
@ -398,9 +434,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
|
|||
|
||||
rex = 0;
|
||||
rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
|
||||
rex |= (r & 8) >> 1; /* REX.R */
|
||||
rex |= (x & 8) >> 2; /* REX.X */
|
||||
rex |= (rm & 8) >> 3; /* REX.B */
|
||||
rex |= (r & 8) >> 1; /* REX.R */
|
||||
rex |= (x & 8) >> 2; /* REX.X */
|
||||
rex |= (rm & 8) >> 3; /* REX.B */
|
||||
|
||||
/* P_REXB_{R,RM} indicates that the given register is the low byte.
|
||||
For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
|
||||
|
@ -449,6 +485,48 @@ static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
|
|||
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
|
||||
}
|
||||
|
||||
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
|
||||
/* Three byte VEX prefix. */
|
||||
tcg_out8(s, 0xc4);
|
||||
|
||||
/* VEX.m-mmmm */
|
||||
if (opc & P_EXT38) {
|
||||
tmp = 2;
|
||||
} else if (opc & P_EXT) {
|
||||
tmp = 1;
|
||||
} else {
|
||||
tcg_abort();
|
||||
}
|
||||
tmp |= 0x40; /* VEX.X */
|
||||
tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
|
||||
tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
|
||||
tcg_out8(s, tmp);
|
||||
|
||||
tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
|
||||
} else {
|
||||
/* Two byte VEX prefix. */
|
||||
tcg_out8(s, 0xc5);
|
||||
|
||||
tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
|
||||
}
|
||||
/* VEX.pp */
|
||||
if (opc & P_DATA16) {
|
||||
tmp |= 1; /* 0x66 */
|
||||
} else if (opc & P_SIMDF3) {
|
||||
tmp |= 2; /* 0xf3 */
|
||||
} else if (opc & P_SIMDF2) {
|
||||
tmp |= 3; /* 0xf2 */
|
||||
}
|
||||
tmp |= (~v & 15) << 3; /* VEX.vvvv */
|
||||
tcg_out8(s, tmp);
|
||||
tcg_out8(s, opc);
|
||||
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
|
||||
}
|
||||
|
||||
/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
|
||||
We handle either RM and INDEX missing with a negative value. In 64-bit
|
||||
mode for absolute addresses, ~RM is the size of the immediate operand
|
||||
|
@ -1638,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
|
|||
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
const TCGArg *args, const int *const_args)
|
||||
{
|
||||
int c, rexw = 0;
|
||||
int c, vexop, rexw = 0;
|
||||
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
# define OP_32_64(x) \
|
||||
|
@ -1774,6 +1852,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
}
|
||||
break;
|
||||
|
||||
OP_32_64(andc):
|
||||
if (const_args[2]) {
|
||||
tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
|
||||
args[0], args[1]);
|
||||
tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
|
||||
} else {
|
||||
tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
|
||||
}
|
||||
break;
|
||||
|
||||
OP_32_64(mul):
|
||||
if (const_args[2]) {
|
||||
int32_t val;
|
||||
|
@ -1799,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
|||
|
||||
OP_32_64(shl):
|
||||
c = SHIFT_SHL;
|
||||
goto gen_shift;
|
||||
vexop = OPC_SHLX;
|
||||
goto gen_shift_maybe_vex;
|
||||
OP_32_64(shr):
|
||||
c = SHIFT_SHR;
|
||||
goto gen_shift;
|
||||
vexop = OPC_SHRX;
|
||||
goto gen_shift_maybe_vex;
|
||||
OP_32_64(sar):
|
||||
c = SHIFT_SAR;
|
||||
goto gen_shift;
|
||||
vexop = OPC_SARX;
|
||||
goto gen_shift_maybe_vex;
|
||||
OP_32_64(rotl):
|
||||
c = SHIFT_ROL;
|
||||
goto gen_shift;
|
||||
OP_32_64(rotr):
|
||||
c = SHIFT_ROR;
|
||||
goto gen_shift;
|
||||
gen_shift_maybe_vex:
|
||||
if (have_bmi2 && !const_args[2]) {
|
||||
tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
|
||||
break;
|
||||
}
|
||||
/* FALLTHRU */
|
||||
gen_shift:
|
||||
if (const_args[2]) {
|
||||
tcg_out_shifti(s, c + rexw, args[0], args[2]);
|
||||
|
@ -2002,10 +2099,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
|
|||
{ INDEX_op_and_i32, { "r", "0", "ri" } },
|
||||
{ INDEX_op_or_i32, { "r", "0", "ri" } },
|
||||
{ INDEX_op_xor_i32, { "r", "0", "ri" } },
|
||||
{ INDEX_op_andc_i32, { "r", "r", "ri" } },
|
||||
|
||||
{ INDEX_op_shl_i32, { "r", "0", "ci" } },
|
||||
{ INDEX_op_shr_i32, { "r", "0", "ci" } },
|
||||
{ INDEX_op_sar_i32, { "r", "0", "ci" } },
|
||||
{ INDEX_op_shl_i32, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_shr_i32, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_sar_i32, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_rotl_i32, { "r", "0", "ci" } },
|
||||
{ INDEX_op_rotr_i32, { "r", "0", "ci" } },
|
||||
|
||||
|
@ -2059,10 +2157,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
|
|||
{ INDEX_op_and_i64, { "r", "0", "reZ" } },
|
||||
{ INDEX_op_or_i64, { "r", "0", "re" } },
|
||||
{ INDEX_op_xor_i64, { "r", "0", "re" } },
|
||||
{ INDEX_op_andc_i64, { "r", "r", "rI" } },
|
||||
|
||||
{ INDEX_op_shl_i64, { "r", "0", "ci" } },
|
||||
{ INDEX_op_shr_i64, { "r", "0", "ci" } },
|
||||
{ INDEX_op_sar_i64, { "r", "0", "ci" } },
|
||||
{ INDEX_op_shl_i64, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_shr_i64, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_sar_i64, { "r", "0", "Ci" } },
|
||||
{ INDEX_op_rotl_i64, { "r", "0", "ci" } },
|
||||
{ INDEX_op_rotr_i64, { "r", "0", "ci" } },
|
||||
|
||||
|
@ -2196,25 +2295,34 @@ static void tcg_target_qemu_prologue(TCGContext *s)
|
|||
|
||||
static void tcg_target_init(TCGContext *s)
|
||||
{
|
||||
#if !(defined(have_cmov) && defined(have_movbe))
|
||||
{
|
||||
unsigned a, b, c, d;
|
||||
int ret = __get_cpuid(1, &a, &b, &c, &d);
|
||||
unsigned a, b, c, d;
|
||||
int max = __get_cpuid_max(0, 0);
|
||||
|
||||
# ifndef have_cmov
|
||||
if (max >= 1) {
|
||||
__cpuid(1, a, b, c, d);
|
||||
#ifndef have_cmov
|
||||
/* For 32-bit, 99% certainty that we're running on hardware that
|
||||
supports cmov, but we still need to check. In case cmov is not
|
||||
available, we'll use a small forward branch. */
|
||||
have_cmov = ret && (d & bit_CMOV);
|
||||
# endif
|
||||
|
||||
# ifndef have_movbe
|
||||
have_cmov = (d & bit_CMOV) != 0;
|
||||
#endif
|
||||
#ifndef have_movbe
|
||||
/* MOVBE is only available on Intel Atom and Haswell CPUs, so we
|
||||
need to probe for it. */
|
||||
have_movbe = ret && (c & bit_MOVBE);
|
||||
# endif
|
||||
}
|
||||
have_movbe = (c & bit_MOVBE) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (max >= 7) {
|
||||
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
|
||||
__cpuid_count(7, 0, a, b, c, d);
|
||||
#ifdef bit_BMI
|
||||
have_bmi1 = (b & bit_BMI) != 0;
|
||||
#endif
|
||||
#ifndef have_bmi2
|
||||
have_bmi2 = (b & bit_BMI2) != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (TCG_TARGET_REG_BITS == 64) {
|
||||
tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
|
||||
|
|
|
@ -64,9 +64,6 @@ typedef enum {
|
|||
TCG_REG_RDI = TCG_REG_EDI,
|
||||
} TCGReg;
|
||||
|
||||
#define TCG_CT_CONST_S32 0x100
|
||||
#define TCG_CT_CONST_U32 0x200
|
||||
|
||||
/* used for function call generation */
|
||||
#define TCG_REG_CALL_STACK TCG_REG_ESP
|
||||
#define TCG_TARGET_STACK_ALIGN 16
|
||||
|
@ -76,6 +73,8 @@ typedef enum {
|
|||
#define TCG_TARGET_CALL_STACK_OFFSET 0
|
||||
#endif
|
||||
|
||||
extern bool have_bmi1;
|
||||
|
||||
/* optional instructions */
|
||||
#define TCG_TARGET_HAS_div2_i32 1
|
||||
#define TCG_TARGET_HAS_rot_i32 1
|
||||
|
@ -87,7 +86,7 @@ typedef enum {
|
|||
#define TCG_TARGET_HAS_bswap32_i32 1
|
||||
#define TCG_TARGET_HAS_neg_i32 1
|
||||
#define TCG_TARGET_HAS_not_i32 1
|
||||
#define TCG_TARGET_HAS_andc_i32 0
|
||||
#define TCG_TARGET_HAS_andc_i32 have_bmi1
|
||||
#define TCG_TARGET_HAS_orc_i32 0
|
||||
#define TCG_TARGET_HAS_eqv_i32 0
|
||||
#define TCG_TARGET_HAS_nand_i32 0
|
||||
|
@ -115,7 +114,7 @@ typedef enum {
|
|||
#define TCG_TARGET_HAS_bswap64_i64 1
|
||||
#define TCG_TARGET_HAS_neg_i64 1
|
||||
#define TCG_TARGET_HAS_not_i64 1
|
||||
#define TCG_TARGET_HAS_andc_i64 0
|
||||
#define TCG_TARGET_HAS_andc_i64 have_bmi1
|
||||
#define TCG_TARGET_HAS_orc_i64 0
|
||||
#define TCG_TARGET_HAS_eqv_i64 0
|
||||
#define TCG_TARGET_HAS_nand_i64 0
|
||||
|
|
165
tcg/optimize.c
165
tcg/optimize.c
|
@ -655,11 +655,68 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
}
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(xor):
|
||||
CASE_OP_32_64(nand):
|
||||
if (temps[args[1]].state != TCG_TEMP_CONST
|
||||
&& temps[args[2]].state == TCG_TEMP_CONST
|
||||
&& temps[args[2]].val == -1) {
|
||||
i = 1;
|
||||
goto try_not;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(nor):
|
||||
if (temps[args[1]].state != TCG_TEMP_CONST
|
||||
&& temps[args[2]].state == TCG_TEMP_CONST
|
||||
&& temps[args[2]].val == 0) {
|
||||
i = 1;
|
||||
goto try_not;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(andc):
|
||||
if (temps[args[2]].state != TCG_TEMP_CONST
|
||||
&& temps[args[1]].state == TCG_TEMP_CONST
|
||||
&& temps[args[1]].val == -1) {
|
||||
i = 2;
|
||||
goto try_not;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(orc):
|
||||
CASE_OP_32_64(eqv):
|
||||
if (temps[args[2]].state != TCG_TEMP_CONST
|
||||
&& temps[args[1]].state == TCG_TEMP_CONST
|
||||
&& temps[args[1]].val == 0) {
|
||||
i = 2;
|
||||
goto try_not;
|
||||
}
|
||||
break;
|
||||
try_not:
|
||||
{
|
||||
TCGOpcode not_op;
|
||||
bool have_not;
|
||||
|
||||
if (def->flags & TCG_OPF_64BIT) {
|
||||
not_op = INDEX_op_not_i64;
|
||||
have_not = TCG_TARGET_HAS_not_i64;
|
||||
} else {
|
||||
not_op = INDEX_op_not_i32;
|
||||
have_not = TCG_TARGET_HAS_not_i32;
|
||||
}
|
||||
if (!have_not) {
|
||||
break;
|
||||
}
|
||||
s->gen_opc_buf[op_index] = not_op;
|
||||
reset_temp(args[0]);
|
||||
gen_args[0] = args[0];
|
||||
gen_args[1] = args[i];
|
||||
args += 3;
|
||||
gen_args += 2;
|
||||
continue;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Simplify expression for "op r, a, 0 => mov r, a" cases */
|
||||
/* Simplify expression for "op r, a, const => mov r, a" cases */
|
||||
switch (op) {
|
||||
CASE_OP_32_64(add):
|
||||
CASE_OP_32_64(sub):
|
||||
|
@ -670,28 +727,38 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
CASE_OP_32_64(rotr):
|
||||
CASE_OP_32_64(or):
|
||||
CASE_OP_32_64(xor):
|
||||
if (temps[args[1]].state == TCG_TEMP_CONST) {
|
||||
/* Proceed with possible constant folding. */
|
||||
break;
|
||||
}
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST
|
||||
CASE_OP_32_64(andc):
|
||||
if (temps[args[1]].state != TCG_TEMP_CONST
|
||||
&& temps[args[2]].state == TCG_TEMP_CONST
|
||||
&& temps[args[2]].val == 0) {
|
||||
if (temps_are_copies(args[0], args[1])) {
|
||||
s->gen_opc_buf[op_index] = INDEX_op_nop;
|
||||
} else {
|
||||
s->gen_opc_buf[op_index] = op_to_mov(op);
|
||||
tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
|
||||
gen_args += 2;
|
||||
}
|
||||
args += 3;
|
||||
continue;
|
||||
goto do_mov3;
|
||||
}
|
||||
break;
|
||||
CASE_OP_32_64(and):
|
||||
CASE_OP_32_64(orc):
|
||||
CASE_OP_32_64(eqv):
|
||||
if (temps[args[1]].state != TCG_TEMP_CONST
|
||||
&& temps[args[2]].state == TCG_TEMP_CONST
|
||||
&& temps[args[2]].val == -1) {
|
||||
goto do_mov3;
|
||||
}
|
||||
break;
|
||||
do_mov3:
|
||||
if (temps_are_copies(args[0], args[1])) {
|
||||
s->gen_opc_buf[op_index] = INDEX_op_nop;
|
||||
} else {
|
||||
s->gen_opc_buf[op_index] = op_to_mov(op);
|
||||
tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
|
||||
gen_args += 2;
|
||||
}
|
||||
args += 3;
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Simplify using known-zero bits */
|
||||
/* Simplify using known-zero bits. Currently only ops with a single
|
||||
output argument is supported. */
|
||||
mask = -1;
|
||||
affected = -1;
|
||||
switch (op) {
|
||||
|
@ -726,16 +793,36 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
mask = temps[args[1]].mask & mask;
|
||||
break;
|
||||
|
||||
CASE_OP_32_64(sar):
|
||||
CASE_OP_32_64(andc):
|
||||
/* Known-zeros does not imply known-ones. Therefore unless
|
||||
args[2] is constant, we can't infer anything from it. */
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST) {
|
||||
mask = ((tcg_target_long)temps[args[1]].mask
|
||||
>> temps[args[2]].val);
|
||||
mask = ~temps[args[2]].mask;
|
||||
goto and_const;
|
||||
}
|
||||
/* But we certainly know nothing outside args[1] may be set. */
|
||||
mask = temps[args[1]].mask;
|
||||
break;
|
||||
|
||||
case INDEX_op_sar_i32:
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST) {
|
||||
mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val;
|
||||
}
|
||||
break;
|
||||
case INDEX_op_sar_i64:
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST) {
|
||||
mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val;
|
||||
}
|
||||
break;
|
||||
|
||||
CASE_OP_32_64(shr):
|
||||
case INDEX_op_shr_i32:
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST) {
|
||||
mask = temps[args[1]].mask >> temps[args[2]].val;
|
||||
mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val;
|
||||
}
|
||||
break;
|
||||
case INDEX_op_shr_i64:
|
||||
if (temps[args[2]].state == TCG_TEMP_CONST) {
|
||||
mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -769,10 +856,40 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
mask = temps[args[3]].mask | temps[args[4]].mask;
|
||||
break;
|
||||
|
||||
CASE_OP_32_64(ld8u):
|
||||
case INDEX_op_qemu_ld8u:
|
||||
mask = 0xff;
|
||||
break;
|
||||
CASE_OP_32_64(ld16u):
|
||||
case INDEX_op_qemu_ld16u:
|
||||
mask = 0xffff;
|
||||
break;
|
||||
case INDEX_op_ld32u_i64:
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
case INDEX_op_qemu_ld32u:
|
||||
#endif
|
||||
mask = 0xffffffffu;
|
||||
break;
|
||||
|
||||
CASE_OP_32_64(qemu_ld):
|
||||
{
|
||||
TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
|
||||
if (!(mop & MO_SIGN)) {
|
||||
mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
|
||||
results */
|
||||
if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
|
||||
mask &= 0xffffffffu;
|
||||
}
|
||||
|
||||
if (mask == 0) {
|
||||
assert(def->nb_oargs == 1);
|
||||
s->gen_opc_buf[op_index] = op_to_movi(op);
|
||||
|
@ -839,6 +956,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
|
||||
/* Simplify expression for "op r, a, a => movi r, 0" cases */
|
||||
switch (op) {
|
||||
CASE_OP_32_64(andc):
|
||||
CASE_OP_32_64(sub):
|
||||
CASE_OP_32_64(xor):
|
||||
if (temps_are_copies(args[1], args[2])) {
|
||||
|
@ -1140,6 +1258,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
|
|||
} else {
|
||||
for (i = 0; i < def->nb_oargs; i++) {
|
||||
reset_temp(args[i]);
|
||||
/* Save the corresponding known-zero bits mask for the
|
||||
first output argument (only one supported so far). */
|
||||
if (i == 0) {
|
||||
temps[args[i]].mask = mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < def->nb_args; i++) {
|
||||
|
|
Loading…
Reference in New Issue