tcg/optimize: rework copy progagation

The copy propagation pass tries to keep track what is a copy of what
and what has copy of what, and in addition it keep a circular list of
of all the copies. Unfortunately this doesn't fully work: a mov from
a temp which has a state "COPY" changed it into a state "HAS_COPY".
Later when this temp is used again, it is considered has not having
copy and thus no propagation is done.

This patch fixes that by removing the hiearchy between copies, and thus
only keeping a "COPY" state both meaning "is a copy" and "has a copy".
The decision of which copy to use is deferred to the actual temp
replacement. At this stage there is not one best choice to do, but only
better choices than others. For doing the best choice the operation
would have to be parsed in reversed to know if a temp is going to be
used later or not. That what is done by the liveness analysis. At this
stage it is known that globals will be always live, that local temps
will be dead at the end of the translation block, and that the temps
will be dead at the end of the basic block. This means that this stage
should try to replace temps by local temps or globals and local temps
by globals.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
Aurelien Jarno 2012-09-11 12:31:21 +02:00
parent b80bb016d8
commit e590d4e6b3
1 changed files with 92 additions and 75 deletions

View File

@ -39,7 +39,6 @@ typedef enum {
TCG_TEMP_UNDEF = 0,
TCG_TEMP_CONST,
TCG_TEMP_COPY,
TCG_TEMP_HAS_COPY
} tcg_temp_state;
struct tcg_temp_info {
@ -51,39 +50,19 @@ struct tcg_temp_info {
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP was a representative of some
class of equivalent temp's, a new representative should be chosen in this
class. */
static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
the copy flag from the left temp. */
static void reset_temp(TCGArg temp)
{
int i;
TCGArg new_base = (TCGArg)-1;
if (temps[temp].state == TCG_TEMP_HAS_COPY) {
for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
if (i >= nb_globals) {
temps[i].state = TCG_TEMP_HAS_COPY;
new_base = i;
break;
}
if (temps[temp].state == TCG_TEMP_COPY) {
if (temps[temp].prev_copy == temps[temp].next_copy) {
temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
} else {
temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
}
for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
if (new_base == (TCGArg)-1) {
temps[i].state = TCG_TEMP_UNDEF;
} else {
temps[i].val = new_base;
}
}
temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
} else if (temps[temp].state == TCG_TEMP_COPY) {
temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
new_base = temps[temp].val;
}
temps[temp].state = TCG_TEMP_UNDEF;
if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
temps[new_base].state = TCG_TEMP_UNDEF;
}
}
static int op_bits(TCGOpcode op)
@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
}
}
static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
{
TCGArg i;
/* If this is already a global, we can't do better. */
if (temp < s->nb_globals) {
return temp;
}
/* Search for a global first. */
for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
if (i < s->nb_globals) {
return i;
}
}
/* If it is a temp, search for a temp local. */
if (!s->temps[temp].temp_local) {
for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
if (s->temps[i].temp_local) {
return i;
}
}
}
/* Failure to find a better representation, return the same temp. */
return temp;
}
static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
{
TCGArg i;
if (arg1 == arg2) {
return true;
}
if (temps[arg1].state != TCG_TEMP_COPY
|| temps[arg2].state != TCG_TEMP_COPY) {
return false;
}
for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
if (i == arg2) {
return true;
}
}
return false;
}
static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
TCGArg dst, TCGArg src)
{
reset_temp(dst, s->nb_temps, s->nb_globals);
assert(temps[src].state != TCG_TEMP_COPY);
/* Only consider temps with the same type (width) as copies. */
if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
assert(temps[src].state != TCG_TEMP_CONST);
if (temps[src].state != TCG_TEMP_HAS_COPY) {
temps[src].state = TCG_TEMP_HAS_COPY;
reset_temp(dst);
assert(temps[src].state != TCG_TEMP_CONST);
if (s->temps[src].type == s->temps[dst].type) {
if (temps[src].state != TCG_TEMP_COPY) {
temps[src].state = TCG_TEMP_COPY;
temps[src].next_copy = src;
temps[src].prev_copy = src;
}
temps[dst].state = TCG_TEMP_COPY;
temps[dst].val = src;
temps[dst].next_copy = temps[src].next_copy;
temps[dst].prev_copy = src;
temps[temps[dst].next_copy].prev_copy = dst;
temps[src].next_copy = dst;
}
gen_args[0] = dst;
gen_args[1] = src;
}
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
int nb_temps, int nb_globals)
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
{
reset_temp(dst, nb_temps, nb_globals);
reset_temp(dst);
temps[dst].state = TCG_TEMP_CONST;
temps[dst].val = val;
gen_args[0] = dst;
@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
tcg_abort();
}
/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
If this temp is a copy of other ones then this equivalence class'
representative is kept in VALS' element.
If this temp is neither copy nor constant then corresponding VALS'
element is unused. */
If this temp is a copy of other ones then the other copies are
available through the doubly linked circular list. */
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
assert(op != INDEX_op_call);
for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
if (temps[args[i]].state == TCG_TEMP_COPY) {
args[i] = temps[args[i]].val;
args[i] = find_better_copy(s, args[i]);
}
}
}
@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[1]].val == 0) {
gen_opc_buf[op_index] = op_to_movi(op);
tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], 0);
args += 3;
gen_args += 2;
continue;
@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
if (temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) {
if ((temps[args[0]].state == TCG_TEMP_COPY
&& temps[args[0]].val == args[1])
|| args[0] == args[1]) {
if (temps_are_copies(args[0], args[1])) {
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
gen_opc_buf[op_index] = op_to_movi(op);
tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], 0);
args += 3;
gen_args += 2;
continue;
@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(or):
CASE_OP_32_64(and):
if (args[1] == args[2]) {
if (args[1] == args[0]) {
if (temps_are_copies(args[0], args[1])) {
gen_opc_buf[op_index] = INDEX_op_nop;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
allocator where needed and possible. Also detect copies. */
switch (op) {
CASE_OP_32_64(mov):
if ((temps[args[1]].state == TCG_TEMP_COPY
&& temps[args[1]].val == args[0])
|| args[0] == args[1]) {
if (temps_are_copies(args[0], args[1])) {
args += 2;
gen_opc_buf[op_index] = INDEX_op_nop;
break;
@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
args[1] = temps[args[1]].val;
/* fallthrough */
CASE_OP_32_64(movi):
tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], args[1]);
gen_args += 2;
args += 2;
break;
@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
if (temps[args[1]].state == TCG_TEMP_CONST) {
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val, 0);
tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], tmp);
} else {
reset_temp(args[0], nb_temps, nb_globals);
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
}
@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val,
temps[args[2]].val);
tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
} else {
reset_temp(args[0], nb_temps, nb_globals);
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding_cond(op, temps[args[1]].val,
temps[args[2]].val, args[3]);
tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
} else {
reset_temp(args[0], nb_temps, nb_globals);
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
} else {
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
reset_temp(args[0], nb_temps, nb_globals);
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
&& temps[args[2]].state == TCG_TEMP_CONST) {
tmp = do_constant_folding_cond(op, temps[args[1]].val,
temps[args[2]].val, args[5]);
if (args[0] == args[4-tmp]
|| (temps[args[4-tmp]].state == TCG_TEMP_COPY
&& temps[args[4-tmp]].val == args[0])) {
if (temps_are_copies(args[0], args[4-tmp])) {
gen_opc_buf[op_index] = INDEX_op_nop;
} else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
gen_opc_buf[op_index] = op_to_movi(op);
tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
nb_temps, nb_globals);
tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
gen_args += 2;
} else {
gen_opc_buf[op_index] = op_to_mov(op);
tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
nb_temps, nb_globals);
tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
gen_args += 2;
}
} else {
reset_temp(args[0], nb_temps, nb_globals);
reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[1];
gen_args[2] = args[2];
@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
for (i = 0; i < nb_globals; i++) {
reset_temp(i, nb_temps, nb_globals);
reset_temp(i);
}
}
for (i = 0; i < (args[0] >> 16); i++) {
reset_temp(args[i + 1], nb_temps, nb_globals);
reset_temp(args[i + 1]);
}
i = nb_call_args + 3;
while (i) {
@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
} else {
for (i = 0; i < def->nb_oargs; i++) {
reset_temp(args[i], nb_temps, nb_globals);
reset_temp(args[i]);
}
}
for (i = 0; i < def->nb_args; i++) {