• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
Aucun tag

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

Commit MetaInfo

Révision5a18407f55ade924aa6397c9a043a9ffd59645fe (tree)
l'heure2016-08-06 01:14:40
AuteurRichard Henderson <rth@twid...>
CommiterRichard Henderson

Message de Log

tcg: Lower indirect registers in a separate pass

Rather than rely on recursion during the middle of register allocation,
lower indirect registers to loads and stores off the indirect base into
plain temps.

For an x86_64 host, with sufficient registers, this results in identical
code, modulo the actual register assignments.

For an i686 host, with insufficient registers, this means that temps can
be (temporarily) spilled to the stack in order to satisfy an allocation.
This as opposed to the possibility of not being able to spill, to allocate
a register for the indirect base, in order to perform a spill.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>

Change Summary

Modification

--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void)
4242 #define CPU_LOG_TB_NOCHAIN (1 << 13)
4343 #define CPU_LOG_PAGE (1 << 14)
4444 #define LOG_TRACE (1 << 15)
45+#define CPU_LOG_TB_OP_IND (1 << 16)
4546
4647 /* Returns true if a bit is set in the current loglevel mask
4748 */
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -82,33 +82,6 @@ static void init_temp_info(TCGArg temp)
8282 }
8383 }
8484
85-static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
86- TCGOpcode opc, int nargs)
87-{
88- int oi = s->gen_next_op_idx;
89- int pi = s->gen_next_parm_idx;
90- int prev = old_op->prev;
91- int next = old_op - s->gen_op_buf;
92- TCGOp *new_op;
93-
94- tcg_debug_assert(oi < OPC_BUF_SIZE);
95- tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
96- s->gen_next_op_idx = oi + 1;
97- s->gen_next_parm_idx = pi + nargs;
98-
99- new_op = &s->gen_op_buf[oi];
100- *new_op = (TCGOp){
101- .opc = opc,
102- .args = pi,
103- .prev = prev,
104- .next = next
105- };
106- s->gen_op_buf[prev].next = oi;
107- old_op->prev = oi;
108-
109- return new_op;
110-}
111-
11285 static int op_bits(TCGOpcode op)
11386 {
11487 const TCGOpDef *def = &tcg_op_defs[op];
@@ -1116,7 +1089,7 @@ void tcg_optimize(TCGContext *s)
11161089 uint64_t a = ((uint64_t)ah << 32) | al;
11171090 uint64_t b = ((uint64_t)bh << 32) | bl;
11181091 TCGArg rl, rh;
1119- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1092+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
11201093 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
11211094
11221095 if (opc == INDEX_op_add2_i32) {
@@ -1142,7 +1115,7 @@ void tcg_optimize(TCGContext *s)
11421115 uint32_t b = temps[args[3]].val;
11431116 uint64_t r = (uint64_t)a * b;
11441117 TCGArg rl, rh;
1145- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1118+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
11461119 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
11471120
11481121 rl = args[0];
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -531,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
531531 #endif
532532
533533 if (!base_ts->fixed_reg) {
534- indirect_reg = 1;
534+ /* We do not support double-indirect registers. */
535+ tcg_debug_assert(!base_ts->indirect_reg);
535536 base_ts->indirect_base = 1;
537+ s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
538+ ? 2 : 1);
539+ indirect_reg = 1;
536540 }
537541
538542 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
@@ -1336,9 +1340,66 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
13361340 #endif
13371341 }
13381342
1343+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1344+ TCGOpcode opc, int nargs)
1345+{
1346+ int oi = s->gen_next_op_idx;
1347+ int pi = s->gen_next_parm_idx;
1348+ int prev = old_op->prev;
1349+ int next = old_op - s->gen_op_buf;
1350+ TCGOp *new_op;
1351+
1352+ tcg_debug_assert(oi < OPC_BUF_SIZE);
1353+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1354+ s->gen_next_op_idx = oi + 1;
1355+ s->gen_next_parm_idx = pi + nargs;
1356+
1357+ new_op = &s->gen_op_buf[oi];
1358+ *new_op = (TCGOp){
1359+ .opc = opc,
1360+ .args = pi,
1361+ .prev = prev,
1362+ .next = next
1363+ };
1364+ s->gen_op_buf[prev].next = oi;
1365+ old_op->prev = oi;
1366+
1367+ return new_op;
1368+}
1369+
1370+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1371+ TCGOpcode opc, int nargs)
1372+{
1373+ int oi = s->gen_next_op_idx;
1374+ int pi = s->gen_next_parm_idx;
1375+ int prev = old_op - s->gen_op_buf;
1376+ int next = old_op->next;
1377+ TCGOp *new_op;
1378+
1379+ tcg_debug_assert(oi < OPC_BUF_SIZE);
1380+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1381+ s->gen_next_op_idx = oi + 1;
1382+ s->gen_next_parm_idx = pi + nargs;
1383+
1384+ new_op = &s->gen_op_buf[oi];
1385+ *new_op = (TCGOp){
1386+ .opc = opc,
1387+ .args = pi,
1388+ .prev = prev,
1389+ .next = next
1390+ };
1391+ s->gen_op_buf[next].prev = oi;
1392+ old_op->next = oi;
1393+
1394+ return new_op;
1395+}
1396+
13391397 #define TS_DEAD 1
13401398 #define TS_MEM 2
13411399
1400+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1401+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1402+
13421403 /* liveness analysis: end of function: all temps are dead, and globals
13431404 should be in memory. */
13441405 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
@@ -1364,13 +1425,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
13641425 /* Liveness analysis : update the opc_arg_life array to tell if a
13651426 given input arguments is dead. Instructions updating dead
13661427 temporaries are removed. */
1367-static void tcg_liveness_analysis(TCGContext *s)
1428+static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
13681429 {
1369- uint8_t *temp_state;
1370- int oi, oi_prev;
13711430 int nb_globals = s->nb_globals;
1431+ int oi, oi_prev;
13721432
1373- temp_state = tcg_malloc(s->nb_temps);
13741433 tcg_la_func_end(s, temp_state);
13751434
13761435 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
@@ -1593,6 +1652,165 @@ static void tcg_liveness_analysis(TCGContext *s)
15931652 }
15941653 }
15951654
1655+/* Liveness analysis: Convert indirect regs to direct temporaries. */
1656+static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
1657+{
1658+ int nb_globals = s->nb_globals;
1659+ int16_t *dir_temps;
1660+ int i, oi, oi_next;
1661+ bool changes = false;
1662+
1663+ dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
1664+ memset(dir_temps, 0, nb_globals * sizeof(int16_t));
1665+
1666+ /* Create a temporary for each indirect global. */
1667+ for (i = 0; i < nb_globals; ++i) {
1668+ TCGTemp *its = &s->temps[i];
1669+ if (its->indirect_reg) {
1670+ TCGTemp *dts = tcg_temp_alloc(s);
1671+ dts->type = its->type;
1672+ dts->base_type = its->base_type;
1673+ dir_temps[i] = temp_idx(s, dts);
1674+ }
1675+ }
1676+
1677+ memset(temp_state, TS_DEAD, nb_globals);
1678+
1679+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
1680+ TCGOp *op = &s->gen_op_buf[oi];
1681+ TCGArg *args = &s->gen_opparam_buf[op->args];
1682+ TCGOpcode opc = op->opc;
1683+ const TCGOpDef *def = &tcg_op_defs[opc];
1684+ TCGLifeData arg_life = op->life;
1685+ int nb_iargs, nb_oargs, call_flags;
1686+ TCGArg arg, dir;
1687+
1688+ oi_next = op->next;
1689+
1690+ if (opc == INDEX_op_call) {
1691+ nb_oargs = op->callo;
1692+ nb_iargs = op->calli;
1693+ call_flags = args[nb_oargs + nb_iargs + 1];
1694+ } else {
1695+ nb_iargs = def->nb_iargs;
1696+ nb_oargs = def->nb_oargs;
1697+
1698+ /* Set flags similar to how calls require. */
1699+ if (def->flags & TCG_OPF_BB_END) {
1700+ /* Like writing globals: save_globals */
1701+ call_flags = 0;
1702+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1703+ /* Like reading globals: sync_globals */
1704+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
1705+ } else {
1706+ /* No effect on globals. */
1707+ call_flags = (TCG_CALL_NO_READ_GLOBALS |
1708+ TCG_CALL_NO_WRITE_GLOBALS);
1709+ }
1710+ }
1711+
1712+ /* Make sure that input arguments are available. */
1713+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1714+ arg = args[i];
1715+ /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
1716+ if (arg < nb_globals) {
1717+ dir = dir_temps[arg];
1718+ if (dir != 0 && temp_state[arg] == TS_DEAD) {
1719+ TCGTemp *its = &s->temps[arg];
1720+ TCGOpcode lopc = (its->type == TCG_TYPE_I32
1721+ ? INDEX_op_ld_i32
1722+ : INDEX_op_ld_i64);
1723+ TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
1724+ TCGArg *largs = &s->gen_opparam_buf[lop->args];
1725+
1726+ largs[0] = dir;
1727+ largs[1] = temp_idx(s, its->mem_base);
1728+ largs[2] = its->mem_offset;
1729+
1730+ /* Loaded, but synced with memory. */
1731+ temp_state[arg] = TS_MEM;
1732+ }
1733+ }
1734+ }
1735+
1736+ /* Perform input replacement, and mark inputs that became dead.
1737+ No action is required except keeping temp_state up to date
1738+ so that we reload when needed. */
1739+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1740+ arg = args[i];
1741+ if (arg < nb_globals) {
1742+ dir = dir_temps[arg];
1743+ if (dir != 0) {
1744+ args[i] = dir;
1745+ changes = true;
1746+ if (IS_DEAD_ARG(i)) {
1747+ temp_state[arg] = TS_DEAD;
1748+ }
1749+ }
1750+ }
1751+ }
1752+
1753+ /* Liveness analysis should ensure that the following are
1754+ all correct, for call sites and basic block end points. */
1755+ if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
1756+ /* Nothing to do */
1757+ } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
1758+ for (i = 0; i < nb_globals; ++i) {
1759+ /* Liveness should see that globals are synced back,
1760+ that is, either TS_DEAD or TS_MEM. */
1761+ tcg_debug_assert(dir_temps[i] == 0
1762+ || temp_state[i] != 0);
1763+ }
1764+ } else {
1765+ for (i = 0; i < nb_globals; ++i) {
1766+ /* Liveness should see that globals are saved back,
1767+ that is, TS_DEAD, waiting to be reloaded. */
1768+ tcg_debug_assert(dir_temps[i] == 0
1769+ || temp_state[i] == TS_DEAD);
1770+ }
1771+ }
1772+
1773+ /* Outputs become available. */
1774+ for (i = 0; i < nb_oargs; i++) {
1775+ arg = args[i];
1776+ if (arg >= nb_globals) {
1777+ continue;
1778+ }
1779+ dir = dir_temps[arg];
1780+ if (dir == 0) {
1781+ continue;
1782+ }
1783+ args[i] = dir;
1784+ changes = true;
1785+
1786+ /* The output is now live and modified. */
1787+ temp_state[arg] = 0;
1788+
1789+ /* Sync outputs upon their last write. */
1790+ if (NEED_SYNC_ARG(i)) {
1791+ TCGTemp *its = &s->temps[arg];
1792+ TCGOpcode sopc = (its->type == TCG_TYPE_I32
1793+ ? INDEX_op_st_i32
1794+ : INDEX_op_st_i64);
1795+ TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
1796+ TCGArg *sargs = &s->gen_opparam_buf[sop->args];
1797+
1798+ sargs[0] = dir;
1799+ sargs[1] = temp_idx(s, its->mem_base);
1800+ sargs[2] = its->mem_offset;
1801+
1802+ temp_state[arg] = TS_MEM;
1803+ }
1804+ /* Drop outputs that are dead. */
1805+ if (IS_DEAD_ARG(i)) {
1806+ temp_state[arg] = TS_DEAD;
1807+ }
1808+ }
1809+ }
1810+
1811+ return changes;
1812+}
1813+
15961814 #ifdef CONFIG_DEBUG_TCG
15971815 static void dump_regs(TCGContext *s)
15981816 {
@@ -1723,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
17231941 if (!ts->mem_allocated) {
17241942 temp_allocate_frame(s, temp_idx(s, ts));
17251943 }
1726- if (ts->indirect_reg) {
1727- if (ts->val_type == TEMP_VAL_REG) {
1728- tcg_regset_set_reg(allocated_regs, ts->reg);
1729- }
1730- temp_load(s, ts->mem_base,
1731- tcg_target_available_regs[TCG_TYPE_PTR],
1732- allocated_regs);
1733- }
17341944 switch (ts->val_type) {
17351945 case TEMP_VAL_CONST:
17361946 /* If we're going to free the temp immediately, then we won't
@@ -1821,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
18212031 break;
18222032 case TEMP_VAL_MEM:
18232033 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
1824- if (ts->indirect_reg) {
1825- tcg_regset_set_reg(allocated_regs, reg);
1826- temp_load(s, ts->mem_base,
1827- tcg_target_available_regs[TCG_TYPE_PTR],
1828- allocated_regs);
1829- }
18302034 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
18312035 ts->mem_coherent = 1;
18322036 break;
@@ -1843,14 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
18432047 temporary registers needs to be allocated to store a constant. */
18442048 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
18452049 {
1846- /* ??? Liveness does not yet incorporate indirect bases. */
1847- if (!ts->indirect_base) {
1848- /* The liveness analysis already ensures that globals are back
1849- in memory. Keep an tcg_debug_assert for safety. */
1850- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
1851- return;
1852- }
1853- temp_sync(s, ts, allocated_regs, 1);
2050+ /* The liveness analysis already ensures that globals are back
2051+ in memory. Keep an tcg_debug_assert for safety. */
2052+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
18542053 }
18552054
18562055 /* save globals to their canonical location and assume they can be
@@ -1874,14 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
18742073
18752074 for (i = 0; i < s->nb_globals; i++) {
18762075 TCGTemp *ts = &s->temps[i];
1877- /* ??? Liveness does not yet incorporate indirect bases. */
1878- if (!ts->indirect_base) {
1879- tcg_debug_assert(ts->val_type != TEMP_VAL_REG
1880- || ts->fixed_reg
1881- || ts->mem_coherent);
1882- continue;
1883- }
1884- temp_sync(s, ts, allocated_regs, 0);
2076+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2077+ || ts->fixed_reg
2078+ || ts->mem_coherent);
18852079 }
18862080 }
18872081
@@ -1896,23 +2090,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
18962090 if (ts->temp_local) {
18972091 temp_save(s, ts, allocated_regs);
18982092 } else {
1899- /* ??? Liveness does not yet incorporate indirect bases. */
1900- if (!ts->indirect_base) {
1901- /* The liveness analysis already ensures that temps are dead.
1902- Keep an tcg_debug_assert for safety. */
1903- tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
1904- continue;
1905- }
1906- temp_dead(s, ts);
2093+ /* The liveness analysis already ensures that temps are dead.
2094+ Keep an tcg_debug_assert for safety. */
2095+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
19072096 }
19082097 }
19092098
19102099 save_globals(s, allocated_regs);
19112100 }
19122101
1913-#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1914-#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1915-
19162102 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
19172103 TCGLifeData arg_life)
19182104 {
@@ -1975,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
19752161 if (!ots->mem_allocated) {
19762162 temp_allocate_frame(s, args[0]);
19772163 }
1978- if (ots->indirect_reg) {
1979- tcg_regset_set_reg(allocated_regs, ts->reg);
1980- temp_load(s, ots->mem_base,
1981- tcg_target_available_regs[TCG_TYPE_PTR],
1982- allocated_regs);
1983- }
19842164 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
19852165 if (IS_DEAD_ARG(1)) {
19862166 temp_dead(s, ts);
@@ -2385,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
23852565 s->la_time -= profile_getclock();
23862566 #endif
23872567
2388- tcg_liveness_analysis(s);
2568+ {
2569+ uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
2570+
2571+ liveness_pass_1(s, temp_state);
2572+
2573+ if (s->nb_indirects > 0) {
2574+#ifdef DEBUG_DISAS
2575+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
2576+ && qemu_log_in_addr_range(tb->pc))) {
2577+ qemu_log("OP before indirect lowering:\n");
2578+ tcg_dump_ops(s);
2579+ qemu_log("\n");
2580+ }
2581+#endif
2582+ /* Replace indirect temps with direct temps. */
2583+ if (liveness_pass_2(s, temp_state)) {
2584+ /* If changes were made, re-run liveness. */
2585+ liveness_pass_1(s, temp_state);
2586+ }
2587+ }
2588+ }
23892589
23902590 #ifdef CONFIG_PROFILER
23912591 s->la_time += profile_getclock();
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -617,6 +617,7 @@ struct TCGContext {
617617 int nb_labels;
618618 int nb_globals;
619619 int nb_temps;
620+ int nb_indirects;
620621
621622 /* goto_tb support */
622623 tcg_insn_unit *code_buf;
@@ -898,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func,
898899 TCGArg ret, int nargs, TCGArg *args);
899900
900901 void tcg_op_remove(TCGContext *s, TCGOp *op);
902+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
903+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
904+
901905 void tcg_optimize(TCGContext *s);
902906
903907 /* only used for debugging purposes */
--- a/util/log.c
+++ b/util/log.c
@@ -247,8 +247,9 @@ const QEMULogItem qemu_log_items[] = {
247247 { CPU_LOG_TB_OP, "op",
248248 "show micro ops for each compiled TB" },
249249 { CPU_LOG_TB_OP_OPT, "op_opt",
250- "show micro ops (x86 only: before eflags optimization) and\n"
251- "after liveness analysis" },
250+ "show micro ops after optimization" },
251+ { CPU_LOG_TB_OP_IND, "op_ind",
252+ "show micro ops before indirect lowering" },
252253 { CPU_LOG_INT, "int",
253254 "show interrupts/exceptions in short format" },
254255 { CPU_LOG_EXEC, "exec",