tcg/s390: Use constant pool for movi

Split out maybe_out_small_movi for use with other operations that want to add to the constant pool. Signed-off-by: Richard Henderson <rth@twiddle.net>
2017-07-31 19:16:02 -07:00 · 2017-07-31 19:16:02 -07:00 · 28eef8aaec
parent e692a3492d
commit 28eef8aaec
3 changed files with 81 additions and 55 deletions
--- a/include/elf.h
+++ b/include/elf.h
@ -942,8 +942,9 @@ typedef struct {
 #define R_390_TLS_DTPOFF	55	/* Offset in TLS block.  */
 #define R_390_TLS_TPOFF		56	/* Negate offset in static TLS
                                           block.  */
+#define R_390_20                57
 /* Keep this the last entry.  */
-#define R_390_NUM	57
+#define R_390_NUM               58

 /* x86-64 relocation types */
 #define R_X86_64_NONE		0	/* No reloc */
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@ -158,5 +158,6 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
+#define TCG_TARGET_NEED_POOL_LABELS

 #endif
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@ -29,6 +29,7 @@
 #error "unsupported code generation mode"
 #endif

+#include "tcg-pool.inc.c"
 #include "elf.h"

 /* ??? The translation blocks produced by TCG are generally small enough to
@ -361,6 +362,7 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                        intptr_t value, intptr_t addend)
 {
    intptr_t pcrel2;
+    uint32_t old;

    value += addend;
    pcrel2 = (tcg_insn_unit *)value - code_ptr;
@ -374,6 +376,12 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
        assert(pcrel2 == (int32_t)pcrel2);
        tcg_patch32(code_ptr, pcrel2);
        break;
+    case R_390_20:
+        assert(value == sextract64(value, 0, 20));
+        old = *(uint32_t *)code_ptr & 0xf00000ff;
+        old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
+        tcg_patch32(code_ptr, old);
+        break;
    default:
        g_assert_not_reached();
    }
@ -562,14 +570,16 @@ static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
    }
 }

-/* load a register with an immediate value */
-static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
-                             tcg_target_long sval, bool in_prologue)
-{
 static const S390Opcode lli_insns[4] = {
    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
 };
+static const S390Opcode ii_insns[4] = {
+    RI_IILL, RI_IILH, RI_IIHL, RI_IIHH
+};

+static bool maybe_out_small_movi(TCGContext *s, TCGType type,
+                                 TCGReg ret, tcg_target_long sval)
+{
    tcg_target_ulong uval = sval;
    int i;

@ -581,15 +591,35 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
    /* Try all 32-bit insns that can load it in one go.  */
    if (sval >= -0x8000 && sval < 0x8000) {
        tcg_out_insn(s, RI, LGHI, ret, sval);
-        return;
+        return true;
    }

    for (i = 0; i < 4; i++) {
        tcg_target_long mask = 0xffffull << i*16;
        if ((uval & mask) == uval) {
            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* load a register with an immediate value */
+static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
+                             tcg_target_long sval, bool in_prologue)
+{
+    tcg_target_ulong uval;
+
+    /* Try all 32-bit insns that can load it in one go.  */
+    if (maybe_out_small_movi(s, type, ret, sval)) {
        return;
    }
+
+    uval = sval;
+    if (type == TCG_TYPE_I32) {
+        uval = (uint32_t)sval;
+        sval = (int32_t)sval;
    }

    /* Try all 48-bit insns that can load it in one go.  */
@ -603,7 +633,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
            return;
        }
        if ((uval & 0xffffffff) == 0) {
-            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1);
+            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
            return;
        }
    }
@ -626,11 +656,8 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
        }
    }

-    /* If extended immediates are not present, then we may have to issue
-       several instructions to load the low 32 bits.  */
-    if (!(s390_facilities & FACILITY_EXT_IMM)) {
    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
-           that the lli_insns loop above did not succeed, we know that
+       that LLILL, LLIHL, LLILF above did not succeed, we know that
       both insns are required.  */
    if (uval <= 0xffffffff) {
        tcg_out_insn(s, RI, LLILL, ret, uval);
@ -638,43 +665,35 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
        return;
    }

-        /* If all high bits are set, the value can be loaded in 2 or 3 insns.
-           We first want to make sure that all the high bits get set.  With
-           luck the low 16-bits can be considered negative to perform that for
-           free, otherwise we load an explicit -1.  */
-        if (sval >> 31 >> 1 == -1) {
-            if (uval & 0x8000) {
-                tcg_out_insn(s, RI, LGHI, ret, uval);
+    /* When allowed, stuff it in the constant pool.  */
+    if (!in_prologue) {
+        if (USE_REG_TB) {
+            tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
+            new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
+                           -(intptr_t)s->code_gen_ptr);
        } else {
-                tcg_out_insn(s, RI, LGHI, ret, -1);
-                tcg_out_insn(s, RI, IILL, ret, uval);
+            tcg_out_insn(s, RIL, LGRL, ret, 0);
+            new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
        }
-            tcg_out_insn(s, RI, IILH, ret, uval >> 16);
        return;
    }
-    }

-    /* If we get here, both the high and low parts have non-zero bits.  */
-
-    /* Recurse to load the lower 32-bits.  */
-    tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
-
-    /* Insert data into the high 32-bits.  */
-    uval = uval >> 31 >> 1;
+    /* What's left is for the prologue, loading GUEST_BASE, and because
+       it failed to match above, is known to be a full 64-bit quantity.
+       We could try more than this, but it probably wouldn't pay off.  */
    if (s390_facilities & FACILITY_EXT_IMM) {
-        if (uval < 0x10000) {
-            tcg_out_insn(s, RI, IIHL, ret, uval);
-        } else if ((uval & 0xffff) == 0) {
-            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
+        tcg_out_insn(s, RIL, LLILF, ret, uval);
+        tcg_out_insn(s, RIL, IIHF, ret, uval >> 32);
    } else {
-            tcg_out_insn(s, RIL, IIHF, ret, uval);
+        const S390Opcode *insns = lli_insns;
+        int i;
+
+        for (i = 0; i < 4; i++) {
+            uint16_t part = uval >> (16 * i);
+            if (part) {
+                tcg_out_insn_RI(s, insns[i], ret, part);
+                insns = ii_insns;
            }
-    } else {
-        if (uval & 0xffff) {
-            tcg_out_insn(s, RI, IIHL, ret, uval);
-        }
-        if (uval & 0xffff0000) {
-            tcg_out_insn(s, RI, IIHH, ret, uval >> 16);
        }
    }
 }
@ -2573,6 +2592,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
 }

+static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
+{
+    memset(p, 0x07, count * sizeof(tcg_insn_unit));
+}
+
 typedef struct {
    DebugFrameHeader h;
    uint8_t fde_def_cfa[4];