diff --git a/cpu-i386.h b/cpu-i386.h
index c056a46771..4eac51fb50 100644
--- a/cpu-i386.h
+++ b/cpu-i386.h
@@ -85,7 +85,7 @@ typedef long double CPU86_LDouble;
 typedef double CPU86_LDouble;
 #endif
 
-typedef struct CPU86State {
+typedef struct CPUX86State {
     /* standard registers */
     uint32_t regs[8];
     uint32_t pc; /* cs_case + eip value */
@@ -109,11 +109,8 @@ typedef struct CPU86State {
     unsigned int fpuc;
 
     /* emulator internal variables */
-    uint32_t t0; /* temporary t0 storage */
-    uint32_t t1; /* temporary t1 storage */
-    uint32_t a0; /* temporary a0 storage (address) */
     CPU86_LDouble ft0;
-} CPU86State;
+} CPUX86State;
 
 static inline int ldub(void *ptr)
 {
@@ -188,12 +185,20 @@ static inline void stfq(void *ptr, double v)
 }
 
 #ifndef IN_OP_I386
-void port_outb(int addr, int val);
-void port_outw(int addr, int val);
-void port_outl(int addr, int val);
-int port_inb(int addr);
-int port_inw(int addr);
-int port_inl(int addr);
+void cpu_x86_outb(int addr, int val);
+void cpu_x86_outw(int addr, int val);
+void cpu_x86_outl(int addr, int val);
+int cpu_x86_inb(int addr);
+int cpu_x86_inw(int addr);
+int cpu_x86_inl(int addr);
 #endif
 
+CPUX86State *cpu_x86_init(void);
+int cpu_x86_exec(CPUX86State *s);
+void cpu_x86_close(CPUX86State *s);
+
+/* internal functions */
+int cpu_x86_gen_code(uint8_t *gen_code_buf, int *gen_code_size_ptr,
+                     uint8_t *pc_start);
+
 #endif /* CPU_I386_H */
diff --git a/linux-user/main.c b/linux-user/main.c
index 1d76d4d7cc..552ce006c7 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -193,34 +193,34 @@ void INT_handler(int num, void *env)
 /***********************************************************/
 /* new CPU core */
 
-void port_outb(int addr, int val)
+void cpu_x86_outb(int addr, int val)
 {
     fprintf(stderr, "outb: port=0x%04x, data=%02x\n", addr, val);
 }
 
-void port_outw(int addr, int val)
+void cpu_x86_outw(int addr, int val)
 {
     fprintf(stderr, "outw: port=0x%04x, data=%04x\n", addr, val);
 }
 
-void port_outl(int addr, int val)
+void cpu_x86_outl(int addr, int val)
 {
     fprintf(stderr, "outl: port=0x%04x, data=%08x\n", addr, val);
 }
 
-int port_inb(int addr)
+int cpu_x86_inb(int addr)
 {
     fprintf(stderr, "inb: port=0x%04x\n", addr);
     return 0;
 }
 
-int port_inw(int addr)
+int cpu_x86_inw(int addr)
 {
     fprintf(stderr, "inw: port=0x%04x\n", addr);
     return 0;
 }
 
-int port_inl(int addr)
+int cpu_x86_inl(int addr)
 {
     fprintf(stderr, "inl: port=0x%04x\n", addr);
     return 0;
diff --git a/op-i386.c b/op-i386.c
index 9fd4accd57..9443e172c7 100644
--- a/op-i386.c
+++ b/op-i386.c
@@ -10,39 +10,44 @@ typedef signed long long int64_t;
 
 #define NULL 0
 
+typedef struct FILE FILE;
+
+extern FILE *stderr;
+extern int fprintf(FILE *, const char *, ...);
+
 #ifdef __i386__
 register int T0 asm("esi");
 register int T1 asm("ebx");
 register int A0 asm("edi");
-register struct CPU86State *env asm("ebp");
+register struct CPUX86State *env asm("ebp");
 #define FORCE_RET() asm volatile ("ret");
 #endif
 #ifdef __powerpc__
 register int T0 asm("r24");
 register int T1 asm("r25");
 register int A0 asm("r26");
-register struct CPU86State *env asm("r27");
+register struct CPUX86State *env asm("r27");
 #define FORCE_RET() asm volatile ("blr");
 #endif
 #ifdef __arm__
 register int T0 asm("r4");
 register int T1 asm("r5");
 register int A0 asm("r6");
-register struct CPU86State *env asm("r7");
+register struct CPUX86State *env asm("r7");
 #define FORCE_RET() asm volatile ("mov pc, lr");
 #endif
 #ifdef __mips__
 register int T0 asm("s0");
 register int T1 asm("s1");
 register int A0 asm("s2");
-register struct CPU86State *env asm("s3");
+register struct CPUX86State *env asm("s3");
 #define FORCE_RET() asm volatile ("jr $31");
 #endif
 #ifdef __sparc__
 register int T0 asm("l0");
 register int T1 asm("l1");
 register int A0 asm("l2");
-register struct CPU86State *env asm("l3");
+register struct CPUX86State *env asm("l3");
 #define FORCE_RET() asm volatile ("retl ; nop");
 #endif
 
@@ -465,17 +470,17 @@ void OPPROTO op_idivl_EAX_T0(void)
 
 /* constant load */
 
-void OPPROTO op1_movl_T0_im(void)
+void OPPROTO op_movl_T0_im(void)
 {
     T0 = PARAM1;
 }
 
-void OPPROTO op1_movl_T1_im(void)
+void OPPROTO op_movl_T1_im(void)
 {
     T1 = PARAM1;
 }
 
-void OPPROTO op1_movl_A0_im(void)
+void OPPROTO op_movl_A0_im(void)
 {
     A0 = PARAM1;
 }
@@ -1592,3 +1597,35 @@ void OPPROTO op_fcos(void)
     helper_fcos();
 }
 
+/* main execution loop */
+uint8_t code_gen_buffer[65536];
+
+
+int cpu_x86_exec(CPUX86State *env1)
+{
+    int saved_T0, saved_T1, saved_A0;
+    CPUX86State *saved_env;
+    int code_gen_size;
+    void (*gen_func)(void);
+    
+    /* first we save global registers */
+    saved_T0 = T0;
+    saved_T1 = T1;
+    saved_A0 = A0;
+    saved_env = env;
+    env = env1;
+    
+    for(;;) {
+        cpu_x86_gen_code(code_gen_buffer, &code_gen_size, (uint8_t *)env->pc);
+        /* execute the generated code */
+        gen_func = (void *)code_gen_buffer;
+        gen_func();
+    }
+        
+    /* restore global registers */
+    T0 = saved_T0;
+    T1 = saved_T1;
+    A0 = saved_A0;
+    env = saved_env;
+    return 0;
+}
diff --git a/ops_template.h b/ops_template.h
index 4032472947..18b2ffb492 100644
--- a/ops_template.h
+++ b/ops_template.h
@@ -575,12 +575,14 @@ void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
     }
 }
 
+/* port I/O */
+
 void OPPROTO glue(op_outs, SUFFIX)(void)
 {
     int v, dx;
     dx = EDX & 0xffff;
     v = glue(ldu, SUFFIX)((void *)ESI);
-    glue(port_out, SUFFIX)(dx, v);
+    glue(cpu_x86_out, SUFFIX)(dx, v);
     ESI += (DF << SHIFT);
 }
 
@@ -591,7 +593,7 @@ void OPPROTO glue(op_rep_outs, SUFFIX)(void)
     dx = EDX & 0xffff;
     while (ECX != 0) {
         v = glue(ldu, SUFFIX)((void *)ESI);
-        glue(port_out, SUFFIX)(dx, v);
+        glue(cpu_x86_out, SUFFIX)(dx, v);
         ESI += inc;
         ECX--;
     }
@@ -601,7 +603,7 @@ void OPPROTO glue(op_ins, SUFFIX)(void)
 {
     int v, dx;
     dx = EDX & 0xffff;
-    v = glue(port_in, SUFFIX)(dx);
+    v = glue(cpu_x86_in, SUFFIX)(dx);
     glue(st, SUFFIX)((void *)EDI, v);
     EDI += (DF << SHIFT);
 }
@@ -612,13 +614,23 @@ void OPPROTO glue(op_rep_ins, SUFFIX)(void)
     inc = (DF << SHIFT);
     dx = EDX & 0xffff;
     while (ECX != 0) {
-        v = glue(port_in, SUFFIX)(dx);
+        v = glue(cpu_x86_in, SUFFIX)(dx);
         glue(st, SUFFIX)((void *)EDI, v);
         EDI += (DF << SHIFT);
         ECX--;
     }
 }
 
+void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
+{
+    glue(cpu_x86_out, SUFFIX)(T0 & 0xffff, T1 & DATA_MASK);
+}
+
+void OPPROTO glue(glue(op_in, SUFFIX), _T0_T1)(void)
+{
+    T1 = glue(cpu_x86_in, SUFFIX)(T0 & 0xffff);
+}
+
 #undef DATA_BITS
 #undef SHIFT_MASK
 #undef SIGN_MASK
diff --git a/translate-i386.c b/translate-i386.c
index 0ad24e3a40..9ebc81e2ce 100644
--- a/translate-i386.c
+++ b/translate-i386.c
@@ -495,6 +495,18 @@ static GenOpFunc *gen_op_outs[6] = {
     gen_op_rep_outsl,
 };
 
+static GenOpFunc *gen_op_in[3] = {
+    gen_op_inb_T0_T1,
+    gen_op_inw_T0_T1,
+    gen_op_inl_T0_T1,
+};
+
+static GenOpFunc *gen_op_out[3] = {
+    gen_op_outb_T0_T1,
+    gen_op_outw_T0_T1,
+    gen_op_outl_T0_T1,
+};
+
 enum {
     JCC_O,
     JCC_B,
@@ -632,7 +644,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d, int s)
 
 static void gen_opi(DisasContext *s1, int op, int ot, int d, int c)
 {
-    gen_op1_movl_T1_im(c);
+    gen_op_movl_T1_im(c);
     gen_op(s1, op, ot, d, OR_TMP0);
 }
 
@@ -678,7 +690,7 @@ static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
 static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
 {
     /* currently not optimized */
-    gen_op1_movl_T1_im(c);
+    gen_op_movl_T1_im(c);
     gen_shift(s1, op, ot, d, OR_TMP1);
 }
 
@@ -746,7 +758,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
         if (reg2 == OR_ZERO) {
             /* op: disp + (reg1 << scale) */
             if (reg1 == OR_ZERO) {
-                gen_op1_movl_A0_im(disp);
+                gen_op_movl_A0_im(disp);
             } else if (scale == 0 && disp == 0) {
                 gen_op_movl_A0_reg[reg1]();
             } else {
@@ -755,7 +767,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
         } else {
             /* op: disp + reg1 + (reg2 << scale) */
             if (disp != 0) {
-                gen_op1_movl_A0_im(disp);
+                gen_op_movl_A0_im(disp);
                 gen_op_addl_A0_reg_sN[0][reg1]();
             } else {
                 gen_op_movl_A0_reg[reg1]();
@@ -1149,7 +1161,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
         switch(op) {
         case 0: /* test */
             val = insn_get(s, ot);
-            gen_op1_movl_T1_im(val);
+            gen_op_movl_T1_im(val);
             gen_op_testl_T0_T1_cc();
             s->cc_op = CC_OP_LOGICB + ot;
             break;
@@ -1266,7 +1278,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
                 gen_op_st_T0_A0[ot]();
             break;
         case 2: /* call Ev */
-            gen_op1_movl_T1_im((long)s->pc);
+            gen_op_movl_T1_im((long)s->pc);
             gen_op_pushl_T1();
             gen_op_jmp_T0();
             break;
@@ -1309,7 +1321,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
         val = insn_get(s, ot);
 
         gen_op_mov_TN_reg[ot][0][OR_EAX]();
-        gen_op1_movl_T1_im(val);
+        gen_op_movl_T1_im(val);
         gen_op_testl_T0_T1_cc();
         s->cc_op = CC_OP_LOGICB + ot;
         break;
@@ -1336,10 +1348,10 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
             val = insn_get(s, ot);
-            gen_op1_movl_T1_im(val);
+            gen_op_movl_T1_im(val);
         } else if (b == 0x6b) {
             val = insn_get(s, OT_BYTE);
-            gen_op1_movl_T1_im(val);
+            gen_op_movl_T1_im(val);
         } else {
             gen_op_mov_TN_reg[ot][1][reg]();
         }
@@ -1369,7 +1381,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
             val = insn_get(s, ot);
         else
             val = (int8_t)insn_get(s, OT_BYTE);
-        gen_op1_movl_T0_im(val);
+        gen_op_movl_T0_im(val);
         gen_op_pushl_T0();
         break;
     case 0x8f: /* pop Ev */
@@ -1408,7 +1420,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
         mod = (modrm >> 6) & 3;
 
         val = insn_get(s, ot);
-        gen_op1_movl_T0_im(val);
+        gen_op_movl_T0_im(val);
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
         break;
     case 0x8a:
@@ -1502,14 +1514,14 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
 
     case 0xb0 ... 0xb7: /* mov R, Ib */
         val = insn_get(s, OT_BYTE);
-        gen_op1_movl_T0_im(val);
+        gen_op_movl_T0_im(val);
         gen_op_mov_reg_T0[OT_BYTE][b & 7]();
         break;
     case 0xb8 ... 0xbf: /* mov R, Iv */
         ot = dflag ? OT_LONG : OT_WORD;
         val = insn_get(s, ot);
         reg = OR_EAX + (b & 7);
-        gen_op1_movl_T0_im(val);
+        gen_op_movl_T0_im(val);
         gen_op_mov_reg_T0[ot][reg]();
         break;
 
@@ -1978,6 +1990,8 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
         }
         break;
         
+        /************************/
+        /* port I/O */
     case 0x6c: /* insS */
     case 0x6d:
         if ((b & 1) == 0)
@@ -2002,6 +2016,48 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
             gen_op_outs[ot]();
         }
         break;
+    case 0xe4:
+    case 0xe5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_op_in[ot]();
+        gen_op_mov_reg_T1[ot][R_EAX]();
+        break;
+    case 0xe6:
+    case 0xe7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = ldub(s->pc++);
+        gen_op_movl_T0_im(val);
+        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_out[ot]();
+        break;
+    case 0xec:
+    case 0xed:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_in[ot]();
+        gen_op_mov_reg_T1[ot][R_EAX]();
+        break;
+    case 0xee:
+    case 0xef:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        gen_op_mov_TN_reg[OT_WORD][0][R_EDX]();
+        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_out[ot]();
+        break;
 
         /************************/
         /* control */
@@ -2020,7 +2076,7 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
     case 0xe8: /* call */
         val = insn_get(s, OT_LONG);
         val += (long)s->pc;
-        gen_op1_movl_T1_im((long)s->pc);
+        gen_op_movl_T1_im((long)s->pc);
         gen_op_pushl_T1();
         gen_op_jmp_im(val);
         break;
@@ -2121,3 +2177,44 @@ int disas_insn(DisasContext *s, uint8_t *pc_start)
     return (long)s->pc;
 }
 
+/* return the next pc */
+int cpu_x86_gen_code(uint8_t *gen_code_buf, int *gen_code_size_ptr,
+                     uint8_t *pc_start)
+{
+    DisasContext dc1, *dc = &dc1;
+    long ret;
+    dc->cc_op = CC_OP_DYNAMIC;
+    gen_code_ptr = gen_code_buf;
+    gen_start();
+    ret = disas_insn(dc, pc_start);
+    if (ret == -1) 
+        error("unknown instruction at PC=0x%x", pc_start);
+    gen_end();
+    *gen_code_size_ptr = gen_code_ptr - gen_code_buf;
+    printf("0x%08lx: code_size = %d\n", (long)pc_start, *gen_code_size_ptr);
+    return 0;
+}
+
+CPUX86State *cpu_x86_init(void)
+{
+    CPUX86State *env;
+    int i;
+
+    env = malloc(sizeof(CPUX86State));
+    if (!env)
+        return NULL;
+    memset(env, 0, sizeof(CPUX86State));
+    /* basic FPU init */
+    for(i = 0;i < 8; i++)
+        env->fptags[i] = 1;
+    env->fpuc = 0x37f;
+    /* flags setup */
+    env->cc_op = CC_OP_EFLAGS;
+    env->df = 1;
+    return env;
+}
+
+void cpu_x86_close(CPUX86State *env)
+{
+    free(env);
+}