mirror of https://gitee.com/openkylin/qemu.git
target-i386: make xmm_regs 512-bit wide
Right now, the AVX512 registers are split in many different fields: xmm_regs for the low 128 bits of the first 16 registers, ymmh_regs for the next 128 bits of the same first 16 registers, zmmh_regs for the next 256 bits of the same first 16 registers, and finally hi16_zmm_regs for the full 512 bits of the second 16 bit registers. This makes it simple to move data in and out of the xsave region, but would be a nightmare for a hypothetical TCG implementation and leads to a proliferation of [XYZ]MM_[BWLSQD] macros. Instead, this patch marshals data manually from the xsave region to a single 32x512-bit array, simplifying the macro jungle and clarifying which bits are in which vmstate subsection. The migration format is unaffected. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
a03c3e90e1
commit
b7711471f5
|
@ -712,24 +712,6 @@ typedef struct SegmentCache {
|
|||
uint32_t flags;
|
||||
} SegmentCache;
|
||||
|
||||
typedef union {
|
||||
uint8_t _b[16];
|
||||
uint16_t _w[8];
|
||||
uint32_t _l[4];
|
||||
uint64_t _q[2];
|
||||
float32 _s[4];
|
||||
float64 _d[2];
|
||||
} XMMReg;
|
||||
|
||||
typedef union {
|
||||
uint8_t _b[32];
|
||||
uint16_t _w[16];
|
||||
uint32_t _l[8];
|
||||
uint64_t _q[4];
|
||||
float32 _s[8];
|
||||
float64 _d[4];
|
||||
} YMMReg;
|
||||
|
||||
typedef union {
|
||||
uint8_t _b[64];
|
||||
uint16_t _w[32];
|
||||
|
@ -737,7 +719,7 @@ typedef union {
|
|||
uint64_t _q[8];
|
||||
float32 _s[16];
|
||||
float64 _d[8];
|
||||
} ZMMReg;
|
||||
} XMMReg; /* really zmm */
|
||||
|
||||
typedef union {
|
||||
uint8_t _b[8];
|
||||
|
@ -758,46 +740,18 @@ typedef struct BNDCSReg {
|
|||
} BNDCSReg;
|
||||
|
||||
#ifdef HOST_WORDS_BIGENDIAN
|
||||
#define ZMM_B(n) _b[63 - (n)]
|
||||
#define ZMM_W(n) _w[31 - (n)]
|
||||
#define ZMM_L(n) _l[15 - (n)]
|
||||
#define ZMM_S(n) _s[15 - (n)]
|
||||
#define ZMM_Q(n) _q[7 - (n)]
|
||||
#define ZMM_D(n) _d[7 - (n)]
|
||||
|
||||
#define YMM_B(n) _b[31 - (n)]
|
||||
#define YMM_W(n) _w[15 - (n)]
|
||||
#define YMM_L(n) _l[7 - (n)]
|
||||
#define YMM_S(n) _s[7 - (n)]
|
||||
#define YMM_Q(n) _q[3 - (n)]
|
||||
#define YMM_D(n) _d[3 - (n)]
|
||||
|
||||
#define XMM_B(n) _b[15 - (n)]
|
||||
#define XMM_W(n) _w[7 - (n)]
|
||||
#define XMM_L(n) _l[3 - (n)]
|
||||
#define XMM_S(n) _s[3 - (n)]
|
||||
#define XMM_Q(n) _q[1 - (n)]
|
||||
#define XMM_D(n) _d[1 - (n)]
|
||||
#define XMM_B(n) _b[63 - (n)]
|
||||
#define XMM_W(n) _w[31 - (n)]
|
||||
#define XMM_L(n) _l[15 - (n)]
|
||||
#define XMM_S(n) _s[15 - (n)]
|
||||
#define XMM_Q(n) _q[7 - (n)]
|
||||
#define XMM_D(n) _d[7 - (n)]
|
||||
|
||||
#define MMX_B(n) _b[7 - (n)]
|
||||
#define MMX_W(n) _w[3 - (n)]
|
||||
#define MMX_L(n) _l[1 - (n)]
|
||||
#define MMX_S(n) _s[1 - (n)]
|
||||
#else
|
||||
#define ZMM_B(n) _b[n]
|
||||
#define ZMM_W(n) _w[n]
|
||||
#define ZMM_L(n) _l[n]
|
||||
#define ZMM_S(n) _s[n]
|
||||
#define ZMM_Q(n) _q[n]
|
||||
#define ZMM_D(n) _d[n]
|
||||
|
||||
#define YMM_B(n) _b[n]
|
||||
#define YMM_W(n) _w[n]
|
||||
#define YMM_L(n) _l[n]
|
||||
#define YMM_S(n) _s[n]
|
||||
#define YMM_Q(n) _q[n]
|
||||
#define YMM_D(n) _d[n]
|
||||
|
||||
#define XMM_B(n) _b[n]
|
||||
#define XMM_W(n) _w[n]
|
||||
#define XMM_L(n) _l[n]
|
||||
|
@ -896,17 +850,11 @@ typedef struct CPUX86State {
|
|||
float_status mmx_status; /* for 3DNow! float ops */
|
||||
float_status sse_status;
|
||||
uint32_t mxcsr;
|
||||
XMMReg xmm_regs[CPU_NB_REGS];
|
||||
XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
|
||||
XMMReg xmm_t0;
|
||||
MMXReg mmx_t0;
|
||||
|
||||
XMMReg ymmh_regs[CPU_NB_REGS];
|
||||
|
||||
uint64_t opmask_regs[NB_OPMASK_REGS];
|
||||
YMMReg zmmh_regs[CPU_NB_REGS];
|
||||
#ifdef TARGET_X86_64
|
||||
ZMMReg hi16_zmm_regs[CPU_NB_REGS];
|
||||
#endif
|
||||
|
||||
/* sysenter registers */
|
||||
uint32_t sysenter_cs;
|
||||
|
|
|
@ -1048,7 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu)
|
|||
CPUX86State *env = &cpu->env;
|
||||
struct kvm_xsave* xsave = env->kvm_xsave_buf;
|
||||
uint16_t cwd, swd, twd;
|
||||
uint8_t *xmm;
|
||||
uint8_t *xmm, *ymmh, *zmmh;
|
||||
int i, r;
|
||||
|
||||
if (!kvm_has_xsave()) {
|
||||
|
@ -1071,26 +1071,30 @@ static int kvm_put_xsave(X86CPU *cpu)
|
|||
sizeof env->fpregs);
|
||||
xsave->region[XSAVE_MXCSR] = env->mxcsr;
|
||||
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
|
||||
memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
|
||||
sizeof env->ymmh_regs);
|
||||
memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
|
||||
sizeof env->bnd_regs);
|
||||
memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
|
||||
sizeof(env->bndcs_regs));
|
||||
memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
|
||||
sizeof env->opmask_regs);
|
||||
memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
|
||||
sizeof env->zmmh_regs);
|
||||
|
||||
xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
|
||||
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
|
||||
ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
|
||||
zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
|
||||
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
|
||||
stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
|
||||
stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
|
||||
stq_p(ymmh, env->xmm_regs[i].XMM_Q(2));
|
||||
stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3));
|
||||
stq_p(zmmh, env->xmm_regs[i].XMM_Q(4));
|
||||
stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5));
|
||||
stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
|
||||
stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
|
||||
}
|
||||
|
||||
#ifdef TARGET_X86_64
|
||||
memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
|
||||
sizeof env->hi16_zmm_regs);
|
||||
memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16],
|
||||
16 * sizeof env->xmm_regs[16]);
|
||||
#endif
|
||||
r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
|
||||
return r;
|
||||
|
@ -1407,7 +1411,7 @@ static int kvm_get_xsave(X86CPU *cpu)
|
|||
CPUX86State *env = &cpu->env;
|
||||
struct kvm_xsave* xsave = env->kvm_xsave_buf;
|
||||
int ret, i;
|
||||
const uint8_t *xmm;
|
||||
const uint8_t *xmm, *ymmh, *zmmh;
|
||||
uint16_t cwd, swd, twd;
|
||||
|
||||
if (!kvm_has_xsave()) {
|
||||
|
@ -1435,26 +1439,30 @@ static int kvm_get_xsave(X86CPU *cpu)
|
|||
memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
|
||||
sizeof env->fpregs);
|
||||
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
|
||||
memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
|
||||
sizeof env->ymmh_regs);
|
||||
memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
|
||||
sizeof env->bnd_regs);
|
||||
memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
|
||||
sizeof(env->bndcs_regs));
|
||||
memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
|
||||
sizeof env->opmask_regs);
|
||||
memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
|
||||
sizeof env->zmmh_regs);
|
||||
|
||||
xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
|
||||
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
|
||||
ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
|
||||
zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
|
||||
for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
|
||||
env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
|
||||
env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
|
||||
env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
|
||||
env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
|
||||
env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
|
||||
env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
|
||||
env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
|
||||
env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
|
||||
}
|
||||
|
||||
#ifdef TARGET_X86_64
|
||||
memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
|
||||
sizeof env->hi16_zmm_regs);
|
||||
memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM],
|
||||
16 * sizeof env->xmm_regs[16]);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -46,14 +46,14 @@ static const VMStateDescription vmstate_xmm_reg = {
|
|||
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
|
||||
vmstate_xmm_reg, XMMReg)
|
||||
|
||||
/* YMMH format is the same as XMM */
|
||||
/* YMMH format is the same as XMM, but for bits 128-255 */
|
||||
static const VMStateDescription vmstate_ymmh_reg = {
|
||||
.name = "ymmh_reg",
|
||||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64(XMM_Q(0), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(1), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(2), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(3), XMMReg),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
@ -67,17 +67,17 @@ static const VMStateDescription vmstate_zmmh_reg = {
|
|||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64(YMM_Q(0), YMMReg),
|
||||
VMSTATE_UINT64(YMM_Q(1), YMMReg),
|
||||
VMSTATE_UINT64(YMM_Q(2), YMMReg),
|
||||
VMSTATE_UINT64(YMM_Q(3), YMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(4), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(5), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(6), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(7), XMMReg),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _start) \
|
||||
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
|
||||
vmstate_zmmh_reg, YMMReg)
|
||||
vmstate_zmmh_reg, XMMReg)
|
||||
|
||||
#ifdef TARGET_X86_64
|
||||
static const VMStateDescription vmstate_hi16_zmm_reg = {
|
||||
|
@ -85,21 +85,21 @@ static const VMStateDescription vmstate_hi16_zmm_reg = {
|
|||
.version_id = 1,
|
||||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
|
||||
VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(0), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(1), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(2), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(3), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(4), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(5), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(6), XMMReg),
|
||||
VMSTATE_UINT64(XMM_Q(7), XMMReg),
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
};
|
||||
|
||||
#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _start) \
|
||||
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
|
||||
vmstate_hi16_zmm_reg, ZMMReg)
|
||||
vmstate_hi16_zmm_reg, XMMReg)
|
||||
#endif
|
||||
|
||||
static const VMStateDescription vmstate_bnd_regs = {
|
||||
|
@ -658,17 +658,16 @@ static bool avx512_needed(void *opaque)
|
|||
}
|
||||
|
||||
for (i = 0; i < CPU_NB_REGS; i++) {
|
||||
#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
|
||||
if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
|
||||
ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
|
||||
#define ENV_XMM(reg, field) (env->xmm_regs[reg].XMM_Q(field))
|
||||
if (ENV_XMM(i, 4) || ENV_XMM(i, 6) ||
|
||||
ENV_XMM(i, 5) || ENV_XMM(i, 7)) {
|
||||
return true;
|
||||
}
|
||||
#ifdef TARGET_X86_64
|
||||
#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
|
||||
if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
|
||||
ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
|
||||
ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
|
||||
ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
|
||||
if (ENV_XMM(i+16, 0) || ENV_XMM(i+16, 1) ||
|
||||
ENV_XMM(i+16, 2) || ENV_XMM(i+16, 3) ||
|
||||
ENV_XMM(i+16, 4) || ENV_XMM(i+16, 5) ||
|
||||
ENV_XMM(i+16, 6) || ENV_XMM(i+16, 7)) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
@ -683,9 +682,9 @@ static const VMStateDescription vmstate_avx512 = {
|
|||
.minimum_version_id = 1,
|
||||
.fields = (VMStateField[]) {
|
||||
VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
|
||||
VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, 0),
|
||||
VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0),
|
||||
#ifdef TARGET_X86_64
|
||||
VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, 0),
|
||||
VMSTATE_Hi16_ZMM_REGS_VARS(env.xmm_regs, X86CPU, 16),
|
||||
#endif
|
||||
VMSTATE_END_OF_LIST()
|
||||
}
|
||||
|
@ -807,7 +806,7 @@ VMStateDescription vmstate_x86_cpu = {
|
|||
/* XSAVE related fields */
|
||||
VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
|
||||
VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),
|
||||
VMSTATE_YMMH_REGS_VARS(env.ymmh_regs, X86CPU, 0, 12),
|
||||
VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, 0, 12),
|
||||
VMSTATE_END_OF_LIST()
|
||||
/* The above list is not sorted /wrt version numbers, watch out! */
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue