mirror of https://gitee.com/openkylin/qemu.git
target/arm: Update contiguous first-fault and no-fault loads
With sve_cont_ldst_pages, the differences between first-fault and no-fault are minimal, so unify the routines. With cpu_probe_watchpoint, we are able to make progress through pages with TLB_WATCHPOINT set when the watchpoint does not actually fire. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200508154359.7494-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
5c9b8458a0
commit
c647673ce4
|
@ -4101,18 +4101,6 @@ static intptr_t find_next_active(uint64_t *vg, intptr_t reg_off,
|
|||
return reg_off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the maximum offset <= @mem_max which is still within the page
|
||||
* referenced by @base + @mem_off.
|
||||
*/
|
||||
static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
|
||||
intptr_t mem_max)
|
||||
{
|
||||
target_ulong addr = base + mem_off;
|
||||
intptr_t split = -(intptr_t)(addr | TARGET_PAGE_MASK);
|
||||
return MIN(split, mem_max - mem_off) + mem_off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Resolve the guest virtual address to info->host and info->flags.
|
||||
* If @nofault, return false if the page is invalid, otherwise
|
||||
|
@ -4435,19 +4423,6 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The result of tlb_vaddr_to_host for user-only is just g2h(x),
|
||||
* which is always non-null. Elide the useless test.
|
||||
*/
|
||||
static inline bool test_host_page(void *host)
|
||||
{
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
return true;
|
||||
#else
|
||||
return likely(host != NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Common helper for all contiguous 1,2,3,4-register predicated stores.
|
||||
*/
|
||||
|
@ -4705,167 +4680,167 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
|
|||
}
|
||||
|
||||
/*
|
||||
* Common helper for all contiguous first-fault loads.
|
||||
* Common helper for all contiguous no-fault and first-fault loads.
|
||||
*/
|
||||
static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
||||
uint32_t desc, const uintptr_t retaddr,
|
||||
const int esz, const int msz,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn)
|
||||
static inline QEMU_ALWAYS_INLINE
|
||||
void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
||||
uint32_t desc, const uintptr_t retaddr,
|
||||
const int esz, const int msz, const SVEContFault fault,
|
||||
sve_ldst1_host_fn *host_fn,
|
||||
sve_ldst1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int mmu_idx = get_mmuidx(oi);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
const int diffsz = esz - msz;
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t mem_max = reg_max >> diffsz;
|
||||
intptr_t split, reg_off, mem_off, i;
|
||||
intptr_t reg_off, mem_off, reg_last;
|
||||
SVEContLdSt info;
|
||||
int flags;
|
||||
void *host;
|
||||
|
||||
/* Skip to the first active element. */
|
||||
reg_off = find_next_active(vg, 0, reg_max, esz);
|
||||
if (unlikely(reg_off == reg_max)) {
|
||||
/* Find the active elements. */
|
||||
if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, 1 << msz)) {
|
||||
/* The entire predicate was false; no load occurs. */
|
||||
memset(vd, 0, reg_max);
|
||||
return;
|
||||
}
|
||||
mem_off = reg_off >> diffsz;
|
||||
reg_off = info.reg_off_first[0];
|
||||
|
||||
/*
|
||||
* If the (remaining) load is entirely within a single page, then:
|
||||
* For softmmu, and the tlb hits, then no faults will occur;
|
||||
* For user-only, either the first load will fault or none will.
|
||||
* We can thus perform the load directly to the destination and
|
||||
* Vd will be unmodified on any exception path.
|
||||
*/
|
||||
split = max_for_page(addr, mem_off, mem_max);
|
||||
if (likely(split == mem_max)) {
|
||||
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
|
||||
if (test_host_page(host)) {
|
||||
i = reg_off;
|
||||
host -= mem_off;
|
||||
do {
|
||||
host_fn(vd, i, host + (i >> diffsz));
|
||||
i = find_next_active(vg, i + (1 << esz), reg_max, esz);
|
||||
} while (i < reg_max);
|
||||
/* After any fault, zero any leading inactive elements. */
|
||||
/* Probe the page(s). */
|
||||
if (!sve_cont_ldst_pages(&info, fault, env, addr, MMU_DATA_LOAD, retaddr)) {
|
||||
/* Fault on first element. */
|
||||
tcg_debug_assert(fault == FAULT_NO);
|
||||
memset(vd, 0, reg_max);
|
||||
goto do_fault;
|
||||
}
|
||||
|
||||
mem_off = info.mem_off_first[0];
|
||||
flags = info.page[0].flags;
|
||||
|
||||
if (fault == FAULT_FIRST) {
|
||||
/*
|
||||
* Special handling of the first active element,
|
||||
* if it crosses a page boundary or is MMIO.
|
||||
*/
|
||||
bool is_split = mem_off == info.mem_off_split;
|
||||
/* TODO: MTE check. */
|
||||
if (unlikely(flags != 0) || unlikely(is_split)) {
|
||||
/*
|
||||
* Use the slow path for cross-page handling.
|
||||
* Might trap for MMIO or watchpoints.
|
||||
*/
|
||||
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
|
||||
|
||||
/* After any fault, zero the other elements. */
|
||||
swap_memzero(vd, reg_off);
|
||||
return;
|
||||
reg_off += 1 << esz;
|
||||
mem_off += 1 << msz;
|
||||
swap_memzero(vd + reg_off, reg_max - reg_off);
|
||||
|
||||
if (is_split) {
|
||||
goto second_page;
|
||||
}
|
||||
} else {
|
||||
memset(vd, 0, reg_max);
|
||||
}
|
||||
} else {
|
||||
memset(vd, 0, reg_max);
|
||||
if (unlikely(mem_off == info.mem_off_split)) {
|
||||
/* The first active element crosses a page boundary. */
|
||||
flags |= info.page[1].flags;
|
||||
if (unlikely(flags & TLB_MMIO)) {
|
||||
/* Some page is MMIO, see below. */
|
||||
goto do_fault;
|
||||
}
|
||||
if (unlikely(flags & TLB_WATCHPOINT) &&
|
||||
(cpu_watchpoint_address_matches
|
||||
(env_cpu(env), addr + mem_off, 1 << msz)
|
||||
& BP_MEM_READ)) {
|
||||
/* Watchpoint hit, see below. */
|
||||
goto do_fault;
|
||||
}
|
||||
/* TODO: MTE check. */
|
||||
/*
|
||||
* Use the slow path for cross-page handling.
|
||||
* This is RAM, without a watchpoint, and will not trap.
|
||||
*/
|
||||
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
|
||||
goto second_page;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform one normal read, which will fault or not.
|
||||
* But it is likely to bring the page into the tlb.
|
||||
* From this point on, all memory operations are MemSingleNF.
|
||||
*
|
||||
* Per the MemSingleNF pseudocode, a no-fault load from Device memory
|
||||
* must not actually hit the bus -- it returns (UNKNOWN, FAULT) instead.
|
||||
*
|
||||
* Unfortuately we do not have access to the memory attributes from the
|
||||
* PTE to tell Device memory from Normal memory. So we make a mostly
|
||||
* correct check, and indicate (UNKNOWN, FAULT) for any MMIO.
|
||||
* This gives the right answer for the common cases of "Normal memory,
|
||||
* backed by host RAM" and "Device memory, backed by MMIO".
|
||||
* The architecture allows us to suppress an NF load and return
|
||||
* (UNKNOWN, FAULT) for any reason, so our behaviour for the corner
|
||||
* case of "Normal memory, backed by MMIO" is permitted. The case we
|
||||
* get wrong is "Device memory, backed by host RAM", for which we
|
||||
* should return (UNKNOWN, FAULT) for but do not.
|
||||
*
|
||||
* Similarly, CPU_BP breakpoints would raise exceptions, and so
|
||||
* return (UNKNOWN, FAULT). For simplicity, we consider gdb and
|
||||
* architectural breakpoints the same.
|
||||
*/
|
||||
tlb_fn(env, vd, reg_off, addr + mem_off, retaddr);
|
||||
if (unlikely(flags & TLB_MMIO)) {
|
||||
goto do_fault;
|
||||
}
|
||||
|
||||
/* After any fault, zero any leading predicated false elts. */
|
||||
swap_memzero(vd, reg_off);
|
||||
mem_off += 1 << msz;
|
||||
reg_off += 1 << esz;
|
||||
reg_last = info.reg_off_last[0];
|
||||
host = info.page[0].host;
|
||||
|
||||
/* Try again to read the balance of the page. */
|
||||
split = max_for_page(addr, mem_off - 1, mem_max);
|
||||
if (split >= (1 << msz)) {
|
||||
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
|
||||
if (host) {
|
||||
host -= mem_off;
|
||||
do {
|
||||
do {
|
||||
uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
|
||||
do {
|
||||
if ((pg >> (reg_off & 63)) & 1) {
|
||||
if (unlikely(flags & TLB_WATCHPOINT) &&
|
||||
(cpu_watchpoint_address_matches
|
||||
(env_cpu(env), addr + mem_off, 1 << msz)
|
||||
& BP_MEM_READ)) {
|
||||
goto do_fault;
|
||||
}
|
||||
/* TODO: MTE check. */
|
||||
host_fn(vd, reg_off, host + mem_off);
|
||||
reg_off += 1 << esz;
|
||||
reg_off = find_next_active(vg, reg_off, reg_max, esz);
|
||||
mem_off = reg_off >> diffsz;
|
||||
} while (split - mem_off >= (1 << msz));
|
||||
}
|
||||
}
|
||||
|
||||
record_fault(env, reg_off, reg_max);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common helper for all contiguous no-fault loads.
|
||||
*/
|
||||
static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
||||
uint32_t desc, const int esz, const int msz,
|
||||
sve_ldst1_host_fn *host_fn)
|
||||
{
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
const int diffsz = esz - msz;
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t mem_max = reg_max >> diffsz;
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
intptr_t split, reg_off, mem_off;
|
||||
void *host;
|
||||
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_idx);
|
||||
if (likely(page_check_range(addr, mem_max, PAGE_READ) == 0)) {
|
||||
/* The entire operation is valid and will not fault. */
|
||||
reg_off = 0;
|
||||
do {
|
||||
mem_off = reg_off >> diffsz;
|
||||
host_fn(vd, reg_off, host + mem_off);
|
||||
}
|
||||
reg_off += 1 << esz;
|
||||
reg_off = find_next_active(vg, reg_off, reg_max, esz);
|
||||
} while (reg_off < reg_max);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
mem_off += 1 << msz;
|
||||
} while (reg_off <= reg_last && (reg_off & 63));
|
||||
} while (reg_off <= reg_last);
|
||||
|
||||
/* There will be no fault, so we may modify in advance. */
|
||||
memset(vd, 0, reg_max);
|
||||
|
||||
/* Skip to the first active element. */
|
||||
reg_off = find_next_active(vg, 0, reg_max, esz);
|
||||
if (unlikely(reg_off == reg_max)) {
|
||||
/* The entire predicate was false; no load occurs. */
|
||||
return;
|
||||
}
|
||||
mem_off = reg_off >> diffsz;
|
||||
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
if (page_check_range(addr + mem_off, 1 << msz, PAGE_READ) == 0) {
|
||||
/* At least one load is valid; take the rest of the page. */
|
||||
split = max_for_page(addr, mem_off + (1 << msz) - 1, mem_max);
|
||||
do {
|
||||
host_fn(vd, reg_off, host + mem_off);
|
||||
reg_off += 1 << esz;
|
||||
reg_off = find_next_active(vg, reg_off, reg_max, esz);
|
||||
mem_off = reg_off >> diffsz;
|
||||
} while (split - mem_off >= (1 << msz));
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* If the address is not in the TLB, we have no way to bring the
|
||||
* entry into the TLB without also risking a fault. Note that
|
||||
* the corollary is that we never load from an address not in RAM.
|
||||
*
|
||||
* This last is out of spec, in a weird corner case.
|
||||
* Per the MemNF/MemSingleNF pseudocode, a NF load from Device memory
|
||||
* must not actually hit the bus -- it returns UNKNOWN data instead.
|
||||
* But if you map non-RAM with Normal memory attributes and do a NF
|
||||
* load then it should access the bus. (Nobody ought actually do this
|
||||
* in the real world, obviously.)
|
||||
*
|
||||
* Then there are the annoying special cases with watchpoints...
|
||||
* TODO: Add a form of non-faulting loads using cc->tlb_fill(probe=true).
|
||||
* MemSingleNF is allowed to fail for any reason. We have special
|
||||
* code above to handle the first element crossing a page boundary.
|
||||
* As an implementation choice, decline to handle a cross-page element
|
||||
* in any other position.
|
||||
*/
|
||||
host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
|
||||
split = max_for_page(addr, mem_off, mem_max);
|
||||
if (host && split >= (1 << msz)) {
|
||||
host -= mem_off;
|
||||
do {
|
||||
host_fn(vd, reg_off, host + mem_off);
|
||||
reg_off += 1 << esz;
|
||||
reg_off = find_next_active(vg, reg_off, reg_max, esz);
|
||||
mem_off = reg_off >> diffsz;
|
||||
} while (split - mem_off >= (1 << msz));
|
||||
reg_off = info.reg_off_split;
|
||||
if (reg_off >= 0) {
|
||||
goto do_fault;
|
||||
}
|
||||
#endif
|
||||
|
||||
second_page:
|
||||
reg_off = info.reg_off_first[1];
|
||||
if (likely(reg_off < 0)) {
|
||||
/* No active elements on the second page. All done. */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* MemSingleNF is allowed to fail for any reason. As an implementation
|
||||
* choice, decline to handle elements on the second page. This should
|
||||
* be low frequency as the guest walks through memory -- the next
|
||||
* iteration of the guest's loop should be aligned on the page boundary,
|
||||
* and then all following iterations will stay aligned.
|
||||
*/
|
||||
|
||||
do_fault:
|
||||
record_fault(env, reg_off, reg_max);
|
||||
}
|
||||
|
||||
|
@ -4873,58 +4848,61 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, 0, \
|
||||
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \
|
||||
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
|
||||
} \
|
||||
void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldnf1_r(env, vg, addr, desc, ESZ, 0, sve_ld1##PART##_host); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \
|
||||
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
|
||||
}
|
||||
|
||||
#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
|
||||
void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
|
||||
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
|
||||
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
|
||||
} \
|
||||
void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_le_host); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
|
||||
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
|
||||
} \
|
||||
void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, \
|
||||
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
|
||||
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
|
||||
} \
|
||||
void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
|
||||
target_ulong addr, uint32_t desc) \
|
||||
{ \
|
||||
sve_ldnf1_r(env, vg, addr, desc, ESZ, MSZ, sve_ld1##PART##_be_host); \
|
||||
sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
|
||||
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
|
||||
}
|
||||
|
||||
DO_LDFF1_LDNF1_1(bb, 0)
|
||||
DO_LDFF1_LDNF1_1(bhu, 1)
|
||||
DO_LDFF1_LDNF1_1(bhs, 1)
|
||||
DO_LDFF1_LDNF1_1(bsu, 2)
|
||||
DO_LDFF1_LDNF1_1(bss, 2)
|
||||
DO_LDFF1_LDNF1_1(bdu, 3)
|
||||
DO_LDFF1_LDNF1_1(bds, 3)
|
||||
DO_LDFF1_LDNF1_1(bb, MO_8)
|
||||
DO_LDFF1_LDNF1_1(bhu, MO_16)
|
||||
DO_LDFF1_LDNF1_1(bhs, MO_16)
|
||||
DO_LDFF1_LDNF1_1(bsu, MO_32)
|
||||
DO_LDFF1_LDNF1_1(bss, MO_32)
|
||||
DO_LDFF1_LDNF1_1(bdu, MO_64)
|
||||
DO_LDFF1_LDNF1_1(bds, MO_64)
|
||||
|
||||
DO_LDFF1_LDNF1_2(hh, 1, 1)
|
||||
DO_LDFF1_LDNF1_2(hsu, 2, 1)
|
||||
DO_LDFF1_LDNF1_2(hss, 2, 1)
|
||||
DO_LDFF1_LDNF1_2(hdu, 3, 1)
|
||||
DO_LDFF1_LDNF1_2(hds, 3, 1)
|
||||
DO_LDFF1_LDNF1_2(hh, MO_16, MO_16)
|
||||
DO_LDFF1_LDNF1_2(hsu, MO_32, MO_16)
|
||||
DO_LDFF1_LDNF1_2(hss, MO_32, MO_16)
|
||||
DO_LDFF1_LDNF1_2(hdu, MO_64, MO_16)
|
||||
DO_LDFF1_LDNF1_2(hds, MO_64, MO_16)
|
||||
|
||||
DO_LDFF1_LDNF1_2(ss, 2, 2)
|
||||
DO_LDFF1_LDNF1_2(sdu, 3, 2)
|
||||
DO_LDFF1_LDNF1_2(sds, 3, 2)
|
||||
DO_LDFF1_LDNF1_2(ss, MO_32, MO_32)
|
||||
DO_LDFF1_LDNF1_2(sdu, MO_64, MO_32)
|
||||
DO_LDFF1_LDNF1_2(sds, MO_64, MO_32)
|
||||
|
||||
DO_LDFF1_LDNF1_2(dd, 3, 3)
|
||||
DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
|
||||
|
||||
#undef DO_LDFF1_LDNF1_1
|
||||
#undef DO_LDFF1_LDNF1_2
|
||||
|
|
Loading…
Reference in New Issue