Merge eccc876724
("Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs") into android-mainline
Steps on the way to 5.10-rc4 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I9e0fa89c0f6f306fe802ae95c8d01d9ba558e111
This commit is contained in:
commit
7f6480e40c
|
@ -256,6 +256,10 @@ which is 1024 bytes long:
|
|||
- s\_padding2
|
||||
-
|
||||
* - 0x54
|
||||
- \_\_be32
|
||||
- s\_num\_fc\_blocks
|
||||
- Number of fast commit blocks in the journal.
|
||||
* - 0x58
|
||||
- \_\_u32
|
||||
- s\_padding[42]
|
||||
-
|
||||
|
@ -310,6 +314,8 @@ The journal incompat features are any combination of the following:
|
|||
- This journal uses v3 of the checksum on-disk format. This is the same as
|
||||
v2, but the journal block tag size is fixed regardless of the size of
|
||||
block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
|
||||
* - 0x20
|
||||
- Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
|
||||
|
||||
.. _jbd2_checksum_type:
|
||||
|
||||
|
|
|
@ -596,6 +596,13 @@ following:
|
|||
- Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
|
||||
points to the two block groups that contain backup superblocks
|
||||
(COMPAT\_SPARSE\_SUPER2).
|
||||
* - 0x400
|
||||
- Fast commits supported. Although fast commits blocks are
|
||||
backward incompatible, fast commit blocks are not always
|
||||
present in the journal. If fast commit blocks are present in
|
||||
the journal, JBD2 incompat feature
|
||||
(JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
|
||||
set (COMPAT\_FAST\_COMMIT).
|
||||
|
||||
.. _super_incompat:
|
||||
|
||||
|
|
|
@ -136,10 +136,8 @@ Fast commits
|
|||
~~~~~~~~~~~~
|
||||
|
||||
JBD2 to also allows you to perform file-system specific delta commits known as
|
||||
fast commits. In order to use fast commits, you first need to call
|
||||
:c:func:`jbd2_fc_init` and tell how many blocks at the end of journal
|
||||
area should be reserved for fast commits. Along with that, you will also need
|
||||
to set following callbacks that perform correspodning work:
|
||||
fast commits. In order to use fast commits, you will need to set following
|
||||
callbacks that perform correspodning work:
|
||||
|
||||
`journal->j_fc_cleanup_cb`: Cleanup function called after every full commit and
|
||||
fast commit.
|
||||
|
|
|
@ -6367,7 +6367,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
|
|||
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
|
||||
KVM_EXIT_X86_WRMSR exit notifications.
|
||||
|
||||
8.25 KVM_X86_SET_MSR_FILTER
|
||||
8.27 KVM_X86_SET_MSR_FILTER
|
||||
---------------------------
|
||||
|
||||
:Architectures: x86
|
||||
|
@ -6381,8 +6381,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
|
|||
trap and emulate MSRs that are outside of the scope of KVM as well as
|
||||
limit the attack surface on KVM's MSR emulation code.
|
||||
|
||||
|
||||
8.26 KVM_CAP_ENFORCE_PV_CPUID
|
||||
8.28 KVM_CAP_ENFORCE_PV_CPUID
|
||||
-----------------------------
|
||||
|
||||
Architectures: x86
|
||||
|
|
|
@ -6614,6 +6614,7 @@ Q: http://patchwork.ozlabs.org/project/linux-ext4/list/
|
|||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
|
||||
F: Documentation/filesystems/ext4/
|
||||
F: fs/ext4/
|
||||
F: include/trace/events/ext4.h
|
||||
|
||||
Extended Verification Module (EVM)
|
||||
M: Mimi Zohar <zohar@linux.ibm.com>
|
||||
|
|
|
@ -788,10 +788,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
}
|
||||
|
||||
switch (vma_shift) {
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
case PUD_SHIFT:
|
||||
if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
|
||||
break;
|
||||
fallthrough;
|
||||
#endif
|
||||
case CONT_PMD_SHIFT:
|
||||
vma_shift = PMD_SHIFT;
|
||||
fallthrough;
|
||||
|
|
|
@ -1069,7 +1069,7 @@ static bool trap_ptrauth(struct kvm_vcpu *vcpu,
|
|||
static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST;
|
||||
return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN;
|
||||
}
|
||||
|
||||
#define __PTRAUTH_KEY(k) \
|
||||
|
@ -1162,6 +1162,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
|||
return val;
|
||||
}
|
||||
|
||||
static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
|
||||
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
|
||||
|
||||
switch (id) {
|
||||
case SYS_ID_AA64ZFR0_EL1:
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
return REG_RAZ;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* cpufeature ID register access trap handlers */
|
||||
|
||||
static bool __access_id_reg(struct kvm_vcpu *vcpu,
|
||||
|
@ -1180,7 +1196,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu,
|
|||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
return __access_id_reg(vcpu, p, r, false);
|
||||
bool raz = sysreg_visible_as_raz(vcpu, r);
|
||||
|
||||
return __access_id_reg(vcpu, p, r, raz);
|
||||
}
|
||||
|
||||
static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
|
||||
|
@ -1201,72 +1219,7 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
|
|||
if (vcpu_has_sve(vcpu))
|
||||
return 0;
|
||||
|
||||
return REG_HIDDEN_USER | REG_HIDDEN_GUEST;
|
||||
}
|
||||
|
||||
/* Visibility overrides for SVE-specific ID registers */
|
||||
static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
if (vcpu_has_sve(vcpu))
|
||||
return 0;
|
||||
|
||||
return REG_HIDDEN_USER;
|
||||
}
|
||||
|
||||
/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
|
||||
static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!vcpu_has_sve(vcpu))
|
||||
return 0;
|
||||
|
||||
return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
|
||||
}
|
||||
|
||||
static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
if (p->is_write)
|
||||
return write_to_read_only(vcpu, p, rd);
|
||||
|
||||
p->regval = guest_id_aa64zfr0_el1(vcpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (WARN_ON(!vcpu_has_sve(vcpu)))
|
||||
return -ENOENT;
|
||||
|
||||
val = guest_id_aa64zfr0_el1(vcpu);
|
||||
return reg_to_user(uaddr, &val, reg->id);
|
||||
}
|
||||
|
||||
static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
const u64 id = sys_reg_to_index(rd);
|
||||
int err;
|
||||
u64 val;
|
||||
|
||||
if (WARN_ON(!vcpu_has_sve(vcpu)))
|
||||
return -ENOENT;
|
||||
|
||||
err = reg_from_user(&val, uaddr, id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* This is what we mean by invariant: you can't change it. */
|
||||
if (val != guest_id_aa64zfr0_el1(vcpu))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1308,13 +1261,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu,
|
|||
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
return __get_id_reg(vcpu, rd, uaddr, false);
|
||||
bool raz = sysreg_visible_as_raz(vcpu, rd);
|
||||
|
||||
return __get_id_reg(vcpu, rd, uaddr, raz);
|
||||
}
|
||||
|
||||
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
const struct kvm_one_reg *reg, void __user *uaddr)
|
||||
{
|
||||
return __set_id_reg(vcpu, rd, uaddr, false);
|
||||
bool raz = sysreg_visible_as_raz(vcpu, rd);
|
||||
|
||||
return __set_id_reg(vcpu, rd, uaddr, raz);
|
||||
}
|
||||
|
||||
static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
|
||||
|
@ -1406,6 +1363,7 @@ static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
.access = access_id_reg, \
|
||||
.get_user = get_id_reg, \
|
||||
.set_user = set_id_reg, \
|
||||
.visibility = id_visibility, \
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1527,7 +1485,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
ID_SANITISED(ID_AA64PFR1_EL1),
|
||||
ID_UNALLOCATED(4,2),
|
||||
ID_UNALLOCATED(4,3),
|
||||
{ SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility },
|
||||
ID_SANITISED(ID_AA64ZFR0_EL1),
|
||||
ID_UNALLOCATED(4,5),
|
||||
ID_UNALLOCATED(4,6),
|
||||
ID_UNALLOCATED(4,7),
|
||||
|
@ -2194,7 +2152,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
|
|||
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
|
||||
|
||||
/* Check for regs disabled by runtime config */
|
||||
if (sysreg_hidden_from_guest(vcpu, r)) {
|
||||
if (sysreg_hidden(vcpu, r)) {
|
||||
kvm_inject_undefined(vcpu);
|
||||
return;
|
||||
}
|
||||
|
@ -2693,7 +2651,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
|
|||
return get_invariant_sys_reg(reg->id, uaddr);
|
||||
|
||||
/* Check for regs disabled by runtime config */
|
||||
if (sysreg_hidden_from_user(vcpu, r))
|
||||
if (sysreg_hidden(vcpu, r))
|
||||
return -ENOENT;
|
||||
|
||||
if (r->get_user)
|
||||
|
@ -2718,7 +2676,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
|
|||
return set_invariant_sys_reg(reg->id, uaddr);
|
||||
|
||||
/* Check for regs disabled by runtime config */
|
||||
if (sysreg_hidden_from_user(vcpu, r))
|
||||
if (sysreg_hidden(vcpu, r))
|
||||
return -ENOENT;
|
||||
|
||||
if (r->set_user)
|
||||
|
@ -2789,7 +2747,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
|
|||
if (!(rd->reg || rd->get_user))
|
||||
return 0;
|
||||
|
||||
if (sysreg_hidden_from_user(vcpu, rd))
|
||||
if (sysreg_hidden(vcpu, rd))
|
||||
return 0;
|
||||
|
||||
if (!copy_reg_to_user(rd, uind))
|
||||
|
|
|
@ -59,8 +59,8 @@ struct sys_reg_desc {
|
|||
const struct sys_reg_desc *rd);
|
||||
};
|
||||
|
||||
#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
|
||||
#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
|
||||
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
|
||||
#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
|
||||
|
||||
static __printf(2, 3)
|
||||
inline void print_sys_reg_msg(const struct sys_reg_params *p,
|
||||
|
@ -111,22 +111,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
|
|||
__vcpu_sys_reg(vcpu, r->reg) = r->val;
|
||||
}
|
||||
|
||||
static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (likely(!r->visibility))
|
||||
return false;
|
||||
|
||||
return r->visibility(vcpu, r) & REG_HIDDEN_GUEST;
|
||||
return r->visibility(vcpu, r) & REG_HIDDEN;
|
||||
}
|
||||
|
||||
static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (likely(!r->visibility))
|
||||
return false;
|
||||
|
||||
return r->visibility(vcpu, r) & REG_HIDDEN_USER;
|
||||
return r->visibility(vcpu, r) & REG_RAZ;
|
||||
}
|
||||
|
||||
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
|
||||
|
|
|
@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
||||
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
|
||||
|
||||
/*
|
||||
* save the feature bitmap to avoid cpuid lookup for every PV
|
||||
* operation
|
||||
*/
|
||||
if (best)
|
||||
vcpu->arch.pv_cpuid.features = best->eax;
|
||||
}
|
||||
|
||||
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_cpuid_entry2 *best;
|
||||
|
@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
|
|||
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
|
||||
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
|
||||
|
||||
/*
|
||||
* save the feature bitmap to avoid cpuid lookup for every PV
|
||||
* operation
|
||||
*/
|
||||
if (best)
|
||||
vcpu->arch.pv_cpuid.features = best->eax;
|
||||
|
||||
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
|
||||
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
|
||||
if (best)
|
||||
|
@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
vcpu->arch.guest_supported_xcr0 =
|
||||
(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
|
||||
|
||||
kvm_update_pv_runtime(vcpu);
|
||||
|
||||
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
|
|||
void kvm_set_cpu_caps(void);
|
||||
|
||||
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
|
||||
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
|
||||
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
|
||||
u32 function, u32 index);
|
||||
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
|
||||
|
|
|
@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
|
|||
} else {
|
||||
rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
|
||||
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
|
||||
while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
|
||||
desc = desc->more;
|
||||
while (desc->sptes[PTE_LIST_EXT-1]) {
|
||||
count += PTE_LIST_EXT;
|
||||
}
|
||||
if (desc->sptes[PTE_LIST_EXT-1]) {
|
||||
desc->more = mmu_alloc_pte_list_desc(vcpu);
|
||||
|
||||
if (!desc->more) {
|
||||
desc->more = mmu_alloc_pte_list_desc(vcpu);
|
||||
desc = desc->more;
|
||||
break;
|
||||
}
|
||||
desc = desc->more;
|
||||
}
|
||||
for (i = 0; desc->sptes[i]; ++i)
|
||||
|
|
|
@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache;
|
|||
|
||||
/*
|
||||
* When called, it means the previous get/set msr reached an invalid msr.
|
||||
* Return 0 if we want to ignore/silent this failed msr access, or 1 if we want
|
||||
* to fail the caller.
|
||||
* Return true if we want to ignore/silent this failed msr access.
|
||||
*/
|
||||
static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
|
||||
u64 data, bool write)
|
||||
static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
|
||||
u64 data, bool write)
|
||||
{
|
||||
const char *op = write ? "wrmsr" : "rdmsr";
|
||||
|
||||
|
@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
|
|||
kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
|
||||
op, msr, data);
|
||||
/* Mask the error */
|
||||
return 0;
|
||||
return true;
|
||||
} else {
|
||||
kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
|
||||
op, msr, data);
|
||||
return -ENOENT;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
|
|||
if (r == KVM_MSR_RET_INVALID) {
|
||||
/* Unconditionally clear the output for simplicity */
|
||||
*data = 0;
|
||||
r = kvm_msr_ignored_check(vcpu, index, 0, false);
|
||||
if (kvm_msr_ignored_check(vcpu, index, 0, false))
|
||||
r = 0;
|
||||
}
|
||||
|
||||
if (r)
|
||||
|
@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
|
|||
struct msr_data msr;
|
||||
|
||||
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
|
||||
return -EPERM;
|
||||
return KVM_MSR_RET_FILTERED;
|
||||
|
||||
switch (index) {
|
||||
case MSR_FS_BASE:
|
||||
|
@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
|
|||
int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
|
||||
|
||||
if (ret == KVM_MSR_RET_INVALID)
|
||||
ret = kvm_msr_ignored_check(vcpu, index, data, true);
|
||||
if (kvm_msr_ignored_check(vcpu, index, data, true))
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
|
|||
int ret;
|
||||
|
||||
if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
|
||||
return -EPERM;
|
||||
return KVM_MSR_RET_FILTERED;
|
||||
|
||||
msr.index = index;
|
||||
msr.host_initiated = host_initiated;
|
||||
|
@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
|
|||
if (ret == KVM_MSR_RET_INVALID) {
|
||||
/* Unconditionally clear *data for simplicity */
|
||||
*data = 0;
|
||||
ret = kvm_msr_ignored_check(vcpu, index, 0, false);
|
||||
if (kvm_msr_ignored_check(vcpu, index, 0, false))
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
|
|||
static u64 kvm_msr_reason(int r)
|
||||
{
|
||||
switch (r) {
|
||||
case -ENOENT:
|
||||
case KVM_MSR_RET_INVALID:
|
||||
return KVM_MSR_EXIT_REASON_UNKNOWN;
|
||||
case -EPERM:
|
||||
case KVM_MSR_RET_FILTERED:
|
||||
return KVM_MSR_EXIT_REASON_FILTER;
|
||||
default:
|
||||
return KVM_MSR_EXIT_REASON_INVAL;
|
||||
|
@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
|
|||
struct kvm_arch *ka = &vcpu->kvm->arch;
|
||||
|
||||
if (vcpu->vcpu_id == 0 && !host_initiated) {
|
||||
if (ka->boot_vcpu_runs_old_kvmclock && old_msr)
|
||||
if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
|
||||
ka->boot_vcpu_runs_old_kvmclock = old_msr;
|
||||
|
@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
/* Values other than LBR and BTF are vendor-specific,
|
||||
thus reserved and should throw a #GP */
|
||||
return 1;
|
||||
}
|
||||
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
|
||||
__func__, data);
|
||||
} else if (report_ignored_msrs)
|
||||
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
|
||||
__func__, data);
|
||||
break;
|
||||
case 0x200 ... 0x2ff:
|
||||
return kvm_mtrr_set_msr(vcpu, msr, data);
|
||||
|
@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
|||
msr_info->data = vcpu->arch.efer;
|
||||
break;
|
||||
case MSR_KVM_WALL_CLOCK:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->kvm->arch.wall_clock;
|
||||
break;
|
||||
case MSR_KVM_WALL_CLOCK_NEW:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->kvm->arch.wall_clock;
|
||||
break;
|
||||
case MSR_KVM_SYSTEM_TIME:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.time;
|
||||
break;
|
||||
case MSR_KVM_SYSTEM_TIME_NEW:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.time;
|
||||
break;
|
||||
case MSR_KVM_ASYNC_PF_EN:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.apf.msr_en_val;
|
||||
break;
|
||||
case MSR_KVM_ASYNC_PF_INT:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.apf.msr_int_val;
|
||||
break;
|
||||
case MSR_KVM_ASYNC_PF_ACK:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
|
||||
return 1;
|
||||
|
||||
msr_info->data = 0;
|
||||
break;
|
||||
case MSR_KVM_STEAL_TIME:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.st.msr_val;
|
||||
break;
|
||||
case MSR_KVM_PV_EOI_EN:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.pv_eoi.msr_val;
|
||||
break;
|
||||
case MSR_KVM_POLL_CONTROL:
|
||||
if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
|
||||
return 1;
|
||||
|
||||
msr_info->data = vcpu->arch.msr_kvm_poll_control;
|
||||
break;
|
||||
case MSR_IA32_P5_MC_ADDR:
|
||||
|
@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
|||
|
||||
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
|
||||
vcpu->arch.pv_cpuid.enforce = cap->args[0];
|
||||
if (vcpu->arch.pv_cpuid.enforce)
|
||||
kvm_update_pv_runtime(vcpu);
|
||||
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
|
|||
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
|
||||
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
|
||||
|
||||
#define KVM_MSR_RET_INVALID 2
|
||||
/*
|
||||
* Internal error codes that are used to indicate that MSR emulation encountered
|
||||
* an error that should result in #GP in the guest, unless userspace
|
||||
* handles it.
|
||||
*/
|
||||
#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */
|
||||
#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */
|
||||
|
||||
#define __cr4_reserved_bits(__cpu_has, __c) \
|
||||
({ \
|
||||
|
|
|
@ -367,9 +367,9 @@ static void create_power_zone_common_attributes(
|
|||
&dev_attr_max_energy_range_uj.attr;
|
||||
if (power_zone->ops->get_energy_uj) {
|
||||
if (power_zone->ops->reset_energy_uj)
|
||||
dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO;
|
||||
dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR;
|
||||
else
|
||||
dev_attr_energy_uj.attr.mode = S_IRUGO;
|
||||
dev_attr_energy_uj.attr.mode = S_IRUSR;
|
||||
power_zone->zone_dev_attrs[count++] =
|
||||
&dev_attr_energy_uj.attr;
|
||||
}
|
||||
|
|
|
@ -511,7 +511,8 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
|
|||
/*DEFAULT_RATELIMIT_BURST*/ 1);
|
||||
if (__ratelimit(&_rs))
|
||||
WARN(1, KERN_DEBUG
|
||||
"BTRFS: block rsv returned %d\n", ret);
|
||||
"BTRFS: block rsv %d returned %d\n",
|
||||
block_rsv->type, ret);
|
||||
}
|
||||
try_reserve:
|
||||
ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize,
|
||||
|
|
|
@ -91,6 +91,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
|
|||
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
|
||||
if (ret) {
|
||||
no_valid_dev_replace_entry_found:
|
||||
/*
|
||||
* We don't have a replace item or it's corrupted. If there is
|
||||
* a replace target, fail the mount.
|
||||
*/
|
||||
if (btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
|
||||
btrfs_err(fs_info,
|
||||
"found replace target device without a valid replace item");
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
dev_replace->replace_state =
|
||||
BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
|
||||
|
@ -143,8 +154,19 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
|
|||
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
|
||||
dev_replace->srcdev = NULL;
|
||||
dev_replace->tgtdev = NULL;
|
||||
/*
|
||||
* We don't have an active replace item but if there is a
|
||||
* replace target, fail the mount.
|
||||
*/
|
||||
if (btrfs_find_device(fs_info->fs_devices,
|
||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
|
||||
btrfs_err(fs_info,
|
||||
"replace devid present without an active replace item");
|
||||
ret = -EUCLEAN;
|
||||
} else {
|
||||
dev_replace->srcdev = NULL;
|
||||
dev_replace->tgtdev = NULL;
|
||||
}
|
||||
break;
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
|
|
|
@ -1274,6 +1274,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
|||
u64 page_start;
|
||||
u64 page_end;
|
||||
u64 page_cnt;
|
||||
u64 start = (u64)start_index << PAGE_SHIFT;
|
||||
int ret;
|
||||
int i;
|
||||
int i_done;
|
||||
|
@ -1290,8 +1291,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
|||
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
|
||||
|
||||
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
|
||||
start_index << PAGE_SHIFT,
|
||||
page_cnt << PAGE_SHIFT);
|
||||
start, page_cnt << PAGE_SHIFT);
|
||||
if (ret)
|
||||
return ret;
|
||||
i_done = 0;
|
||||
|
@ -1380,8 +1380,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
|||
btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
|
||||
start_index << PAGE_SHIFT,
|
||||
(page_cnt - i_done) << PAGE_SHIFT, true);
|
||||
start, (page_cnt - i_done) << PAGE_SHIFT, true);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1408,8 +1407,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
|
|||
put_page(pages[i]);
|
||||
}
|
||||
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
|
||||
start_index << PAGE_SHIFT,
|
||||
page_cnt << PAGE_SHIFT, true);
|
||||
start, page_cnt << PAGE_SHIFT, true);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
|
||||
extent_changeset_free(data_reserved);
|
||||
return ret;
|
||||
|
|
|
@ -3435,24 +3435,20 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
|
|||
{
|
||||
struct rb_node *node;
|
||||
struct rb_node *next;
|
||||
struct ulist_node *entry = NULL;
|
||||
struct ulist_node *entry;
|
||||
int ret = 0;
|
||||
|
||||
node = reserved->range_changed.root.rb_node;
|
||||
if (!node)
|
||||
return 0;
|
||||
while (node) {
|
||||
entry = rb_entry(node, struct ulist_node, rb_node);
|
||||
if (entry->val < start)
|
||||
node = node->rb_right;
|
||||
else if (entry)
|
||||
node = node->rb_left;
|
||||
else
|
||||
break;
|
||||
node = node->rb_left;
|
||||
}
|
||||
|
||||
/* Empty changeset */
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
if (entry->val > start && rb_prev(&entry->rb_node))
|
||||
entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
|
||||
rb_node);
|
||||
|
|
|
@ -860,6 +860,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
|
|||
"dropping a ref for a root that doesn't have a ref on the block");
|
||||
dump_block_entry(fs_info, be);
|
||||
dump_ref_action(fs_info, ra);
|
||||
kfree(ref);
|
||||
kfree(ra);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
|
|
@ -1648,6 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
|||
struct btrfs_root_item *root_item;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
int reserve_level;
|
||||
int level;
|
||||
int max_level;
|
||||
int replaced = 0;
|
||||
|
@ -1696,7 +1697,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
|||
* Thus the needed metadata size is at most root_level * nodesize,
|
||||
* and * 2 since we have two trees to COW.
|
||||
*/
|
||||
min_reserved = fs_info->nodesize * btrfs_root_level(root_item) * 2;
|
||||
reserve_level = max_t(int, 1, btrfs_root_level(root_item));
|
||||
min_reserved = fs_info->nodesize * reserve_level * 2;
|
||||
memset(&next_key, 0, sizeof(next_key));
|
||||
|
||||
while (1) {
|
||||
|
|
|
@ -3866,8 +3866,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
|||
if (!is_dev_replace && !readonly &&
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
btrfs_err_in_rcu(fs_info, "scrub: device %s is not writable",
|
||||
rcu_str_deref(dev->name));
|
||||
btrfs_err_in_rcu(fs_info,
|
||||
"scrub on devid %llu: filesystem on %s is not writable",
|
||||
devid, rcu_str_deref(dev->name));
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -1056,22 +1056,13 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
|
||||
/*
|
||||
* In the first step, keep the device which has
|
||||
* the correct fsid and the devid that is used
|
||||
* for the dev_replace procedure.
|
||||
* In the second step, the dev_replace state is
|
||||
* read from the device tree and it is known
|
||||
* whether the procedure is really active or
|
||||
* not, which means whether this device is
|
||||
* used or whether it should be removed.
|
||||
*/
|
||||
if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
|
||||
&device->dev_state)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* We have already validated the presence of BTRFS_DEV_REPLACE_DEVID,
|
||||
* in btrfs_init_dev_replace() so just continue.
|
||||
*/
|
||||
if (device->devid == BTRFS_DEV_REPLACE_DEVID)
|
||||
continue;
|
||||
|
||||
if (device->bdev) {
|
||||
blkdev_put(device->bdev, device->mode);
|
||||
device->bdev = NULL;
|
||||
|
@ -1080,9 +1071,6 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
|
|||
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
|
||||
list_del_init(&device->dev_alloc_list);
|
||||
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
|
||||
&device->dev_state))
|
||||
fs_devices->rw_devices--;
|
||||
}
|
||||
list_del_init(&device->dev_list);
|
||||
fs_devices->num_devices--;
|
||||
|
|
|
@ -300,9 +300,7 @@ static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_info *ci,
|
|||
* New inodes may not have an inode number assigned yet.
|
||||
* Hashing their inode number is delayed until later.
|
||||
*/
|
||||
if (ci->ci_inode->i_ino == 0)
|
||||
WARN_ON(!(ci->ci_inode->i_state & I_CREATING));
|
||||
else
|
||||
if (ci->ci_inode->i_ino)
|
||||
fscrypt_hash_inode_number(ci, mk);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -107,11 +107,9 @@ static struct page *erofs_read_inode(struct inode *inode,
|
|||
i_gid_write(inode, le32_to_cpu(die->i_gid));
|
||||
set_nlink(inode, le32_to_cpu(die->i_nlink));
|
||||
|
||||
/* ns timestamp */
|
||||
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
|
||||
le64_to_cpu(die->i_ctime);
|
||||
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
|
||||
le32_to_cpu(die->i_ctime_nsec);
|
||||
/* extended inode has its own timestamp */
|
||||
inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
|
||||
inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
|
||||
|
||||
inode->i_size = le64_to_cpu(die->i_size);
|
||||
|
||||
|
@ -149,11 +147,9 @@ static struct page *erofs_read_inode(struct inode *inode,
|
|||
i_gid_write(inode, le16_to_cpu(dic->i_gid));
|
||||
set_nlink(inode, le16_to_cpu(dic->i_nlink));
|
||||
|
||||
/* use build time to derive all file time */
|
||||
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
|
||||
sbi->build_time;
|
||||
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
|
||||
sbi->build_time_nsec;
|
||||
/* use build time for compact inodes */
|
||||
inode->i_ctime.tv_sec = sbi->build_time;
|
||||
inode->i_ctime.tv_nsec = sbi->build_time_nsec;
|
||||
|
||||
inode->i_size = le32_to_cpu(dic->i_size);
|
||||
if (erofs_inode_is_data_compressed(vi->datalayout))
|
||||
|
@ -167,6 +163,11 @@ static struct page *erofs_read_inode(struct inode *inode,
|
|||
goto err_out;
|
||||
}
|
||||
|
||||
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
|
||||
inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
|
||||
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
|
||||
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
|
||||
|
||||
if (!nblks)
|
||||
/* measure inode.i_blocks as generic filesystems */
|
||||
inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
|
||||
|
|
|
@ -1078,8 +1078,11 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
|
|||
cond_resched();
|
||||
goto repeat;
|
||||
}
|
||||
set_page_private(page, (unsigned long)pcl);
|
||||
SetPagePrivate(page);
|
||||
|
||||
if (tocache) {
|
||||
set_page_private(page, (unsigned long)pcl);
|
||||
SetPagePrivate(page);
|
||||
}
|
||||
out: /* the only exit (for tracing and debugging) */
|
||||
return page;
|
||||
}
|
||||
|
|
|
@ -1028,9 +1028,6 @@ struct ext4_inode_info {
|
|||
* protected by sbi->s_fc_lock.
|
||||
*/
|
||||
|
||||
/* Fast commit subtid when this inode was committed */
|
||||
unsigned int i_fc_committed_subtid;
|
||||
|
||||
/* Start of lblk range that needs to be committed in this fast commit */
|
||||
ext4_lblk_t i_fc_lblk_start;
|
||||
|
||||
|
@ -1422,16 +1419,6 @@ struct ext4_super_block {
|
|||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/*
|
||||
* run-time mount flags
|
||||
*/
|
||||
#define EXT4_MF_MNTDIR_SAMPLED 0x0001
|
||||
#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
|
||||
#define EXT4_MF_FC_INELIGIBLE 0x0004 /* Fast commit ineligible */
|
||||
#define EXT4_MF_FC_COMMITTING 0x0008 /* File system underoing a fast
|
||||
* commit.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL)
|
||||
#else
|
||||
|
@ -1466,7 +1453,7 @@ struct ext4_sb_info {
|
|||
struct buffer_head * __rcu *s_group_desc;
|
||||
unsigned int s_mount_opt;
|
||||
unsigned int s_mount_opt2;
|
||||
unsigned int s_mount_flags;
|
||||
unsigned long s_mount_flags;
|
||||
unsigned int s_def_mount_opt;
|
||||
ext4_fsblk_t s_sb_block;
|
||||
atomic64_t s_resv_clusters;
|
||||
|
@ -1694,6 +1681,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
|||
_v; \
|
||||
})
|
||||
|
||||
/*
|
||||
* run-time mount flags
|
||||
*/
|
||||
enum {
|
||||
EXT4_MF_MNTDIR_SAMPLED,
|
||||
EXT4_MF_FS_ABORTED, /* Fatal error detected */
|
||||
EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
|
||||
EXT4_MF_FC_COMMITTING /* File system underoing a fast
|
||||
* commit.
|
||||
*/
|
||||
};
|
||||
|
||||
static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
|
||||
{
|
||||
set_bit(bit, &EXT4_SB(sb)->s_mount_flags);
|
||||
}
|
||||
|
||||
static inline void ext4_clear_mount_flag(struct super_block *sb, int bit)
|
||||
{
|
||||
clear_bit(bit, &EXT4_SB(sb)->s_mount_flags);
|
||||
}
|
||||
|
||||
static inline int ext4_test_mount_flag(struct super_block *sb, int bit)
|
||||
{
|
||||
return test_bit(bit, &EXT4_SB(sb)->s_mount_flags);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Simulate_fail codes
|
||||
*/
|
||||
|
@ -1863,6 +1878,13 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
|
|||
#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
|
||||
#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
|
||||
#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
|
||||
/*
|
||||
* The reason why "FAST_COMMIT" is a compat feature is that, FS becomes
|
||||
* incompatible only if fast commit blocks are present in the FS. Since we
|
||||
* clear the journal (and thus the fast commit blocks), we don't mark FS as
|
||||
* incompatible. We also have a JBD2 incompat feature, which gets set when
|
||||
* there are fast commit blocks present in the journal.
|
||||
*/
|
||||
#define EXT4_FEATURE_COMPAT_FAST_COMMIT 0x0400
|
||||
#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800
|
||||
|
||||
|
@ -2776,12 +2798,16 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
|
|||
int ext4_fc_info_show(struct seq_file *seq, void *v);
|
||||
void ext4_fc_init(struct super_block *sb, journal_t *journal);
|
||||
void ext4_fc_init_inode(struct inode *inode);
|
||||
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
|
||||
void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end);
|
||||
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry);
|
||||
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry);
|
||||
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry);
|
||||
void ext4_fc_track_inode(struct inode *inode);
|
||||
void __ext4_fc_track_unlink(handle_t *handle, struct inode *inode,
|
||||
struct dentry *dentry);
|
||||
void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
|
||||
struct dentry *dentry);
|
||||
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
|
||||
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
|
||||
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
|
||||
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
|
||||
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
|
||||
void ext4_fc_start_ineligible(struct super_block *sb, int reason);
|
||||
void ext4_fc_stop_ineligible(struct super_block *sb);
|
||||
|
@ -3495,7 +3521,7 @@ extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
|
|||
unsigned int blocksize);
|
||||
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
|
||||
extern int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
|
||||
struct inode *inode);
|
||||
extern int __ext4_link(struct inode *dir, struct inode *inode,
|
||||
struct dentry *dentry);
|
||||
|
|
|
@ -3724,7 +3724,6 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
|
|||
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
|
||||
out:
|
||||
ext4_ext_show_leaf(inode, path);
|
||||
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -3796,7 +3795,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
|
|||
if (*allocated > map->m_len)
|
||||
*allocated = map->m_len;
|
||||
map->m_len = *allocated;
|
||||
ext4_fc_track_range(inode, ee_block, ee_block + ee_len - 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4329,7 +4327,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||
map->m_len = ar.len;
|
||||
allocated = map->m_len;
|
||||
ext4_ext_show_leaf(inode, path);
|
||||
ext4_fc_track_range(inode, map->m_lblk, map->m_lblk + map->m_len - 1);
|
||||
out:
|
||||
ext4_ext_drop_refs(path);
|
||||
kfree(path);
|
||||
|
@ -4602,7 +4599,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
|||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(ret))
|
||||
goto out_handle;
|
||||
ext4_fc_track_range(inode, offset >> inode->i_sb->s_blocksize_bits,
|
||||
ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
|
||||
(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
|
||||
/* Zero out partial block at the edges of the range */
|
||||
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
|
||||
|
@ -4651,8 +4648,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
|||
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
|
||||
FALLOC_FL_INSERT_RANGE))
|
||||
return -EOPNOTSUPP;
|
||||
ext4_fc_track_range(inode, offset >> blkbits,
|
||||
(offset + len - 1) >> blkbits);
|
||||
|
||||
ext4_fc_start_update(inode);
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@
|
|||
*
|
||||
* Atomicity of commits
|
||||
* --------------------
|
||||
* In order to gaurantee atomicity during the commit operation, fast commit
|
||||
* In order to guarantee atomicity during the commit operation, fast commit
|
||||
* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
|
||||
* tag contains CRC of the contents and TID of the transaction after which
|
||||
* this fast commit should be applied. Recovery code replays fast commit
|
||||
|
@ -152,7 +152,31 @@ void ext4_fc_init_inode(struct inode *inode)
|
|||
INIT_LIST_HEAD(&ei->i_fc_list);
|
||||
init_waitqueue_head(&ei->i_fc_wait);
|
||||
atomic_set(&ei->i_fc_updates, 0);
|
||||
ei->i_fc_committed_subtid = 0;
|
||||
}
|
||||
|
||||
/* This function must be called with sbi->s_fc_lock held. */
|
||||
static void ext4_fc_wait_committing_inode(struct inode *inode)
|
||||
__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
|
||||
{
|
||||
wait_queue_head_t *wq;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
#if (BITS_PER_LONG < 64)
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#else
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#endif
|
||||
lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -176,22 +200,7 @@ void ext4_fc_start_update(struct inode *inode)
|
|||
goto out;
|
||||
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
|
||||
wait_queue_head_t *wq;
|
||||
#if (BITS_PER_LONG < 64)
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#else
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#endif
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
ext4_fc_wait_committing_inode(inode);
|
||||
goto restart;
|
||||
}
|
||||
out:
|
||||
|
@ -234,26 +243,10 @@ void ext4_fc_del(struct inode *inode)
|
|||
}
|
||||
|
||||
if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
|
||||
wait_queue_head_t *wq;
|
||||
#if (BITS_PER_LONG < 64)
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_state_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#else
|
||||
DEFINE_WAIT_BIT(wait, &ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
wq = bit_waitqueue(&ei->i_flags,
|
||||
EXT4_STATE_FC_COMMITTING);
|
||||
#endif
|
||||
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
|
||||
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
|
||||
schedule();
|
||||
finish_wait(wq, &wait.wq_entry);
|
||||
ext4_fc_wait_committing_inode(inode);
|
||||
goto restart;
|
||||
}
|
||||
if (!list_empty(&ei->i_fc_list))
|
||||
list_del_init(&ei->i_fc_list);
|
||||
list_del_init(&ei->i_fc_list);
|
||||
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
|
||||
}
|
||||
|
||||
|
@ -269,7 +262,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
|
|||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
|
||||
return;
|
||||
|
||||
sbi->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
WARN_ON(reason >= EXT4_FC_REASON_MAX);
|
||||
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
|
||||
}
|
||||
|
@ -302,14 +295,14 @@ void ext4_fc_stop_ineligible(struct super_block *sb)
|
|||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
|
||||
return;
|
||||
|
||||
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FC_INELIGIBLE;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
|
||||
}
|
||||
|
||||
static inline int ext4_fc_is_ineligible(struct super_block *sb)
|
||||
{
|
||||
return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) ||
|
||||
atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
|
||||
return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
|
||||
atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -323,13 +316,14 @@ static inline int ext4_fc_is_ineligible(struct super_block *sb)
|
|||
* If enqueue is set, this function enqueues the inode in fast commit list.
|
||||
*/
|
||||
static int ext4_fc_track_template(
|
||||
struct inode *inode, int (*__fc_track_fn)(struct inode *, void *, bool),
|
||||
handle_t *handle, struct inode *inode,
|
||||
int (*__fc_track_fn)(struct inode *, void *, bool),
|
||||
void *args, int enqueue)
|
||||
{
|
||||
tid_t running_txn_tid;
|
||||
bool update = false;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
tid_t tid = 0;
|
||||
int ret;
|
||||
|
||||
if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
|
||||
|
@ -339,15 +333,13 @@ static int ext4_fc_track_template(
|
|||
if (ext4_fc_is_ineligible(inode->i_sb))
|
||||
return -EINVAL;
|
||||
|
||||
running_txn_tid = sbi->s_journal ?
|
||||
sbi->s_journal->j_commit_sequence + 1 : 0;
|
||||
|
||||
tid = handle->h_transaction->t_tid;
|
||||
mutex_lock(&ei->i_fc_lock);
|
||||
if (running_txn_tid == ei->i_sync_tid) {
|
||||
if (tid == ei->i_sync_tid) {
|
||||
update = true;
|
||||
} else {
|
||||
ext4_fc_reset_inode(inode);
|
||||
ei->i_sync_tid = running_txn_tid;
|
||||
ei->i_sync_tid = tid;
|
||||
}
|
||||
ret = __fc_track_fn(inode, args, update);
|
||||
mutex_unlock(&ei->i_fc_lock);
|
||||
|
@ -358,7 +350,7 @@ static int ext4_fc_track_template(
|
|||
spin_lock(&sbi->s_fc_lock);
|
||||
if (list_empty(&EXT4_I(inode)->i_fc_list))
|
||||
list_add_tail(&EXT4_I(inode)->i_fc_list,
|
||||
(sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
|
||||
(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
|
||||
&sbi->s_fc_q[FC_Q_STAGING] :
|
||||
&sbi->s_fc_q[FC_Q_MAIN]);
|
||||
spin_unlock(&sbi->s_fc_lock);
|
||||
|
@ -384,7 +376,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
|
|||
mutex_unlock(&ei->i_fc_lock);
|
||||
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
|
||||
if (!node) {
|
||||
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
|
||||
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
|
||||
mutex_lock(&ei->i_fc_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -397,7 +389,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
|
|||
if (!node->fcd_name.name) {
|
||||
kmem_cache_free(ext4_fc_dentry_cachep, node);
|
||||
ext4_fc_mark_ineligible(inode->i_sb,
|
||||
EXT4_FC_REASON_MEM);
|
||||
EXT4_FC_REASON_NOMEM);
|
||||
mutex_lock(&ei->i_fc_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
@ -411,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
|
|||
node->fcd_name.len = dentry->d_name.len;
|
||||
|
||||
spin_lock(&sbi->s_fc_lock);
|
||||
if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
|
||||
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
|
||||
list_add_tail(&node->fcd_list,
|
||||
&sbi->s_fc_dentry_q[FC_Q_STAGING]);
|
||||
else
|
||||
|
@ -422,7 +414,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
|
||||
void __ext4_fc_track_unlink(handle_t *handle,
|
||||
struct inode *inode, struct dentry *dentry)
|
||||
{
|
||||
struct __track_dentry_update_args args;
|
||||
int ret;
|
||||
|
@ -430,12 +423,18 @@ void ext4_fc_track_unlink(struct inode *inode, struct dentry *dentry)
|
|||
args.dentry = dentry;
|
||||
args.op = EXT4_FC_TAG_UNLINK;
|
||||
|
||||
ret = ext4_fc_track_template(inode, __track_dentry_update,
|
||||
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
|
||||
(void *)&args, 0);
|
||||
trace_ext4_fc_track_unlink(inode, dentry, ret);
|
||||
}
|
||||
|
||||
void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
|
||||
void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
|
||||
{
|
||||
__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
|
||||
}
|
||||
|
||||
void __ext4_fc_track_link(handle_t *handle,
|
||||
struct inode *inode, struct dentry *dentry)
|
||||
{
|
||||
struct __track_dentry_update_args args;
|
||||
int ret;
|
||||
|
@ -443,20 +442,26 @@ void ext4_fc_track_link(struct inode *inode, struct dentry *dentry)
|
|||
args.dentry = dentry;
|
||||
args.op = EXT4_FC_TAG_LINK;
|
||||
|
||||
ret = ext4_fc_track_template(inode, __track_dentry_update,
|
||||
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
|
||||
(void *)&args, 0);
|
||||
trace_ext4_fc_track_link(inode, dentry, ret);
|
||||
}
|
||||
|
||||
void ext4_fc_track_create(struct inode *inode, struct dentry *dentry)
|
||||
void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
|
||||
{
|
||||
__ext4_fc_track_link(handle, d_inode(dentry), dentry);
|
||||
}
|
||||
|
||||
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
|
||||
{
|
||||
struct __track_dentry_update_args args;
|
||||
struct inode *inode = d_inode(dentry);
|
||||
int ret;
|
||||
|
||||
args.dentry = dentry;
|
||||
args.op = EXT4_FC_TAG_CREAT;
|
||||
|
||||
ret = ext4_fc_track_template(inode, __track_dentry_update,
|
||||
ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
|
||||
(void *)&args, 0);
|
||||
trace_ext4_fc_track_create(inode, dentry, ret);
|
||||
}
|
||||
|
@ -472,14 +477,20 @@ static int __track_inode(struct inode *inode, void *arg, bool update)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ext4_fc_track_inode(struct inode *inode)
|
||||
void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
return;
|
||||
|
||||
ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
ext4_fc_mark_ineligible(inode->i_sb,
|
||||
EXT4_FC_REASON_INODE_JOURNAL_DATA);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
|
||||
trace_ext4_fc_track_inode(inode, ret);
|
||||
}
|
||||
|
||||
|
@ -515,7 +526,7 @@ static int __track_range(struct inode *inode, void *arg, bool update)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
|
||||
void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
|
||||
ext4_lblk_t end)
|
||||
{
|
||||
struct __track_range_args args;
|
||||
|
@ -527,7 +538,7 @@ void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
|
|||
args.start = start;
|
||||
args.end = end;
|
||||
|
||||
ret = ext4_fc_track_template(inode, __track_range, &args, 1);
|
||||
ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
|
||||
|
||||
trace_ext4_fc_track_range(inode, start, end, ret);
|
||||
}
|
||||
|
@ -537,10 +548,11 @@ static void ext4_fc_submit_bh(struct super_block *sb)
|
|||
int write_flags = REQ_SYNC;
|
||||
struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
|
||||
|
||||
/* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */
|
||||
if (test_opt(sb, BARRIER))
|
||||
write_flags |= REQ_FUA | REQ_PREFLUSH;
|
||||
lock_buffer(bh);
|
||||
clear_buffer_dirty(bh);
|
||||
set_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
bh->b_end_io = ext4_end_buffer_io_sync;
|
||||
submit_bh(REQ_OP_WRITE, write_flags, bh);
|
||||
|
@ -846,7 +858,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
|
|||
int ret = 0;
|
||||
|
||||
spin_lock(&sbi->s_fc_lock);
|
||||
sbi->s_mount_flags |= EXT4_MF_FC_COMMITTING;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
|
||||
list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
|
||||
ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
|
||||
ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
|
||||
|
@ -900,6 +912,8 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
|
|||
|
||||
/* Commit all the directory entry updates */
|
||||
static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
|
||||
__acquires(&sbi->s_fc_lock)
|
||||
__releases(&sbi->s_fc_lock)
|
||||
{
|
||||
struct super_block *sb = (struct super_block *)(journal->j_private);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
@ -996,6 +1010,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If file system device is different from journal device, issue a cache
|
||||
* flush before we start writing fast commit blocks.
|
||||
*/
|
||||
if (journal->j_fs_dev != journal->j_dev)
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS);
|
||||
|
||||
blk_start_plug(&plug);
|
||||
if (sbi->s_fc_bytes == 0) {
|
||||
/*
|
||||
|
@ -1031,8 +1052,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
|
|||
if (ret)
|
||||
goto out;
|
||||
spin_lock(&sbi->s_fc_lock);
|
||||
EXT4_I(inode)->i_fc_committed_subtid =
|
||||
atomic_read(&sbi->s_fc_subtid);
|
||||
}
|
||||
spin_unlock(&sbi->s_fc_lock);
|
||||
|
||||
|
@ -1131,7 +1150,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
|
|||
"Fast commit ended with blks = %d, reason = %d, subtid - %d",
|
||||
nblks, reason, subtid);
|
||||
if (reason == EXT4_FC_REASON_FC_FAILED)
|
||||
return jbd2_fc_end_commit_fallback(journal, commit_tid);
|
||||
return jbd2_fc_end_commit_fallback(journal);
|
||||
if (reason == EXT4_FC_REASON_FC_START_FAILED ||
|
||||
reason == EXT4_FC_REASON_INELIGIBLE)
|
||||
return jbd2_complete_transaction(journal, commit_tid);
|
||||
|
@ -1190,8 +1209,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
|
|||
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
|
||||
&sbi->s_fc_q[FC_Q_STAGING]);
|
||||
|
||||
sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
|
||||
sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
|
||||
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
|
||||
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
|
||||
if (full)
|
||||
sbi->s_fc_bytes = 0;
|
||||
|
@ -1263,7 +1282,7 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
|
|||
return 0;
|
||||
}
|
||||
|
||||
ret = __ext4_unlink(old_parent, &entry, inode);
|
||||
ret = __ext4_unlink(NULL, old_parent, &entry, inode);
|
||||
/* -ENOENT ok coz it might not exist anymore. */
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
|
@ -2079,8 +2098,6 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
|
|||
|
||||
void ext4_fc_init(struct super_block *sb, journal_t *journal)
|
||||
{
|
||||
int num_fc_blocks;
|
||||
|
||||
/*
|
||||
* We set replay callback even if fast commit disabled because we may
|
||||
* could still have fast commit blocks that need to be replayed even if
|
||||
|
@ -2090,21 +2107,9 @@ void ext4_fc_init(struct super_block *sb, journal_t *journal)
|
|||
if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
|
||||
return;
|
||||
journal->j_fc_cleanup_callback = ext4_fc_cleanup;
|
||||
if (!buffer_uptodate(journal->j_sb_buffer)
|
||||
&& ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO,
|
||||
true)) {
|
||||
ext4_msg(sb, KERN_ERR, "I/O error on journal");
|
||||
return;
|
||||
}
|
||||
num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
|
||||
if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
|
||||
EXT4_NUM_FC_BLKS)) {
|
||||
pr_warn("Error while enabling fast commits, turning off.");
|
||||
ext4_clear_feature_fast_commit(sb);
|
||||
}
|
||||
}
|
||||
|
||||
const char *fc_ineligible_reasons[] = {
|
||||
static const char *fc_ineligible_reasons[] = {
|
||||
"Extended attributes changed",
|
||||
"Cross rename",
|
||||
"Journal flag changed",
|
||||
|
@ -2113,6 +2118,7 @@ const char *fc_ineligible_reasons[] = {
|
|||
"Resize",
|
||||
"Dir renamed",
|
||||
"Falloc range op",
|
||||
"Data journalling",
|
||||
"FC Commit Failed"
|
||||
};
|
||||
|
||||
|
|
|
@ -3,9 +3,6 @@
|
|||
#ifndef __FAST_COMMIT_H__
|
||||
#define __FAST_COMMIT_H__
|
||||
|
||||
/* Number of blocks in journal area to allocate for fast commits */
|
||||
#define EXT4_NUM_FC_BLKS 256
|
||||
|
||||
/* Fast commit tags */
|
||||
#define EXT4_FC_TAG_ADD_RANGE 0x0001
|
||||
#define EXT4_FC_TAG_DEL_RANGE 0x0002
|
||||
|
@ -100,11 +97,12 @@ enum {
|
|||
EXT4_FC_REASON_XATTR = 0,
|
||||
EXT4_FC_REASON_CROSS_RENAME,
|
||||
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
|
||||
EXT4_FC_REASON_MEM,
|
||||
EXT4_FC_REASON_NOMEM,
|
||||
EXT4_FC_REASON_SWAP_BOOT,
|
||||
EXT4_FC_REASON_RESIZE,
|
||||
EXT4_FC_REASON_RENAME_DIR,
|
||||
EXT4_FC_REASON_FALLOC_RANGE,
|
||||
EXT4_FC_REASON_INODE_JOURNAL_DATA,
|
||||
EXT4_FC_COMMIT_FAILED,
|
||||
EXT4_FC_REASON_MAX
|
||||
};
|
||||
|
|
|
@ -763,7 +763,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
if (!daxdev_mapping_supported(vma, dax_dev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ext4_fc_start_update(inode);
|
||||
file_accessed(file);
|
||||
if (IS_DAX(file_inode(file))) {
|
||||
vma->vm_ops = &ext4_dax_vm_ops;
|
||||
|
@ -771,7 +770,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
} else {
|
||||
vma->vm_ops = &ext4_file_vm_ops;
|
||||
}
|
||||
ext4_fc_stop_update(inode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -784,13 +782,13 @@ static int ext4_sample_last_mounted(struct super_block *sb,
|
|||
handle_t *handle;
|
||||
int err;
|
||||
|
||||
if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
|
||||
if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
|
||||
return 0;
|
||||
|
||||
if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
|
||||
return 0;
|
||||
|
||||
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
|
||||
/*
|
||||
* Sample where the filesystem has been mounted and
|
||||
* store it in the superblock for sysadmin convenience
|
||||
|
|
|
@ -280,7 +280,7 @@ static int ext4_getfsmap_logdev(struct super_block *sb, struct ext4_fsmap *keys,
|
|||
|
||||
/* Fabricate an rmap entry for the external log device. */
|
||||
irec.fmr_physical = journal->j_blk_offset;
|
||||
irec.fmr_length = journal->j_maxlen;
|
||||
irec.fmr_length = journal->j_total_len;
|
||||
irec.fmr_owner = EXT4_FMR_OWN_LOG;
|
||||
irec.fmr_flags = 0;
|
||||
|
||||
|
|
|
@ -143,7 +143,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|||
if (sb_rdonly(inode->i_sb)) {
|
||||
/* Make sure that we read updated s_mount_flags value */
|
||||
smp_rmb();
|
||||
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -1899,6 +1899,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
|
|||
|
||||
ext4_write_lock_xattr(inode, &no_expand);
|
||||
if (!ext4_has_inline_data(inode)) {
|
||||
ext4_write_unlock_xattr(inode, &no_expand);
|
||||
*has_inline = 0;
|
||||
ext4_journal_stop(handle);
|
||||
return 0;
|
||||
|
|
|
@ -328,6 +328,8 @@ void ext4_evict_inode(struct inode *inode)
|
|||
ext4_xattr_inode_array_free(ea_inode_array);
|
||||
return;
|
||||
no_delete:
|
||||
if (!list_empty(&EXT4_I(inode)->i_fc_list))
|
||||
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
|
||||
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
|
||||
}
|
||||
|
||||
|
@ -731,7 +733,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
|||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
ext4_fc_track_range(inode, map->m_lblk,
|
||||
ext4_fc_track_range(handle, inode, map->m_lblk,
|
||||
map->m_lblk + map->m_len - 1);
|
||||
}
|
||||
|
||||
|
@ -2453,7 +2455,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
|
|||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
|
||||
EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
|
||||
goto invalidate_dirty_pages;
|
||||
/*
|
||||
* Let the uper layers retry transient errors.
|
||||
|
@ -2687,7 +2689,7 @@ static int ext4_writepages(struct address_space *mapping,
|
|||
* the stack trace.
|
||||
*/
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
|
||||
sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) {
|
||||
ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
|
||||
ret = -EROFS;
|
||||
goto out_writepages;
|
||||
}
|
||||
|
@ -3334,8 +3336,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
|
|||
EXT4_I(inode)->i_datasync_tid))
|
||||
return false;
|
||||
if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
|
||||
return atomic_read(&EXT4_SB(inode->i_sb)->s_fc_subtid) <
|
||||
EXT4_I(inode)->i_fc_committed_subtid;
|
||||
return !list_empty(&EXT4_I(inode)->i_fc_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -4133,7 +4134,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
|
|||
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
}
|
||||
ext4_fc_track_range(inode, first_block, stop_block);
|
||||
ext4_fc_track_range(handle, inode, first_block, stop_block);
|
||||
if (IS_SYNC(inode))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
|
@ -5466,14 +5467,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
}
|
||||
|
||||
if (shrink)
|
||||
ext4_fc_track_range(inode,
|
||||
ext4_fc_track_range(handle, inode,
|
||||
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
|
||||
inode->i_sb->s_blocksize_bits,
|
||||
(oldsize > 0 ? oldsize - 1 : 0) >>
|
||||
inode->i_sb->s_blocksize_bits);
|
||||
else
|
||||
ext4_fc_track_range(
|
||||
inode,
|
||||
handle, inode,
|
||||
(oldsize > 0 ? oldsize - 1 : oldsize) >>
|
||||
inode->i_sb->s_blocksize_bits,
|
||||
(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
|
||||
|
@ -5723,7 +5724,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
|
|||
put_bh(iloc->bh);
|
||||
return -EIO;
|
||||
}
|
||||
ext4_fc_track_inode(inode);
|
||||
ext4_fc_track_inode(handle, inode);
|
||||
|
||||
if (IS_I_VERSION(inode))
|
||||
inode_inc_iversion(inode);
|
||||
|
|
|
@ -4477,7 +4477,7 @@ static inline void ext4_mb_show_pa(struct super_block *sb)
|
|||
{
|
||||
ext4_group_t i, ngroups;
|
||||
|
||||
if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
|
||||
return;
|
||||
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
|
@ -4508,7 +4508,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
|
|||
{
|
||||
struct super_block *sb = ac->ac_sb;
|
||||
|
||||
if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
|
||||
return;
|
||||
|
||||
mb_debug(sb, "Can't allocate:"
|
||||
|
@ -5167,7 +5167,7 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle,
|
|||
struct super_block *sb = ar->inode->i_sb;
|
||||
ext4_group_t group;
|
||||
ext4_grpblk_t blkoff;
|
||||
int i;
|
||||
int i = sb->s_blocksize;
|
||||
ext4_fsblk_t goal, block;
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
|
||||
|
|
|
@ -2717,7 +2717,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
|||
bool excl)
|
||||
{
|
||||
handle_t *handle;
|
||||
struct inode *inode, *inode_save;
|
||||
struct inode *inode;
|
||||
int err, credits, retries = 0;
|
||||
|
||||
err = dquot_initialize(dir);
|
||||
|
@ -2735,11 +2735,9 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
|||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
ext4_set_aops(inode);
|
||||
inode_save = inode;
|
||||
ihold(inode_save);
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
ext4_fc_track_create(inode_save, dentry);
|
||||
iput(inode_save);
|
||||
if (!err)
|
||||
ext4_fc_track_create(handle, dentry);
|
||||
}
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
|
@ -2754,7 +2752,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
|
|||
umode_t mode, dev_t rdev)
|
||||
{
|
||||
handle_t *handle;
|
||||
struct inode *inode, *inode_save;
|
||||
struct inode *inode;
|
||||
int err, credits, retries = 0;
|
||||
|
||||
err = dquot_initialize(dir);
|
||||
|
@ -2771,12 +2769,9 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
|
|||
if (!IS_ERR(inode)) {
|
||||
init_special_inode(inode, inode->i_mode, rdev);
|
||||
inode->i_op = &ext4_special_inode_operations;
|
||||
inode_save = inode;
|
||||
ihold(inode_save);
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
if (!err)
|
||||
ext4_fc_track_create(inode_save, dentry);
|
||||
iput(inode_save);
|
||||
ext4_fc_track_create(handle, dentry);
|
||||
}
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
|
@ -2941,7 +2936,6 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
|||
iput(inode);
|
||||
goto out_retry;
|
||||
}
|
||||
ext4_fc_track_create(inode, dentry);
|
||||
ext4_inc_count(dir);
|
||||
|
||||
ext4_update_dx_flag(dir);
|
||||
|
@ -2949,6 +2943,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
|||
if (err)
|
||||
goto out_clear_inode;
|
||||
d_instantiate_new(dentry, inode);
|
||||
ext4_fc_track_create(handle, dentry);
|
||||
if (IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
|
@ -3286,7 +3281,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
goto end_rmdir;
|
||||
ext4_dec_count(dir);
|
||||
ext4_update_dx_flag(dir);
|
||||
ext4_fc_track_unlink(inode, dentry);
|
||||
ext4_fc_track_unlink(handle, dentry);
|
||||
retval = ext4_mark_inode_dirty(handle, dir);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
|
@ -3307,13 +3302,12 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
|
|||
return retval;
|
||||
}
|
||||
|
||||
int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
|
||||
int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
|
||||
struct inode *inode)
|
||||
{
|
||||
int retval = -ENOENT;
|
||||
struct buffer_head *bh;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
handle_t *handle = NULL;
|
||||
int skip_remove_dentry = 0;
|
||||
ext4_lblk_t lblk;
|
||||
|
||||
|
@ -3333,14 +3327,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
|
|||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||
skip_remove_dentry = 1;
|
||||
else
|
||||
goto out_bh;
|
||||
}
|
||||
|
||||
handle = ext4_journal_start(dir, EXT4_HT_DIR,
|
||||
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
|
||||
if (IS_ERR(handle)) {
|
||||
retval = PTR_ERR(handle);
|
||||
goto out_bh;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (IS_DIRSYNC(dir))
|
||||
|
@ -3349,12 +3336,12 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
|
|||
if (!skip_remove_dentry) {
|
||||
retval = ext4_delete_entry(handle, dir, de, lblk, bh);
|
||||
if (retval)
|
||||
goto out_handle;
|
||||
goto out;
|
||||
dir->i_ctime = dir->i_mtime = current_time(dir);
|
||||
ext4_update_dx_flag(dir);
|
||||
retval = ext4_mark_inode_dirty(handle, dir);
|
||||
if (retval)
|
||||
goto out_handle;
|
||||
goto out;
|
||||
} else {
|
||||
retval = 0;
|
||||
}
|
||||
|
@ -3368,15 +3355,14 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
|
|||
inode->i_ctime = current_time(inode);
|
||||
retval = ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
out_handle:
|
||||
ext4_journal_stop(handle);
|
||||
out_bh:
|
||||
out:
|
||||
brelse(bh);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
handle_t *handle;
|
||||
int retval;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
|
||||
|
@ -3394,9 +3380,16 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
|||
if (retval)
|
||||
goto out_trace;
|
||||
|
||||
retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry));
|
||||
handle = ext4_journal_start(dir, EXT4_HT_DIR,
|
||||
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
|
||||
if (IS_ERR(handle)) {
|
||||
retval = PTR_ERR(handle);
|
||||
goto out_trace;
|
||||
}
|
||||
|
||||
retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
|
||||
if (!retval)
|
||||
ext4_fc_track_unlink(d_inode(dentry), dentry);
|
||||
ext4_fc_track_unlink(handle, dentry);
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* VFS negative dentries are incompatible with Encoding and
|
||||
* Case-insensitiveness. Eventually we'll want avoid
|
||||
|
@ -3407,6 +3400,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
|||
if (IS_CASEFOLDED(dir))
|
||||
d_invalidate(dentry);
|
||||
#endif
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
out_trace:
|
||||
trace_ext4_unlink_exit(dentry, retval);
|
||||
|
@ -3563,7 +3558,6 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
|
|||
|
||||
err = ext4_add_entry(handle, dentry, inode);
|
||||
if (!err) {
|
||||
ext4_fc_track_link(inode, dentry);
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
/* this can happen only for tmpfile being
|
||||
* linked the first time
|
||||
|
@ -3571,6 +3565,7 @@ int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
|
|||
if (inode->i_nlink == 1)
|
||||
ext4_orphan_del(handle, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
ext4_fc_track_link(handle, dentry);
|
||||
} else {
|
||||
drop_nlink(inode);
|
||||
iput(inode);
|
||||
|
@ -4035,9 +4030,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
EXT4_FC_REASON_RENAME_DIR);
|
||||
} else {
|
||||
if (new.inode)
|
||||
ext4_fc_track_unlink(new.inode, new.dentry);
|
||||
ext4_fc_track_link(old.inode, new.dentry);
|
||||
ext4_fc_track_unlink(old.inode, old.dentry);
|
||||
ext4_fc_track_unlink(handle, new.dentry);
|
||||
__ext4_fc_track_link(handle, old.inode, new.dentry);
|
||||
__ext4_fc_track_unlink(handle, old.inode, old.dentry);
|
||||
}
|
||||
|
||||
if (new.inode) {
|
||||
|
|
|
@ -686,7 +686,7 @@ static void ext4_handle_error(struct super_block *sb)
|
|||
if (!test_opt(sb, ERRORS_CONT)) {
|
||||
journal_t *journal = EXT4_SB(sb)->s_journal;
|
||||
|
||||
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
|
||||
if (journal)
|
||||
jbd2_journal_abort(journal, -EIO);
|
||||
}
|
||||
|
@ -904,7 +904,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
|
|||
va_end(args);
|
||||
|
||||
if (sb_rdonly(sb) == 0) {
|
||||
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
|
||||
if (EXT4_SB(sb)->s_journal)
|
||||
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
|
||||
|
||||
|
@ -1716,11 +1716,10 @@ enum {
|
|||
Opt_dioread_nolock, Opt_dioread_lock,
|
||||
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
|
||||
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
|
||||
Opt_prefetch_block_bitmaps, Opt_no_fc,
|
||||
Opt_prefetch_block_bitmaps,
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
Opt_fc_debug_max_replay,
|
||||
Opt_fc_debug_max_replay, Opt_fc_debug_force
|
||||
#endif
|
||||
Opt_fc_debug_force
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
|
@ -1807,9 +1806,8 @@ static const match_table_t tokens = {
|
|||
{Opt_init_itable, "init_itable=%u"},
|
||||
{Opt_init_itable, "init_itable"},
|
||||
{Opt_noinit_itable, "noinit_itable"},
|
||||
{Opt_no_fc, "no_fc"},
|
||||
{Opt_fc_debug_force, "fc_debug_force"},
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
{Opt_fc_debug_force, "fc_debug_force"},
|
||||
{Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
|
||||
#endif
|
||||
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
|
||||
|
@ -2027,8 +2025,8 @@ static const struct mount_opts {
|
|||
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
|
||||
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
|
||||
MOPT_CLEAR | MOPT_Q},
|
||||
{Opt_usrjquota, 0, MOPT_Q},
|
||||
{Opt_grpjquota, 0, MOPT_Q},
|
||||
{Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
|
||||
{Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
|
||||
{Opt_offusrjquota, 0, MOPT_Q},
|
||||
{Opt_offgrpjquota, 0, MOPT_Q},
|
||||
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
|
||||
|
@ -2039,11 +2037,9 @@ static const struct mount_opts {
|
|||
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
|
||||
{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
|
||||
MOPT_SET},
|
||||
{Opt_no_fc, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
|
||||
MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
|
||||
MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
|
||||
#ifdef CONFIG_EXT4_DEBUG
|
||||
{Opt_fc_debug_max_replay, 0, MOPT_GTE0},
|
||||
#endif
|
||||
{Opt_err, 0, 0}
|
||||
|
@ -2153,7 +2149,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
|
|||
ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
|
||||
return 1;
|
||||
case Opt_abort:
|
||||
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
|
||||
ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
|
||||
return 1;
|
||||
case Opt_i_version:
|
||||
sb->s_flags |= SB_I_VERSION;
|
||||
|
@ -3976,7 +3972,7 @@ int ext4_calculate_overhead(struct super_block *sb)
|
|||
* loaded or not
|
||||
*/
|
||||
if (sbi->s_journal && !sbi->s_journal_bdev)
|
||||
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
|
||||
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
|
||||
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
|
||||
/* j_inum for internal journal is non-zero */
|
||||
j_inode = ext4_get_journal_inode(sb, j_inum);
|
||||
|
@ -4334,9 +4330,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
#endif
|
||||
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
|
||||
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
|
||||
/* can't mount with both data=journal and dioread_nolock. */
|
||||
clear_opt(sb, DIOREAD_NOLOCK);
|
||||
clear_opt2(sb, JOURNAL_FAST_COMMIT);
|
||||
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"both data=journal and delalloc");
|
||||
|
@ -4771,8 +4768,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
|
||||
INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
|
||||
sbi->s_fc_bytes = 0;
|
||||
sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
|
||||
sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
|
||||
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
|
||||
spin_lock_init(&sbi->s_fc_lock);
|
||||
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
|
||||
sbi->s_fc_replay_state.fc_regions = NULL;
|
||||
|
@ -4851,6 +4848,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
goto failed_mount_wq;
|
||||
}
|
||||
|
||||
if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
|
||||
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
|
||||
JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Failed to set fast commit journal feature");
|
||||
goto failed_mount_wq;
|
||||
}
|
||||
|
||||
/* We have now updated the journal if required, so we can
|
||||
* validate the data journaling mode. */
|
||||
switch (test_opt(sb, DATA_FLAGS)) {
|
||||
|
@ -5861,7 +5866,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||
goto restore_opts;
|
||||
}
|
||||
|
||||
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
|
||||
ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
|
||||
|
||||
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
|
||||
|
@ -5875,7 +5880,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||
}
|
||||
|
||||
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
|
||||
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
|
||||
if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
|
||||
err = -EROFS;
|
||||
goto restore_opts;
|
||||
}
|
||||
|
@ -6549,10 +6554,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
|
|||
brelse(bh);
|
||||
out:
|
||||
if (inode->i_size < off + len) {
|
||||
ext4_fc_track_range(inode,
|
||||
(inode->i_size > 0 ? inode->i_size - 1 : 0)
|
||||
>> inode->i_sb->s_blocksize_bits,
|
||||
(off + len) >> inode->i_sb->s_blocksize_bits);
|
||||
i_size_write(inode, off + len);
|
||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||
err2 = ext4_mark_inode_dirty(handle, inode);
|
||||
|
|
|
@ -106,6 +106,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
|
|||
* for a checkpoint to free up some space in the log.
|
||||
*/
|
||||
void __jbd2_log_wait_for_space(journal_t *journal)
|
||||
__acquires(&journal->j_state_lock)
|
||||
__releases(&journal->j_state_lock)
|
||||
{
|
||||
int nblocks, space_left;
|
||||
/* assert_spin_locked(&journal->j_state_lock); */
|
||||
|
|
|
@ -450,6 +450,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
schedule();
|
||||
write_lock(&journal->j_state_lock);
|
||||
finish_wait(&journal->j_fc_wait, &wait);
|
||||
/*
|
||||
* TODO: by blocking fast commits here, we are increasing
|
||||
* fsync() latency slightly. Strictly speaking, we don't need
|
||||
* to block fast commits until the transaction enters T_FLUSH
|
||||
* state. So an optimization is possible where we block new fast
|
||||
* commits here and wait for existing ones to complete
|
||||
* just before we enter T_FLUSH. That way, the existing fast
|
||||
* commits and this full commit can proceed parallely.
|
||||
*/
|
||||
}
|
||||
write_unlock(&journal->j_state_lock);
|
||||
|
||||
|
@ -801,7 +810,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
if (first_block < journal->j_tail)
|
||||
freed += journal->j_last - journal->j_first;
|
||||
/* Update tail only if we free significant amount of space */
|
||||
if (freed < journal->j_maxlen / 4)
|
||||
if (freed < jbd2_journal_get_max_txn_bufs(journal))
|
||||
update_tail = 0;
|
||||
}
|
||||
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
||||
|
|
|
@ -727,6 +727,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
|
|||
*/
|
||||
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
|
||||
{
|
||||
if (unlikely(is_journal_aborted(journal)))
|
||||
return -EIO;
|
||||
/*
|
||||
* Fast commits only allowed if at least one full commit has
|
||||
* been processed.
|
||||
|
@ -734,10 +736,12 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
|
|||
if (!journal->j_stats.ts_tid)
|
||||
return -EINVAL;
|
||||
|
||||
if (tid <= journal->j_commit_sequence)
|
||||
return -EALREADY;
|
||||
|
||||
write_lock(&journal->j_state_lock);
|
||||
if (tid <= journal->j_commit_sequence) {
|
||||
write_unlock(&journal->j_state_lock);
|
||||
return -EALREADY;
|
||||
}
|
||||
|
||||
if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
|
||||
(journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
|
||||
DEFINE_WAIT(wait);
|
||||
|
@ -777,13 +781,19 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
|
|||
|
||||
int jbd2_fc_end_commit(journal_t *journal)
|
||||
{
|
||||
return __jbd2_fc_end_commit(journal, 0, 0);
|
||||
return __jbd2_fc_end_commit(journal, 0, false);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_fc_end_commit);
|
||||
|
||||
int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid)
|
||||
int jbd2_fc_end_commit_fallback(journal_t *journal)
|
||||
{
|
||||
return __jbd2_fc_end_commit(journal, tid, 1);
|
||||
tid_t tid;
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
tid = journal->j_running_transaction ?
|
||||
journal->j_running_transaction->t_tid : 0;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
return __jbd2_fc_end_commit(journal, tid, true);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
|
||||
|
||||
|
@ -865,7 +875,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
|
|||
int fc_off;
|
||||
|
||||
*bh_out = NULL;
|
||||
write_lock(&journal->j_state_lock);
|
||||
|
||||
if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
|
||||
fc_off = journal->j_fc_off;
|
||||
|
@ -874,7 +883,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
|
|||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
write_unlock(&journal->j_state_lock);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -887,11 +895,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
|
|||
if (!bh)
|
||||
return -ENOMEM;
|
||||
|
||||
lock_buffer(bh);
|
||||
|
||||
clear_buffer_uptodate(bh);
|
||||
set_buffer_dirty(bh);
|
||||
unlock_buffer(bh);
|
||||
journal->j_fc_wbuf[fc_off] = bh;
|
||||
|
||||
*bh_out = bh;
|
||||
|
@ -909,9 +913,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
|
|||
struct buffer_head *bh;
|
||||
int i, j_fc_off;
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
j_fc_off = journal->j_fc_off;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
|
||||
/*
|
||||
* Wait in reverse order to minimize chances of us being woken up before
|
||||
|
@ -939,9 +941,7 @@ int jbd2_fc_release_bufs(journal_t *journal)
|
|||
struct buffer_head *bh;
|
||||
int i, j_fc_off;
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
j_fc_off = journal->j_fc_off;
|
||||
read_unlock(&journal->j_state_lock);
|
||||
|
||||
/*
|
||||
* Wait in reverse order to minimize chances of us being woken up before
|
||||
|
@ -1348,23 +1348,16 @@ static journal_t *journal_init_common(struct block_device *bdev,
|
|||
journal->j_dev = bdev;
|
||||
journal->j_fs_dev = fs_dev;
|
||||
journal->j_blk_offset = start;
|
||||
journal->j_maxlen = len;
|
||||
journal->j_total_len = len;
|
||||
/* We need enough buffers to write out full descriptor block. */
|
||||
n = journal->j_blocksize / jbd2_min_tag_size();
|
||||
journal->j_wbufsize = n;
|
||||
journal->j_fc_wbuf = NULL;
|
||||
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
|
||||
GFP_KERNEL);
|
||||
if (!journal->j_wbuf)
|
||||
goto err_cleanup;
|
||||
|
||||
if (journal->j_fc_wbufsize > 0) {
|
||||
journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
|
||||
sizeof(struct buffer_head *),
|
||||
GFP_KERNEL);
|
||||
if (!journal->j_fc_wbuf)
|
||||
goto err_cleanup;
|
||||
}
|
||||
|
||||
bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
|
||||
if (!bh) {
|
||||
pr_err("%s: Cannot get buffer for journal superblock\n",
|
||||
|
@ -1378,23 +1371,11 @@ static journal_t *journal_init_common(struct block_device *bdev,
|
|||
|
||||
err_cleanup:
|
||||
kfree(journal->j_wbuf);
|
||||
kfree(journal->j_fc_wbuf);
|
||||
jbd2_journal_destroy_revoke(journal);
|
||||
kfree(journal);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int jbd2_fc_init(journal_t *journal, int num_fc_blks)
|
||||
{
|
||||
journal->j_fc_wbufsize = num_fc_blks;
|
||||
journal->j_fc_wbuf = kmalloc_array(journal->j_fc_wbufsize,
|
||||
sizeof(struct buffer_head *), GFP_KERNEL);
|
||||
if (!journal->j_fc_wbuf)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_fc_init);
|
||||
|
||||
/* jbd2_journal_init_dev and jbd2_journal_init_inode:
|
||||
*
|
||||
* Create a journal structure assigned some fixed set of disk blocks to
|
||||
|
@ -1512,16 +1493,7 @@ static int journal_reset(journal_t *journal)
|
|||
}
|
||||
|
||||
journal->j_first = first;
|
||||
|
||||
if (jbd2_has_feature_fast_commit(journal) &&
|
||||
journal->j_fc_wbufsize > 0) {
|
||||
journal->j_fc_last = last;
|
||||
journal->j_last = last - journal->j_fc_wbufsize;
|
||||
journal->j_fc_first = journal->j_last + 1;
|
||||
journal->j_fc_off = 0;
|
||||
} else {
|
||||
journal->j_last = last;
|
||||
}
|
||||
journal->j_last = last;
|
||||
|
||||
journal->j_head = journal->j_first;
|
||||
journal->j_tail = journal->j_first;
|
||||
|
@ -1531,7 +1503,14 @@ static int journal_reset(journal_t *journal)
|
|||
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
|
||||
journal->j_commit_request = journal->j_commit_sequence;
|
||||
|
||||
journal->j_max_transaction_buffers = journal->j_maxlen / 4;
|
||||
journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
|
||||
|
||||
/*
|
||||
* Now that journal recovery is done, turn fast commits off here. This
|
||||
* way, if fast commit was enabled before the crash but if now FS has
|
||||
* disabled it, we don't enable fast commits.
|
||||
*/
|
||||
jbd2_clear_feature_fast_commit(journal);
|
||||
|
||||
/*
|
||||
* As a special case, if the on-disk copy is already marked as needing
|
||||
|
@ -1792,15 +1771,15 @@ static int journal_get_superblock(journal_t *journal)
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
|
||||
journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
|
||||
else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
|
||||
if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
|
||||
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
|
||||
else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
|
||||
printk(KERN_WARNING "JBD2: journal file too short\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (be32_to_cpu(sb->s_first) == 0 ||
|
||||
be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
|
||||
be32_to_cpu(sb->s_first) >= journal->j_total_len) {
|
||||
printk(KERN_WARNING
|
||||
"JBD2: Invalid start block of journal: %u\n",
|
||||
be32_to_cpu(sb->s_first));
|
||||
|
@ -1872,6 +1851,7 @@ static int load_superblock(journal_t *journal)
|
|||
{
|
||||
int err;
|
||||
journal_superblock_t *sb;
|
||||
int num_fc_blocks;
|
||||
|
||||
err = journal_get_superblock(journal);
|
||||
if (err)
|
||||
|
@ -1883,15 +1863,17 @@ static int load_superblock(journal_t *journal)
|
|||
journal->j_tail = be32_to_cpu(sb->s_start);
|
||||
journal->j_first = be32_to_cpu(sb->s_first);
|
||||
journal->j_errno = be32_to_cpu(sb->s_errno);
|
||||
journal->j_last = be32_to_cpu(sb->s_maxlen);
|
||||
|
||||
if (jbd2_has_feature_fast_commit(journal) &&
|
||||
journal->j_fc_wbufsize > 0) {
|
||||
if (jbd2_has_feature_fast_commit(journal)) {
|
||||
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
|
||||
journal->j_last = journal->j_fc_last - journal->j_fc_wbufsize;
|
||||
num_fc_blocks = be32_to_cpu(sb->s_num_fc_blks);
|
||||
if (!num_fc_blocks)
|
||||
num_fc_blocks = JBD2_MIN_FC_BLOCKS;
|
||||
if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
|
||||
journal->j_last = journal->j_fc_last - num_fc_blocks;
|
||||
journal->j_fc_first = journal->j_last + 1;
|
||||
journal->j_fc_off = 0;
|
||||
} else {
|
||||
journal->j_last = be32_to_cpu(sb->s_maxlen);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1954,9 +1936,6 @@ int jbd2_journal_load(journal_t *journal)
|
|||
*/
|
||||
journal->j_flags &= ~JBD2_ABORT;
|
||||
|
||||
if (journal->j_fc_wbufsize > 0)
|
||||
jbd2_journal_set_features(journal, 0, 0,
|
||||
JBD2_FEATURE_INCOMPAT_FAST_COMMIT);
|
||||
/* OK, we've finished with the dynamic journal bits:
|
||||
* reinitialise the dynamic contents of the superblock in memory
|
||||
* and reset them on disk. */
|
||||
|
@ -2040,8 +2019,7 @@ int jbd2_journal_destroy(journal_t *journal)
|
|||
jbd2_journal_destroy_revoke(journal);
|
||||
if (journal->j_chksum_driver)
|
||||
crypto_free_shash(journal->j_chksum_driver);
|
||||
if (journal->j_fc_wbufsize > 0)
|
||||
kfree(journal->j_fc_wbuf);
|
||||
kfree(journal->j_fc_wbuf);
|
||||
kfree(journal->j_wbuf);
|
||||
kfree(journal);
|
||||
|
||||
|
@ -2116,6 +2094,37 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
jbd2_journal_initialize_fast_commit(journal_t *journal)
|
||||
{
|
||||
journal_superblock_t *sb = journal->j_superblock;
|
||||
unsigned long long num_fc_blks;
|
||||
|
||||
num_fc_blks = be32_to_cpu(sb->s_num_fc_blks);
|
||||
if (num_fc_blks == 0)
|
||||
num_fc_blks = JBD2_MIN_FC_BLOCKS;
|
||||
if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
|
||||
return -ENOSPC;
|
||||
|
||||
/* Are we called twice? */
|
||||
WARN_ON(journal->j_fc_wbuf != NULL);
|
||||
journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
|
||||
sizeof(struct buffer_head *), GFP_KERNEL);
|
||||
if (!journal->j_fc_wbuf)
|
||||
return -ENOMEM;
|
||||
|
||||
journal->j_fc_wbufsize = num_fc_blks;
|
||||
journal->j_fc_last = journal->j_last;
|
||||
journal->j_last = journal->j_fc_last - num_fc_blks;
|
||||
journal->j_fc_first = journal->j_last + 1;
|
||||
journal->j_fc_off = 0;
|
||||
journal->j_free = journal->j_last - journal->j_first;
|
||||
journal->j_max_transaction_buffers =
|
||||
jbd2_journal_get_max_txn_bufs(journal);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* int jbd2_journal_set_features() - Mark a given journal feature in the superblock
|
||||
* @journal: Journal to act on.
|
||||
|
@ -2159,6 +2168,13 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
|
|||
|
||||
sb = journal->j_superblock;
|
||||
|
||||
if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
|
||||
if (jbd2_journal_initialize_fast_commit(journal)) {
|
||||
pr_err("JBD2: Cannot enable fast commits.\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Load the checksum driver if necessary */
|
||||
if ((journal->j_chksum_driver == NULL) &&
|
||||
INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
|
||||
|
|
|
@ -74,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start)
|
|||
|
||||
/* Do up to 128K of readahead */
|
||||
max = start + (128 * 1024 / journal->j_blocksize);
|
||||
if (max > journal->j_maxlen)
|
||||
max = journal->j_maxlen;
|
||||
if (max > journal->j_total_len)
|
||||
max = journal->j_total_len;
|
||||
|
||||
/* Do the readahead itself. We'll submit MAXBUF buffer_heads at
|
||||
* a time to the block device IO layer. */
|
||||
|
@ -134,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
|
|||
|
||||
*bhp = NULL;
|
||||
|
||||
if (offset >= journal->j_maxlen) {
|
||||
if (offset >= journal->j_total_len) {
|
||||
printk(KERN_ERR "JBD2: corrupted journal superblock\n");
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
|
|
@ -195,8 +195,10 @@ static void wait_transaction_switching(journal_t *journal)
|
|||
DEFINE_WAIT(wait);
|
||||
|
||||
if (WARN_ON(!journal->j_running_transaction ||
|
||||
journal->j_running_transaction->t_state != T_SWITCH))
|
||||
journal->j_running_transaction->t_state != T_SWITCH)) {
|
||||
read_unlock(&journal->j_state_lock);
|
||||
return;
|
||||
}
|
||||
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
read_unlock(&journal->j_state_lock);
|
||||
|
|
|
@ -316,10 +316,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
|
|||
fh_copy(&resp->dirfh, &argp->fh);
|
||||
fh_init(&resp->fh, NFS3_FHSIZE);
|
||||
|
||||
if (argp->ftype == 0 || argp->ftype >= NF3BAD) {
|
||||
resp->status = nfserr_inval;
|
||||
goto out;
|
||||
}
|
||||
if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
|
||||
rdev = MKDEV(argp->major, argp->minor);
|
||||
if (MAJOR(rdev) != argp->major ||
|
||||
|
@ -328,7 +324,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
|
|||
goto out;
|
||||
}
|
||||
} else if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) {
|
||||
resp->status = nfserr_inval;
|
||||
resp->status = nfserr_badtype;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -1114,6 +1114,7 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
|
|||
{
|
||||
struct nfsd3_pathconfres *resp = rqstp->rq_resp;
|
||||
|
||||
*p++ = resp->status;
|
||||
*p++ = xdr_zero; /* no post_op_attr */
|
||||
|
||||
if (resp->status == 0) {
|
||||
|
|
|
@ -1299,7 +1299,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
|
|||
struct nfsd_file *dst)
|
||||
{
|
||||
nfs42_ssc_close(src->nf_file);
|
||||
nfsd_file_put(src);
|
||||
/* 'src' is freed by nfsd4_do_async_copy */
|
||||
nfsd_file_put(dst);
|
||||
mntput(ss_mnt);
|
||||
}
|
||||
|
@ -1486,6 +1486,7 @@ static int nfsd4_do_async_copy(void *data)
|
|||
cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
|
||||
if (!cb_copy)
|
||||
goto out;
|
||||
refcount_set(&cb_copy->refcount, 1);
|
||||
memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res));
|
||||
cb_copy->cp_clp = copy->cp_clp;
|
||||
cb_copy->nfserr = copy->nfserr;
|
||||
|
|
|
@ -877,7 +877,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
|
|||
goto done;
|
||||
}
|
||||
|
||||
trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen);
|
||||
trace_ocfs2_journal_init_maxlen(j_journal->j_total_len);
|
||||
|
||||
*dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
|
||||
OCFS2_JOURNAL_DIRTY_FL);
|
||||
|
|
|
@ -68,6 +68,7 @@ extern void *jbd2_alloc(size_t size, gfp_t flags);
|
|||
extern void jbd2_free(void *ptr, size_t size);
|
||||
|
||||
#define JBD2_MIN_JOURNAL_BLOCKS 1024
|
||||
#define JBD2_MIN_FC_BLOCKS 256
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
|
@ -944,8 +945,9 @@ struct journal_s
|
|||
/**
|
||||
* @j_fc_off:
|
||||
*
|
||||
* Number of fast commit blocks currently allocated.
|
||||
* [j_state_lock].
|
||||
* Number of fast commit blocks currently allocated. Accessed only
|
||||
* during fast commit. Currently only process can do fast commit, so
|
||||
* this field is not protected by any lock.
|
||||
*/
|
||||
unsigned long j_fc_off;
|
||||
|
||||
|
@ -988,9 +990,9 @@ struct journal_s
|
|||
struct block_device *j_fs_dev;
|
||||
|
||||
/**
|
||||
* @j_maxlen: Total maximum capacity of the journal region on disk.
|
||||
* @j_total_len: Total maximum capacity of the journal region on disk.
|
||||
*/
|
||||
unsigned int j_maxlen;
|
||||
unsigned int j_total_len;
|
||||
|
||||
/**
|
||||
* @j_reserved_credits:
|
||||
|
@ -1108,8 +1110,9 @@ struct journal_s
|
|||
struct buffer_head **j_wbuf;
|
||||
|
||||
/**
|
||||
* @j_fc_wbuf: Array of fast commit bhs for
|
||||
* jbd2_journal_commit_transaction.
|
||||
* @j_fc_wbuf: Array of fast commit bhs for fast commit. Accessed only
|
||||
* during a fast commit. Currently only process can do fast commit, so
|
||||
* this field is not protected by any lock.
|
||||
*/
|
||||
struct buffer_head **j_fc_wbuf;
|
||||
|
||||
|
@ -1614,16 +1617,20 @@ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
|
|||
extern int jbd2_cleanup_journal_tail(journal_t *);
|
||||
|
||||
/* Fast commit related APIs */
|
||||
int jbd2_fc_init(journal_t *journal, int num_fc_blks);
|
||||
int jbd2_fc_begin_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_fc_end_commit(journal_t *journal);
|
||||
int jbd2_fc_end_commit_fallback(journal_t *journal, tid_t tid);
|
||||
int jbd2_fc_end_commit_fallback(journal_t *journal);
|
||||
int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
|
||||
int jbd2_submit_inode_data(struct jbd2_inode *jinode);
|
||||
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
|
||||
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
|
||||
int jbd2_fc_release_bufs(journal_t *journal);
|
||||
|
||||
static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal)
|
||||
{
|
||||
return (journal->j_total_len - journal->j_fc_wbufsize) / 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* is_journal_abort
|
||||
*
|
||||
|
|
|
@ -100,11 +100,12 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
|
|||
{ EXT4_FC_REASON_XATTR, "XATTR"}, \
|
||||
{ EXT4_FC_REASON_CROSS_RENAME, "CROSS_RENAME"}, \
|
||||
{ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, "JOURNAL_FLAG_CHANGE"}, \
|
||||
{ EXT4_FC_REASON_MEM, "NO_MEM"}, \
|
||||
{ EXT4_FC_REASON_NOMEM, "NO_MEM"}, \
|
||||
{ EXT4_FC_REASON_SWAP_BOOT, "SWAP_BOOT"}, \
|
||||
{ EXT4_FC_REASON_RESIZE, "RESIZE"}, \
|
||||
{ EXT4_FC_REASON_RENAME_DIR, "RENAME_DIR"}, \
|
||||
{ EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"})
|
||||
{ EXT4_FC_REASON_FALLOC_RANGE, "FALLOC_RANGE"}, \
|
||||
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"})
|
||||
|
||||
TRACE_EVENT(ext4_other_inode_update_time,
|
||||
TP_PROTO(struct inode *inode, ino_t orig_ino),
|
||||
|
@ -2917,17 +2918,18 @@ TRACE_EVENT(ext4_fc_stats,
|
|||
),
|
||||
|
||||
TP_printk("dev %d:%d fc ineligible reasons:\n"
|
||||
"%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s,%d; "
|
||||
"%s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d, %s:%d; "
|
||||
"num_commits:%ld, ineligible: %ld, numblks: %ld",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_XATTR),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_CROSS_RENAME),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_JOURNAL_FLAG_CHANGE),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_MEM),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_NOMEM),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_SWAP_BOOT),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_RESIZE),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_RENAME_DIR),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_FALLOC_RANGE),
|
||||
FC_REASON_NAME_STAT(EXT4_FC_REASON_INODE_JOURNAL_DATA),
|
||||
__entry->sbi->s_fc_stats.fc_num_commits,
|
||||
__entry->sbi->s_fc_stats.fc_ineligible_commits,
|
||||
__entry->sbi->s_fc_stats.fc_numblks)
|
||||
|
|
|
@ -655,10 +655,10 @@ TRACE_EVENT(rpc_xdr_overflow,
|
|||
__field(size_t, tail_len)
|
||||
__field(unsigned int, page_len)
|
||||
__field(unsigned int, len)
|
||||
__string(progname,
|
||||
xdr->rqst->rq_task->tk_client->cl_program->name)
|
||||
__string(procedure,
|
||||
xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
|
||||
__string(progname, xdr->rqst ?
|
||||
xdr->rqst->rq_task->tk_client->cl_program->name : "unknown")
|
||||
__string(procedure, xdr->rqst ?
|
||||
xdr->rqst->rq_task->tk_msg.rpc_proc->p_name : "unknown")
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
|
|
@ -454,7 +454,10 @@ static void exit_mm(void)
|
|||
mmap_read_unlock(mm);
|
||||
|
||||
self.task = current;
|
||||
self.next = xchg(&core_state->dumper.next, &self);
|
||||
if (self.task->flags & PF_SIGNALED)
|
||||
self.next = xchg(&core_state->dumper.next, &self);
|
||||
else
|
||||
self.task = NULL;
|
||||
/*
|
||||
* Implies mb(), the result of xchg() must be visible
|
||||
* to core_state->dumper.
|
||||
|
|
|
@ -63,19 +63,20 @@ static int proc_do_xprt(struct ctl_table *table, int write,
|
|||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
char tmpbuf[256];
|
||||
size_t len;
|
||||
ssize_t len;
|
||||
|
||||
if ((*ppos && !write) || !*lenp) {
|
||||
if (write || *ppos) {
|
||||
*lenp = 0;
|
||||
return 0;
|
||||
}
|
||||
len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
|
||||
*lenp = memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
|
||||
len = memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
|
||||
|
||||
if (*lenp < 0) {
|
||||
if (len < 0) {
|
||||
*lenp = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
*lenp = len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,11 +12,12 @@ turbostat : turbostat.c
|
|||
override CFLAGS += -O2 -Wall -I../../../include
|
||||
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
|
||||
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
|
||||
override CFLAGS += -D_FILE_OFFSET_BITS=64
|
||||
override CFLAGS += -D_FORTIFY_SOURCE=2
|
||||
|
||||
%: %.c
|
||||
@mkdir -p $(BUILD_OUTPUT)
|
||||
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
|
||||
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt
|
||||
|
||||
.PHONY : clean
|
||||
clean :
|
||||
|
|
|
@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date.
|
|||
|
||||
.SH REFERENCES
|
||||
Volume 3B: System Programming Guide"
|
||||
http://www.intel.com/products/processor/manuals/
|
||||
https://www.intel.com/products/processor/manuals/
|
||||
|
||||
.SH FILES
|
||||
.ta
|
||||
|
|
|
@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms;
|
|||
unsigned long long cpuidle_cur_cpu_lpi_us;
|
||||
unsigned long long cpuidle_cur_sys_lpi_us;
|
||||
unsigned int gfx_cur_mhz;
|
||||
unsigned int gfx_act_mhz;
|
||||
unsigned int tcc_activation_temp;
|
||||
unsigned int tcc_activation_temp_override;
|
||||
double rapl_power_units, rapl_time_units;
|
||||
|
@ -210,13 +211,14 @@ struct pkg_data {
|
|||
unsigned long long pkg_both_core_gfxe_c0;
|
||||
long long gfx_rc6_ms;
|
||||
unsigned int gfx_mhz;
|
||||
unsigned int gfx_act_mhz;
|
||||
unsigned int package_id;
|
||||
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
|
||||
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
|
||||
unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */
|
||||
unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */
|
||||
unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
|
||||
unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
|
||||
unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
|
||||
unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
|
||||
unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
|
||||
unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
|
||||
unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
|
||||
unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
|
||||
unsigned int pkg_temp_c;
|
||||
unsigned long long counter[MAX_ADDED_COUNTERS];
|
||||
} *package_even, *package_odd;
|
||||
|
@ -259,6 +261,113 @@ struct msr_counter {
|
|||
#define SYSFS_PERCPU (1 << 1)
|
||||
};
|
||||
|
||||
/*
|
||||
* The accumulated sum of MSR is defined as a monotonic
|
||||
* increasing MSR, it will be accumulated periodically,
|
||||
* despite its register's bit width.
|
||||
*/
|
||||
enum {
|
||||
IDX_PKG_ENERGY,
|
||||
IDX_DRAM_ENERGY,
|
||||
IDX_PP0_ENERGY,
|
||||
IDX_PP1_ENERGY,
|
||||
IDX_PKG_PERF,
|
||||
IDX_DRAM_PERF,
|
||||
IDX_COUNT,
|
||||
};
|
||||
|
||||
int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
|
||||
|
||||
struct msr_sum_array {
|
||||
/* get_msr_sum() = sum + (get_msr() - last) */
|
||||
struct {
|
||||
/*The accumulated MSR value is updated by the timer*/
|
||||
unsigned long long sum;
|
||||
/*The MSR footprint recorded in last timer*/
|
||||
unsigned long long last;
|
||||
} entries[IDX_COUNT];
|
||||
};
|
||||
|
||||
/* The percpu MSR sum array.*/
|
||||
struct msr_sum_array *per_cpu_msr_sum;
|
||||
|
||||
int idx_to_offset(int idx)
|
||||
{
|
||||
int offset;
|
||||
|
||||
switch (idx) {
|
||||
case IDX_PKG_ENERGY:
|
||||
offset = MSR_PKG_ENERGY_STATUS;
|
||||
break;
|
||||
case IDX_DRAM_ENERGY:
|
||||
offset = MSR_DRAM_ENERGY_STATUS;
|
||||
break;
|
||||
case IDX_PP0_ENERGY:
|
||||
offset = MSR_PP0_ENERGY_STATUS;
|
||||
break;
|
||||
case IDX_PP1_ENERGY:
|
||||
offset = MSR_PP1_ENERGY_STATUS;
|
||||
break;
|
||||
case IDX_PKG_PERF:
|
||||
offset = MSR_PKG_PERF_STATUS;
|
||||
break;
|
||||
case IDX_DRAM_PERF:
|
||||
offset = MSR_DRAM_PERF_STATUS;
|
||||
break;
|
||||
default:
|
||||
offset = -1;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
int offset_to_idx(int offset)
|
||||
{
|
||||
int idx;
|
||||
|
||||
switch (offset) {
|
||||
case MSR_PKG_ENERGY_STATUS:
|
||||
idx = IDX_PKG_ENERGY;
|
||||
break;
|
||||
case MSR_DRAM_ENERGY_STATUS:
|
||||
idx = IDX_DRAM_ENERGY;
|
||||
break;
|
||||
case MSR_PP0_ENERGY_STATUS:
|
||||
idx = IDX_PP0_ENERGY;
|
||||
break;
|
||||
case MSR_PP1_ENERGY_STATUS:
|
||||
idx = IDX_PP1_ENERGY;
|
||||
break;
|
||||
case MSR_PKG_PERF_STATUS:
|
||||
idx = IDX_PKG_PERF;
|
||||
break;
|
||||
case MSR_DRAM_PERF_STATUS:
|
||||
idx = IDX_DRAM_PERF;
|
||||
break;
|
||||
default:
|
||||
idx = -1;
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
int idx_valid(int idx)
|
||||
{
|
||||
switch (idx) {
|
||||
case IDX_PKG_ENERGY:
|
||||
return do_rapl & RAPL_PKG;
|
||||
case IDX_DRAM_ENERGY:
|
||||
return do_rapl & RAPL_DRAM;
|
||||
case IDX_PP0_ENERGY:
|
||||
return do_rapl & RAPL_CORES_ENERGY_STATUS;
|
||||
case IDX_PP1_ENERGY:
|
||||
return do_rapl & RAPL_GFX;
|
||||
case IDX_PKG_PERF:
|
||||
return do_rapl & RAPL_PKG_PERF_STATUS;
|
||||
case IDX_DRAM_PERF:
|
||||
return do_rapl & RAPL_DRAM_PERF_STATUS;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
struct sys_counters {
|
||||
unsigned int added_thread_counters;
|
||||
unsigned int added_core_counters;
|
||||
|
@ -451,6 +560,7 @@ struct msr_counter bic[] = {
|
|||
{ 0x0, "APIC" },
|
||||
{ 0x0, "X2APIC" },
|
||||
{ 0x0, "Die" },
|
||||
{ 0x0, "GFXAMHz" },
|
||||
};
|
||||
|
||||
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
|
||||
|
@ -505,6 +615,7 @@ struct msr_counter bic[] = {
|
|||
#define BIC_APIC (1ULL << 48)
|
||||
#define BIC_X2APIC (1ULL << 49)
|
||||
#define BIC_Die (1ULL << 50)
|
||||
#define BIC_GFXACTMHz (1ULL << 51)
|
||||
|
||||
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
|
||||
|
||||
|
@ -724,6 +835,9 @@ void print_header(char *delim)
|
|||
if (DO_BIC(BIC_GFXMHz))
|
||||
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
|
||||
|
||||
if (DO_BIC(BIC_GFXACTMHz))
|
||||
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
|
||||
|
||||
if (DO_BIC(BIC_Totl_c0))
|
||||
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
|
||||
if (DO_BIC(BIC_Any_c0))
|
||||
|
@ -858,13 +972,13 @@ int dump_counters(struct thread_data *t, struct core_data *c,
|
|||
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
|
||||
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
|
||||
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
|
||||
outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
|
||||
outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
|
||||
outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
|
||||
outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
|
||||
outp += sprintf(outp, "Throttle PKG: %0X\n",
|
||||
outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
|
||||
outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
|
||||
outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
|
||||
outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
|
||||
outp += sprintf(outp, "Throttle PKG: %0llX\n",
|
||||
p->rapl_pkg_perf_status);
|
||||
outp += sprintf(outp, "Throttle RAM: %0X\n",
|
||||
outp += sprintf(outp, "Throttle RAM: %0llX\n",
|
||||
p->rapl_dram_perf_status);
|
||||
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
|
||||
|
||||
|
@ -1062,14 +1176,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If measurement interval exceeds minimum RAPL Joule Counter range,
|
||||
* indicate that results are suspect by printing "**" in fraction place.
|
||||
*/
|
||||
if (interval_float < rapl_joule_counter_range)
|
||||
fmt8 = "%s%.2f";
|
||||
else
|
||||
fmt8 = "%6.0f**";
|
||||
fmt8 = "%s%.2f";
|
||||
|
||||
if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
|
||||
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
|
||||
|
@ -1098,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
|||
if (DO_BIC(BIC_GFXMHz))
|
||||
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
|
||||
|
||||
/* GFXACTMHz */
|
||||
if (DO_BIC(BIC_GFXACTMHz))
|
||||
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
|
||||
|
||||
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
|
||||
if (DO_BIC(BIC_Totl_c0))
|
||||
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
|
||||
|
@ -1210,11 +1321,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
|
|||
}
|
||||
|
||||
#define DELTA_WRAP32(new, old) \
|
||||
if (new > old) { \
|
||||
old = new - old; \
|
||||
} else { \
|
||||
old = 0x100000000 + new - old; \
|
||||
}
|
||||
old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
|
||||
|
||||
int
|
||||
delta_package(struct pkg_data *new, struct pkg_data *old)
|
||||
|
@ -1253,13 +1360,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
|
|||
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
|
||||
|
||||
old->gfx_mhz = new->gfx_mhz;
|
||||
old->gfx_act_mhz = new->gfx_act_mhz;
|
||||
|
||||
DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
|
||||
DELTA_WRAP32(new->energy_cores, old->energy_cores);
|
||||
DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
|
||||
DELTA_WRAP32(new->energy_dram, old->energy_dram);
|
||||
DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
|
||||
DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
|
||||
old->energy_pkg = new->energy_pkg - old->energy_pkg;
|
||||
old->energy_cores = new->energy_cores - old->energy_cores;
|
||||
old->energy_gfx = new->energy_gfx - old->energy_gfx;
|
||||
old->energy_dram = new->energy_dram - old->energy_dram;
|
||||
old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
|
||||
old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
|
||||
|
||||
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
|
||||
if (mp->format == FORMAT_RAW)
|
||||
|
@ -1469,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
|
|||
|
||||
p->gfx_rc6_ms = 0;
|
||||
p->gfx_mhz = 0;
|
||||
p->gfx_act_mhz = 0;
|
||||
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
|
||||
t->counter[i] = 0;
|
||||
|
||||
|
@ -1564,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
|
|||
|
||||
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
|
||||
average.packages.gfx_mhz = p->gfx_mhz;
|
||||
average.packages.gfx_act_mhz = p->gfx_act_mhz;
|
||||
|
||||
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
|
||||
|
||||
|
@ -1784,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
int i;
|
||||
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1966,39 +2076,39 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
p->sys_lpi = cpuidle_cur_sys_lpi_us;
|
||||
|
||||
if (do_rapl & RAPL_PKG) {
|
||||
if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
|
||||
return -13;
|
||||
p->energy_pkg = msr & 0xFFFFFFFF;
|
||||
p->energy_pkg = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
|
||||
if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
|
||||
return -14;
|
||||
p->energy_cores = msr & 0xFFFFFFFF;
|
||||
p->energy_cores = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_DRAM) {
|
||||
if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
|
||||
return -15;
|
||||
p->energy_dram = msr & 0xFFFFFFFF;
|
||||
p->energy_dram = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_GFX) {
|
||||
if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
|
||||
return -16;
|
||||
p->energy_gfx = msr & 0xFFFFFFFF;
|
||||
p->energy_gfx = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_PKG_PERF_STATUS) {
|
||||
if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
|
||||
return -16;
|
||||
p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
|
||||
p->rapl_pkg_perf_status = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_DRAM_PERF_STATUS) {
|
||||
if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
|
||||
if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
|
||||
return -16;
|
||||
p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
|
||||
p->rapl_dram_perf_status = msr;
|
||||
}
|
||||
if (do_rapl & RAPL_AMD_F17H) {
|
||||
if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
|
||||
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
|
||||
return -13;
|
||||
p->energy_pkg = msr & 0xFFFFFFFF;
|
||||
p->energy_pkg = msr;
|
||||
}
|
||||
if (DO_BIC(BIC_PkgTmp)) {
|
||||
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
|
||||
|
@ -2012,6 +2122,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
if (DO_BIC(BIC_GFXMHz))
|
||||
p->gfx_mhz = gfx_cur_mhz;
|
||||
|
||||
if (DO_BIC(BIC_GFXACTMHz))
|
||||
p->gfx_act_mhz = gfx_act_mhz;
|
||||
|
||||
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
|
||||
if (get_mp(cpu, mp, &p->counter[i]))
|
||||
return -10;
|
||||
|
@ -2173,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model)
|
|||
case INTEL_FAM6_ATOM_GOLDMONT:
|
||||
case INTEL_FAM6_SKYLAKE_X:
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_D:
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -2650,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
|
|||
|
||||
sprintf(path,
|
||||
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
|
||||
filep = fopen_or_die(path, "r");
|
||||
filep = fopen(path, "r");
|
||||
|
||||
if (!filep) {
|
||||
warnx("%s: open failed", path);
|
||||
return -1;
|
||||
}
|
||||
do {
|
||||
offset -= BITMASK_SIZE;
|
||||
if (fscanf(filep, "%lx%c", &map, &character) != 2)
|
||||
|
@ -2763,18 +2882,25 @@ void re_initialize(void)
|
|||
{
|
||||
free_all_buffers();
|
||||
setup_all_buffers();
|
||||
printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
|
||||
fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
|
||||
}
|
||||
|
||||
void set_max_cpu_num(void)
|
||||
{
|
||||
FILE *filep;
|
||||
int base_cpu;
|
||||
unsigned long dummy;
|
||||
char pathname[64];
|
||||
|
||||
base_cpu = sched_getcpu();
|
||||
if (base_cpu < 0)
|
||||
err(1, "cannot find calling cpu ID");
|
||||
sprintf(pathname,
|
||||
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings",
|
||||
base_cpu);
|
||||
|
||||
filep = fopen_or_die(pathname, "r");
|
||||
topo.max_cpu_num = 0;
|
||||
filep = fopen_or_die(
|
||||
"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
|
||||
"r");
|
||||
while (fscanf(filep, "%lx,", &dummy) == 1)
|
||||
topo.max_cpu_num += BITMASK_SIZE;
|
||||
fclose(filep);
|
||||
|
@ -2915,6 +3041,33 @@ int snapshot_gfx_mhz(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* snapshot_gfx_cur_mhz()
|
||||
*
|
||||
* record snapshot of
|
||||
* /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
|
||||
*
|
||||
* return 1 if config change requires a restart, else return 0
|
||||
*/
|
||||
int snapshot_gfx_act_mhz(void)
|
||||
{
|
||||
static FILE *fp;
|
||||
int retval;
|
||||
|
||||
if (fp == NULL)
|
||||
fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
|
||||
else {
|
||||
rewind(fp);
|
||||
fflush(fp);
|
||||
}
|
||||
|
||||
retval = fscanf(fp, "%d", &gfx_act_mhz);
|
||||
if (retval != 1)
|
||||
err(1, "GFX ACT MHz");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* snapshot_cpu_lpi()
|
||||
*
|
||||
|
@ -2980,6 +3133,9 @@ int snapshot_proc_sysfs_files(void)
|
|||
if (DO_BIC(BIC_GFXMHz))
|
||||
snapshot_gfx_mhz();
|
||||
|
||||
if (DO_BIC(BIC_GFXACTMHz))
|
||||
snapshot_gfx_act_mhz();
|
||||
|
||||
if (DO_BIC(BIC_CPU_LPI))
|
||||
snapshot_cpu_lpi_us();
|
||||
|
||||
|
@ -3057,6 +3213,111 @@ void do_sleep(void)
|
|||
}
|
||||
}
|
||||
|
||||
int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
|
||||
{
|
||||
int ret, idx;
|
||||
unsigned long long msr_cur, msr_last;
|
||||
|
||||
if (!per_cpu_msr_sum)
|
||||
return 1;
|
||||
|
||||
idx = offset_to_idx(offset);
|
||||
if (idx < 0)
|
||||
return idx;
|
||||
/* get_msr_sum() = sum + (get_msr() - last) */
|
||||
ret = get_msr(cpu, offset, &msr_cur);
|
||||
if (ret)
|
||||
return ret;
|
||||
msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
|
||||
DELTA_WRAP32(msr_cur, msr_last);
|
||||
*msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
timer_t timerid;
|
||||
|
||||
/* Timer callback, update the sum of MSRs periodically. */
|
||||
static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
{
|
||||
int i, ret;
|
||||
int cpu = t->cpu_id;
|
||||
|
||||
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
|
||||
unsigned long long msr_cur, msr_last;
|
||||
int offset;
|
||||
|
||||
if (!idx_valid(i))
|
||||
continue;
|
||||
offset = idx_to_offset(i);
|
||||
if (offset < 0)
|
||||
continue;
|
||||
ret = get_msr(cpu, offset, &msr_cur);
|
||||
if (ret) {
|
||||
fprintf(outf, "Can not update msr(0x%x)\n", offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
msr_last = per_cpu_msr_sum[cpu].entries[i].last;
|
||||
per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
|
||||
|
||||
DELTA_WRAP32(msr_cur, msr_last);
|
||||
per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
msr_record_handler(union sigval v)
|
||||
{
|
||||
for_all_cpus(update_msr_sum, EVEN_COUNTERS);
|
||||
}
|
||||
|
||||
void msr_sum_record(void)
|
||||
{
|
||||
struct itimerspec its;
|
||||
struct sigevent sev;
|
||||
|
||||
per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
|
||||
if (!per_cpu_msr_sum) {
|
||||
fprintf(outf, "Can not allocate memory for long time MSR.\n");
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Signal handler might be restricted, so use thread notifier instead.
|
||||
*/
|
||||
memset(&sev, 0, sizeof(struct sigevent));
|
||||
sev.sigev_notify = SIGEV_THREAD;
|
||||
sev.sigev_notify_function = msr_record_handler;
|
||||
|
||||
sev.sigev_value.sival_ptr = &timerid;
|
||||
if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
|
||||
fprintf(outf, "Can not create timer.\n");
|
||||
goto release_msr;
|
||||
}
|
||||
|
||||
its.it_value.tv_sec = 0;
|
||||
its.it_value.tv_nsec = 1;
|
||||
/*
|
||||
* A wraparound time has been calculated early.
|
||||
* Some sources state that the peak power for a
|
||||
* microprocessor is usually 1.5 times the TDP rating,
|
||||
* use 2 * TDP for safety.
|
||||
*/
|
||||
its.it_interval.tv_sec = rapl_joule_counter_range / 2;
|
||||
its.it_interval.tv_nsec = 0;
|
||||
|
||||
if (timer_settime(timerid, 0, &its, NULL) == -1) {
|
||||
fprintf(outf, "Can not set timer.\n");
|
||||
goto release_timer;
|
||||
}
|
||||
return;
|
||||
|
||||
release_timer:
|
||||
timer_delete(timerid);
|
||||
release_msr:
|
||||
free(per_cpu_msr_sum);
|
||||
}
|
||||
|
||||
void turbostat_loop()
|
||||
{
|
||||
|
@ -3075,7 +3336,7 @@ void turbostat_loop()
|
|||
if (retval < -1) {
|
||||
exit(retval);
|
||||
} else if (retval == -1) {
|
||||
if (restarted > 1) {
|
||||
if (restarted > 10) {
|
||||
exit(retval);
|
||||
}
|
||||
re_initialize();
|
||||
|
@ -3279,6 +3540,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
|
|||
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
|
||||
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
|
||||
case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
|
||||
pkg_cstate_limits = glm_pkg_cstate_limits;
|
||||
break;
|
||||
default:
|
||||
|
@ -3361,6 +3623,17 @@ int is_ehl(unsigned int family, unsigned int model)
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
int is_jvl(unsigned int family, unsigned int model)
|
||||
{
|
||||
if (!genuine_intel)
|
||||
return 0;
|
||||
|
||||
switch (model) {
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
|
||||
{
|
||||
|
@ -3474,6 +3747,20 @@ int has_config_tdp(unsigned int family, unsigned int model)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
remove_underbar(char *s)
|
||||
{
|
||||
char *to = s;
|
||||
|
||||
while (*s) {
|
||||
if (*s != '_')
|
||||
*to++ = *s;
|
||||
s++;
|
||||
}
|
||||
|
||||
*to = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
|
||||
{
|
||||
|
@ -3530,9 +3817,6 @@ dump_sysfs_cstate_config(void)
|
|||
int state;
|
||||
char *sp;
|
||||
|
||||
if (!DO_BIC(BIC_sysfs))
|
||||
return;
|
||||
|
||||
if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
|
||||
fprintf(outf, "cpuidle not loaded\n");
|
||||
return;
|
||||
|
@ -3559,6 +3843,8 @@ dump_sysfs_cstate_config(void)
|
|||
*sp = '\0';
|
||||
fclose(input);
|
||||
|
||||
remove_underbar(name_buf);
|
||||
|
||||
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
|
||||
base_cpu, state);
|
||||
input = fopen(path, "r");
|
||||
|
@ -3645,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
return 0;
|
||||
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -3689,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
return 0;
|
||||
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -3777,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
|
|||
return 0;
|
||||
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -3881,13 +4167,8 @@ double get_tdp_intel(unsigned int model)
|
|||
|
||||
double get_tdp_amd(unsigned int family)
|
||||
{
|
||||
switch (family) {
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
default:
|
||||
/* This is the max stock TDP of HEDT/Server Fam17h chips */
|
||||
return 250.0;
|
||||
}
|
||||
/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
|
||||
return 280.0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3959,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
|
|||
BIC_PRESENT(BIC_GFXWatt);
|
||||
}
|
||||
break;
|
||||
case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
|
||||
do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
|
||||
BIC_PRESENT(BIC_PKG__);
|
||||
if (rapl_joules)
|
||||
BIC_PRESENT(BIC_Pkg_J);
|
||||
else
|
||||
BIC_PRESENT(BIC_PkgWatt);
|
||||
break;
|
||||
case INTEL_FAM6_SKYLAKE_L: /* SKL */
|
||||
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
|
||||
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
|
||||
|
@ -4069,27 +4358,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
|
|||
|
||||
if (max_extended_level >= 0x80000007) {
|
||||
__cpuid(0x80000007, eax, ebx, ecx, edx);
|
||||
/* RAPL (Fam 17h) */
|
||||
/* RAPL (Fam 17h+) */
|
||||
has_rapl = edx & (1 << 14);
|
||||
}
|
||||
|
||||
if (!has_rapl)
|
||||
if (!has_rapl || family < 0x17)
|
||||
return;
|
||||
|
||||
switch (family) {
|
||||
case 0x17: /* Zen, Zen+ */
|
||||
case 0x18: /* Hygon Dhyana */
|
||||
do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
|
||||
if (rapl_joules) {
|
||||
BIC_PRESENT(BIC_Pkg_J);
|
||||
BIC_PRESENT(BIC_Cor_J);
|
||||
} else {
|
||||
BIC_PRESENT(BIC_PkgWatt);
|
||||
BIC_PRESENT(BIC_CorWatt);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
|
||||
if (rapl_joules) {
|
||||
BIC_PRESENT(BIC_Pkg_J);
|
||||
BIC_PRESENT(BIC_Cor_J);
|
||||
} else {
|
||||
BIC_PRESENT(BIC_PkgWatt);
|
||||
BIC_PRESENT(BIC_CorWatt);
|
||||
}
|
||||
|
||||
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
|
||||
|
@ -4162,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
|
|||
return 0;
|
||||
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -4234,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|||
|
||||
cpu = t->cpu_id;
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -4361,6 +4643,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
|
|||
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
|
||||
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
|
||||
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
|
||||
case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -4507,12 +4790,33 @@ double discover_bclk(unsigned int family, unsigned int model)
|
|||
* below this value, including the Digital Thermal Sensor (DTS),
|
||||
* Package Thermal Management Sensor (PTM), and thermal event thresholds.
|
||||
*/
|
||||
int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
int read_tcc_activation_temp()
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int target_c_local;
|
||||
int cpu;
|
||||
unsigned int tcc, target_c, offset_c;
|
||||
|
||||
/* Temperature Target MSR is Nehalem and newer only */
|
||||
if (!do_nhm_platform_info)
|
||||
return 0;
|
||||
|
||||
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
|
||||
return 0;
|
||||
|
||||
target_c = (msr >> 16) & 0xFF;
|
||||
|
||||
offset_c = (msr >> 24) & 0xF;
|
||||
|
||||
tcc = target_c - offset_c;
|
||||
|
||||
if (!quiet)
|
||||
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
|
||||
base_cpu, msr, tcc, target_c, offset_c);
|
||||
|
||||
return tcc;
|
||||
}
|
||||
|
||||
int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
{
|
||||
/* tcc_activation_temp is used only for dts or ptm */
|
||||
if (!(do_dts || do_ptm))
|
||||
return 0;
|
||||
|
@ -4521,43 +4825,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
|
|||
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
|
||||
return 0;
|
||||
|
||||
cpu = t->cpu_id;
|
||||
if (cpu_migrate(cpu)) {
|
||||
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (tcc_activation_temp_override != 0) {
|
||||
tcc_activation_temp = tcc_activation_temp_override;
|
||||
fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
|
||||
cpu, tcc_activation_temp);
|
||||
fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Temperature Target MSR is Nehalem and newer only */
|
||||
if (!do_nhm_platform_info)
|
||||
goto guess;
|
||||
tcc_activation_temp = read_tcc_activation_temp();
|
||||
if (tcc_activation_temp)
|
||||
return 0;
|
||||
|
||||
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
|
||||
goto guess;
|
||||
|
||||
target_c_local = (msr >> 16) & 0xFF;
|
||||
|
||||
if (!quiet)
|
||||
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
|
||||
cpu, msr, target_c_local);
|
||||
|
||||
if (!target_c_local)
|
||||
goto guess;
|
||||
|
||||
tcc_activation_temp = target_c_local;
|
||||
|
||||
return 0;
|
||||
|
||||
guess:
|
||||
tcc_activation_temp = TJMAX_DEFAULT;
|
||||
fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
|
||||
cpu, tcc_activation_temp);
|
||||
fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4685,19 +4964,46 @@ unsigned int intel_model_duplicates(unsigned int model)
|
|||
case INTEL_FAM6_ICELAKE_NNPI:
|
||||
case INTEL_FAM6_TIGERLAKE_L:
|
||||
case INTEL_FAM6_TIGERLAKE:
|
||||
case INTEL_FAM6_ROCKETLAKE:
|
||||
case INTEL_FAM6_LAKEFIELD:
|
||||
case INTEL_FAM6_ALDERLAKE:
|
||||
return INTEL_FAM6_CANNONLAKE_L;
|
||||
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
return INTEL_FAM6_ATOM_GOLDMONT_D;
|
||||
|
||||
case INTEL_FAM6_ATOM_TREMONT_L:
|
||||
return INTEL_FAM6_ATOM_TREMONT;
|
||||
|
||||
case INTEL_FAM6_ICELAKE_X:
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
return INTEL_FAM6_SKYLAKE_X;
|
||||
}
|
||||
return model;
|
||||
}
|
||||
|
||||
void print_dev_latency(void)
|
||||
{
|
||||
char *path = "/dev/cpu_dma_latency";
|
||||
int fd;
|
||||
int value;
|
||||
int retval;
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
warn("fopen %s\n", path);
|
||||
return;
|
||||
}
|
||||
|
||||
retval = read(fd, (void *)&value, sizeof(int));
|
||||
if (retval != sizeof(int)) {
|
||||
warn("read %s\n", path);
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n",
|
||||
value, value == 2000000000 ? "default" : "constrained");
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
void process_cpuid()
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
@ -4916,6 +5222,14 @@ void process_cpuid()
|
|||
BIC_PRESENT(BIC_Mod_c6);
|
||||
use_c1_residency_msr = 1;
|
||||
}
|
||||
if (is_jvl(family, model)) {
|
||||
BIC_NOT_PRESENT(BIC_CPU_c3);
|
||||
BIC_NOT_PRESENT(BIC_CPU_c7);
|
||||
BIC_NOT_PRESENT(BIC_Pkgpc2);
|
||||
BIC_NOT_PRESENT(BIC_Pkgpc3);
|
||||
BIC_NOT_PRESENT(BIC_Pkgpc6);
|
||||
BIC_NOT_PRESENT(BIC_Pkgpc7);
|
||||
}
|
||||
if (is_dnv(family, model)) {
|
||||
BIC_PRESENT(BIC_CPU_c1);
|
||||
BIC_NOT_PRESENT(BIC_CPU_c3);
|
||||
|
@ -4935,9 +5249,12 @@ void process_cpuid()
|
|||
BIC_NOT_PRESENT(BIC_Pkgpc7);
|
||||
}
|
||||
if (has_c8910_msrs(family, model)) {
|
||||
BIC_PRESENT(BIC_Pkgpc8);
|
||||
BIC_PRESENT(BIC_Pkgpc9);
|
||||
BIC_PRESENT(BIC_Pkgpc10);
|
||||
if (pkg_cstate_limit >= PCL__8)
|
||||
BIC_PRESENT(BIC_Pkgpc8);
|
||||
if (pkg_cstate_limit >= PCL__9)
|
||||
BIC_PRESENT(BIC_Pkgpc9);
|
||||
if (pkg_cstate_limit >= PCL_10)
|
||||
BIC_PRESENT(BIC_Pkgpc10);
|
||||
}
|
||||
do_irtl_hsw = has_c8910_msrs(family, model);
|
||||
if (has_skl_msrs(family, model)) {
|
||||
|
@ -4966,6 +5283,8 @@ void process_cpuid()
|
|||
if (!quiet)
|
||||
dump_cstate_pstate_config_info(family, model);
|
||||
|
||||
if (!quiet)
|
||||
print_dev_latency();
|
||||
if (!quiet)
|
||||
dump_sysfs_cstate_config();
|
||||
if (!quiet)
|
||||
|
@ -4980,6 +5299,9 @@ void process_cpuid()
|
|||
if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
|
||||
BIC_PRESENT(BIC_GFXMHz);
|
||||
|
||||
if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
|
||||
BIC_PRESENT(BIC_GFXACTMHz);
|
||||
|
||||
if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
|
||||
BIC_PRESENT(BIC_CPU_LPI);
|
||||
else
|
||||
|
@ -5390,7 +5712,7 @@ int get_and_dump_counters(void)
|
|||
}
|
||||
|
||||
void print_version() {
|
||||
fprintf(outf, "turbostat version 20.03.20"
|
||||
fprintf(outf, "turbostat version 20.09.30"
|
||||
" - Len Brown <lenb@kernel.org>\n");
|
||||
}
|
||||
|
||||
|
@ -5597,6 +5919,8 @@ void probe_sysfs(void)
|
|||
*sp = '%';
|
||||
*(sp + 1) = '\0';
|
||||
|
||||
remove_underbar(name_buf);
|
||||
|
||||
fclose(input);
|
||||
|
||||
sprintf(path, "cpuidle/state%d/time", state);
|
||||
|
@ -5624,6 +5948,8 @@ void probe_sysfs(void)
|
|||
*sp = '\0';
|
||||
fclose(input);
|
||||
|
||||
remove_underbar(name_buf);
|
||||
|
||||
sprintf(path, "cpuidle/state%d/usage", state);
|
||||
|
||||
if (is_deferred_skip(name_buf))
|
||||
|
@ -5868,6 +6194,7 @@ int main(int argc, char **argv)
|
|||
return 0;
|
||||
}
|
||||
|
||||
msr_sum_record();
|
||||
/*
|
||||
* if any params left, it must be a command to fork
|
||||
*/
|
||||
|
|
|
@ -622,6 +622,57 @@ void cmdline(int argc, char **argv)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Open a file, and exit on failure
|
||||
*/
|
||||
FILE *fopen_or_die(const char *path, const char *mode)
|
||||
{
|
||||
FILE *filep = fopen(path, "r");
|
||||
|
||||
if (!filep)
|
||||
err(1, "%s: open failed", path);
|
||||
return filep;
|
||||
}
|
||||
|
||||
void err_on_hypervisor(void)
|
||||
{
|
||||
FILE *cpuinfo;
|
||||
char *flags, *hypervisor;
|
||||
char *buffer;
|
||||
|
||||
/* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
|
||||
cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
|
||||
|
||||
buffer = malloc(4096);
|
||||
if (!buffer) {
|
||||
fclose(cpuinfo);
|
||||
err(-ENOMEM, "buffer malloc fail");
|
||||
}
|
||||
|
||||
if (!fread(buffer, 1024, 1, cpuinfo)) {
|
||||
fclose(cpuinfo);
|
||||
free(buffer);
|
||||
err(1, "Reading /proc/cpuinfo failed");
|
||||
}
|
||||
|
||||
flags = strstr(buffer, "flags");
|
||||
rewind(cpuinfo);
|
||||
fseek(cpuinfo, flags - buffer, SEEK_SET);
|
||||
if (!fgets(buffer, 4096, cpuinfo)) {
|
||||
fclose(cpuinfo);
|
||||
free(buffer);
|
||||
err(1, "Reading /proc/cpuinfo failed");
|
||||
}
|
||||
fclose(cpuinfo);
|
||||
|
||||
hypervisor = strstr(buffer, "hypervisor");
|
||||
|
||||
free(buffer);
|
||||
|
||||
if (hypervisor)
|
||||
err(-1,
|
||||
"not supported on this virtual machine");
|
||||
}
|
||||
|
||||
int get_msr(int cpu, int offset, unsigned long long *msr)
|
||||
{
|
||||
|
@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr)
|
|||
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
|
||||
|
||||
retval = pread(fd, msr, sizeof(*msr), offset);
|
||||
if (retval != sizeof(*msr))
|
||||
if (retval != sizeof(*msr)) {
|
||||
err_on_hypervisor();
|
||||
err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
|
||||
}
|
||||
|
||||
if (debug > 1)
|
||||
fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
|
||||
|
@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open a file, and exit on failure
|
||||
*/
|
||||
FILE *fopen_or_die(const char *path, const char *mode)
|
||||
{
|
||||
FILE *filep = fopen(path, "r");
|
||||
|
||||
if (!filep)
|
||||
err(1, "%s: open failed", path);
|
||||
return filep;
|
||||
}
|
||||
|
||||
unsigned int get_pkg_num(int cpu)
|
||||
{
|
||||
FILE *fp;
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/aarch64/get-reg-list
|
||||
/aarch64/get-reg-list-sve
|
||||
/s390x/memop
|
||||
/s390x/resets
|
||||
/s390x/sync_regs_test
|
||||
/x86_64/cr4_cpuid_sync_test
|
||||
/x86_64/debug_regs
|
||||
/x86_64/evmcs_test
|
||||
/x86_64/kvm_pv_test
|
||||
/x86_64/hyperv_cpuid
|
||||
/x86_64/mmio_warning_test
|
||||
/x86_64/platform_info_test
|
||||
|
@ -24,6 +27,7 @@
|
|||
/clear_dirty_log_test
|
||||
/demand_paging_test
|
||||
/dirty_log_test
|
||||
/dirty_log_perf_test
|
||||
/kvm_create_max_vcpus
|
||||
/set_memory_region_test
|
||||
/steal_time
|
||||
|
|
|
@ -34,13 +34,14 @@ ifeq ($(ARCH),s390)
|
|||
endif
|
||||
|
||||
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
|
||||
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
|
||||
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
|
||||
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
|
||||
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
|
||||
|
||||
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
|
||||
|
@ -58,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
|
|||
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
|
||||
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += demand_paging_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_test
|
||||
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
|
||||
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
|
||||
TEST_GEN_PROGS_x86_64 += set_memory_region_test
|
||||
TEST_GEN_PROGS_x86_64 += steal_time
|
||||
|
||||
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
|
||||
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
|
||||
TEST_GEN_PROGS_aarch64 += demand_paging_test
|
||||
TEST_GEN_PROGS_aarch64 += dirty_log_test
|
||||
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
|
||||
|
@ -111,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
|
|||
include ../lib.mk
|
||||
|
||||
STATIC_LIBS := $(OUTPUT)/libkvm.a
|
||||
LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
|
||||
EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
|
||||
LIBKVM_C := $(filter %.c,$(LIBKVM))
|
||||
LIBKVM_S := $(filter %.S,$(LIBKVM))
|
||||
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
|
||||
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
|
||||
EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
|
||||
|
||||
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
|
||||
$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
|
||||
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
|
||||
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
|
||||
|
||||
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
|
||||
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
|
||||
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
|
||||
|
||||
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
|
||||
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
|
||||
$(AR) crs $@ $^
|
||||
|
||||
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define REG_LIST_SVE
|
||||
#include "get-reg-list.c"
|
|
@ -0,0 +1,841 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Check for KVM_GET_REG_LIST regressions.
|
||||
*
|
||||
* Copyright (C) 2020, Red Hat, Inc.
|
||||
*
|
||||
* When attempting to migrate from a host with an older kernel to a host
|
||||
* with a newer kernel we allow the newer kernel on the destination to
|
||||
* list new registers with get-reg-list. We assume they'll be unused, at
|
||||
* least until the guest reboots, and so they're relatively harmless.
|
||||
* However, if the destination host with the newer kernel is missing
|
||||
* registers which the source host with the older kernel has, then that's
|
||||
* a regression in get-reg-list. This test checks for that regression by
|
||||
* checking the current list against a blessed list. We should never have
|
||||
* missing registers, but if new ones appear then they can probably be
|
||||
* added to the blessed list. A completely new blessed list can be created
|
||||
* by running the test with the --list command line argument.
|
||||
*
|
||||
* Note, the blessed list should be created from the oldest possible
|
||||
* kernel. We can't go older than v4.15, though, because that's the first
|
||||
* release to expose the ID system registers in KVM_GET_REG_LIST, see
|
||||
* commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
|
||||
* from guests"). Also, one must use the --core-reg-fixup command line
|
||||
* option when running on an older kernel that doesn't include df205b5c6328
|
||||
* ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "kvm_util.h"
|
||||
#include "test_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
#ifdef REG_LIST_SVE
|
||||
#define reg_list_sve() (true)
|
||||
#else
|
||||
#define reg_list_sve() (false)
|
||||
#endif
|
||||
|
||||
#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
|
||||
|
||||
#define for_each_reg(i) \
|
||||
for ((i) = 0; (i) < reg_list->n; ++(i))
|
||||
|
||||
#define for_each_missing_reg(i) \
|
||||
for ((i) = 0; (i) < blessed_n; ++(i)) \
|
||||
if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
|
||||
|
||||
#define for_each_new_reg(i) \
|
||||
for ((i) = 0; (i) < reg_list->n; ++(i)) \
|
||||
if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
|
||||
|
||||
|
||||
static struct kvm_reg_list *reg_list;
|
||||
|
||||
static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
|
||||
static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
|
||||
static __u64 *blessed_reg, blessed_n;
|
||||
|
||||
static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_regs; ++i)
|
||||
if (reg == regs[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char *str_with_index(const char *template, __u64 index)
|
||||
{
|
||||
char *str, *p;
|
||||
int n;
|
||||
|
||||
str = strdup(template);
|
||||
p = strstr(str, "##");
|
||||
n = sprintf(p, "%lld", index);
|
||||
strcat(p + n, strstr(template, "##") + 2);
|
||||
|
||||
return (const char *)str;
|
||||
}
|
||||
|
||||
#define CORE_REGS_XX_NR_WORDS 2
|
||||
#define CORE_SPSR_XX_NR_WORDS 2
|
||||
#define CORE_FPREGS_XX_NR_WORDS 4
|
||||
|
||||
static const char *core_id_to_str(__u64 id)
|
||||
{
|
||||
__u64 core_off = id & ~REG_MASK, idx;
|
||||
|
||||
/*
|
||||
* core_off is the offset into struct kvm_regs
|
||||
*/
|
||||
switch (core_off) {
|
||||
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
|
||||
KVM_REG_ARM_CORE_REG(regs.regs[30]):
|
||||
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
|
||||
TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
|
||||
return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
|
||||
case KVM_REG_ARM_CORE_REG(regs.sp):
|
||||
return "KVM_REG_ARM_CORE_REG(regs.sp)";
|
||||
case KVM_REG_ARM_CORE_REG(regs.pc):
|
||||
return "KVM_REG_ARM_CORE_REG(regs.pc)";
|
||||
case KVM_REG_ARM_CORE_REG(regs.pstate):
|
||||
return "KVM_REG_ARM_CORE_REG(regs.pstate)";
|
||||
case KVM_REG_ARM_CORE_REG(sp_el1):
|
||||
return "KVM_REG_ARM_CORE_REG(sp_el1)";
|
||||
case KVM_REG_ARM_CORE_REG(elr_el1):
|
||||
return "KVM_REG_ARM_CORE_REG(elr_el1)";
|
||||
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
|
||||
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
|
||||
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
|
||||
TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
|
||||
return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
|
||||
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
|
||||
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
|
||||
TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
|
||||
return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
|
||||
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
|
||||
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
|
||||
}
|
||||
|
||||
TEST_FAIL("Unknown core reg id: 0x%llx", id);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const char *sve_id_to_str(__u64 id)
|
||||
{
|
||||
__u64 sve_off, n, i;
|
||||
|
||||
if (id == KVM_REG_ARM64_SVE_VLS)
|
||||
return "KVM_REG_ARM64_SVE_VLS";
|
||||
|
||||
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
|
||||
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
|
||||
|
||||
TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
|
||||
|
||||
switch (sve_off) {
|
||||
case KVM_REG_ARM64_SVE_ZREG_BASE ...
|
||||
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
|
||||
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
|
||||
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
|
||||
"Unexpected bits set in SVE ZREG id: 0x%llx", id);
|
||||
return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
|
||||
case KVM_REG_ARM64_SVE_PREG_BASE ...
|
||||
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
|
||||
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
|
||||
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
|
||||
"Unexpected bits set in SVE PREG id: 0x%llx", id);
|
||||
return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
|
||||
case KVM_REG_ARM64_SVE_FFR_BASE:
|
||||
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
|
||||
"Unexpected bits set in SVE FFR id: 0x%llx", id);
|
||||
return "KVM_REG_ARM64_SVE_FFR(0)";
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void print_reg(__u64 id)
|
||||
{
|
||||
unsigned op0, op1, crn, crm, op2;
|
||||
const char *reg_size = NULL;
|
||||
|
||||
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
|
||||
"KVM_REG_ARM64 missing in reg id: 0x%llx", id);
|
||||
|
||||
switch (id & KVM_REG_SIZE_MASK) {
|
||||
case KVM_REG_SIZE_U8:
|
||||
reg_size = "KVM_REG_SIZE_U8";
|
||||
break;
|
||||
case KVM_REG_SIZE_U16:
|
||||
reg_size = "KVM_REG_SIZE_U16";
|
||||
break;
|
||||
case KVM_REG_SIZE_U32:
|
||||
reg_size = "KVM_REG_SIZE_U32";
|
||||
break;
|
||||
case KVM_REG_SIZE_U64:
|
||||
reg_size = "KVM_REG_SIZE_U64";
|
||||
break;
|
||||
case KVM_REG_SIZE_U128:
|
||||
reg_size = "KVM_REG_SIZE_U128";
|
||||
break;
|
||||
case KVM_REG_SIZE_U256:
|
||||
reg_size = "KVM_REG_SIZE_U256";
|
||||
break;
|
||||
case KVM_REG_SIZE_U512:
|
||||
reg_size = "KVM_REG_SIZE_U512";
|
||||
break;
|
||||
case KVM_REG_SIZE_U1024:
|
||||
reg_size = "KVM_REG_SIZE_U1024";
|
||||
break;
|
||||
case KVM_REG_SIZE_U2048:
|
||||
reg_size = "KVM_REG_SIZE_U2048";
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
|
||||
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
|
||||
}
|
||||
|
||||
switch (id & KVM_REG_ARM_COPROC_MASK) {
|
||||
case KVM_REG_ARM_CORE:
|
||||
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
|
||||
break;
|
||||
case KVM_REG_ARM_DEMUX:
|
||||
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
|
||||
"Unexpected bits set in DEMUX reg id: 0x%llx", id);
|
||||
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
|
||||
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
|
||||
break;
|
||||
case KVM_REG_ARM64_SYSREG:
|
||||
op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
|
||||
op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
|
||||
crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
|
||||
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
|
||||
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
|
||||
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
|
||||
"Unexpected bits set in SYSREG reg id: 0x%llx", id);
|
||||
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
|
||||
break;
|
||||
case KVM_REG_ARM_FW:
|
||||
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
|
||||
"Unexpected bits set in FW reg id: 0x%llx", id);
|
||||
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
|
||||
break;
|
||||
case KVM_REG_ARM64_SVE:
|
||||
if (reg_list_sve())
|
||||
printf("\t%s,\n", sve_id_to_str(id));
|
||||
else
|
||||
TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
|
||||
(id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Older kernels listed each 32-bit word of CORE registers separately.
|
||||
* For 64 and 128-bit registers we need to ignore the extra words. We
|
||||
* also need to fixup the sizes, because the older kernels stated all
|
||||
* registers were 64-bit, even when they weren't.
|
||||
*/
|
||||
static void core_reg_fixup(void)
|
||||
{
|
||||
struct kvm_reg_list *tmp;
|
||||
__u64 id, core_off;
|
||||
int i;
|
||||
|
||||
tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
|
||||
|
||||
for (i = 0; i < reg_list->n; ++i) {
|
||||
id = reg_list->reg[i];
|
||||
|
||||
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
|
||||
tmp->reg[tmp->n++] = id;
|
||||
continue;
|
||||
}
|
||||
|
||||
core_off = id & ~REG_MASK;
|
||||
|
||||
switch (core_off) {
|
||||
case 0x52: case 0xd2: case 0xd6:
|
||||
/*
|
||||
* These offsets are pointing at padding.
|
||||
* We need to ignore them too.
|
||||
*/
|
||||
continue;
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
|
||||
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
|
||||
if (core_off & 3)
|
||||
continue;
|
||||
id &= ~KVM_REG_SIZE_MASK;
|
||||
id |= KVM_REG_SIZE_U128;
|
||||
tmp->reg[tmp->n++] = id;
|
||||
continue;
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
|
||||
case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
|
||||
id &= ~KVM_REG_SIZE_MASK;
|
||||
id |= KVM_REG_SIZE_U32;
|
||||
tmp->reg[tmp->n++] = id;
|
||||
continue;
|
||||
default:
|
||||
if (core_off & 1)
|
||||
continue;
|
||||
tmp->reg[tmp->n++] = id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(reg_list);
|
||||
reg_list = tmp;
|
||||
}
|
||||
|
||||
static void prepare_vcpu_init(struct kvm_vcpu_init *init)
|
||||
{
|
||||
if (reg_list_sve())
|
||||
init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
|
||||
}
|
||||
|
||||
static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
int feature;
|
||||
|
||||
if (reg_list_sve()) {
|
||||
feature = KVM_ARM_VCPU_SVE;
|
||||
vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_supported(void)
|
||||
{
|
||||
if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
|
||||
fprintf(stderr, "SVE not available, skipping tests\n");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int ac, char **av)
|
||||
{
|
||||
struct kvm_vcpu_init init = { .target = -1, };
|
||||
int new_regs = 0, missing_regs = 0, i;
|
||||
int failed_get = 0, failed_set = 0, failed_reject = 0;
|
||||
bool print_list = false, fixup_core_regs = false;
|
||||
struct kvm_vm *vm;
|
||||
__u64 *vec_regs;
|
||||
|
||||
check_supported();
|
||||
|
||||
for (i = 1; i < ac; ++i) {
|
||||
if (strcmp(av[i], "--core-reg-fixup") == 0)
|
||||
fixup_core_regs = true;
|
||||
else if (strcmp(av[i], "--list") == 0)
|
||||
print_list = true;
|
||||
else
|
||||
fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
|
||||
}
|
||||
|
||||
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
|
||||
prepare_vcpu_init(&init);
|
||||
aarch64_vcpu_add_default(vm, 0, &init, NULL);
|
||||
finalize_vcpu(vm, 0);
|
||||
|
||||
reg_list = vcpu_get_reg_list(vm, 0);
|
||||
|
||||
if (fixup_core_regs)
|
||||
core_reg_fixup();
|
||||
|
||||
if (print_list) {
|
||||
putchar('\n');
|
||||
for_each_reg(i)
|
||||
print_reg(reg_list->reg[i]);
|
||||
putchar('\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We only test that we can get the register and then write back the
|
||||
* same value. Some registers may allow other values to be written
|
||||
* back, but others only allow some bits to be changed, and at least
|
||||
* for ID registers set will fail if the value does not exactly match
|
||||
* what was returned by get. If registers that allow other values to
|
||||
* be written need to have the other values tested, then we should
|
||||
* create a new set of tests for those in a new independent test
|
||||
* executable.
|
||||
*/
|
||||
for_each_reg(i) {
|
||||
uint8_t addr[2048 / 8];
|
||||
struct kvm_one_reg reg = {
|
||||
.id = reg_list->reg[i],
|
||||
.addr = (__u64)&addr,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®);
|
||||
if (ret) {
|
||||
puts("Failed to get ");
|
||||
print_reg(reg.id);
|
||||
putchar('\n');
|
||||
++failed_get;
|
||||
}
|
||||
|
||||
/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
|
||||
if (find_reg(rejects_set, rejects_set_n, reg.id)) {
|
||||
ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®);
|
||||
if (ret != -1 || errno != EPERM) {
|
||||
printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
|
||||
print_reg(reg.id);
|
||||
putchar('\n');
|
||||
++failed_reject;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®);
|
||||
if (ret) {
|
||||
puts("Failed to set ");
|
||||
print_reg(reg.id);
|
||||
putchar('\n');
|
||||
++failed_set;
|
||||
}
|
||||
}
|
||||
|
||||
if (reg_list_sve()) {
|
||||
blessed_n = base_regs_n + sve_regs_n;
|
||||
vec_regs = sve_regs;
|
||||
} else {
|
||||
blessed_n = base_regs_n + vregs_n;
|
||||
vec_regs = vregs;
|
||||
}
|
||||
|
||||
blessed_reg = calloc(blessed_n, sizeof(__u64));
|
||||
for (i = 0; i < base_regs_n; ++i)
|
||||
blessed_reg[i] = base_regs[i];
|
||||
for (i = 0; i < blessed_n - base_regs_n; ++i)
|
||||
blessed_reg[base_regs_n + i] = vec_regs[i];
|
||||
|
||||
for_each_new_reg(i)
|
||||
++new_regs;
|
||||
|
||||
for_each_missing_reg(i)
|
||||
++missing_regs;
|
||||
|
||||
if (new_regs || missing_regs) {
|
||||
printf("Number blessed registers: %5lld\n", blessed_n);
|
||||
printf("Number registers: %5lld\n", reg_list->n);
|
||||
}
|
||||
|
||||
if (new_regs) {
|
||||
printf("\nThere are %d new registers.\n"
|
||||
"Consider adding them to the blessed reg "
|
||||
"list with the following lines:\n\n", new_regs);
|
||||
for_each_new_reg(i)
|
||||
print_reg(reg_list->reg[i]);
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
if (missing_regs) {
|
||||
printf("\nThere are %d missing registers.\n"
|
||||
"The following lines are missing registers:\n\n", missing_regs);
|
||||
for_each_missing_reg(i)
|
||||
print_reg(blessed_reg[i]);
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
|
||||
"There are %d missing registers; "
|
||||
"%d registers failed get; %d registers failed set; %d registers failed reject",
|
||||
missing_regs, failed_get, failed_set, failed_reject);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The current blessed list was primed with the output of kernel version
|
||||
* v4.15 with --core-reg-fixup and then later updated with new registers.
|
||||
*/
|
||||
static __u64 base_regs[] = {
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
|
||||
KVM_REG_ARM_FW_REG(0),
|
||||
KVM_REG_ARM_FW_REG(1),
|
||||
KVM_REG_ARM_FW_REG(2),
|
||||
ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 14, 0, 2),
|
||||
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */
|
||||
ARM64_SYS_REG(2, 0, 0, 0, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 0, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 0, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 0, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 1, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 1, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 1, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 1, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 2, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 3, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 3, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 3, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 3, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 4, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 4, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 4, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 4, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 5, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 5, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 5, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 5, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 6, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 6, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 6, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 6, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 7, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 7, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 7, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 7, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 8, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 8, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 8, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 8, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 9, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 9, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 9, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 9, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 10, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 10, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 10, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 10, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 11, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 11, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 11, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 11, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 12, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 12, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 12, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 12, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 13, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 13, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 13, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 13, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 14, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 14, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 14, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 14, 7),
|
||||
ARM64_SYS_REG(2, 0, 0, 15, 4),
|
||||
ARM64_SYS_REG(2, 0, 0, 15, 5),
|
||||
ARM64_SYS_REG(2, 0, 0, 15, 6),
|
||||
ARM64_SYS_REG(2, 0, 0, 15, 7),
|
||||
ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
|
||||
ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 3),
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 3, 7),
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 2),
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 3),
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 5),
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 6),
|
||||
ARM64_SYS_REG(3, 0, 0, 4, 7),
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 2),
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 3),
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 6),
|
||||
ARM64_SYS_REG(3, 0, 0, 5, 7),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 2),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 3),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 4),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 5),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 6),
|
||||
ARM64_SYS_REG(3, 0, 0, 6, 7),
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 3),
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 4),
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 5),
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 6),
|
||||
ARM64_SYS_REG(3, 0, 0, 7, 7),
|
||||
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
|
||||
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
|
||||
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 8, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 9, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 10, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 11, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 12, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 13, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 14, 7),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 0),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 1),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 2),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 3),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 4),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 5),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 6),
|
||||
ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
|
||||
ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
|
||||
ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
|
||||
ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
|
||||
};
|
||||
static __u64 base_regs_n = ARRAY_SIZE(base_regs);
|
||||
|
||||
static __u64 vregs[] = {
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
|
||||
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
|
||||
};
|
||||
static __u64 vregs_n = ARRAY_SIZE(vregs);
|
||||
|
||||
static __u64 sve_regs[] = {
|
||||
KVM_REG_ARM64_SVE_VLS,
|
||||
KVM_REG_ARM64_SVE_ZREG(0, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(1, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(2, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(3, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(4, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(5, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(6, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(7, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(8, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(9, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(10, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(11, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(12, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(13, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(14, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(15, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(16, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(17, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(18, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(19, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(20, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(21, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(22, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(23, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(24, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(25, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(26, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(27, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(28, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(29, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(30, 0),
|
||||
KVM_REG_ARM64_SVE_ZREG(31, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(0, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(1, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(2, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(3, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(4, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(5, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(6, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(7, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(8, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(9, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(10, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(11, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(12, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(13, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(14, 0),
|
||||
KVM_REG_ARM64_SVE_PREG(15, 0),
|
||||
KVM_REG_ARM64_SVE_FFR(0),
|
||||
ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
|
||||
};
|
||||
static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
|
||||
|
||||
static __u64 rejects_set[] = {
|
||||
#ifdef REG_LIST_SVE
|
||||
KVM_REG_ARM64_SVE_VLS,
|
||||
#endif
|
||||
};
|
||||
static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
|
|
@ -1,6 +0,0 @@
|
|||
#define USE_CLEAR_DIRTY_LOG
|
||||
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
|
||||
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
#include "dirty_log_test.c"
|
|
@ -21,20 +21,12 @@
|
|||
#include <linux/bitops.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "perf_test_util.h"
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
|
||||
#ifdef __NR_userfaultfd
|
||||
|
||||
/* The memory slot index demand page */
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
|
||||
/* Default guest test virtual memory offset */
|
||||
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
|
||||
|
||||
#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
|
||||
|
||||
#ifdef PRINT_PER_PAGE_UPDATES
|
||||
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
|
@ -47,77 +39,17 @@
|
|||
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define MAX_VCPUS 512
|
||||
|
||||
/*
|
||||
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
|
||||
* sync_global_to/from_guest() are used when accessing from
|
||||
* the host. READ/WRITE_ONCE() should also be used with anything
|
||||
* that may change.
|
||||
*/
|
||||
static uint64_t host_page_size;
|
||||
static uint64_t guest_page_size;
|
||||
|
||||
static char *guest_data_prototype;
|
||||
|
||||
/*
|
||||
* Guest physical memory offset of the testing memory slot.
|
||||
* This will be set to the topmost valid physical address minus
|
||||
* the test memory size.
|
||||
*/
|
||||
static uint64_t guest_test_phys_mem;
|
||||
|
||||
/*
|
||||
* Guest virtual memory offset of the testing memory slot.
|
||||
* Must not conflict with identity mapped test code.
|
||||
*/
|
||||
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
|
||||
|
||||
struct vcpu_args {
|
||||
uint64_t gva;
|
||||
uint64_t pages;
|
||||
|
||||
/* Only used by the host userspace part of the vCPU thread */
|
||||
int vcpu_id;
|
||||
struct kvm_vm *vm;
|
||||
};
|
||||
|
||||
static struct vcpu_args vcpu_args[MAX_VCPUS];
|
||||
|
||||
/*
|
||||
* Continuously write to the first 8 bytes of each page in the demand paging
|
||||
* memory region.
|
||||
*/
|
||||
static void guest_code(uint32_t vcpu_id)
|
||||
{
|
||||
uint64_t gva;
|
||||
uint64_t pages;
|
||||
int i;
|
||||
|
||||
/* Make sure vCPU args data structure is not corrupt. */
|
||||
GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
|
||||
|
||||
gva = vcpu_args[vcpu_id].gva;
|
||||
pages = vcpu_args[vcpu_id].pages;
|
||||
|
||||
for (i = 0; i < pages; i++) {
|
||||
uint64_t addr = gva + (i * guest_page_size);
|
||||
|
||||
addr &= ~(host_page_size - 1);
|
||||
*(uint64_t *)addr = 0x0123456789ABCDEF;
|
||||
}
|
||||
|
||||
GUEST_SYNC(1);
|
||||
}
|
||||
|
||||
static void *vcpu_worker(void *data)
|
||||
{
|
||||
int ret;
|
||||
struct vcpu_args *args = (struct vcpu_args *)data;
|
||||
struct kvm_vm *vm = args->vm;
|
||||
int vcpu_id = args->vcpu_id;
|
||||
struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
|
||||
int vcpu_id = vcpu_args->vcpu_id;
|
||||
struct kvm_vm *vm = perf_test_args.vm;
|
||||
struct kvm_run *run;
|
||||
struct timespec start, end, ts_diff;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
|
||||
run = vcpu_state(vm, vcpu_id);
|
||||
|
@ -133,52 +65,18 @@ static void *vcpu_worker(void *data)
|
|||
exit_reason_str(run->exit_reason));
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
ts_diff = timespec_sub(end, start);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define PAGE_SHIFT_4K 12
|
||||
#define PTES_PER_4K_PT 512
|
||||
|
||||
static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
|
||||
uint64_t vcpu_memory_bytes)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
|
||||
|
||||
/* Account for a few pages per-vCPU for stacks */
|
||||
pages += DEFAULT_STACK_PGS * vcpus;
|
||||
|
||||
/*
|
||||
* Reserve twice the ammount of memory needed to map the test region and
|
||||
* the page table / stacks region, at 4k, for page tables. Do the
|
||||
* calculation with 4K page size: the smallest of all archs. (e.g., 64K
|
||||
* page size guest will need even less memory for page tables).
|
||||
*/
|
||||
pages += (2 * pages) / PTES_PER_4K_PT;
|
||||
pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
|
||||
PTES_PER_4K_PT;
|
||||
pages = vm_adjust_num_guest_pages(mode, pages);
|
||||
|
||||
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
|
||||
vm = _vm_create(mode, pages, O_RDWR);
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
#ifdef __x86_64__
|
||||
vm_create_irqchip(vm);
|
||||
#endif
|
||||
return vm;
|
||||
}
|
||||
|
||||
static int handle_uffd_page_request(int uffd, uint64_t addr)
|
||||
{
|
||||
pid_t tid;
|
||||
struct timespec start;
|
||||
struct timespec end;
|
||||
struct timespec ts_diff;
|
||||
struct uffdio_copy copy;
|
||||
int r;
|
||||
|
||||
|
@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
|
|||
|
||||
copy.src = (uint64_t)guest_data_prototype;
|
||||
copy.dst = addr;
|
||||
copy.len = host_page_size;
|
||||
copy.len = perf_test_args.host_page_size;
|
||||
copy.mode = 0;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
|
@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
|
|||
return r;
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
|
||||
PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
|
||||
timespec_to_ns(timespec_sub(end, start)));
|
||||
timespec_to_ns(ts_diff));
|
||||
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
|
||||
host_page_size, addr, tid);
|
||||
perf_test_args.host_page_size, addr, tid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg)
|
|||
int pipefd = uffd_args->pipefd;
|
||||
useconds_t delay = uffd_args->delay;
|
||||
int64_t pages = 0;
|
||||
struct timespec start, end, ts_diff;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
while (!quit_uffd_thread) {
|
||||
|
@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg)
|
|||
pages++;
|
||||
}
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
ts_diff = timespec_sub(end, start);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
|
||||
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
|
||||
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm,
|
|||
}
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, bool use_uffd,
|
||||
useconds_t uffd_delay, int vcpus,
|
||||
uint64_t vcpu_memory_bytes)
|
||||
useconds_t uffd_delay)
|
||||
{
|
||||
pthread_t *vcpu_threads;
|
||||
pthread_t *uffd_handler_threads = NULL;
|
||||
struct uffd_handler_args *uffd_args = NULL;
|
||||
struct timespec start, end, ts_diff;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
int *pipefds = NULL;
|
||||
struct kvm_vm *vm;
|
||||
uint64_t guest_num_pages;
|
||||
int vcpu_id;
|
||||
int r;
|
||||
|
||||
vm = create_vm(mode, vcpus, vcpu_memory_bytes);
|
||||
vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
|
||||
|
||||
guest_page_size = vm_get_page_size(vm);
|
||||
perf_test_args.wr_fract = 1;
|
||||
|
||||
TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
|
||||
"Guest memory size is not guest page size aligned.");
|
||||
|
||||
guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
|
||||
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
|
||||
|
||||
/*
|
||||
* If there should be more memory in the guest test region than there
|
||||
* can be pages in the guest, it will definitely cause problems.
|
||||
*/
|
||||
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
|
||||
"Requested more guest memory than address space allows.\n"
|
||||
" guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
|
||||
guest_num_pages, vm_get_max_gfn(vm), vcpus,
|
||||
vcpu_memory_bytes);
|
||||
|
||||
host_page_size = getpagesize();
|
||||
TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
|
||||
"Guest memory size is not host page size aligned.");
|
||||
|
||||
guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
|
||||
guest_page_size;
|
||||
guest_test_phys_mem &= ~(host_page_size - 1);
|
||||
|
||||
#ifdef __s390x__
|
||||
/* Align to 1M (segment size) */
|
||||
guest_test_phys_mem &= ~((1 << 20) - 1);
|
||||
#endif
|
||||
|
||||
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
|
||||
|
||||
/* Add an extra memory slot for testing demand paging */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
guest_test_phys_mem,
|
||||
TEST_MEM_SLOT_INDEX,
|
||||
guest_num_pages, 0);
|
||||
|
||||
/* Do mapping for the demand paging memory slot */
|
||||
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
|
||||
|
||||
ucall_init(vm, NULL);
|
||||
|
||||
guest_data_prototype = malloc(host_page_size);
|
||||
guest_data_prototype = malloc(perf_test_args.host_page_size);
|
||||
TEST_ASSERT(guest_data_prototype,
|
||||
"Failed to allocate buffer for guest data pattern");
|
||||
memset(guest_data_prototype, 0xAB, host_page_size);
|
||||
memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
|
||||
|
||||
vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
|
||||
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
|
||||
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
|
||||
|
||||
add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
|
||||
|
||||
if (use_uffd) {
|
||||
uffd_handler_threads =
|
||||
malloc(vcpus * sizeof(*uffd_handler_threads));
|
||||
malloc(nr_vcpus * sizeof(*uffd_handler_threads));
|
||||
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
|
||||
|
||||
uffd_args = malloc(vcpus * sizeof(*uffd_args));
|
||||
uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
|
||||
TEST_ASSERT(uffd_args, "Memory allocation failed");
|
||||
|
||||
pipefds = malloc(sizeof(int) * vcpus * 2);
|
||||
pipefds = malloc(sizeof(int) * nr_vcpus * 2);
|
||||
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
|
||||
}
|
||||
|
||||
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
|
||||
vm_paddr_t vcpu_gpa;
|
||||
void *vcpu_hva;
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
vm_paddr_t vcpu_gpa;
|
||||
void *vcpu_hva;
|
||||
|
||||
vm_vcpu_add_default(vm, vcpu_id, guest_code);
|
||||
vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
|
||||
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
|
||||
vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
|
||||
|
||||
vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
|
||||
PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
|
||||
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
|
||||
/* Cache the HVA pointer of the region */
|
||||
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
|
||||
|
||||
/* Cache the HVA pointer of the region */
|
||||
vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
|
||||
|
||||
if (use_uffd) {
|
||||
/*
|
||||
* Set up user fault fd to handle demand paging
|
||||
* requests.
|
||||
|
@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
|
|||
&uffd_handler_threads[vcpu_id],
|
||||
pipefds[vcpu_id * 2],
|
||||
uffd_delay, &uffd_args[vcpu_id],
|
||||
vcpu_hva, vcpu_memory_bytes);
|
||||
vcpu_hva, guest_percpu_mem_size);
|
||||
if (r < 0)
|
||||
exit(-r);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
|
||||
#endif
|
||||
|
||||
vcpu_args[vcpu_id].vm = vm;
|
||||
vcpu_args[vcpu_id].vcpu_id = vcpu_id;
|
||||
vcpu_args[vcpu_id].gva = guest_test_virt_mem +
|
||||
(vcpu_id * vcpu_memory_bytes);
|
||||
vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
|
||||
}
|
||||
|
||||
/* Export the shared variables to the guest */
|
||||
sync_global_to_guest(vm, host_page_size);
|
||||
sync_global_to_guest(vm, guest_page_size);
|
||||
sync_global_to_guest(vm, vcpu_args);
|
||||
sync_global_to_guest(vm, perf_test_args);
|
||||
|
||||
pr_info("Finished creating vCPUs and starting uffd threads\n");
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
|
||||
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
|
||||
&vcpu_args[vcpu_id]);
|
||||
&perf_test_args.vcpu_args[vcpu_id]);
|
||||
}
|
||||
|
||||
pr_info("Started all vCPUs\n");
|
||||
|
||||
/* Wait for the vcpu threads to quit */
|
||||
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
pthread_join(vcpu_threads[vcpu_id], NULL);
|
||||
PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
|
||||
}
|
||||
|
||||
pr_info("All vCPU threads joined\n");
|
||||
ts_diff = timespec_diff_now(start);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
pr_info("All vCPU threads joined\n");
|
||||
|
||||
if (use_uffd) {
|
||||
char c;
|
||||
|
||||
/* Tell the user fault fd handler threads to quit */
|
||||
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
|
||||
TEST_ASSERT(r == 1, "Unable to write to pipefd");
|
||||
|
||||
|
@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
|
|||
}
|
||||
}
|
||||
|
||||
ts_diff = timespec_sub(end, start);
|
||||
pr_info("Total guest execution time: %ld.%.9lds\n",
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
pr_info("Overall demand paging rate: %f pgs/sec\n",
|
||||
guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
perf_test_args.vcpu_args[0].pages * nr_vcpus /
|
||||
((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
|
||||
|
||||
ucall_uninit(vm);
|
||||
kvm_vm_free(vm);
|
||||
|
@ -568,9 +409,8 @@ static void help(char *name)
|
|||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
|
||||
bool mode_selected = false;
|
||||
uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
|
||||
int vcpus = 1;
|
||||
unsigned int mode;
|
||||
int opt, i;
|
||||
bool use_uffd = false;
|
||||
|
@ -619,15 +459,12 @@ int main(int argc, char *argv[])
|
|||
"A negative UFFD delay is not supported.");
|
||||
break;
|
||||
case 'b':
|
||||
vcpu_memory_bytes = parse_size(optarg);
|
||||
guest_percpu_mem_size = parse_size(optarg);
|
||||
break;
|
||||
case 'v':
|
||||
vcpus = atoi(optarg);
|
||||
TEST_ASSERT(vcpus > 0,
|
||||
"Must have a positive number of vCPUs");
|
||||
TEST_ASSERT(vcpus <= MAX_VCPUS,
|
||||
"This test does not currently support\n"
|
||||
"more than %d vCPUs.", MAX_VCPUS);
|
||||
nr_vcpus = atoi(optarg);
|
||||
TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
|
||||
"Invalid number of vcpus, must be between 1 and %d", max_vcpus);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
|
@ -642,7 +479,7 @@ int main(int argc, char *argv[])
|
|||
TEST_ASSERT(guest_modes[i].supported,
|
||||
"Guest mode ID %d (%s) not supported.",
|
||||
i, vm_guest_mode_string(i));
|
||||
run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
|
||||
run_test(i, use_uffd, uffd_delay);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1,376 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* KVM dirty page logging performance test
|
||||
*
|
||||
* Based on dirty_log_test.c
|
||||
*
|
||||
* Copyright (C) 2018, Red Hat, Inc.
|
||||
* Copyright (C) 2020, Google, Inc.
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE /* for program_invocation_name */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/bitops.h>
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "perf_test_util.h"
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
|
||||
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
|
||||
#define TEST_HOST_LOOP_N 2UL
|
||||
|
||||
/* Host variables */
|
||||
static bool host_quit;
|
||||
static uint64_t iteration;
|
||||
static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
|
||||
|
||||
static void *vcpu_worker(void *data)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_vm *vm = perf_test_args.vm;
|
||||
uint64_t pages_count = 0;
|
||||
struct kvm_run *run;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
struct timespec total = (struct timespec){0};
|
||||
struct timespec avg;
|
||||
struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
|
||||
int vcpu_id = vcpu_args->vcpu_id;
|
||||
|
||||
vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
|
||||
run = vcpu_state(vm, vcpu_id);
|
||||
|
||||
while (!READ_ONCE(host_quit)) {
|
||||
uint64_t current_iteration = READ_ONCE(iteration);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
ret = _vcpu_run(vm, vcpu_id);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
|
||||
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
|
||||
TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
|
||||
"Invalid guest sync status: exit_reason=%s\n",
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
pr_debug("Got sync event from vCPU %d\n", vcpu_id);
|
||||
vcpu_last_completed_iteration[vcpu_id] = current_iteration;
|
||||
pr_debug("vCPU %d updated last completed iteration to %lu\n",
|
||||
vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
|
||||
|
||||
if (current_iteration) {
|
||||
pages_count += vcpu_args->pages;
|
||||
total = timespec_add(total, ts_diff);
|
||||
pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
|
||||
vcpu_id, current_iteration, ts_diff.tv_sec,
|
||||
ts_diff.tv_nsec);
|
||||
} else {
|
||||
pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
|
||||
vcpu_id, current_iteration, ts_diff.tv_sec,
|
||||
ts_diff.tv_nsec);
|
||||
}
|
||||
|
||||
while (current_iteration == READ_ONCE(iteration) &&
|
||||
!READ_ONCE(host_quit)) {}
|
||||
}
|
||||
|
||||
avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
|
||||
pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
|
||||
vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
|
||||
total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
static u64 dirty_log_manual_caps;
|
||||
#endif
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
uint64_t phys_offset, int wr_fract)
|
||||
{
|
||||
pthread_t *vcpu_threads;
|
||||
struct kvm_vm *vm;
|
||||
unsigned long *bmap;
|
||||
uint64_t guest_num_pages;
|
||||
uint64_t host_num_pages;
|
||||
int vcpu_id;
|
||||
struct timespec start;
|
||||
struct timespec ts_diff;
|
||||
struct timespec get_dirty_log_total = (struct timespec){0};
|
||||
struct timespec vcpu_dirty_total = (struct timespec){0};
|
||||
struct timespec avg;
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
struct kvm_enable_cap cap = {};
|
||||
struct timespec clear_dirty_log_total = (struct timespec){0};
|
||||
#endif
|
||||
|
||||
vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
|
||||
|
||||
perf_test_args.wr_fract = wr_fract;
|
||||
|
||||
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
|
||||
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
|
||||
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
|
||||
bmap = bitmap_alloc(host_num_pages);
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
|
||||
cap.args[0] = dirty_log_manual_caps;
|
||||
vm_enable_cap(vm, &cap);
|
||||
#endif
|
||||
|
||||
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
|
||||
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
|
||||
|
||||
add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
|
||||
|
||||
sync_global_to_guest(vm, perf_test_args);
|
||||
|
||||
/* Start the iterations */
|
||||
iteration = 0;
|
||||
host_quit = false;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
|
||||
&perf_test_args.vcpu_args[vcpu_id]);
|
||||
}
|
||||
|
||||
/* Allow the vCPU to populate memory */
|
||||
pr_debug("Starting iteration %lu - Populating\n", iteration);
|
||||
while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
|
||||
pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
|
||||
iteration);
|
||||
|
||||
ts_diff = timespec_diff_now(start);
|
||||
pr_info("Populate memory time: %ld.%.9lds\n",
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
/* Enable dirty logging */
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
|
||||
KVM_MEM_LOG_DIRTY_PAGES);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
while (iteration < iterations) {
|
||||
/*
|
||||
* Incrementing the iteration number will start the vCPUs
|
||||
* dirtying memory again.
|
||||
*/
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
iteration++;
|
||||
|
||||
pr_debug("Starting iteration %lu\n", iteration);
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
|
||||
while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
|
||||
pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
|
||||
vcpu_id, iteration);
|
||||
}
|
||||
|
||||
ts_diff = timespec_diff_now(start);
|
||||
vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
|
||||
pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
|
||||
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
||||
|
||||
ts_diff = timespec_diff_now(start);
|
||||
get_dirty_log_total = timespec_add(get_dirty_log_total,
|
||||
ts_diff);
|
||||
pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
|
||||
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
|
||||
host_num_pages);
|
||||
|
||||
ts_diff = timespec_diff_now(start);
|
||||
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
|
||||
ts_diff);
|
||||
pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
|
||||
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Tell the vcpu thread to quit */
|
||||
host_quit = true;
|
||||
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
|
||||
pthread_join(vcpu_threads[vcpu_id], NULL);
|
||||
|
||||
/* Disable dirty logging */
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
|
||||
ts_diff = timespec_diff_now(start);
|
||||
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
|
||||
ts_diff.tv_sec, ts_diff.tv_nsec);
|
||||
|
||||
avg = timespec_div(get_dirty_log_total, iterations);
|
||||
pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
|
||||
iterations, get_dirty_log_total.tv_sec,
|
||||
get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
avg = timespec_div(clear_dirty_log_total, iterations);
|
||||
pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
|
||||
iterations, clear_dirty_log_total.tv_sec,
|
||||
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
|
||||
#endif
|
||||
|
||||
free(bmap);
|
||||
free(vcpu_threads);
|
||||
ucall_uninit(vm);
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
struct guest_mode {
|
||||
bool supported;
|
||||
bool enabled;
|
||||
};
|
||||
static struct guest_mode guest_modes[NUM_VM_MODES];
|
||||
|
||||
#define guest_mode_init(mode, supported, enabled) ({ \
|
||||
guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
|
||||
})
|
||||
|
||||
static void help(char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
puts("");
|
||||
printf("usage: %s [-h] [-i iterations] [-p offset] "
|
||||
"[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
|
||||
puts("");
|
||||
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
|
||||
TEST_HOST_LOOP_N);
|
||||
printf(" -p: specify guest physical test memory offset\n"
|
||||
" Warning: a low offset can conflict with the loaded test code.\n");
|
||||
printf(" -m: specify the guest mode ID to test "
|
||||
"(default: test all supported modes)\n"
|
||||
" This option may be used multiple times.\n"
|
||||
" Guest mode IDs:\n");
|
||||
for (i = 0; i < NUM_VM_MODES; ++i) {
|
||||
printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
|
||||
guest_modes[i].supported ? " (supported)" : "");
|
||||
}
|
||||
printf(" -b: specify the size of the memory region which should be\n"
|
||||
" dirtied by each vCPU. e.g. 10M or 3G.\n"
|
||||
" (default: 1G)\n");
|
||||
printf(" -f: specify the fraction of pages which should be written to\n"
|
||||
" as opposed to simply read, in the form\n"
|
||||
" 1/<fraction of pages to write>.\n"
|
||||
" (default: 1 i.e. all pages are written to.)\n");
|
||||
printf(" -v: specify the number of vCPUs to run.\n");
|
||||
puts("");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned long iterations = TEST_HOST_LOOP_N;
|
||||
bool mode_selected = false;
|
||||
uint64_t phys_offset = 0;
|
||||
unsigned int mode;
|
||||
int opt, i;
|
||||
int wr_fract = 1;
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
dirty_log_manual_caps =
|
||||
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||
if (!dirty_log_manual_caps) {
|
||||
print_skip("KVM_CLEAR_DIRTY_LOG not available");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
|
||||
KVM_DIRTY_LOG_INITIALLY_SET);
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
|
||||
#endif
|
||||
#ifdef __aarch64__
|
||||
guest_mode_init(VM_MODE_P40V48_4K, true, true);
|
||||
guest_mode_init(VM_MODE_P40V48_64K, true, true);
|
||||
|
||||
{
|
||||
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
|
||||
|
||||
if (limit >= 52)
|
||||
guest_mode_init(VM_MODE_P52V48_64K, true, true);
|
||||
if (limit >= 48) {
|
||||
guest_mode_init(VM_MODE_P48V48_4K, true, true);
|
||||
guest_mode_init(VM_MODE_P48V48_64K, true, true);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef __s390x__
|
||||
guest_mode_init(VM_MODE_P40V48_4K, true, true);
|
||||
#endif
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
iterations = strtol(optarg, NULL, 10);
|
||||
break;
|
||||
case 'p':
|
||||
phys_offset = strtoull(optarg, NULL, 0);
|
||||
break;
|
||||
case 'm':
|
||||
if (!mode_selected) {
|
||||
for (i = 0; i < NUM_VM_MODES; ++i)
|
||||
guest_modes[i].enabled = false;
|
||||
mode_selected = true;
|
||||
}
|
||||
mode = strtoul(optarg, NULL, 10);
|
||||
TEST_ASSERT(mode < NUM_VM_MODES,
|
||||
"Guest mode ID %d too big", mode);
|
||||
guest_modes[mode].enabled = true;
|
||||
break;
|
||||
case 'b':
|
||||
guest_percpu_mem_size = parse_size(optarg);
|
||||
break;
|
||||
case 'f':
|
||||
wr_fract = atoi(optarg);
|
||||
TEST_ASSERT(wr_fract >= 1,
|
||||
"Write fraction cannot be less than one");
|
||||
break;
|
||||
case 'v':
|
||||
nr_vcpus = atoi(optarg);
|
||||
TEST_ASSERT(nr_vcpus > 0,
|
||||
"Must have a positive number of vCPUs");
|
||||
TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
|
||||
"This test does not currently support\n"
|
||||
"more than %d vCPUs.", MAX_VCPUS);
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
help(argv[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
|
||||
|
||||
pr_info("Test iterations: %"PRIu64"\n", iterations);
|
||||
|
||||
for (i = 0; i < NUM_VM_MODES; ++i) {
|
||||
if (!guest_modes[i].enabled)
|
||||
continue;
|
||||
TEST_ASSERT(guest_modes[i].supported,
|
||||
"Guest mode ID %d (%s) not supported.",
|
||||
i, vm_guest_mode_string(i));
|
||||
run_test(i, iterations, phys_offset, wr_fract);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -128,6 +128,78 @@ static uint64_t host_dirty_count;
|
|||
static uint64_t host_clear_count;
|
||||
static uint64_t host_track_next_count;
|
||||
|
||||
enum log_mode_t {
|
||||
/* Only use KVM_GET_DIRTY_LOG for logging */
|
||||
LOG_MODE_DIRTY_LOG = 0,
|
||||
|
||||
/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
|
||||
LOG_MODE_CLEAR_LOG = 1,
|
||||
|
||||
LOG_MODE_NUM,
|
||||
|
||||
/* Run all supported modes */
|
||||
LOG_MODE_ALL = LOG_MODE_NUM,
|
||||
};
|
||||
|
||||
/* Mode of logging to test. Default is to run all supported modes */
|
||||
static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
|
||||
/* Logging mode for current run */
|
||||
static enum log_mode_t host_log_mode;
|
||||
|
||||
static bool clear_log_supported(void)
|
||||
{
|
||||
return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||
}
|
||||
|
||||
static void clear_log_create_vm_done(struct kvm_vm *vm)
|
||||
{
|
||||
struct kvm_enable_cap cap = {};
|
||||
u64 manual_caps;
|
||||
|
||||
manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||
TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
|
||||
manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
|
||||
KVM_DIRTY_LOG_INITIALLY_SET);
|
||||
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
|
||||
cap.args[0] = manual_caps;
|
||||
vm_enable_cap(vm, &cap);
|
||||
}
|
||||
|
||||
static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vm, slot, bitmap);
|
||||
}
|
||||
|
||||
static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
{
|
||||
kvm_vm_get_dirty_log(vm, slot, bitmap);
|
||||
kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
|
||||
}
|
||||
|
||||
struct log_mode {
|
||||
const char *name;
|
||||
/* Return true if this mode is supported, otherwise false */
|
||||
bool (*supported)(void);
|
||||
/* Hook when the vm creation is done (before vcpu creation) */
|
||||
void (*create_vm_done)(struct kvm_vm *vm);
|
||||
/* Hook to collect the dirty pages into the bitmap provided */
|
||||
void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
|
||||
void *bitmap, uint32_t num_pages);
|
||||
} log_modes[LOG_MODE_NUM] = {
|
||||
{
|
||||
.name = "dirty-log",
|
||||
.collect_dirty_pages = dirty_log_collect_dirty_pages,
|
||||
},
|
||||
{
|
||||
.name = "clear-log",
|
||||
.supported = clear_log_supported,
|
||||
.create_vm_done = clear_log_create_vm_done,
|
||||
.collect_dirty_pages = clear_log_collect_dirty_pages,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* We use this bitmap to track some pages that should have its dirty
|
||||
* bit set in the _next_ iteration. For example, if we detected the
|
||||
|
@ -137,6 +209,44 @@ static uint64_t host_track_next_count;
|
|||
*/
|
||||
static unsigned long *host_bmap_track;
|
||||
|
||||
static void log_modes_dump(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("all");
|
||||
for (i = 0; i < LOG_MODE_NUM; i++)
|
||||
printf(", %s", log_modes[i].name);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static bool log_mode_supported(void)
|
||||
{
|
||||
struct log_mode *mode = &log_modes[host_log_mode];
|
||||
|
||||
if (mode->supported)
|
||||
return mode->supported();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void log_mode_create_vm_done(struct kvm_vm *vm)
|
||||
{
|
||||
struct log_mode *mode = &log_modes[host_log_mode];
|
||||
|
||||
if (mode->create_vm_done)
|
||||
mode->create_vm_done(vm);
|
||||
}
|
||||
|
||||
static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
|
||||
void *bitmap, uint32_t num_pages)
|
||||
{
|
||||
struct log_mode *mode = &log_modes[host_log_mode];
|
||||
|
||||
TEST_ASSERT(mode->collect_dirty_pages != NULL,
|
||||
"collect_dirty_pages() is required for any log mode!");
|
||||
mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
|
||||
}
|
||||
|
||||
static void generate_random_array(uint64_t *guest_array, uint64_t size)
|
||||
{
|
||||
uint64_t i;
|
||||
|
@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
|
|||
page);
|
||||
}
|
||||
|
||||
if (test_bit_le(page, bmap)) {
|
||||
if (test_and_clear_bit_le(page, bmap)) {
|
||||
host_dirty_count++;
|
||||
/*
|
||||
* If the bit is set, the value written onto
|
||||
|
@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
|||
|
||||
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
|
||||
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
|
||||
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
#ifdef __x86_64__
|
||||
vm_create_irqchip(vm);
|
||||
#endif
|
||||
log_mode_create_vm_done(vm);
|
||||
vm_vcpu_add_default(vm, vcpuid, guest_code);
|
||||
return vm;
|
||||
}
|
||||
|
@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
|||
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||
#define PAGE_SHIFT_4K 12
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
static u64 dirty_log_manual_caps;
|
||||
#endif
|
||||
|
||||
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||
unsigned long interval, uint64_t phys_offset)
|
||||
{
|
||||
|
@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
struct kvm_vm *vm;
|
||||
unsigned long *bmap;
|
||||
|
||||
if (!log_mode_supported()) {
|
||||
print_skip("Log mode '%s' not supported",
|
||||
log_modes[host_log_mode].name);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We reserve page table for 2 times of extra dirty mem which
|
||||
* will definitely cover the original (1G+) test range. Here
|
||||
|
@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
bmap = bitmap_alloc(host_num_pages);
|
||||
host_bmap_track = bitmap_alloc(host_num_pages);
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
struct kvm_enable_cap cap = {};
|
||||
|
||||
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
|
||||
cap.args[0] = dirty_log_manual_caps;
|
||||
vm_enable_cap(vm, &cap);
|
||||
#endif
|
||||
|
||||
/* Add an extra memory slot for testing dirty logging */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
guest_test_phys_mem,
|
||||
|
@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
|||
while (iteration < iterations) {
|
||||
/* Give the vcpu thread some time to dirty some pages */
|
||||
usleep(interval * 1000);
|
||||
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
|
||||
host_num_pages);
|
||||
#endif
|
||||
log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
|
||||
bmap, host_num_pages);
|
||||
vm_dirty_log_verify(mode, bmap);
|
||||
iteration++;
|
||||
sync_global_to_guest(vm, iteration);
|
||||
|
@ -410,6 +512,9 @@ static void help(char *name)
|
|||
TEST_HOST_LOOP_INTERVAL);
|
||||
printf(" -p: specify guest physical test memory offset\n"
|
||||
" Warning: a low offset can conflict with the loaded test code.\n");
|
||||
printf(" -M: specify the host logging mode "
|
||||
"(default: run all log modes). Supported modes: \n\t");
|
||||
log_modes_dump();
|
||||
printf(" -m: specify the guest mode ID to test "
|
||||
"(default: test all supported modes)\n"
|
||||
" This option may be used multiple times.\n"
|
||||
|
@ -429,18 +534,7 @@ int main(int argc, char *argv[])
|
|||
bool mode_selected = false;
|
||||
uint64_t phys_offset = 0;
|
||||
unsigned int mode;
|
||||
int opt, i;
|
||||
|
||||
#ifdef USE_CLEAR_DIRTY_LOG
|
||||
dirty_log_manual_caps =
|
||||
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
|
||||
if (!dirty_log_manual_caps) {
|
||||
print_skip("KVM_CLEAR_DIRTY_LOG not available");
|
||||
exit(KSFT_SKIP);
|
||||
}
|
||||
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
|
||||
KVM_DIRTY_LOG_INITIALLY_SET);
|
||||
#endif
|
||||
int opt, i, j;
|
||||
|
||||
#ifdef __x86_64__
|
||||
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
|
||||
|
@ -464,7 +558,7 @@ int main(int argc, char *argv[])
|
|||
guest_mode_init(VM_MODE_P40V48_4K, true, true);
|
||||
#endif
|
||||
|
||||
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
|
||||
switch (opt) {
|
||||
case 'i':
|
||||
iterations = strtol(optarg, NULL, 10);
|
||||
|
@ -486,6 +580,26 @@ int main(int argc, char *argv[])
|
|||
"Guest mode ID %d too big", mode);
|
||||
guest_modes[mode].enabled = true;
|
||||
break;
|
||||
case 'M':
|
||||
if (!strcmp(optarg, "all")) {
|
||||
host_log_mode_option = LOG_MODE_ALL;
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < LOG_MODE_NUM; i++) {
|
||||
if (!strcmp(optarg, log_modes[i].name)) {
|
||||
pr_info("Setting log mode to: '%s'\n",
|
||||
optarg);
|
||||
host_log_mode_option = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == LOG_MODE_NUM) {
|
||||
printf("Log mode '%s' invalid. Please choose "
|
||||
"from: ", optarg);
|
||||
log_modes_dump();
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
help(argv[0]);
|
||||
|
@ -507,7 +621,18 @@ int main(int argc, char *argv[])
|
|||
TEST_ASSERT(guest_modes[i].supported,
|
||||
"Guest mode ID %d (%s) not supported.",
|
||||
i, vm_guest_mode_string(i));
|
||||
run_test(i, iterations, interval, phys_offset);
|
||||
if (host_log_mode_option == LOG_MODE_ALL) {
|
||||
/* Run each log mode */
|
||||
for (j = 0; j < LOG_MODE_NUM; j++) {
|
||||
pr_info("Testing Log Mode '%s'\n",
|
||||
log_modes[j].name);
|
||||
host_log_mode = j;
|
||||
run_test(i, iterations, interval, phys_offset);
|
||||
}
|
||||
} else {
|
||||
host_log_mode = host_log_mode_option;
|
||||
run_test(i, iterations, interval, phys_offset);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -63,9 +63,11 @@ enum vm_mem_backing_src_type {
|
|||
|
||||
int kvm_check_cap(long cap);
|
||||
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
|
||||
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct kvm_enable_cap *cap);
|
||||
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
|
||||
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||
void kvm_vm_free(struct kvm_vm *vmp);
|
||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||
void kvm_vm_release(struct kvm_vm *vmp);
|
||||
|
@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
|
|||
struct kvm_guest_debug *debug);
|
||||
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
|
||||
struct kvm_mp_state *mp_state);
|
||||
struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
|
||||
void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
|
||||
|
||||
|
@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
|
|||
memcpy(&(g), _p, sizeof(g)); \
|
||||
})
|
||||
|
||||
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
|
||||
/* Common ucalls */
|
||||
enum {
|
||||
UCALL_NONE,
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* tools/testing/selftests/kvm/include/perf_test_util.h
|
||||
*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
|
||||
#define SELFTEST_KVM_PERF_TEST_UTIL_H
|
||||
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define MAX_VCPUS 512
|
||||
|
||||
#define PAGE_SHIFT_4K 12
|
||||
#define PTES_PER_4K_PT 512
|
||||
|
||||
#define TEST_MEM_SLOT_INDEX 1
|
||||
|
||||
/* Default guest test virtual memory offset */
|
||||
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
|
||||
|
||||
#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
|
||||
|
||||
/*
|
||||
* Guest physical memory offset of the testing memory slot.
|
||||
* This will be set to the topmost valid physical address minus
|
||||
* the test memory size.
|
||||
*/
|
||||
static uint64_t guest_test_phys_mem;
|
||||
|
||||
/*
|
||||
* Guest virtual memory offset of the testing memory slot.
|
||||
* Must not conflict with identity mapped test code.
|
||||
*/
|
||||
static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
|
||||
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
|
||||
|
||||
/* Number of VCPUs for the test */
|
||||
static int nr_vcpus = 1;
|
||||
|
||||
struct vcpu_args {
|
||||
uint64_t gva;
|
||||
uint64_t pages;
|
||||
|
||||
/* Only used by the host userspace part of the vCPU thread */
|
||||
int vcpu_id;
|
||||
};
|
||||
|
||||
struct perf_test_args {
|
||||
struct kvm_vm *vm;
|
||||
uint64_t host_page_size;
|
||||
uint64_t guest_page_size;
|
||||
int wr_fract;
|
||||
|
||||
struct vcpu_args vcpu_args[MAX_VCPUS];
|
||||
};
|
||||
|
||||
static struct perf_test_args perf_test_args;
|
||||
|
||||
/*
|
||||
* Continuously write to the first 8 bytes of each page in the
|
||||
* specified region.
|
||||
*/
|
||||
static void guest_code(uint32_t vcpu_id)
|
||||
{
|
||||
struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
|
||||
uint64_t gva;
|
||||
uint64_t pages;
|
||||
int i;
|
||||
|
||||
/* Make sure vCPU args data structure is not corrupt. */
|
||||
GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
|
||||
|
||||
gva = vcpu_args->gva;
|
||||
pages = vcpu_args->pages;
|
||||
|
||||
while (true) {
|
||||
for (i = 0; i < pages; i++) {
|
||||
uint64_t addr = gva + (i * perf_test_args.guest_page_size);
|
||||
|
||||
if (i % perf_test_args.wr_fract == 0)
|
||||
*(uint64_t *)addr = 0x0123456789ABCDEF;
|
||||
else
|
||||
READ_ONCE(*(uint64_t *)addr);
|
||||
}
|
||||
|
||||
GUEST_SYNC(1);
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
|
||||
uint64_t vcpu_memory_bytes)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
|
||||
uint64_t guest_num_pages;
|
||||
|
||||
/* Account for a few pages per-vCPU for stacks */
|
||||
pages += DEFAULT_STACK_PGS * vcpus;
|
||||
|
||||
/*
|
||||
* Reserve twice the ammount of memory needed to map the test region and
|
||||
* the page table / stacks region, at 4k, for page tables. Do the
|
||||
* calculation with 4K page size: the smallest of all archs. (e.g., 64K
|
||||
* page size guest will need even less memory for page tables).
|
||||
*/
|
||||
pages += (2 * pages) / PTES_PER_4K_PT;
|
||||
pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
|
||||
PTES_PER_4K_PT;
|
||||
pages = vm_adjust_num_guest_pages(mode, pages);
|
||||
|
||||
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||
|
||||
vm = vm_create(mode, pages, O_RDWR);
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
#ifdef __x86_64__
|
||||
vm_create_irqchip(vm);
|
||||
#endif
|
||||
|
||||
perf_test_args.vm = vm;
|
||||
perf_test_args.guest_page_size = vm_get_page_size(vm);
|
||||
perf_test_args.host_page_size = getpagesize();
|
||||
|
||||
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
|
||||
"Guest memory size is not guest page size aligned.");
|
||||
|
||||
guest_num_pages = (vcpus * vcpu_memory_bytes) /
|
||||
perf_test_args.guest_page_size;
|
||||
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
|
||||
|
||||
/*
|
||||
* If there should be more memory in the guest test region than there
|
||||
* can be pages in the guest, it will definitely cause problems.
|
||||
*/
|
||||
TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
|
||||
"Requested more guest memory than address space allows.\n"
|
||||
" guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
|
||||
guest_num_pages, vm_get_max_gfn(vm), vcpus,
|
||||
vcpu_memory_bytes);
|
||||
|
||||
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
|
||||
"Guest memory size is not host page size aligned.");
|
||||
|
||||
guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
|
||||
perf_test_args.guest_page_size;
|
||||
guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
|
||||
|
||||
#ifdef __s390x__
|
||||
/* Align to 1M (segment size) */
|
||||
guest_test_phys_mem &= ~((1 << 20) - 1);
|
||||
#endif
|
||||
|
||||
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
|
||||
|
||||
/* Add an extra memory slot for testing */
|
||||
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
|
||||
guest_test_phys_mem,
|
||||
TEST_MEM_SLOT_INDEX,
|
||||
guest_num_pages, 0);
|
||||
|
||||
/* Do mapping for the demand paging memory slot */
|
||||
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
|
||||
|
||||
ucall_init(vm, NULL);
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
|
||||
{
|
||||
vm_paddr_t vcpu_gpa;
|
||||
struct vcpu_args *vcpu_args;
|
||||
int vcpu_id;
|
||||
|
||||
for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
|
||||
vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
|
||||
|
||||
vm_vcpu_add_default(vm, vcpu_id, guest_code);
|
||||
|
||||
#ifdef __x86_64__
|
||||
vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
|
||||
#endif
|
||||
|
||||
vcpu_args->vcpu_id = vcpu_id;
|
||||
vcpu_args->gva = guest_test_virt_mem +
|
||||
(vcpu_id * vcpu_memory_bytes);
|
||||
vcpu_args->pages = vcpu_memory_bytes /
|
||||
perf_test_args.guest_page_size;
|
||||
|
||||
vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
|
||||
pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
|
||||
vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
|
|
@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts);
|
|||
struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
|
||||
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
|
||||
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
|
||||
struct timespec timespec_diff_now(struct timespec start);
|
||||
struct timespec timespec_div(struct timespec ts, int divisor);
|
||||
|
||||
#endif /* SELFTEST_KVM_TEST_UTIL_H */
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#define X86_CR4_SMAP (1ul << 21)
|
||||
#define X86_CR4_PKE (1ul << 22)
|
||||
|
||||
#define UNEXPECTED_VECTOR_PORT 0xfff0u
|
||||
|
||||
/* General Registers in 64-Bit Mode */
|
||||
struct gpr64_regs {
|
||||
u64 rax;
|
||||
|
@ -59,7 +61,7 @@ struct gpr64_regs {
|
|||
struct desc64 {
|
||||
uint16_t limit0;
|
||||
uint16_t base0;
|
||||
unsigned base1:8, s:1, type:4, dpl:2, p:1;
|
||||
unsigned base1:8, type:4, s:1, dpl:2, p:1;
|
||||
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
|
||||
uint32_t base3;
|
||||
uint32_t zero1;
|
||||
|
@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void)
|
|||
return idt;
|
||||
}
|
||||
|
||||
static inline void outl(uint16_t port, uint32_t value)
|
||||
{
|
||||
__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
|
||||
}
|
||||
|
||||
#define SET_XMM(__var, __xmm) \
|
||||
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
|
||||
|
||||
|
@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void);
|
|||
uint32_t kvm_get_cpuid_max_extended(void);
|
||||
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
|
||||
|
||||
struct ex_regs {
|
||||
uint64_t rax, rcx, rdx, rbx;
|
||||
uint64_t rbp, rsi, rdi;
|
||||
uint64_t r8, r9, r10, r11;
|
||||
uint64_t r12, r13, r14, r15;
|
||||
uint64_t vector;
|
||||
uint64_t error_code;
|
||||
uint64_t rip;
|
||||
uint64_t cs;
|
||||
uint64_t rflags;
|
||||
};
|
||||
|
||||
void vm_init_descriptor_tables(struct kvm_vm *vm);
|
||||
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
void vm_handle_exception(struct kvm_vm *vm, int vector,
|
||||
void (*handler)(struct ex_regs *));
|
||||
|
||||
/*
|
||||
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
|
||||
* matches based on ent->function && ent->index. returns true
|
||||
* if a match was found and successfully overwritten.
|
||||
* @cpuid: the kvm cpuid list to modify.
|
||||
* @ent: cpuid entry to insert
|
||||
*/
|
||||
bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
|
||||
|
||||
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
uint64_t a3);
|
||||
|
||||
/*
|
||||
* Basic CPU control in CR0
|
||||
*/
|
||||
|
|
|
@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
|
|||
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
|||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (uc)
|
||||
memset(uc, 0, sizeof(*uc));
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_MMIO &&
|
||||
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
|
||||
vm_vaddr_t gva;
|
||||
|
|
|
@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* VCPU Enable Capability
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* vcpu_id - VCPU
|
||||
* cap - Capability
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return: On success, 0. On failure a TEST_ASSERT failure is produced.
|
||||
*
|
||||
* Enables a capability (KVM_CAP_*) on the VCPU.
|
||||
*/
|
||||
int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
|
||||
struct kvm_enable_cap *cap)
|
||||
{
|
||||
struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
|
||||
int r;
|
||||
|
||||
TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
|
||||
|
||||
r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
|
||||
TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
|
||||
" rc: %i, errno: %i", r, errno);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void vm_open(struct kvm_vm *vm, int perm)
|
||||
{
|
||||
vm->kvm_fd = open(KVM_DEV_PATH, perm);
|
||||
|
@ -152,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
|
|||
* descriptor to control the created VM is created with the permissions
|
||||
* given by perm (e.g. O_RDWR).
|
||||
*/
|
||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
|
||||
|
@ -243,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
|||
return vm;
|
||||
}
|
||||
|
||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||
{
|
||||
return _vm_create(mode, phy_pages, perm);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Restart
|
||||
*
|
||||
|
@ -1204,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
|
|||
do {
|
||||
rc = ioctl(vcpu->fd, KVM_RUN, NULL);
|
||||
} while (rc == -1 && errno == EINTR);
|
||||
|
||||
assert_on_unhandled_exception(vm, vcpuid);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1260,6 +1286,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
|
|||
"rc: %i errno: %i", ret, errno);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM VCPU Get Reg List
|
||||
*
|
||||
* Input Args:
|
||||
* vm - Virtual Machine
|
||||
* vcpuid - VCPU ID
|
||||
*
|
||||
* Output Args:
|
||||
* None
|
||||
*
|
||||
* Return:
|
||||
* A pointer to an allocated struct kvm_reg_list
|
||||
*
|
||||
* Get the list of guest registers which are supported for
|
||||
* KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
|
||||
*/
|
||||
struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
|
||||
int ret;
|
||||
|
||||
ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n);
|
||||
TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
|
||||
reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
|
||||
reg_list->n = reg_list_n.n;
|
||||
vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
|
||||
return reg_list;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM VCPU Regs Get
|
||||
*
|
||||
|
|
|
@ -50,6 +50,8 @@ struct kvm_vm {
|
|||
vm_paddr_t pgd;
|
||||
vm_vaddr_t gdt;
|
||||
vm_vaddr_t tss;
|
||||
vm_vaddr_t idt;
|
||||
vm_vaddr_t handlers;
|
||||
};
|
||||
|
||||
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
|
||||
|
|
|
@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
|
|||
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
|
||||
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
|
||||
}
|
||||
|
||||
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
|||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (uc)
|
||||
memset(uc, 0, sizeof(*uc));
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
|
||||
run->s390_sieic.icptcode == 4 &&
|
||||
(run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
|
||||
|
|
|
@ -4,10 +4,13 @@
|
|||
*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "test_util.h"
|
||||
|
||||
/*
|
||||
|
@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
|
|||
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
|
||||
}
|
||||
|
||||
struct timespec timespec_diff_now(struct timespec start)
|
||||
{
|
||||
struct timespec end;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
return timespec_sub(end, start);
|
||||
}
|
||||
|
||||
struct timespec timespec_div(struct timespec ts, int divisor)
|
||||
{
|
||||
int64_t ns = timespec_to_ns(ts) / divisor;
|
||||
|
||||
return timespec_add_ns((struct timespec){0}, ns);
|
||||
}
|
||||
|
||||
void print_skip(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
handle_exception:
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
push %r11
|
||||
push %r10
|
||||
push %r9
|
||||
push %r8
|
||||
|
||||
push %rdi
|
||||
push %rsi
|
||||
push %rbp
|
||||
push %rbx
|
||||
push %rdx
|
||||
push %rcx
|
||||
push %rax
|
||||
mov %rsp, %rdi
|
||||
|
||||
call route_exception
|
||||
|
||||
pop %rax
|
||||
pop %rcx
|
||||
pop %rdx
|
||||
pop %rbx
|
||||
pop %rbp
|
||||
pop %rsi
|
||||
pop %rdi
|
||||
pop %r8
|
||||
pop %r9
|
||||
pop %r10
|
||||
pop %r11
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
|
||||
/* Discard vector and error code. */
|
||||
add $16, %rsp
|
||||
iretq
|
||||
|
||||
/*
|
||||
* Build the handle_exception wrappers which push the vector/error code on the
|
||||
* stack and an array of pointers to those wrappers.
|
||||
*/
|
||||
.pushsection .rodata
|
||||
.globl idt_handlers
|
||||
idt_handlers:
|
||||
.popsection
|
||||
|
||||
.macro HANDLERS has_error from to
|
||||
vector = \from
|
||||
.rept \to - \from + 1
|
||||
.align 8
|
||||
|
||||
/* Fetch current address and append it to idt_handlers. */
|
||||
current_handler = .
|
||||
.pushsection .rodata
|
||||
.quad current_handler
|
||||
.popsection
|
||||
|
||||
.if ! \has_error
|
||||
pushq $0
|
||||
.endif
|
||||
pushq $vector
|
||||
jmp handle_exception
|
||||
vector = vector + 1
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.global idt_handler_code
|
||||
idt_handler_code:
|
||||
HANDLERS has_error=0 from=0 to=7
|
||||
HANDLERS has_error=1 from=8 to=8
|
||||
HANDLERS has_error=0 from=9 to=9
|
||||
HANDLERS has_error=1 from=10 to=14
|
||||
HANDLERS has_error=0 from=15 to=16
|
||||
HANDLERS has_error=1 from=17 to=17
|
||||
HANDLERS has_error=0 from=18 to=255
|
||||
|
||||
.section .note.GNU-stack, "", %progbits
|
|
@ -12,9 +12,18 @@
|
|||
#include "../kvm_util_internal.h"
|
||||
#include "processor.h"
|
||||
|
||||
#ifndef NUM_INTERRUPTS
|
||||
#define NUM_INTERRUPTS 256
|
||||
#endif
|
||||
|
||||
#define DEFAULT_CODE_SELECTOR 0x8
|
||||
#define DEFAULT_DATA_SELECTOR 0x10
|
||||
|
||||
/* Minimum physical address used for virtual translation tables. */
|
||||
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
|
||||
|
||||
vm_vaddr_t exception_handlers;
|
||||
|
||||
/* Virtual translation table structure declarations */
|
||||
struct pageMapL4Entry {
|
||||
uint64_t present:1;
|
||||
|
@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
|
|||
desc->limit0 = segp->limit & 0xFFFF;
|
||||
desc->base0 = segp->base & 0xFFFF;
|
||||
desc->base1 = segp->base >> 16;
|
||||
desc->s = segp->s;
|
||||
desc->type = segp->type;
|
||||
desc->s = segp->s;
|
||||
desc->dpl = segp->dpl;
|
||||
desc->p = segp->present;
|
||||
desc->limit1 = segp->limit >> 16;
|
||||
desc->avl = segp->avl;
|
||||
desc->l = segp->l;
|
||||
desc->db = segp->db;
|
||||
desc->g = segp->g;
|
||||
|
@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
|
|||
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
|
||||
|
||||
kvm_seg_set_unusable(&sregs.ldt);
|
||||
kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
|
||||
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
|
||||
kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
|
||||
kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
|
||||
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
|
||||
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
|
||||
kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
|
||||
break;
|
||||
|
||||
|
@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
|
|||
*va_bits = (entry->eax >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
struct idt_entry {
|
||||
uint16_t offset0;
|
||||
uint16_t selector;
|
||||
uint16_t ist : 3;
|
||||
uint16_t : 5;
|
||||
uint16_t type : 4;
|
||||
uint16_t : 1;
|
||||
uint16_t dpl : 2;
|
||||
uint16_t p : 1;
|
||||
uint16_t offset1;
|
||||
uint32_t offset2; uint32_t reserved;
|
||||
};
|
||||
|
||||
static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
|
||||
int dpl, unsigned short selector)
|
||||
{
|
||||
struct idt_entry *base =
|
||||
(struct idt_entry *)addr_gva2hva(vm, vm->idt);
|
||||
struct idt_entry *e = &base[vector];
|
||||
|
||||
memset(e, 0, sizeof(*e));
|
||||
e->offset0 = addr;
|
||||
e->selector = selector;
|
||||
e->ist = 0;
|
||||
e->type = 14;
|
||||
e->dpl = dpl;
|
||||
e->p = 1;
|
||||
e->offset1 = addr >> 16;
|
||||
e->offset2 = addr >> 32;
|
||||
}
|
||||
|
||||
void kvm_exit_unexpected_vector(uint32_t value)
|
||||
{
|
||||
outl(UNEXPECTED_VECTOR_PORT, value);
|
||||
}
|
||||
|
||||
void route_exception(struct ex_regs *regs)
|
||||
{
|
||||
typedef void(*handler)(struct ex_regs *);
|
||||
handler *handlers = (handler *)exception_handlers;
|
||||
|
||||
if (handlers && handlers[regs->vector]) {
|
||||
handlers[regs->vector](regs);
|
||||
return;
|
||||
}
|
||||
|
||||
kvm_exit_unexpected_vector(regs->vector);
|
||||
}
|
||||
|
||||
void vm_init_descriptor_tables(struct kvm_vm *vm)
|
||||
{
|
||||
extern void *idt_handlers;
|
||||
int i;
|
||||
|
||||
vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
|
||||
vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
|
||||
/* Handlers have the same address in both address spaces.*/
|
||||
for (i = 0; i < NUM_INTERRUPTS; i++)
|
||||
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
|
||||
DEFAULT_CODE_SELECTOR);
|
||||
}
|
||||
|
||||
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct kvm_sregs sregs;
|
||||
|
||||
vcpu_sregs_get(vm, vcpuid, &sregs);
|
||||
sregs.idt.base = vm->idt;
|
||||
sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
|
||||
sregs.gdt.base = vm->gdt;
|
||||
sregs.gdt.limit = getpagesize() - 1;
|
||||
kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
|
||||
vcpu_sregs_set(vm, vcpuid, &sregs);
|
||||
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
|
||||
}
|
||||
|
||||
void vm_handle_exception(struct kvm_vm *vm, int vector,
|
||||
void (*handler)(struct ex_regs *))
|
||||
{
|
||||
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
|
||||
|
||||
handlers[vector] = (vm_vaddr_t)handler;
|
||||
}
|
||||
|
||||
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
|
||||
&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
|
||||
&& vcpu_state(vm, vcpuid)->io.size == 4) {
|
||||
/* Grab pointer to io data */
|
||||
uint32_t *data = (void *)vcpu_state(vm, vcpuid)
|
||||
+ vcpu_state(vm, vcpuid)->io.data_offset;
|
||||
|
||||
TEST_ASSERT(false,
|
||||
"Unexpected vectored event in guest (vector:0x%x)",
|
||||
*data);
|
||||
}
|
||||
}
|
||||
|
||||
bool set_cpuid(struct kvm_cpuid2 *cpuid,
|
||||
struct kvm_cpuid_entry2 *ent)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < cpuid->nent; i++) {
|
||||
struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
|
||||
|
||||
if (cur->function != ent->function || cur->index != ent->index)
|
||||
continue;
|
||||
|
||||
memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
|
||||
uint64_t a3)
|
||||
{
|
||||
uint64_t r;
|
||||
|
||||
asm volatile("vmcall"
|
||||
: "=a"(r)
|
||||
: "b"(a0), "c"(a1), "d"(a2), "S"(a3));
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
|||
struct kvm_run *run = vcpu_state(vm, vcpu_id);
|
||||
struct ucall ucall = {};
|
||||
|
||||
if (uc)
|
||||
memset(uc, 0, sizeof(*uc));
|
||||
|
||||
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
|
||||
struct kvm_regs regs;
|
||||
|
||||
|
|
|
@ -0,0 +1,234 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*
|
||||
* Tests for KVM paravirtual feature disablement
|
||||
*/
|
||||
#include <asm/kvm_para.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
extern unsigned char rdmsr_start;
|
||||
extern unsigned char rdmsr_end;
|
||||
|
||||
static u64 do_rdmsr(u32 idx)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
asm volatile("rdmsr_start: rdmsr;"
|
||||
"rdmsr_end:"
|
||||
: "=a"(lo), "=c"(hi)
|
||||
: "c"(idx));
|
||||
|
||||
return (((u64) hi) << 32) | lo;
|
||||
}
|
||||
|
||||
extern unsigned char wrmsr_start;
|
||||
extern unsigned char wrmsr_end;
|
||||
|
||||
static void do_wrmsr(u32 idx, u64 val)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
lo = val;
|
||||
hi = val >> 32;
|
||||
|
||||
asm volatile("wrmsr_start: wrmsr;"
|
||||
"wrmsr_end:"
|
||||
: : "a"(lo), "c"(idx), "d"(hi));
|
||||
}
|
||||
|
||||
static int nr_gp;
|
||||
|
||||
static void guest_gp_handler(struct ex_regs *regs)
|
||||
{
|
||||
unsigned char *rip = (unsigned char *)regs->rip;
|
||||
bool r, w;
|
||||
|
||||
r = rip == &rdmsr_start;
|
||||
w = rip == &wrmsr_start;
|
||||
GUEST_ASSERT(r || w);
|
||||
|
||||
nr_gp++;
|
||||
|
||||
if (r)
|
||||
regs->rip = (uint64_t)&rdmsr_end;
|
||||
else
|
||||
regs->rip = (uint64_t)&wrmsr_end;
|
||||
}
|
||||
|
||||
struct msr_data {
|
||||
uint32_t idx;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
#define TEST_MSR(msr) { .idx = msr, .name = #msr }
|
||||
#define UCALL_PR_MSR 0xdeadbeef
|
||||
#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
|
||||
|
||||
/*
|
||||
* KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
|
||||
* written, as the KVM_CPUID_FEATURES leaf is cleared.
|
||||
*/
|
||||
static struct msr_data msrs_to_test[] = {
|
||||
TEST_MSR(MSR_KVM_SYSTEM_TIME),
|
||||
TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
|
||||
TEST_MSR(MSR_KVM_WALL_CLOCK),
|
||||
TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
|
||||
TEST_MSR(MSR_KVM_ASYNC_PF_EN),
|
||||
TEST_MSR(MSR_KVM_STEAL_TIME),
|
||||
TEST_MSR(MSR_KVM_PV_EOI_EN),
|
||||
TEST_MSR(MSR_KVM_POLL_CONTROL),
|
||||
TEST_MSR(MSR_KVM_ASYNC_PF_INT),
|
||||
TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
|
||||
};
|
||||
|
||||
static void test_msr(struct msr_data *msr)
|
||||
{
|
||||
PR_MSR(msr);
|
||||
do_rdmsr(msr->idx);
|
||||
GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
|
||||
|
||||
nr_gp = 0;
|
||||
do_wrmsr(msr->idx, 0);
|
||||
GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
|
||||
nr_gp = 0;
|
||||
}
|
||||
|
||||
struct hcall_data {
|
||||
uint64_t nr;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
|
||||
#define UCALL_PR_HCALL 0xdeadc0de
|
||||
#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
|
||||
|
||||
/*
|
||||
* KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
|
||||
* features have been cleared in KVM_CPUID_FEATURES.
|
||||
*/
|
||||
static struct hcall_data hcalls_to_test[] = {
|
||||
TEST_HCALL(KVM_HC_KICK_CPU),
|
||||
TEST_HCALL(KVM_HC_SEND_IPI),
|
||||
TEST_HCALL(KVM_HC_SCHED_YIELD),
|
||||
};
|
||||
|
||||
static void test_hcall(struct hcall_data *hc)
|
||||
{
|
||||
uint64_t r;
|
||||
|
||||
PR_HCALL(hc);
|
||||
r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
|
||||
GUEST_ASSERT(r == -KVM_ENOSYS);
|
||||
}
|
||||
|
||||
static void guest_main(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
|
||||
test_msr(&msrs_to_test[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
|
||||
test_hcall(&hcalls_to_test[i]);
|
||||
}
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
|
||||
{
|
||||
struct kvm_cpuid_entry2 ent = {0};
|
||||
|
||||
ent.function = KVM_CPUID_FEATURES;
|
||||
TEST_ASSERT(set_cpuid(cpuid, &ent),
|
||||
"failed to clear KVM_CPUID_FEATURES leaf");
|
||||
}
|
||||
|
||||
static void pr_msr(struct ucall *uc)
|
||||
{
|
||||
struct msr_data *msr = (struct msr_data *)uc->args[0];
|
||||
|
||||
pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
|
||||
}
|
||||
|
||||
static void pr_hcall(struct ucall *uc)
|
||||
{
|
||||
struct hcall_data *hc = (struct hcall_data *)uc->args[0];
|
||||
|
||||
pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
|
||||
}
|
||||
|
||||
static void handle_abort(struct ucall *uc)
|
||||
{
|
||||
TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
|
||||
__FILE__, uc->args[1]);
|
||||
}
|
||||
|
||||
#define VCPU_ID 0
|
||||
|
||||
static void enter_guest(struct kvm_vm *vm)
|
||||
{
|
||||
struct kvm_run *run;
|
||||
struct ucall uc;
|
||||
int r;
|
||||
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
while (true) {
|
||||
r = _vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"unexpected exit reason: %u (%s)",
|
||||
run->exit_reason, exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_PR_MSR:
|
||||
pr_msr(&uc);
|
||||
break;
|
||||
case UCALL_PR_HCALL:
|
||||
pr_hcall(&uc);
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
handle_abort(&uc);
|
||||
return;
|
||||
case UCALL_DONE:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_enable_cap cap = {0};
|
||||
struct kvm_cpuid2 *best;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
|
||||
pr_info("will skip kvm paravirt restriction tests.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_main);
|
||||
|
||||
cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
|
||||
cap.args[0] = 1;
|
||||
vcpu_enable_cap(vm, VCPU_ID, &cap);
|
||||
|
||||
best = kvm_get_supported_cpuid();
|
||||
clear_kvm_cpuid_features(best);
|
||||
vcpu_set_cpuid(vm, VCPU_ID, best);
|
||||
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vm, VCPU_ID);
|
||||
vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
|
||||
|
||||
enter_guest(vm);
|
||||
kvm_vm_free(vm);
|
||||
}
|
Loading…
Reference in New Issue