qemu/hw/mem/pc-dimm.c

449 lines
13 KiB
C
Raw Normal View History

/*
* Dimm device for Memory Hotplug
*
* Copyright ProfitBricks GmbH 2012
* Copyright (C) 2014 Red Hat Inc
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*/
#include "qemu/osdep.h"
#include "hw/mem/pc-dimm.h"
2016-03-14 16:01:28 +08:00
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qapi/visitor.h"
#include "qemu/range.h"
#include "sysemu/numa.h"
#include "sysemu/kvm.h"
#include "trace.h"
#include "hw/virtio/vhost.h"
typedef struct pc_dimms_capacity {
uint64_t size;
Error **errp;
} pc_dimms_capacity;
void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
MemoryRegion *mr, uint64_t align, Error **errp)
{
int slot;
MachineState *machine = MACHINE(qdev_get_machine());
PCDIMMDevice *dimm = PC_DIMM(dev);
Error *local_err = NULL;
uint64_t existing_dimms_capacity = 0;
uint64_t addr;
addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
if (local_err) {
goto out;
}
addr = pc_dimm_get_free_addr(hpms->base,
memory_region_size(&hpms->mr),
!addr ? NULL : &addr, align,
memory_region_size(mr), &local_err);
if (local_err) {
goto out;
}
existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
if (local_err) {
goto out;
}
if (existing_dimms_capacity + memory_region_size(mr) >
machine->maxram_size - machine->ram_size) {
error_setg(&local_err, "not enough space, currently 0x%" PRIx64
" in use of total hot pluggable 0x" RAM_ADDR_FMT,
existing_dimms_capacity,
machine->maxram_size - machine->ram_size);
goto out;
}
object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
if (local_err) {
goto out;
}
trace_mhp_pc_dimm_assigned_address(addr);
slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
if (local_err) {
goto out;
}
slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
machine->ram_slots, &local_err);
if (local_err) {
goto out;
}
object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
if (local_err) {
goto out;
}
trace_mhp_pc_dimm_assigned_slot(slot);
if (kvm_enabled() && !kvm_has_free_slot(machine)) {
error_setg(&local_err, "hypervisor has no free memory slots left");
goto out;
}
if (!vhost_has_free_slot()) {
error_setg(&local_err, "a used vhost backend has no free"
" memory slots left");
goto out;
}
memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
vmstate_register_ram(mr, dev);
numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
out:
error_propagate(errp, local_err);
}
void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
MemoryRegion *mr)
{
PCDIMMDevice *dimm = PC_DIMM(dev);
numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
memory_region_del_subregion(&hpms->mr, mr);
vmstate_unregister_ram(mr, dev);
}
static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque)
{
pc_dimms_capacity *cap = opaque;
uint64_t *size = &cap->size;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) {
(*size) += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
cap->errp);
}
if (cap->errp && *cap->errp) {
return 1;
}
}
object_child_foreach(obj, pc_existing_dimms_capacity_internal, opaque);
return 0;
}
uint64_t pc_existing_dimms_capacity(Error **errp)
{
pc_dimms_capacity cap;
cap.size = 0;
cap.errp = errp;
pc_existing_dimms_capacity_internal(qdev_get_machine(), &cap);
return cap.size;
}
int qmp_pc_dimm_device_list(Object *obj, void *opaque)
{
MemoryDeviceInfoList ***prev = opaque;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) {
MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
DeviceClass *dc = DEVICE_GET_CLASS(obj);
PCDIMMDevice *dimm = PC_DIMM(obj);
if (dev->id) {
di->has_id = true;
di->id = g_strdup(dev->id);
}
di->hotplugged = dev->hotplugged;
di->hotpluggable = dc->hotpluggable;
di->addr = dimm->addr;
di->slot = dimm->slot;
di->node = dimm->node;
di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP,
NULL);
di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
qapi: Don't special-case simple union wrappers Simple unions were carrying a special case that hid their 'data' QMP member from the resulting C struct, via the hack method QAPISchemaObjectTypeVariant.simple_union_type(). But by using the work we started by unboxing flat union and alternate branches, coupled with the ability to visit the members of an implicit type, we can now expose the simple union's implicit type in qapi-types.h: | struct q_obj_ImageInfoSpecificQCow2_wrapper { | ImageInfoSpecificQCow2 *data; | }; | | struct q_obj_ImageInfoSpecificVmdk_wrapper { | ImageInfoSpecificVmdk *data; | }; ... | struct ImageInfoSpecific { | ImageInfoSpecificKind type; | union { /* union tag is @type */ | void *data; |- ImageInfoSpecificQCow2 *qcow2; |- ImageInfoSpecificVmdk *vmdk; |+ q_obj_ImageInfoSpecificQCow2_wrapper qcow2; |+ q_obj_ImageInfoSpecificVmdk_wrapper vmdk; | } u; | }; Doing this removes asymmetry between QAPI's QMP side and its C side (both sides now expose 'data'), and means that the treatment of a simple union as sugar for a flat union is now equivalent in both languages (previously the two approaches used a different layer of dereferencing, where the simple union could be converted to a flat union with equivalent C layout but different {} on the wire, or to an equivalent QMP wire form but with different C representation). Using the implicit type also lets us get rid of the simple_union_type() hack. Of course, now all clients of simple unions have to adjust from using su->u.member to using su->u.member.data; while this touches a number of files in the tree, some earlier cleanup patches helped minimize the change to the initialization of a temporary variable rather than every single member access. The generated qapi-visit.c code is also affected by the layout change: |@@ -7393,10 +7393,10 @@ void visit_type_ImageInfoSpecific_member | } | switch (obj->type) { | case IMAGE_INFO_SPECIFIC_KIND_QCOW2: |- visit_type_ImageInfoSpecificQCow2(v, "data", &obj->u.qcow2, &err); |+ visit_type_q_obj_ImageInfoSpecificQCow2_wrapper_members(v, &obj->u.qcow2, &err); | break; | case IMAGE_INFO_SPECIFIC_KIND_VMDK: |- visit_type_ImageInfoSpecificVmdk(v, "data", &obj->u.vmdk, &err); |+ visit_type_q_obj_ImageInfoSpecificVmdk_wrapper_members(v, &obj->u.vmdk, &err); | break; | default: | abort(); Signed-off-by: Eric Blake <eblake@redhat.com> Message-Id: <1458254921-17042-13-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-03-18 06:48:37 +08:00
info->u.dimm.data = di;
elem->value = info;
elem->next = NULL;
**prev = elem;
*prev = &elem->next;
}
}
object_child_foreach(obj, qmp_pc_dimm_device_list, opaque);
return 0;
}
static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
{
unsigned long *bitmap = opaque;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) { /* count only realized DIMMs */
PCDIMMDevice *d = PC_DIMM(obj);
set_bit(d->slot, bitmap);
}
}
object_child_foreach(obj, pc_dimm_slot2bitmap, opaque);
return 0;
}
int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp)
{
unsigned long *bitmap = bitmap_new(max_slots);
int slot = 0;
object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap);
/* check if requested slot is not occupied */
if (hint) {
if (*hint >= max_slots) {
error_setg(errp, "invalid slot# %d, should be less than %d",
*hint, max_slots);
} else if (!test_bit(*hint, bitmap)) {
slot = *hint;
} else {
error_setg(errp, "slot %d is busy", *hint);
}
goto out;
}
/* search for free slot */
slot = find_first_zero_bit(bitmap, max_slots);
if (slot == max_slots) {
error_setg(errp, "no free slots available");
}
out:
g_free(bitmap);
return slot;
}
static gint pc_dimm_addr_sort(gconstpointer a, gconstpointer b)
{
PCDIMMDevice *x = PC_DIMM(a);
PCDIMMDevice *y = PC_DIMM(b);
Int128 diff = int128_sub(int128_make64(x->addr), int128_make64(y->addr));
if (int128_lt(diff, int128_zero())) {
return -1;
} else if (int128_gt(diff, int128_zero())) {
return 1;
}
return 0;
}
static int pc_dimm_built_list(Object *obj, void *opaque)
{
GSList **list = opaque;
if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
DeviceState *dev = DEVICE(obj);
if (dev->realized) { /* only realized DIMMs matter */
*list = g_slist_insert_sorted(*list, dev, pc_dimm_addr_sort);
}
}
object_child_foreach(obj, pc_dimm_built_list, opaque);
return 0;
}
uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
uint64_t address_space_size,
uint64_t *hint, uint64_t align, uint64_t size,
Error **errp)
{
GSList *list = NULL, *item;
uint64_t new_addr, ret = 0;
uint64_t address_space_end = address_space_start + address_space_size;
g_assert(QEMU_ALIGN_UP(address_space_start, align) == address_space_start);
if (!address_space_size) {
error_setg(errp, "memory hotplug is not enabled, "
"please add maxmem option");
goto out;
}
if (hint && QEMU_ALIGN_UP(*hint, align) != *hint) {
error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes",
align);
goto out;
}
if (QEMU_ALIGN_UP(size, align) != size) {
error_setg(errp, "backend memory size must be multiple of 0x%"
PRIx64, align);
goto out;
}
assert(address_space_end > address_space_start);
object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list);
if (hint) {
new_addr = *hint;
} else {
new_addr = address_space_start;
}
/* find address range that will fit new DIMM */
for (item = list; item; item = g_slist_next(item)) {
PCDIMMDevice *dimm = item->data;
uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
PC_DIMM_SIZE_PROP,
errp);
if (errp && *errp) {
goto out;
}
if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) {
if (hint) {
DeviceState *d = DEVICE(dimm);
error_setg(errp, "address range conflicts with '%s'", d->id);
goto out;
}
new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align);
}
}
ret = new_addr;
if (new_addr < address_space_start) {
error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
"] at 0x%" PRIx64, new_addr, size, address_space_start);
} else if ((new_addr + size) > address_space_end) {
error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
"] beyond 0x%" PRIx64, new_addr, size, address_space_end);
}
out:
g_slist_free(list);
return ret;
}
static Property pc_dimm_properties[] = {
DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0),
DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
PC_DIMM_UNASSIGNED_SLOT),
DEFINE_PROP_END_OF_LIST(),
};
static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
{
int64_t value;
MemoryRegion *mr;
PCDIMMDevice *dimm = PC_DIMM(obj);
PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(obj);
mr = ddc->get_memory_region(dimm);
value = memory_region_size(mr);
qapi: Swap visit_* arguments for consistent 'name' placement JSON uses "name":value, but many of our visitor interfaces were called with visit_type_FOO(v, &value, name, errp). This can be a bit confusing to have to mentally swap the parameter order to match JSON order. It's particularly bad for visit_start_struct(), where the 'name' parameter is smack in the middle of the otherwise-related group of 'obj, kind, size' parameters! It's time to do a global swap of the parameter ordering, so that the 'name' parameter is always immediately after the Visitor argument. Additional reason in favor of the swap: the existing include/qjson.h prefers listing 'name' first in json_prop_*(), and I have plans to unify that file with the qapi visitors; listing 'name' first in qapi will minimize churn to the (admittedly few) qjson.h clients. Later patches will then fix docs, object.h, visitor-impl.h, and those clients to match. Done by first patching scripts/qapi*.py by hand to make generated files do what I want, then by running the following Coccinelle script to affect the rest of the code base: $ spatch --sp-file script `git grep -l '\bvisit_' -- '**/*.[ch]'` I then had to apply some touchups (Coccinelle insisted on TAB indentation in visitor.h, and botched the signature of visit_type_enum() by rewriting 'const char *const strings[]' to the syntactically invalid 'const char*const[] strings'). The movement of parameters is sufficient to provoke compiler errors if any callers were missed. // Part 1: Swap declaration order @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_start_struct -(TV v, TObj OBJ, T1 ARG1, const char *name, T2 ARG2, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type bool, TV, T1; identifier ARG1; @@ bool visit_optional -(TV v, T1 ARG1, const char *name) +(TV v, const char *name, T1 ARG1) { ... } @@ type TV, TErr, TObj, T1; identifier OBJ, ARG1; @@ void visit_get_next_type -(TV v, TObj OBJ, T1 ARG1, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, TErr errp) { ... } @@ type TV, TErr, TObj, T1, T2; identifier OBJ, ARG1, ARG2; @@ void visit_type_enum -(TV v, TObj OBJ, T1 ARG1, T2 ARG2, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, T1 ARG1, T2 ARG2, TErr errp) { ... } @@ type TV, TErr, TObj; identifier OBJ; identifier VISIT_TYPE =~ "^visit_type_"; @@ void VISIT_TYPE -(TV v, TObj OBJ, const char *name, TErr errp) +(TV v, const char *name, TObj OBJ, TErr errp) { ... } // Part 2: swap caller order @@ expression V, NAME, OBJ, ARG1, ARG2, ERR; identifier VISIT_TYPE =~ "^visit_type_"; @@ ( -visit_start_struct(V, OBJ, ARG1, NAME, ARG2, ERR) +visit_start_struct(V, NAME, OBJ, ARG1, ARG2, ERR) | -visit_optional(V, ARG1, NAME) +visit_optional(V, NAME, ARG1) | -visit_get_next_type(V, OBJ, ARG1, NAME, ERR) +visit_get_next_type(V, NAME, OBJ, ARG1, ERR) | -visit_type_enum(V, OBJ, ARG1, ARG2, NAME, ERR) +visit_type_enum(V, NAME, OBJ, ARG1, ARG2, ERR) | -VISIT_TYPE(V, OBJ, NAME, ERR) +VISIT_TYPE(V, NAME, OBJ, ERR) ) Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com> Message-Id: <1454075341-13658-19-git-send-email-eblake@redhat.com> Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-01-29 21:48:54 +08:00
visit_type_int(v, name, &value, errp);
}
static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
Object *val, Error **errp)
{
MemoryRegion *mr;
Error *local_err = NULL;
mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &local_err);
if (local_err) {
goto out;
}
if (memory_region_is_mapped(mr)) {
char *path = object_get_canonical_path_component(val);
error_setg(&local_err, "can't use already busy memdev: %s", path);
g_free(path);
} else {
qdev_prop_allow_set_link_before_realize(obj, name, val, &local_err);
}
out:
error_propagate(errp, local_err);
}
static void pc_dimm_init(Object *obj)
{
PCDIMMDevice *dimm = PC_DIMM(obj);
object_property_add(obj, PC_DIMM_SIZE_PROP, "int", pc_dimm_get_size,
NULL, NULL, NULL, &error_abort);
object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
(Object **)&dimm->hostmem,
pc_dimm_check_memdev_is_busy,
OBJ_PROP_LINK_UNREF_ON_RELEASE,
&error_abort);
}
static void pc_dimm_realize(DeviceState *dev, Error **errp)
{
PCDIMMDevice *dimm = PC_DIMM(dev);
if (!dimm->hostmem) {
error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
return;
}
if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
(!nb_numa_nodes && dimm->node)) {
error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
PRIu32 "' which exceeds the number of numa nodes: %d",
dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
return;
}
}
static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
{
return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
}
static void pc_dimm_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
dc->realize = pc_dimm_realize;
dc->props = pc_dimm_properties;
dc->desc = "DIMM memory module";
ddc->get_memory_region = pc_dimm_get_memory_region;
}
static TypeInfo pc_dimm_info = {
.name = TYPE_PC_DIMM,
.parent = TYPE_DEVICE,
.instance_size = sizeof(PCDIMMDevice),
.instance_init = pc_dimm_init,
.class_init = pc_dimm_class_init,
.class_size = sizeof(PCDIMMDeviceClass),
};
static void pc_dimm_register_types(void)
{
type_register_static(&pc_dimm_info);
}
type_init(pc_dimm_register_types)