From 77791dc0e162463f3d885ea6481d07bae29d2494 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Fri, 22 Jul 2011 13:02:51 +0100 Subject: [PATCH] Allow use of block devices for guest filesystem Currently the LXC driver can only populate filesystems from host filesystems, using bind mounts. This patch allows host block devices to be mounted. It autodetects the filesystem format at mount time, and adds the block device to the cgroups ACL. Example usage is * src/lxc/lxc_container.c: Mount block device filesystems * src/lxc/lxc_controller.c: Add block device filesystems to cgroups ACL --- src/lxc/lxc_container.c | 181 ++++++++++++++++++++++++++++++++++++++- src/lxc/lxc_controller.c | 17 ++++ 2 files changed, 196 insertions(+), 2 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 52ef35149b..4e84344e2e 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -35,6 +35,7 @@ #include #include #include +#include /* Yes, we want linux private one, for _syscall2() macro */ #include @@ -608,13 +609,185 @@ static int lxcContainerMountFSBind(virDomainFSDefPtr fs, virReportSystemError(errno, _("Failed to make directory %s readonly"), fs->dst); - goto cleanup; } - } ret = 0; +cleanup: + VIR_FREE(src); + return ret; +} + + + +/* + * This functions attempts to do automatic detection of filesystem + * type following the same rules as the util-linux 'mount' binary. + * + * The main difference is that we don't (currently) try to use + * libblkid to detect the format first. We go straight to using + * /etc/filesystems, and then /proc/filesystems + */ +static int lxcContainerMountFSBlockAuto(virDomainFSDefPtr fs, + int fsflags, + const char *src, + const char *srcprefix) +{ + FILE *fp = NULL; + int ret = -1; + bool tryProc = false; + bool gotStar = false; + char *fslist = NULL; + char *line = NULL; + const char *type; + + VIR_DEBUG("src=%s srcprefix=%s dst=%s", src, srcprefix, fs->dst); + + /* First time around we use /etc/filesystems */ +retry: + if (virAsprintf(&fslist, "%s%s", + srcprefix, tryProc ? "/proc/filesystems" : "/etc/filesystems") < 0) { + virReportOOMError(); + goto cleanup; + } + + VIR_DEBUG("Open fslist %s", fslist); + if (!(fp = fopen(fslist, "r"))) { + /* If /etc/filesystems does not exist, then we need to retry + * with /proc/filesystems next + */ + if (errno == ENOENT && + !tryProc) { + tryProc = true; + VIR_FREE(fslist); + goto retry; + } + + virReportSystemError(errno, + _("Unable to read %s"), + fslist); + goto cleanup; + } + + while (!feof(fp)) { + size_t n; + VIR_FREE(line); + if (getline(&line, &n, fp) <= 0) { + if (feof(fp)) + break; + + goto cleanup; + } + + if (strstr(line, "nodev")) + continue; + + type = strchr(line, '\n'); + if (type) + line[type-line] = '\0'; + + type = line; + virSkipSpaces(&type); + + /* + * /etc/filesystems is only allowed to contain '*' on the last line + */ + if (gotStar) { + lxcError(VIR_ERR_INTERNAL_ERROR, + _("%s has unexpected '*' before last line"), + fslist); + goto cleanup; + } + + /* An '*' on the last line in /etc/filesystems + * means try /proc/filesystems next. We don't + * jump immediately though, since we need to see + * if any more lines follow + */ + if (!tryProc && + STREQ(type, "*")) + gotStar = true; + + VIR_DEBUG("Trying mount %s with %s", src, type); + if (mount(src, fs->dst, type, fsflags, NULL) < 0) { + /* These errnos indicate a bogus filesystem type for + * the image we have, so skip to the next type + */ + if (errno == EINVAL || errno == ENODEV) + continue; + + virReportSystemError(errno, + _("Failed to bind mount directory %s to %s"), + src, fs->dst); + goto cleanup; + } + + ret = 0; + break; + } + + /* We've got to the end of /etc/filesystems and saw + * a '*', so we must try /proc/filesystems next + */ + if (ret != 0 && + !tryProc && + gotStar) { + tryProc = true; + VIR_FREE(fslist); + VIR_FORCE_FCLOSE(fp); + goto retry; + } + + VIR_DEBUG("Done mounting filesystem ret=%d tryProc=%d", ret, tryProc); + +cleanup: + VIR_FREE(line); + VIR_FORCE_FCLOSE(fp); + return ret; +} + + +/* + * Mount a block device 'src' on fs->dst, automatically + * probing for filesystem type + */ +static int lxcContainerMountFSBlockHelper(virDomainFSDefPtr fs, + const char *src, + const char *srcprefix) +{ + int fsflags = 0; + int ret = -1; + if (fs->readonly) + fsflags |= MS_RDONLY; + + if (virFileMakePath(fs->dst) < 0) { + virReportSystemError(errno, + _("Failed to create %s"), + fs->dst); + goto cleanup; + } + + ret = lxcContainerMountFSBlockAuto(fs, fsflags, src, srcprefix); + +cleanup: + return ret; +} + + +static int lxcContainerMountFSBlock(virDomainFSDefPtr fs, + const char *srcprefix) +{ + char *src = NULL; + int ret = -1; + + if (virAsprintf(&src, "%s%s", srcprefix, fs->src) < 0) { + virReportOOMError(); + goto cleanup; + } + + ret = lxcContainerMountFSBlockHelper(fs, src, srcprefix); + VIR_DEBUG("Done mounting filesystem ret=%d", ret); cleanup: @@ -631,6 +804,10 @@ static int lxcContainerMountFS(virDomainFSDefPtr fs, if (lxcContainerMountFSBind(fs, srcprefix) < 0) return -1; break; + case VIR_DOMAIN_FS_TYPE_BLOCK: + if (lxcContainerMountFSBlock(fs, srcprefix) < 0) + return -1; + break; default: lxcError(VIR_ERR_CONFIG_UNSUPPORTED, _("Cannot mount filesystem type %s"), diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index ff42aa569a..bc5ee2524d 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -188,6 +188,23 @@ static int lxcSetContainerResources(virDomainDefPtr def) } } + for (i = 0 ; i < def->nfss ; i++) { + if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK) + continue; + + rc = virCgroupAllowDevicePath(cgroup, + def->fss[i]->src, + def->fss[i]->readonly ? + VIR_CGROUP_DEVICE_READ : + VIR_CGROUP_DEVICE_RW); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to allow device %s for domain %s"), + def->fss[i]->src, def->name); + goto cleanup; + } + } + rc = virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY, VIR_CGROUP_DEVICE_RWM); if (rc != 0) {