mirror of https://gitee.com/openkylin/libvirt.git
save: let iohelper work on O_DIRECT fds
Required for a coming patch where iohelper will operate on O_DIRECT fds. There, the user-space memory must be aligned to file system boundaries (at least 512, but using page-aligned works better, and some file systems prefer 64k). Made tougher by the fact that VIR_ALLOC won't work on void *, but posix_memalign won't work on char * and isn't available everywhere. This patch makes some simplifying assumptions - namely, output to an O_DIRECT fd will only be attempted on an empty seekable file (hence, no need to worry about preserving existing data on a partial block, and ftruncate will work to undo the effects of having to round up the size of the last block written), and input from an O_DIRECT fd will only be attempted on a complete seekable file with the only possible short read at EOF. * configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign. * src/util/iohelper.c (runIO): Use aligned memory, and handle quirks of O_DIRECT on last write.
This commit is contained in:
parent
1eb6647979
commit
12291656b1
|
@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long])
|
||||||
|
|
||||||
dnl Availability of various common functions (non-fatal if missing),
|
dnl Availability of various common functions (non-fatal if missing),
|
||||||
dnl and various less common threadsafe functions
|
dnl and various less common threadsafe functions
|
||||||
AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \
|
AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \
|
||||||
geteuid initgroups posix_fallocate mmap kill \
|
getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \
|
||||||
getmntent_r getgrnam_r getpwuid_r])
|
regexec sched_getaffinity])
|
||||||
|
|
||||||
dnl Availability of pthread functions (if missing, win32 threading is
|
dnl Availability of pthread functions (if missing, win32 threading is
|
||||||
dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE.
|
dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE.
|
||||||
|
|
|
@ -74,17 +74,32 @@ cleanup:
|
||||||
static int
|
static int
|
||||||
runIO(const char *path, int fd, int oflags, unsigned long long length)
|
runIO(const char *path, int fd, int oflags, unsigned long long length)
|
||||||
{
|
{
|
||||||
char *buf = NULL;
|
void *base = NULL; /* Location to be freed */
|
||||||
|
char *buf = NULL; /* Aligned location within base */
|
||||||
size_t buflen = 1024*1024;
|
size_t buflen = 1024*1024;
|
||||||
|
intptr_t alignMask = 64*1024 - 1;
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
int fdin, fdout;
|
int fdin, fdout;
|
||||||
const char *fdinname, *fdoutname;
|
const char *fdinname, *fdoutname;
|
||||||
unsigned long long total = 0;
|
unsigned long long total = 0;
|
||||||
|
bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
|
||||||
|
bool shortRead = false; /* true if we hit a short read */
|
||||||
|
off_t end = 0;
|
||||||
|
|
||||||
if (VIR_ALLOC_N(buf, buflen) < 0) {
|
#if HAVE_POSIX_MEMALIGN
|
||||||
|
if (posix_memalign(&base, alignMask + 1, buflen)) {
|
||||||
virReportOOMError();
|
virReportOOMError();
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
buf = base;
|
||||||
|
#else
|
||||||
|
if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) {
|
||||||
|
virReportOOMError();
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
base = buf;
|
||||||
|
buf = (char *) (((intptr_t) base + alignMask) & alignMask);
|
||||||
|
#endif
|
||||||
|
|
||||||
switch (oflags & O_ACCMODE) {
|
switch (oflags & O_ACCMODE) {
|
||||||
case O_RDONLY:
|
case O_RDONLY:
|
||||||
|
@ -92,12 +107,26 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
|
||||||
fdinname = path;
|
fdinname = path;
|
||||||
fdout = STDOUT_FILENO;
|
fdout = STDOUT_FILENO;
|
||||||
fdoutname = "stdout";
|
fdoutname = "stdout";
|
||||||
|
/* To make the implementation simpler, we give up on any
|
||||||
|
* attempt to use O_DIRECT in a non-trivial manner. */
|
||||||
|
if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0 || length)) {
|
||||||
|
virReportSystemError(end < 0 ? errno : EINVAL, "%s",
|
||||||
|
_("O_DIRECT read needs entire seekable file"));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case O_WRONLY:
|
case O_WRONLY:
|
||||||
fdin = STDIN_FILENO;
|
fdin = STDIN_FILENO;
|
||||||
fdinname = "stdin";
|
fdinname = "stdin";
|
||||||
fdout = fd;
|
fdout = fd;
|
||||||
fdoutname = path;
|
fdoutname = path;
|
||||||
|
/* To make the implementation simpler, we give up on any
|
||||||
|
* attempt to use O_DIRECT in a non-trivial manner. */
|
||||||
|
if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
|
||||||
|
virReportSystemError(end < 0 ? errno : EINVAL, "%s",
|
||||||
|
_("O_DIRECT write needs empty seekable file"));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case O_RDWR:
|
case O_RDWR:
|
||||||
|
@ -124,12 +153,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
|
||||||
}
|
}
|
||||||
if (got == 0)
|
if (got == 0)
|
||||||
break; /* End of file before end of requested data */
|
break; /* End of file before end of requested data */
|
||||||
|
if (got < buflen || (buflen & alignMask)) {
|
||||||
|
/* O_DIRECT can handle at most one short read, at end of file */
|
||||||
|
if (direct && shortRead) {
|
||||||
|
virReportSystemError(EINVAL, "%s",
|
||||||
|
_("Too many short reads for O_DIRECT"));
|
||||||
|
}
|
||||||
|
shortRead = true;
|
||||||
|
}
|
||||||
|
|
||||||
total += got;
|
total += got;
|
||||||
|
if (fdout == fd && direct && shortRead) {
|
||||||
|
end = total;
|
||||||
|
memset(buf + got, 0, buflen - got);
|
||||||
|
got = (got + alignMask) & ~alignMask;
|
||||||
|
}
|
||||||
if (safewrite(fdout, buf, got) < 0) {
|
if (safewrite(fdout, buf, got) < 0) {
|
||||||
virReportSystemError(errno, _("Unable to write %s"), fdoutname);
|
virReportSystemError(errno, _("Unable to write %s"), fdoutname);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
if (end && ftruncate(fd, end) < 0) {
|
||||||
|
virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
@ -141,7 +187,7 @@ cleanup:
|
||||||
ret = -1;
|
ret = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
VIR_FREE(buf);
|
VIR_FREE(base);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue