mirror of
https://github.com/libguestfs/libguestfs.git
synced 2026-03-21 22:53:37 +00:00
We tested for QEMU >= 2.10 support for mandatory locking. I believe this is for all practical purposes always enabled now (and qemu 2.10 is ancient history) so simply assume it's true always.
1156 lines
34 KiB
C
1156 lines
34 KiB
C
/* libguestfs
|
|
* Copyright (C) 2009-2023 Red Hat Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* Implementation of the C<direct> backend.
|
|
*
|
|
* For more details see L<guestfs(3)/BACKENDS>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdbool.h>
|
|
#include <inttypes.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <sys/stat.h>
|
|
#include <signal.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/un.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <libintl.h>
|
|
|
|
#include "cloexec.h"
|
|
|
|
#include "guestfs.h"
|
|
#include "guestfs-internal.h"
|
|
#include "guestfs_protocol.h"
|
|
#include "qemuopts.h"
|
|
#include "ignore-value.h"
|
|
|
|
/* Per-handle data. */
|
|
struct backend_direct_data {
|
|
pid_t pid; /* Qemu PID. */
|
|
pid_t recoverypid; /* Recovery process PID. */
|
|
|
|
struct version qemu_version; /* qemu version (0 if unable to parse). */
|
|
struct qemu_data *qemu_data; /* qemu -help output etc. */
|
|
|
|
char guestfsd_sock[UNIX_PATH_MAX]; /* Path to daemon socket. */
|
|
};
|
|
|
|
static char *
|
|
create_cow_overlay_direct (guestfs_h *g, void *datav, struct drive *drv)
|
|
{
|
|
char *overlay;
|
|
CLEANUP_FREE char *backing_drive = NULL;
|
|
struct guestfs_disk_create_argv optargs;
|
|
|
|
backing_drive = guestfs_int_drive_source_qemu_param (g, &drv->src);
|
|
if (!backing_drive)
|
|
return NULL;
|
|
|
|
overlay = guestfs_int_make_temp_path (g, "overlay", "qcow2");
|
|
if (!overlay)
|
|
return NULL;
|
|
|
|
optargs.bitmask = GUESTFS_DISK_CREATE_BACKINGFILE_BITMASK;
|
|
optargs.backingfile = backing_drive;
|
|
if (drv->src.format) {
|
|
optargs.bitmask |= GUESTFS_DISK_CREATE_BACKINGFORMAT_BITMASK;
|
|
optargs.backingformat = drv->src.format;
|
|
}
|
|
|
|
if (guestfs_disk_create_argv (g, overlay, "qcow2", -1, &optargs) == -1) {
|
|
free (overlay);
|
|
return NULL;
|
|
}
|
|
|
|
/* Caller sets g->overlay in the handle to this, and then manages
|
|
* the memory.
|
|
*/
|
|
return overlay;
|
|
}
|
|
|
|
/* On Debian, /dev/kvm is mode 0660 and group kvm, so users need to
|
|
* add themselves to the kvm group otherwise things are going to be
|
|
* very slow (this is Debian bug 640328). Warn about this.
|
|
*/
|
|
static void
|
|
debian_kvm_warning (guestfs_h *g)
|
|
{
|
|
#ifdef __linux__
|
|
uid_t euid = geteuid ();
|
|
gid_t egid = getegid ();
|
|
struct stat statbuf;
|
|
gid_t kvm_group;
|
|
CLEANUP_FREE gid_t *groups = NULL;
|
|
int ngroups;
|
|
size_t i;
|
|
|
|
/* Doesn't apply if running as root. */
|
|
if (euid == 0)
|
|
return;
|
|
|
|
if (stat ("/dev/kvm", &statbuf) == -1)
|
|
return;
|
|
if ((statbuf.st_mode & 0777) != 0660)
|
|
return;
|
|
|
|
/* They might be running libguestfs as root or have chowned /dev/kvm, so: */
|
|
if (geteuid () == statbuf.st_uid)
|
|
return;
|
|
|
|
kvm_group = statbuf.st_gid;
|
|
|
|
/* Is the current process a member of the KVM group? */
|
|
if (egid == kvm_group)
|
|
return;
|
|
|
|
ngroups = getgroups (0, NULL);
|
|
if (ngroups > 0) {
|
|
groups = safe_malloc (g, ngroups * sizeof (gid_t));
|
|
if (getgroups (ngroups, groups) == -1) {
|
|
warning (g, "getgroups: %m (ignored)");
|
|
return;
|
|
}
|
|
for (i = 0; i < (size_t) ngroups; ++i) {
|
|
if (groups[i] == kvm_group)
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* No, so emit the warning. Note that \n characters cannot appear
|
|
* in warnings.
|
|
*/
|
|
warning (g,
|
|
_("current user is not a member of the KVM group (group ID %d). "
|
|
"This user cannot access /dev/kvm, so libguestfs may run very slowly. "
|
|
"It is recommended that you 'chmod 0666 /dev/kvm' or add the current user "
|
|
"to the KVM group (you might need to log out and log in again)."),
|
|
(int) kvm_group);
|
|
#endif /* __linux__ */
|
|
}
|
|
|
|
/* Some macros which make using qemuopts a bit easier. */
|
|
#define flag(flag) \
|
|
do { \
|
|
if (qemuopts_add_flag (qopts, (flag)) == -1) goto qemuopts_error; \
|
|
} while (0)
|
|
#define arg(flag, value) \
|
|
do { \
|
|
if (qemuopts_add_arg (qopts, (flag), (value)) == -1) goto qemuopts_error; \
|
|
} while (0)
|
|
#define arg_format(flag, fs, ...) \
|
|
do { \
|
|
if (qemuopts_add_arg_format (qopts, (flag), (fs), ##__VA_ARGS__) == -1) \
|
|
goto qemuopts_error; \
|
|
} while (0)
|
|
#define arg_noquote(flag, value) \
|
|
do { \
|
|
if (qemuopts_add_arg_noquote (qopts, (flag), (value)) == -1) \
|
|
goto qemuopts_error; \
|
|
} while (0)
|
|
#define start_list(flag) \
|
|
if (qemuopts_start_arg_list (qopts, (flag)) == -1) goto qemuopts_error; \
|
|
do
|
|
#define append_list(value) \
|
|
do { \
|
|
if (qemuopts_append_arg_list (qopts, (value)) == -1) \
|
|
goto qemuopts_error; \
|
|
} while (0)
|
|
#define append_list_format(fs, ...) \
|
|
do { \
|
|
if (qemuopts_append_arg_list_format (qopts, (fs), ##__VA_ARGS__) == -1) \
|
|
goto qemuopts_error; \
|
|
} while (0)
|
|
#define end_list() \
|
|
while (0); \
|
|
do { \
|
|
if (qemuopts_end_arg_list (qopts) == -1) goto qemuopts_error; \
|
|
} while (0)
|
|
|
|
/**
|
|
* Add the standard elements of the C<-drive> parameter.
|
|
*/
|
|
static int
|
|
add_drive_standard_params (guestfs_h *g, struct backend_direct_data *data,
|
|
struct qemuopts *qopts,
|
|
size_t i, struct drive *drv)
|
|
{
|
|
if (!drv->overlay) {
|
|
CLEANUP_FREE char *file = NULL;
|
|
|
|
/* file= parameter. */
|
|
file = guestfs_int_drive_source_qemu_param (g, &drv->src);
|
|
append_list_format ("file=%s", file);
|
|
|
|
if (drv->readonly)
|
|
append_list ("snapshot=on");
|
|
append_list_format ("cache=%s",
|
|
drv->cachemode ? drv->cachemode : "writeback");
|
|
if (drv->src.format)
|
|
append_list_format ("format=%s", drv->src.format);
|
|
if (drv->copyonread)
|
|
append_list ("copy-on-read=on");
|
|
|
|
/* Discard mode. */
|
|
switch (drv->discard) {
|
|
case discard_disable:
|
|
/* Since the default is always discard=ignore, don't specify it
|
|
* on the command line. This also avoids unnecessary breakage
|
|
* with qemu < 1.5 which didn't have the option at all.
|
|
*/
|
|
break;
|
|
case discard_enable:
|
|
if (!guestfs_int_discard_possible (g, drv, &data->qemu_version))
|
|
return -1;
|
|
/*FALLTHROUGH*/
|
|
case discard_besteffort:
|
|
/* I believe from reading the code that this is always safe as
|
|
* long as qemu >= 1.5.
|
|
*/
|
|
if (guestfs_int_version_ge (&data->qemu_version, 1, 5, 0))
|
|
append_list ("discard=unmap");
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
/* Writable qcow2 overlay on top of read-only drive.
|
|
*
|
|
* Add the file-specific locking option only for files, as
|
|
* qemu won't accept options unknown to the block driver in
|
|
* use.
|
|
*/
|
|
if (drv->src.protocol == drive_protocol_file) {
|
|
append_list_format ("file.file.filename=%s", drv->overlay);
|
|
append_list ("file.driver=qcow2");
|
|
append_list ("file.backing.file.locking=off");
|
|
}
|
|
else {
|
|
/* Ancient qemu (esp. qemu 1.5 in RHEL 7) didn't understand the
|
|
* file.file.filename= parameter, so use the safer old-style
|
|
* form of parameters unless we actually want to specify the
|
|
* locking flag above.
|
|
*/
|
|
append_list_format ("file=%s", drv->overlay);
|
|
append_list ("format=qcow2");
|
|
}
|
|
append_list ("cache=unsafe");
|
|
}
|
|
|
|
append_list_format ("id=hd%zu", i);
|
|
|
|
return 0;
|
|
|
|
/* This label is called implicitly from the qemuopts macros on error. */
|
|
qemuopts_error:
|
|
perrorf (g, "qemuopts");
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* Add the physical_block_size and logical_block_size elements of the C<-device>
|
|
* parameter.
|
|
*/
|
|
static int
|
|
add_device_blocksize_params (guestfs_h *g, struct qemuopts *qopts,
|
|
struct drive *drv)
|
|
{
|
|
if (drv->blocksize) {
|
|
append_list_format ("physical_block_size=%d", drv->blocksize);
|
|
append_list_format ("logical_block_size=%d", drv->blocksize);
|
|
}
|
|
|
|
return 0;
|
|
|
|
/* This label is called implicitly from the qemuopts macros on error. */
|
|
qemuopts_error:
|
|
perrorf (g, "qemuopts");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
add_drive (guestfs_h *g, struct backend_direct_data *data,
|
|
struct qemuopts *qopts, size_t i, struct drive *drv)
|
|
{
|
|
start_list ("-drive") {
|
|
if (add_drive_standard_params (g, data, qopts, i, drv) == -1)
|
|
return -1;
|
|
append_list ("if=none");
|
|
} end_list ();
|
|
start_list ("-device") {
|
|
append_list ("scsi-hd");
|
|
append_list_format ("drive=hd%zu", i);
|
|
if (drv->disk_label)
|
|
append_list_format ("serial=%s", drv->disk_label);
|
|
if (add_device_blocksize_params (g, qopts, drv) == -1)
|
|
return -1;
|
|
} end_list ();
|
|
|
|
return 0;
|
|
|
|
/* This label is called implicitly from the qemuopts macros on error. */
|
|
qemuopts_error:
|
|
perrorf (g, "qemuopts");
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
add_drives (guestfs_h *g, struct backend_direct_data *data,
|
|
struct qemuopts *qopts)
|
|
{
|
|
size_t i;
|
|
struct drive *drv;
|
|
|
|
ITER_DRIVES (g, i, drv) {
|
|
if (add_drive (g, data, qopts, i, drv) == -1)
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Launch passt such that it daemonizes.
|
|
*
|
|
* On error, C<-1> is returned; C<passt_pid> and C<sockpath> are not modified.
|
|
*
|
|
* On success, C<0> is returned. C<passt_pid> contains the PID of the passt
|
|
* background process. C<sockpath> contains the pathname of the unix domain
|
|
* socket where passt will accept a single connection.
|
|
*/
|
|
static int
|
|
launch_passt (guestfs_h *g, long *passt_pid, char (*sockpath)[UNIX_PATH_MAX])
|
|
{
|
|
int rc;
|
|
char sockpath_local[sizeof *sockpath];
|
|
char *pid_path;
|
|
struct command *cmd;
|
|
int passt_status;
|
|
int passt_exit;
|
|
char *pid_str;
|
|
long passt_pid_local;
|
|
char *endptr;
|
|
|
|
rc = -1;
|
|
if (guestfs_int_create_socketname (g, "passt.sock", &sockpath_local) == -1)
|
|
return rc;
|
|
|
|
pid_path = guestfs_int_make_pid_path (g, "passt");
|
|
if (pid_path == NULL)
|
|
return rc;
|
|
|
|
cmd = guestfs_int_new_command (g);
|
|
if (cmd == NULL)
|
|
goto free_pid_path;
|
|
|
|
guestfs_int_cmd_add_arg (cmd, "passt");
|
|
guestfs_int_cmd_add_arg (cmd, "--one-off");
|
|
guestfs_int_cmd_add_arg (cmd, "--socket");
|
|
guestfs_int_cmd_add_arg (cmd, sockpath_local);
|
|
guestfs_int_cmd_add_arg (cmd, "--pid");
|
|
guestfs_int_cmd_add_arg (cmd, pid_path);
|
|
guestfs_int_cmd_add_arg (cmd, "--address");
|
|
guestfs_int_cmd_add_arg (cmd, NETWORK_ADDRESS);
|
|
guestfs_int_cmd_add_arg (cmd, "--netmask");
|
|
guestfs_int_cmd_add_arg (cmd, NETWORK_PREFIX);
|
|
guestfs_int_cmd_add_arg (cmd, "--mac-addr");
|
|
guestfs_int_cmd_add_arg (cmd, NETWORK_GW_MAC);
|
|
guestfs_int_cmd_add_arg (cmd, "--gateway");
|
|
guestfs_int_cmd_add_arg (cmd, NETWORK_GW_IP);
|
|
|
|
passt_status = guestfs_int_cmd_run (cmd);
|
|
if (passt_status == -1)
|
|
/* guestfs_int_cmd_run() reports errors internally, so just bail here */
|
|
goto close_cmd;
|
|
|
|
if (WIFSIGNALED (passt_status)) {
|
|
error (g, _("passt was killed with signal %d"), WTERMSIG (passt_status));
|
|
goto close_cmd;
|
|
}
|
|
|
|
if (!WIFEXITED (passt_status)) {
|
|
error (g, _("internal error: unexpected exit status from passt (%d)"),
|
|
passt_status);
|
|
goto close_cmd;
|
|
}
|
|
|
|
passt_exit = WEXITSTATUS (passt_status);
|
|
if (passt_exit != 0) {
|
|
error (g, _("passt exited with status %d"), passt_exit);
|
|
goto close_cmd;
|
|
}
|
|
|
|
/* At this point passt has forked into the background, dropped privileges, and
|
|
* written a PID file. Due to "--one-off", passt will exit once our QEMU
|
|
* appliance disappears (forcibly or cleanly); however, we still need the
|
|
* passt PID *temporarily*, so we can kill passt in case we encounter an error
|
|
* *before* starting the appliance.
|
|
*/
|
|
if (guestfs_int_read_whole_file (g, pid_path, &pid_str, NULL) == -1)
|
|
/* Any error has been reported internally, so just bail. We can't kill
|
|
* passt here because we've failed to get its PID in the first place...
|
|
*/
|
|
goto close_cmd;
|
|
|
|
errno = 0;
|
|
passt_pid_local = strtol (pid_str, &endptr, 10);
|
|
if (endptr == pid_str || (*endptr != '\0' && *endptr != '\n') || errno != 0 ||
|
|
passt_pid_local <= 1) {
|
|
/* Same thing, we can't kill passt just yet. */
|
|
error (g, _("failed to parse passt PID from '%s'"), pid_path);
|
|
goto free_pid_str;
|
|
}
|
|
|
|
/* We're done. */
|
|
*passt_pid = passt_pid_local;
|
|
ignore_value (strcpy (*sockpath, sockpath_local));
|
|
rc = 0;
|
|
|
|
free_pid_str:
|
|
free (pid_str);
|
|
|
|
close_cmd:
|
|
guestfs_int_cmd_close (cmd);
|
|
|
|
free_pid_path:
|
|
free (pid_path);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
launch_direct (guestfs_h *g, void *datav, const char *arg)
|
|
{
|
|
struct backend_direct_data *data = datav;
|
|
struct qemuopts *qopts = NULL;
|
|
int daemon_accept_sock = -1, console_sock = -1;
|
|
int r;
|
|
long passt_pid = -1;
|
|
int flags;
|
|
int sv[2];
|
|
struct sockaddr_un addr;
|
|
CLEANUP_FREE char *uefi_code = NULL, *uefi_vars = NULL;
|
|
int uefi_flags;
|
|
CLEANUP_FREE char *kernel = NULL, *initrd = NULL, *appliance = NULL;
|
|
int has_appliance_drive;
|
|
uint32_t size;
|
|
CLEANUP_FREE void *buf = NULL;
|
|
struct hv_param *hp;
|
|
bool has_kvm;
|
|
int force_tcg;
|
|
int force_kvm;
|
|
const char *accel_val = "kvm:tcg";
|
|
const char *cpu_model;
|
|
CLEANUP_FREE char *append = NULL;
|
|
CLEANUP_FREE_STRING_LIST char **argv = NULL;
|
|
CLEANUP_FREE_STRING_LIST char **env = NULL;
|
|
|
|
if (!g->nr_drives) {
|
|
error (g, _("you must call guestfs_add_drive before guestfs_launch"));
|
|
return -1;
|
|
}
|
|
|
|
guestfs_int_launch_send_progress (g, 0);
|
|
|
|
/* Locate and/or build the appliance. */
|
|
if (guestfs_int_build_appliance (g, &kernel, &initrd, &appliance) == -1)
|
|
return -1;
|
|
has_appliance_drive = appliance != NULL;
|
|
|
|
guestfs_int_launch_send_progress (g, 3);
|
|
|
|
debug (g, "begin testing qemu features");
|
|
|
|
/* Get qemu help text and version. */
|
|
if (data->qemu_data == NULL) {
|
|
data->qemu_data = guestfs_int_test_qemu (g);
|
|
if (data->qemu_data == NULL)
|
|
goto cleanup0;
|
|
data->qemu_version = guestfs_int_qemu_version (g, data->qemu_data);
|
|
debug (g, "qemu version: %d.%d",
|
|
data->qemu_version.v_major, data->qemu_version.v_minor);
|
|
}
|
|
|
|
/* Work out if KVM is supported or if the user wants to force TCG. */
|
|
has_kvm = guestfs_int_platform_has_kvm (g, data->qemu_data);
|
|
debug (g, "qemu KVM: %s", has_kvm ? "enabled" : "disabled");
|
|
|
|
force_tcg = guestfs_int_get_backend_setting_bool (g, "force_tcg");
|
|
if (force_tcg == -1)
|
|
return -1;
|
|
else if (force_tcg)
|
|
accel_val = "tcg";
|
|
|
|
force_kvm = guestfs_int_get_backend_setting_bool (g, "force_kvm");
|
|
if (force_kvm == -1)
|
|
return -1;
|
|
else if (force_kvm)
|
|
accel_val = "kvm";
|
|
|
|
if (force_kvm && force_tcg) {
|
|
error (g, "Both force_kvm and force_tcg backend settings supplied.");
|
|
return -1;
|
|
}
|
|
if (!has_kvm) {
|
|
if (!force_tcg)
|
|
debian_kvm_warning (g);
|
|
if (force_kvm) {
|
|
error (g, "force_kvm supplied but kvm not available.");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Using virtio-serial, we need to create a local Unix domain socket
|
|
* for qemu to connect to.
|
|
*/
|
|
if (guestfs_int_create_socketname (g, "guestfsd.sock",
|
|
&data->guestfsd_sock) == -1)
|
|
goto cleanup0;
|
|
|
|
daemon_accept_sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0);
|
|
if (daemon_accept_sock == -1) {
|
|
perrorf (g, "socket");
|
|
goto cleanup0;
|
|
}
|
|
|
|
addr.sun_family = AF_UNIX;
|
|
strncpy (addr.sun_path, data->guestfsd_sock, UNIX_PATH_MAX);
|
|
addr.sun_path[UNIX_PATH_MAX-1] = '\0';
|
|
|
|
if (bind (daemon_accept_sock, (struct sockaddr *) &addr,
|
|
sizeof addr) == -1) {
|
|
perrorf (g, "bind");
|
|
goto cleanup0;
|
|
}
|
|
|
|
if (listen (daemon_accept_sock, 1) == -1) {
|
|
perrorf (g, "listen");
|
|
goto cleanup0;
|
|
}
|
|
|
|
if (!g->direct_mode) {
|
|
if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, sv) == -1) {
|
|
perrorf (g, "socketpair");
|
|
goto cleanup0;
|
|
}
|
|
}
|
|
|
|
debug (g, "finished testing qemu features");
|
|
|
|
/* Construct the qemu command line. We have to do this before
|
|
* forking, because after fork we are not allowed to use
|
|
* non-signal-safe functions such as malloc.
|
|
*/
|
|
qopts = qemuopts_create ();
|
|
if (qopts == NULL) {
|
|
qemuopts_error:
|
|
perrorf (g, "qemuopts");
|
|
goto cleanup0;
|
|
}
|
|
if (qemuopts_set_binary (qopts, g->hv) == -1) goto qemuopts_error;
|
|
|
|
/* CVE-2011-4127 mitigation: Disable SCSI ioctls on virtio-blk
|
|
* devices.
|
|
*/
|
|
arg ("-global", VIRTIO_DEVICE_NAME ("virtio-blk") ".scsi=off");
|
|
|
|
if (guestfs_int_qemu_supports (g, data->qemu_data, "-no-user-config"))
|
|
flag ("-no-user-config");
|
|
|
|
/* Newer versions of qemu (from around 2009/12) changed the
|
|
* behaviour of monitors so that an implicit '-monitor stdio' is
|
|
* assumed if we are in -nographic mode and there is no other
|
|
* -monitor option. Only a single stdio device is allowed, so
|
|
* this broke the '-serial stdio' option. There is a new flag
|
|
* called -nodefaults which gets rid of all this default crud, so
|
|
* let's use that to avoid this and any future surprises.
|
|
*/
|
|
if (guestfs_int_qemu_supports (g, data->qemu_data, "-nodefaults"))
|
|
flag ("-nodefaults");
|
|
|
|
/* This disables the host-side display (SDL, Gtk). */
|
|
arg ("-display", "none");
|
|
|
|
/* See guestfs.pod / gdb */
|
|
if (guestfs_int_get_backend_setting_bool (g, "gdb") > 0) {
|
|
flag ("-S");
|
|
flag ("-s");
|
|
warning (g, "qemu debugging is enabled, connect gdb to tcp::1234 to begin");
|
|
}
|
|
|
|
start_list ("-machine") {
|
|
#ifdef MACHINE_TYPE
|
|
append_list (MACHINE_TYPE);
|
|
#endif
|
|
#ifdef __aarch64__
|
|
if (has_kvm && !force_tcg)
|
|
append_list ("gic-version=host");
|
|
#endif
|
|
append_list_format ("accel=%s", accel_val);
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
/* Tell seabios to send debug messages to the serial port.
|
|
* This used to be done by sgabios.
|
|
*/
|
|
if (g->verbose)
|
|
append_list ("graphics=off");
|
|
#endif
|
|
} end_list ();
|
|
|
|
cpu_model = guestfs_int_get_cpu_model (has_kvm && !force_tcg);
|
|
if (cpu_model) {
|
|
#if defined(__x86_64__)
|
|
/* Temporary workaround for RHBZ#2082806 */
|
|
if (STREQ (cpu_model, "max")) {
|
|
start_list ("-cpu") {
|
|
append_list (cpu_model);
|
|
append_list ("la57=off");
|
|
} end_list ();
|
|
}
|
|
else
|
|
#endif
|
|
arg ("-cpu", cpu_model);
|
|
}
|
|
|
|
if (g->smp > 1)
|
|
arg_format ("-smp", "%d", g->smp);
|
|
|
|
arg_format ("-m", "%d", g->memsize);
|
|
|
|
/* Force exit instead of reboot on panic */
|
|
flag ("-no-reboot");
|
|
|
|
/* These are recommended settings, see RHBZ#1053847. */
|
|
arg ("-rtc", "driftfix=slew");
|
|
if (guestfs_int_qemu_supports (g, data->qemu_data, "-no-hpet"))
|
|
flag ("-no-hpet");
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
if (guestfs_int_version_ge (&data->qemu_version, 1, 3, 0))
|
|
arg ("-global", "kvm-pit.lost_tick_policy=discard");
|
|
#endif
|
|
|
|
/* UEFI (firmware) if required. */
|
|
if (guestfs_int_get_uefi (g, NULL, NULL, &uefi_code, &uefi_vars,
|
|
&uefi_flags) == -1)
|
|
goto cleanup0;
|
|
if (uefi_flags & UEFI_FLAG_SECURE_BOOT_REQUIRED) {
|
|
/* Implementing this requires changes to the qemu command line.
|
|
* See RHBZ#1367615 for details. As the guestfs_int_get_uefi
|
|
* function is only implemented for aarch64, and UEFI secure boot
|
|
* is some way off on aarch64 (2017/2018), we only need to worry
|
|
* about this later.
|
|
*/
|
|
error (g, "internal error: direct backend "
|
|
"does not implement UEFI secure boot, "
|
|
"see comments in the code");
|
|
goto cleanup0;
|
|
}
|
|
if (uefi_code) {
|
|
start_list ("-drive") {
|
|
append_list ("if=pflash");
|
|
append_list ("format=raw");
|
|
append_list_format ("file=%s", uefi_code);
|
|
append_list ("readonly");
|
|
} end_list ();
|
|
if (uefi_vars) {
|
|
start_list ("-drive") {
|
|
append_list ("if=pflash");
|
|
append_list ("format=raw");
|
|
append_list_format ("file=%s", uefi_vars);
|
|
} end_list ();
|
|
}
|
|
}
|
|
|
|
/* Kernel and initrd. */
|
|
arg ("-kernel", kernel);
|
|
arg ("-initrd", initrd);
|
|
|
|
/* Add a random number generator (backend for virtio-rng). This
|
|
* isn't strictly necessary but means we won't need to hang around
|
|
* when needing entropy.
|
|
*/
|
|
if (guestfs_int_qemu_supports_device (g, data->qemu_data,
|
|
VIRTIO_DEVICE_NAME ("virtio-rng"))) {
|
|
start_list ("-object") {
|
|
append_list ("rng-random");
|
|
append_list ("filename=/dev/urandom");
|
|
append_list ("id=rng0");
|
|
} end_list ();
|
|
start_list ("-device") {
|
|
append_list (VIRTIO_DEVICE_NAME ("virtio-rng"));
|
|
append_list ("rng=rng0");
|
|
} end_list ();
|
|
}
|
|
|
|
/* Create the virtio-scsi bus. */
|
|
start_list ("-device") {
|
|
append_list (VIRTIO_DEVICE_NAME ("virtio-scsi"));
|
|
append_list ("id=scsi");
|
|
} end_list ();
|
|
|
|
/* Add drives (except for the appliance drive). */
|
|
if (add_drives (g, data, qopts) == -1)
|
|
goto cleanup0;
|
|
|
|
/* Add the ext2 appliance drive (after all the drives). */
|
|
if (has_appliance_drive) {
|
|
start_list ("-drive") {
|
|
append_list_format ("file=%s", appliance);
|
|
append_list ("snapshot=on");
|
|
append_list ("id=appliance");
|
|
append_list ("cache=unsafe");
|
|
append_list ("if=none");
|
|
#ifndef APPLIANCE_FORMAT_AUTO
|
|
append_list ("format=raw");
|
|
#endif
|
|
} end_list ();
|
|
start_list ("-device") {
|
|
append_list ("scsi-hd");
|
|
append_list ("drive=appliance");
|
|
} end_list ();
|
|
}
|
|
|
|
/* Create the virtio serial bus. */
|
|
arg ("-device", VIRTIO_DEVICE_NAME ("virtio-serial"));
|
|
|
|
/* Create the serial console. */
|
|
#ifndef __s390x__
|
|
arg ("-serial", "stdio");
|
|
#else
|
|
start_list ("-chardev") {
|
|
append_list ("stdio");
|
|
append_list ("id=charconsole0");
|
|
} end_list ();
|
|
start_list ("-device") {
|
|
append_list ("sclpconsole");
|
|
append_list ("chardev=charconsole0");
|
|
} end_list ();
|
|
#endif
|
|
|
|
/* Set up virtio-serial for the communications channel. */
|
|
start_list ("-chardev") {
|
|
append_list ("socket");
|
|
append_list_format ("path=%s", data->guestfsd_sock);
|
|
append_list ("id=channel0");
|
|
} end_list ();
|
|
start_list ("-device") {
|
|
append_list ("virtserialport");
|
|
append_list ("chardev=channel0");
|
|
append_list ("name=org.libguestfs.channel.0");
|
|
} end_list ();
|
|
|
|
/* Enable user networking. */
|
|
if (g->enable_network) {
|
|
/* If qemu is 7.2.0+ and "passt" is available, ask for passt rather
|
|
* than SLIRP. RHBZ#2184967.
|
|
*/
|
|
if (guestfs_int_version_ge (&data->qemu_version, 7, 2, 0) &&
|
|
guestfs_int_passt_runnable (g)) {
|
|
char passt_sock[UNIX_PATH_MAX];
|
|
|
|
if (launch_passt (g, &passt_pid, &passt_sock) == -1)
|
|
goto cleanup0;
|
|
|
|
start_list ("-netdev") {
|
|
append_list ("stream");
|
|
append_list ("id=usernet");
|
|
append_list ("addr.type=unix");
|
|
append_list_format ("addr.path=%s", passt_sock);
|
|
} end_list ();
|
|
}
|
|
else {
|
|
start_list ("-netdev") {
|
|
append_list ("user");
|
|
append_list ("id=usernet");
|
|
append_list ("net=" NETWORK_ADDRESS "/" NETWORK_PREFIX);
|
|
} end_list ();
|
|
}
|
|
start_list ("-device") {
|
|
append_list (VIRTIO_DEVICE_NAME ("virtio-net"));
|
|
append_list ("netdev=usernet");
|
|
} end_list ();
|
|
}
|
|
|
|
flags = 0;
|
|
if (!has_kvm || force_tcg)
|
|
flags |= APPLIANCE_COMMAND_LINE_IS_TCG;
|
|
append = guestfs_int_appliance_command_line (g, appliance, flags);
|
|
arg ("-append", append);
|
|
|
|
/* Note: custom command line parameters must come last so that
|
|
* qemu -set parameters can modify previously added options.
|
|
*/
|
|
|
|
/* Add any qemu parameters. */
|
|
for (hp = g->hv_params; hp; hp = hp->next) {
|
|
if (!hp->hv_value)
|
|
flag (hp->hv_param);
|
|
else
|
|
arg_noquote (hp->hv_param, hp->hv_value);
|
|
}
|
|
|
|
/* Get the argv list from the command line. */
|
|
argv = qemuopts_to_argv (qopts);
|
|
|
|
/* Create the environ for the child process. */
|
|
env = guestfs_int_copy_environ (environ,
|
|
"LC_ALL", "C",
|
|
/* Prevents qemu opening /dev/dsp */
|
|
"QEMU_AUDIO_DRV", "none",
|
|
NULL);
|
|
if (env == NULL)
|
|
goto cleanup0;
|
|
|
|
r = fork ();
|
|
if (r == -1) {
|
|
perrorf (g, "fork");
|
|
if (!g->direct_mode) {
|
|
close (sv[0]);
|
|
close (sv[1]);
|
|
}
|
|
goto cleanup0;
|
|
}
|
|
|
|
if (r == 0) { /* Child (qemu). */
|
|
if (!g->direct_mode) {
|
|
/* Set up stdin, stdout, stderr. */
|
|
close (0);
|
|
close (1);
|
|
close (sv[0]);
|
|
|
|
/* We set the FD_CLOEXEC flag on the socket above, but now (in
|
|
* the child) it's safe to unset this flag so qemu can use the
|
|
* socket.
|
|
*/
|
|
set_cloexec_flag (sv[1], 0);
|
|
|
|
/* Stdin. */
|
|
if (dup (sv[1]) == -1) {
|
|
dup_failed:
|
|
perror ("dup failed");
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
/* Stdout. */
|
|
if (dup (sv[1]) == -1)
|
|
goto dup_failed;
|
|
|
|
/* Particularly since qemu 0.15, qemu spews all sorts of debug
|
|
* information on stderr. It is useful to both capture this and
|
|
* not confuse casual users, so send stderr to the pipe as well.
|
|
*/
|
|
close (2);
|
|
if (dup (sv[1]) == -1)
|
|
goto dup_failed;
|
|
|
|
close (sv[1]);
|
|
|
|
/* Close any other file descriptors that we don't want to pass
|
|
* to qemu. This prevents file descriptors which didn't have
|
|
* O_CLOEXEC set properly from leaking into the subprocess. See
|
|
* RHBZ#1123007.
|
|
*/
|
|
close_file_descriptors (fd > 2);
|
|
}
|
|
|
|
/* Unblock the SIGTERM signal since we will need to send that to
|
|
* the subprocess (RHBZ#1460338).
|
|
*/
|
|
guestfs_int_unblock_sigterm ();
|
|
|
|
/* Dump the command line (after setting up stderr above). */
|
|
if (g->verbose)
|
|
qemuopts_to_channel (qopts, stderr);
|
|
|
|
/* Put qemu in a new process group. */
|
|
if (g->pgroup)
|
|
setpgid (0, 0);
|
|
|
|
execve (g->hv, argv, env); /* Run qemu. */
|
|
perror (g->hv);
|
|
_exit (EXIT_FAILURE);
|
|
}
|
|
|
|
/* Parent (library). */
|
|
data->pid = r;
|
|
|
|
qemuopts_free (qopts);
|
|
qopts = NULL;
|
|
|
|
/* Fork the recovery process off which will kill qemu if the parent
|
|
* process fails to do so (eg. if the parent segfaults).
|
|
*/
|
|
data->recoverypid = -1;
|
|
if (g->recovery_proc) {
|
|
r = fork ();
|
|
if (r == 0) {
|
|
size_t i;
|
|
struct sigaction sa;
|
|
pid_t qemu_pid = data->pid;
|
|
pid_t parent_pid = getppid ();
|
|
|
|
/* Remove all signal handlers. See the justification here:
|
|
* https://www.redhat.com/archives/libvir-list/2008-August/msg00303.html
|
|
* We don't mask signal handlers yet, so this isn't completely
|
|
* race-free, but better than not doing it at all.
|
|
*/
|
|
memset (&sa, 0, sizeof sa);
|
|
sa.sa_handler = SIG_DFL;
|
|
sa.sa_flags = 0;
|
|
sigemptyset (&sa.sa_mask);
|
|
for (i = 1; i < NSIG; ++i)
|
|
sigaction (i, &sa, NULL);
|
|
|
|
/* Close all other file descriptors. This ensures that we don't
|
|
* hold open (eg) pipes from the parent process.
|
|
*/
|
|
close_file_descriptors (1);
|
|
|
|
/* Unblock the SIGTERM signal since we will need to respond to
|
|
* SIGTERM from the parent (RHBZ#1460338).
|
|
*/
|
|
guestfs_int_unblock_sigterm ();
|
|
|
|
/* It would be nice to be able to put this in the same process
|
|
* group as qemu (ie. setpgid (0, qemu_pid)). However this is
|
|
* not possible because we don't have any guarantee here that
|
|
* the qemu process has started yet.
|
|
*/
|
|
if (g->pgroup)
|
|
setpgid (0, 0);
|
|
|
|
/* Writing to argv is hideously complicated and error prone. See:
|
|
* http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/backend/utils/misc/ps_status.c;hb=HEAD
|
|
*/
|
|
|
|
/* Loop around waiting for one or both of the other processes to
|
|
* disappear. It's fair to say this is very hairy. The PIDs that
|
|
* we are looking at might be reused by another process. We are
|
|
* effectively polling. Is the cure worse than the disease?
|
|
*/
|
|
for (;;) {
|
|
if (kill (qemu_pid, 0) == -1) /* qemu's gone away, we aren't needed */
|
|
_exit (EXIT_SUCCESS);
|
|
if (kill (parent_pid, 0) == -1) {
|
|
/* Parent's gone away, qemu still around, so kill qemu. */
|
|
kill (qemu_pid, 9);
|
|
_exit (EXIT_SUCCESS);
|
|
}
|
|
sleep (2);
|
|
}
|
|
}
|
|
|
|
/* Don't worry, if the fork failed, this will be -1. The recovery
|
|
* process isn't essential.
|
|
*/
|
|
data->recoverypid = r;
|
|
}
|
|
|
|
if (!g->direct_mode) {
|
|
/* Close the other end of the socketpair. */
|
|
close (sv[1]);
|
|
|
|
console_sock = sv[0]; /* stdin of child */
|
|
sv[0] = -1;
|
|
}
|
|
|
|
g->state = LAUNCHING;
|
|
|
|
/* Wait for qemu to start and to connect back to us via
|
|
* virtio-serial and send the GUESTFS_LAUNCH_FLAG message.
|
|
*/
|
|
g->conn =
|
|
guestfs_int_new_conn_socket_listening (g, daemon_accept_sock, console_sock);
|
|
if (!g->conn)
|
|
goto cleanup1;
|
|
|
|
/* g->conn now owns these sockets. */
|
|
daemon_accept_sock = console_sock = -1;
|
|
|
|
r = g->conn->ops->accept_connection (g, g->conn);
|
|
if (r == -1)
|
|
goto cleanup1;
|
|
if (r == 0) {
|
|
guestfs_int_launch_failed_error (g);
|
|
goto cleanup1;
|
|
}
|
|
|
|
/* NB: We reach here just because qemu has opened the socket. It
|
|
* does not mean the daemon is up until we read the
|
|
* GUESTFS_LAUNCH_FLAG below. Failures in qemu startup can still
|
|
* happen even if we reach here, even early failures like not being
|
|
* able to open a drive.
|
|
*/
|
|
|
|
r = guestfs_int_recv_from_daemon (g, &size, &buf);
|
|
|
|
if (r == -1) {
|
|
guestfs_int_launch_failed_error (g);
|
|
goto cleanup1;
|
|
}
|
|
|
|
if (size != GUESTFS_LAUNCH_FLAG) {
|
|
guestfs_int_launch_failed_error (g);
|
|
goto cleanup1;
|
|
}
|
|
|
|
debug (g, "appliance is up");
|
|
|
|
/* From this point onward, even if we fail, QEMU terminating (forcefully or
|
|
* gracefully) will cause passt to go away as well. Note that we can't
|
|
* precisely tell whether QEMU managed to open the passt socket before QEMU
|
|
* failed. Therefore, err on the side of killing passt needlessly, rather
|
|
* than not killing it when needed -- that's why we re-set "passt_pid" to (-1)
|
|
* only this late during QEMU startup verification.
|
|
*/
|
|
passt_pid = -1;
|
|
|
|
/* This is possible in some really strange situations, such as
|
|
* guestfsd starts up OK but then qemu immediately exits. Check for
|
|
* it because the caller is probably expecting to be able to send
|
|
* commands after this function returns.
|
|
*/
|
|
if (g->state != READY) {
|
|
error (g, _("qemu launched and contacted daemon, but state != READY"));
|
|
goto cleanup1;
|
|
}
|
|
|
|
guestfs_int_launch_send_progress (g, 12);
|
|
|
|
if (has_appliance_drive)
|
|
guestfs_int_add_dummy_appliance_drive (g);
|
|
|
|
return 0;
|
|
|
|
cleanup1:
|
|
if (!g->direct_mode && sv[0] >= 0)
|
|
close (sv[0]);
|
|
if (data->pid > 0) kill (data->pid, 9);
|
|
if (data->recoverypid > 0) kill (data->recoverypid, 9);
|
|
if (data->pid > 0) guestfs_int_waitpid_noerror (data->pid);
|
|
if (data->recoverypid > 0) guestfs_int_waitpid_noerror (data->recoverypid);
|
|
data->pid = 0;
|
|
data->recoverypid = 0;
|
|
memset (&g->launch_t, 0, sizeof g->launch_t);
|
|
guestfs_int_free_qemu_data (data->qemu_data);
|
|
data->qemu_data = NULL;
|
|
|
|
cleanup0:
|
|
if (passt_pid != -1)
|
|
kill (passt_pid, SIGTERM);
|
|
if (qopts != NULL)
|
|
qemuopts_free (qopts);
|
|
if (daemon_accept_sock >= 0)
|
|
close (daemon_accept_sock);
|
|
if (console_sock >= 0)
|
|
close (console_sock);
|
|
if (g->conn) {
|
|
g->conn->ops->free_connection (g, g->conn);
|
|
g->conn = NULL;
|
|
}
|
|
g->state = CONFIG;
|
|
return -1;
|
|
}
|
|
|
|
static int
|
|
shutdown_direct (guestfs_h *g, void *datav, int check_for_errors)
|
|
{
|
|
struct backend_direct_data *data = datav;
|
|
int ret = 0;
|
|
int status;
|
|
struct rusage rusage;
|
|
|
|
/* Signal qemu to shutdown cleanly, and kill the recovery process. */
|
|
if (data->pid > 0) {
|
|
debug (g, "sending SIGTERM to process %d", data->pid);
|
|
kill (data->pid, SIGTERM);
|
|
}
|
|
if (data->recoverypid > 0) kill (data->recoverypid, 9);
|
|
|
|
/* Wait for subprocess(es) to exit. */
|
|
if (g->recovery_proc /* RHBZ#998482 */ && data->pid > 0) {
|
|
if (guestfs_int_wait4 (g, data->pid, &status, &rusage, "qemu") == -1)
|
|
ret = -1;
|
|
else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) {
|
|
guestfs_int_external_command_failed (g, status, g->hv, NULL);
|
|
ret = -1;
|
|
}
|
|
else
|
|
/* Print the actual memory usage of qemu, useful for seeing
|
|
* if techniques like DAX are having any effect.
|
|
*/
|
|
debug (g, "qemu maxrss %ldK", rusage.ru_maxrss);
|
|
}
|
|
if (data->recoverypid > 0) guestfs_int_waitpid_noerror (data->recoverypid);
|
|
|
|
data->pid = data->recoverypid = 0;
|
|
|
|
if (data->guestfsd_sock[0] != '\0') {
|
|
unlink (data->guestfsd_sock);
|
|
data->guestfsd_sock[0] = '\0';
|
|
}
|
|
|
|
guestfs_int_free_qemu_data (data->qemu_data);
|
|
data->qemu_data = NULL;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
get_pid_direct (guestfs_h *g, void *datav)
|
|
{
|
|
struct backend_direct_data *data = datav;
|
|
|
|
if (data->pid > 0)
|
|
return data->pid;
|
|
else {
|
|
error (g, "get_pid: no qemu subprocess");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* Maximum number of disks. */
|
|
static int
|
|
max_disks_direct (guestfs_h *g, void *datav)
|
|
{
|
|
return 255;
|
|
}
|
|
|
|
static struct backend_ops backend_direct_ops = {
|
|
.data_size = sizeof (struct backend_direct_data),
|
|
.create_cow_overlay = create_cow_overlay_direct,
|
|
.launch = launch_direct,
|
|
.shutdown = shutdown_direct,
|
|
.get_pid = get_pid_direct,
|
|
.max_disks = max_disks_direct,
|
|
};
|
|
|
|
void
|
|
guestfs_int_init_direct_backend (void)
|
|
{
|
|
guestfs_int_register_backend ("direct", &backend_direct_ops);
|
|
}
|