/* libguestfs * Copyright (C) 2009-2017 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * Implementation of the C backend. * * For more details see L. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cloexec.h" #include "guestfs.h" #include "guestfs-internal.h" #include "guestfs_protocol.h" /* Per-handle data. */ struct backend_direct_data { pid_t pid; /* Qemu PID. */ pid_t recoverypid; /* Recovery process PID. */ struct version qemu_version; /* qemu version (0 if unable to parse). */ struct qemu_data *qemu_data; /* qemu -help output etc. */ char guestfsd_sock[UNIX_PATH_MAX]; /* Path to daemon socket. */ }; static int is_openable (guestfs_h *g, const char *path, int flags); static char *make_appliance_dev (guestfs_h *g, int virtio_scsi); static void print_qemu_command_line (guestfs_h *g, char **argv); static char * create_cow_overlay_direct (guestfs_h *g, void *datav, struct drive *drv) { char *overlay; CLEANUP_FREE char *backing_drive = NULL; struct guestfs_disk_create_argv optargs; backing_drive = guestfs_int_drive_source_qemu_param (g, &drv->src); if (!backing_drive) return NULL; if (guestfs_int_lazy_make_tmpdir (g) == -1) return NULL; overlay = safe_asprintf (g, "%s/overlay%d", g->tmpdir, ++g->unique); optargs.bitmask = GUESTFS_DISK_CREATE_BACKINGFILE_BITMASK; optargs.backingfile = backing_drive; if (drv->src.format) { optargs.bitmask |= GUESTFS_DISK_CREATE_BACKINGFORMAT_BITMASK; optargs.backingformat = drv->src.format; } if (guestfs_disk_create_argv (g, overlay, "qcow2", -1, &optargs) == -1) { free (overlay); return NULL; } /* Caller sets g->overlay in the handle to this, and then manages * the memory. */ return overlay; } #ifdef QEMU_OPTIONS /* Like 'add_cmdline' but allowing a shell-quoted string of zero or * more options. XXX The unquoting is not very clever. */ static void add_cmdline_shell_unquoted (guestfs_h *g, struct stringsbuf *sb, const char *options) { char quote; const char *startp, *endp, *nextp; while (*options) { quote = *options; if (quote == '\'' || quote == '"') startp = options+1; else { startp = options; quote = ' '; } endp = strchr (options, quote); if (endp == NULL) { if (quote != ' ') { fprintf (stderr, _("unclosed quote character (%c) in command line near: %s"), quote, options); _exit (EXIT_FAILURE); } endp = options + strlen (options); } if (quote == ' ') { if (endp[0] == '\0') nextp = endp; else nextp = endp+1; } else { if (!endp[1]) nextp = endp+1; else if (endp[1] == ' ') nextp = endp+2; else { fprintf (stderr, _("cannot parse quoted string near: %s"), options); _exit (EXIT_FAILURE); } } while (*nextp && *nextp == ' ') nextp++; guestfs_int_add_string_nodup (g, sb, safe_strndup (g, startp, endp-startp)); options = nextp; } } #endif /* defined QEMU_OPTIONS */ /* On Debian, /dev/kvm is mode 0660 and group kvm, so users need to * add themselves to the kvm group otherwise things are going to be * very slow (this is Debian bug 640328). Warn about this. */ static void debian_kvm_warning (guestfs_h *g) { #ifdef __linux__ uid_t euid = geteuid (); gid_t egid = getegid (); struct stat statbuf; gid_t kvm_group; CLEANUP_FREE gid_t *groups = NULL; int ngroups; size_t i; /* Doesn't apply if running as root. */ if (euid == 0) return; if (stat ("/dev/kvm", &statbuf) == -1) return; if ((statbuf.st_mode & 0777) != 0660) return; /* They might be running libguestfs as root or have chowned /dev/kvm, so: */ if (geteuid () == statbuf.st_uid) return; kvm_group = statbuf.st_gid; /* Is the current process a member of the KVM group? */ if (egid == kvm_group) return; ngroups = getgroups (0, NULL); if (ngroups > 0) { groups = safe_malloc (g, ngroups * sizeof (gid_t)); if (getgroups (ngroups, groups) == -1) { warning (g, "getgroups: %m (ignored)"); return; } for (i = 0; i < (size_t) ngroups; ++i) { if (groups[i] == kvm_group) return; } } /* No, so emit the warning. Note that \n characters cannot appear * in warnings. */ warning (g, _("current user is not a member of the KVM group (group ID %d). " "This user cannot access /dev/kvm, so libguestfs may run very slowly. " "It is recommended that you 'chmod 0666 /dev/kvm' or add the current user " "to the KVM group (you might need to log out and log in again)."), (int) kvm_group); #endif /* __linux__ */ } static int launch_direct (guestfs_h *g, void *datav, const char *arg) { struct backend_direct_data *data = datav; CLEANUP_FREE_STRINGSBUF DECLARE_STRINGSBUF (cmdline); int daemon_accept_sock = -1, console_sock = -1; int r; int flags; int sv[2]; struct sockaddr_un addr; CLEANUP_FREE char *uefi_code = NULL, *uefi_vars = NULL; int uefi_flags; CLEANUP_FREE char *kernel = NULL, *initrd = NULL, *appliance = NULL; int has_appliance_drive; CLEANUP_FREE char *appliance_dev = NULL; uint32_t size; CLEANUP_FREE void *buf = NULL; struct drive *drv; size_t i; int virtio_scsi; struct hv_param *hp; bool has_kvm; int force_tcg; const char *cpu_model; /* At present you must add drives before starting the appliance. In * future when we enable hotplugging you won't need to do this. */ if (!g->nr_drives) { error (g, _("you must call guestfs_add_drive before guestfs_launch")); return -1; } /* Try to guess if KVM is available. We are just checking that * /dev/kvm is openable. That's not reliable, since /dev/kvm * might be openable by qemu but not by us (think: SELinux) in * which case the user would not get hardware virtualization, * although at least shouldn't fail. */ has_kvm = is_openable (g, "/dev/kvm", O_RDWR|O_CLOEXEC); force_tcg = guestfs_int_get_backend_setting_bool (g, "force_tcg"); if (force_tcg == -1) return -1; if (!has_kvm && !force_tcg) debian_kvm_warning (g); guestfs_int_launch_send_progress (g, 0); TRACE0 (launch_build_appliance_start); /* Locate and/or build the appliance. */ if (guestfs_int_build_appliance (g, &kernel, &initrd, &appliance) == -1) return -1; has_appliance_drive = appliance != NULL; TRACE0 (launch_build_appliance_end); guestfs_int_launch_send_progress (g, 3); debug (g, "begin testing qemu features"); /* Get qemu help text and version. */ if (data->qemu_data == NULL) { data->qemu_data = guestfs_int_test_qemu (g, &data->qemu_version); if (data->qemu_data == NULL) goto cleanup0; } /* Using virtio-serial, we need to create a local Unix domain socket * for qemu to connect to. */ if (guestfs_int_create_socketname (g, "guestfsd.sock", &data->guestfsd_sock) == -1) goto cleanup0; daemon_accept_sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); if (daemon_accept_sock == -1) { perrorf (g, "socket"); goto cleanup0; } addr.sun_family = AF_UNIX; strncpy (addr.sun_path, data->guestfsd_sock, UNIX_PATH_MAX); addr.sun_path[UNIX_PATH_MAX-1] = '\0'; if (bind (daemon_accept_sock, (struct sockaddr *) &addr, sizeof addr) == -1) { perrorf (g, "bind"); goto cleanup0; } if (listen (daemon_accept_sock, 1) == -1) { perrorf (g, "listen"); goto cleanup0; } if (!g->direct_mode) { if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, sv) == -1) { perrorf (g, "socketpair"); goto cleanup0; } } debug (g, "finished testing qemu features"); /* Construct the qemu command line. We have to do this before * forking, because after fork we are not allowed to use * non-signal-safe functions such as malloc. */ #define ADD_CMDLINE(str) \ guestfs_int_add_string (g, &cmdline, (str)) #define ADD_CMDLINE_STRING_NODUP(str) \ guestfs_int_add_string_nodup (g, &cmdline, (str)) #define ADD_CMDLINE_PRINTF(fs,...) \ guestfs_int_add_sprintf (g, &cmdline, (fs), ##__VA_ARGS__) ADD_CMDLINE (g->hv); /* CVE-2011-4127 mitigation: Disable SCSI ioctls on virtio-blk * devices. The -global option must exist, but you can pass any * strings to it so we don't need to check for the specific virtio * feature. */ if (guestfs_int_qemu_supports (g, data->qemu_data, "-global")) { ADD_CMDLINE ("-global"); ADD_CMDLINE (VIRTIO_BLK ".scsi=off"); } if (guestfs_int_qemu_supports (g, data->qemu_data, "-nodefconfig")) ADD_CMDLINE ("-nodefconfig"); /* This oddly named option doesn't actually enable FIPS. It just * causes qemu to do the right thing if FIPS is enabled in the * kernel. So like libvirt, we pass it unconditionally. */ if (guestfs_int_qemu_supports (g, data->qemu_data, "-enable-fips")) ADD_CMDLINE ("-enable-fips"); /* Newer versions of qemu (from around 2009/12) changed the * behaviour of monitors so that an implicit '-monitor stdio' is * assumed if we are in -nographic mode and there is no other * -monitor option. Only a single stdio device is allowed, so * this broke the '-serial stdio' option. There is a new flag * called -nodefaults which gets rid of all this default crud, so * let's use that to avoid this and any future surprises. */ if (guestfs_int_qemu_supports (g, data->qemu_data, "-nodefaults")) ADD_CMDLINE ("-nodefaults"); /* This disables the host-side display (SDL, Gtk). */ ADD_CMDLINE ("-display"); ADD_CMDLINE ("none"); /* See guestfs.pod / gdb */ if (guestfs_int_get_backend_setting_bool (g, "gdb") > 0) { ADD_CMDLINE ("-S"); ADD_CMDLINE ("-s"); warning (g, "qemu debugging is enabled, connect gdb to tcp::1234 to begin"); } ADD_CMDLINE ("-machine"); ADD_CMDLINE_PRINTF ( #ifdef MACHINE_TYPE MACHINE_TYPE "," #endif #ifdef __aarch64__ "%s" /* gic-version */ #endif "accel=%s", #ifdef __aarch64__ has_kvm && !force_tcg ? "gic-version=host," : "", #endif !force_tcg ? "kvm:tcg" : "tcg"); cpu_model = guestfs_int_get_cpu_model (has_kvm && !force_tcg); if (cpu_model) { ADD_CMDLINE ("-cpu"); ADD_CMDLINE (cpu_model); } if (g->smp > 1) { ADD_CMDLINE ("-smp"); ADD_CMDLINE_PRINTF ("%d", g->smp); } ADD_CMDLINE ("-m"); ADD_CMDLINE_PRINTF ("%d", g->memsize); /* Force exit instead of reboot on panic */ ADD_CMDLINE ("-no-reboot"); /* These are recommended settings, see RHBZ#1053847. */ ADD_CMDLINE ("-rtc"); ADD_CMDLINE ("driftfix=slew"); if (guestfs_int_qemu_supports (g, data->qemu_data, "-no-hpet")) { ADD_CMDLINE ("-no-hpet"); } if (!guestfs_int_version_ge (&data->qemu_version, 1, 3, 0)) ADD_CMDLINE ("-no-kvm-pit-reinjection"); else { /* New non-deprecated way, added in qemu >= 1.3. */ ADD_CMDLINE ("-global"); ADD_CMDLINE ("kvm-pit.lost_tick_policy=discard"); } /* UEFI (firmware) if required. */ if (guestfs_int_get_uefi (g, &uefi_code, &uefi_vars, &uefi_flags) == -1) goto cleanup0; if (uefi_flags & UEFI_FLAG_SECURE_BOOT_REQUIRED) { /* Implementing this requires changes to the qemu command line. * See RHBZ#1367615 for details. As the guestfs_int_get_uefi * function is only implemented for aarch64, and UEFI secure boot * is some way off on aarch64 (2017/2018), we only need to worry * about this later. */ error (g, "internal error: direct backend " "does not implement UEFI secure boot, " "see comments in the code"); goto cleanup0; } if (uefi_code) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("if=pflash,format=raw,file=%s,readonly", uefi_code); if (uefi_vars) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("if=pflash,format=raw,file=%s", uefi_vars); } } /* Kernel and initrd. */ ADD_CMDLINE ("-kernel"); ADD_CMDLINE (kernel); ADD_CMDLINE ("-initrd"); ADD_CMDLINE (initrd); /* Add a random number generator (backend for virtio-rng). This * isn't strictly necessary but means we won't need to hang around * when needing entropy. */ if (guestfs_int_qemu_supports_device (g, data->qemu_data, "virtio-rng-pci")) { ADD_CMDLINE ("-object"); ADD_CMDLINE ("rng-random,filename=/dev/urandom,id=rng0"); ADD_CMDLINE ("-device"); ADD_CMDLINE ("virtio-rng-pci,rng=rng0"); } /* Add drives */ virtio_scsi = guestfs_int_qemu_supports_virtio_scsi (g, data->qemu_data, &data->qemu_version); if (virtio_scsi) { /* Create the virtio-scsi bus. */ ADD_CMDLINE ("-device"); ADD_CMDLINE (VIRTIO_SCSI ",id=scsi"); } ITER_DRIVES (g, i, drv) { CLEANUP_FREE char *file = NULL, *escaped_file = NULL, *param = NULL; if (!drv->overlay) { const char *discard_mode = ""; switch (drv->discard) { case discard_disable: /* Since the default is always discard=ignore, don't specify it * on the command line. This also avoids unnecessary breakage * with qemu < 1.5 which didn't have the option at all. */ break; case discard_enable: if (!guestfs_int_discard_possible (g, drv, &data->qemu_version)) goto cleanup0; /*FALLTHROUGH*/ case discard_besteffort: /* I believe from reading the code that this is always safe as * long as qemu >= 1.5. */ if (guestfs_int_version_ge (&data->qemu_version, 1, 5, 0)) discard_mode = ",discard=unmap"; break; } /* Make the file= parameter. */ file = guestfs_int_drive_source_qemu_param (g, &drv->src); escaped_file = guestfs_int_qemu_escape_param (g, file); /* Make the first part of the -drive parameter, everything up to * the if=... at the end. */ param = safe_asprintf (g, "file=%s%s,cache=%s%s%s%s%s%s%s,id=hd%zu", escaped_file, drv->readonly ? ",snapshot=on" : "", drv->cachemode ? drv->cachemode : "writeback", discard_mode, drv->src.format ? ",format=" : "", drv->src.format ? drv->src.format : "", drv->disk_label ? ",serial=" : "", drv->disk_label ? drv->disk_label : "", drv->copyonread ? ",copy-on-read=on" : "", i); } else { /* Writable qcow2 overlay on top of read-only drive. */ escaped_file = guestfs_int_qemu_escape_param (g, drv->overlay); param = safe_asprintf (g, "file=%s,cache=unsafe,format=qcow2%s%s,id=hd%zu", escaped_file, drv->disk_label ? ",serial=" : "", drv->disk_label ? drv->disk_label : "", i); } /* If there's an explicit 'iface', use it. Otherwise default to * virtio-scsi if available. Otherwise default to virtio-blk. */ if (drv->iface && STREQ (drv->iface, "virtio")) /* virtio-blk */ goto virtio_blk; #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) else if (drv->iface && STREQ (drv->iface, "ide")) { error (g, "'ide' interface does not work on ARM or PowerPC"); goto cleanup0; } #endif else if (drv->iface) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=%s", param, drv->iface); } else if (virtio_scsi) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=none" /* sic */, param); ADD_CMDLINE ("-device"); ADD_CMDLINE_PRINTF ("scsi-hd,drive=hd%zu", i); } else { virtio_blk: ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=none" /* sic */, param); ADD_CMDLINE ("-device"); ADD_CMDLINE_PRINTF (VIRTIO_BLK ",drive=hd%zu", i); } } /* Add the ext2 appliance drive (after all the drives). */ if (has_appliance_drive) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("file=%s,snapshot=on,id=appliance," "cache=unsafe,if=none,format=raw", appliance); if (virtio_scsi) { ADD_CMDLINE ("-device"); ADD_CMDLINE ("scsi-hd,drive=appliance"); } else { ADD_CMDLINE ("-device"); ADD_CMDLINE (VIRTIO_BLK ",drive=appliance"); } appliance_dev = make_appliance_dev (g, virtio_scsi); } /* Create the virtio serial bus. */ ADD_CMDLINE ("-device"); ADD_CMDLINE (VIRTIO_SERIAL); /* Create the serial console. */ ADD_CMDLINE ("-serial"); ADD_CMDLINE ("stdio"); if (g->verbose && guestfs_int_qemu_supports_device (g, data->qemu_data, "Serial Graphics Adapter")) { /* Use sgabios instead of vgabios. This means we'll see BIOS * messages on the serial port, and also works around this bug * in qemu 1.1.0: * https://bugs.launchpad.net/qemu/+bug/1021649 * QEmu has included sgabios upstream since just before 1.0. */ ADD_CMDLINE ("-device"); ADD_CMDLINE ("sga"); } /* Set up virtio-serial for the communications channel. */ ADD_CMDLINE ("-chardev"); ADD_CMDLINE_PRINTF ("socket,path=%s,id=channel0", data->guestfsd_sock); ADD_CMDLINE ("-device"); ADD_CMDLINE ("virtserialport,chardev=channel0,name=org.libguestfs.channel.0"); /* Enable user networking. */ if (g->enable_network) { ADD_CMDLINE ("-netdev"); ADD_CMDLINE ("user,id=usernet,net=169.254.0.0/16"); ADD_CMDLINE ("-device"); ADD_CMDLINE (VIRTIO_NET ",netdev=usernet"); } ADD_CMDLINE ("-append"); flags = 0; if (!has_kvm || force_tcg) flags |= APPLIANCE_COMMAND_LINE_IS_TCG; ADD_CMDLINE_STRING_NODUP (guestfs_int_appliance_command_line (g, appliance_dev, flags)); /* Note: custom command line parameters must come last so that * qemu -set parameters can modify previously added options. */ /* Add the extra options for the qemu command line specified * at configure time. */ #ifdef QEMU_OPTIONS if (STRNEQ (QEMU_OPTIONS, "")) add_cmdline_shell_unquoted (g, &cmdline, QEMU_OPTIONS); #endif /* Add any qemu parameters. */ for (hp = g->hv_params; hp; hp = hp->next) { ADD_CMDLINE (hp->hv_param); if (hp->hv_value) ADD_CMDLINE (hp->hv_value); } /* Finish off the command line. */ guestfs_int_end_stringsbuf (g, &cmdline); r = fork (); if (r == -1) { perrorf (g, "fork"); if (!g->direct_mode) { close (sv[0]); close (sv[1]); } goto cleanup0; } if (r == 0) { /* Child (qemu). */ if (!g->direct_mode) { /* Set up stdin, stdout, stderr. */ close (0); close (1); close (sv[0]); /* We set the FD_CLOEXEC flag on the socket above, but now (in * the child) it's safe to unset this flag so qemu can use the * socket. */ set_cloexec_flag (sv[1], 0); /* Stdin. */ if (dup (sv[1]) == -1) { dup_failed: perror ("dup failed"); _exit (EXIT_FAILURE); } /* Stdout. */ if (dup (sv[1]) == -1) goto dup_failed; /* Particularly since qemu 0.15, qemu spews all sorts of debug * information on stderr. It is useful to both capture this and * not confuse casual users, so send stderr to the pipe as well. */ close (2); if (dup (sv[1]) == -1) goto dup_failed; close (sv[1]); /* Close any other file descriptors that we don't want to pass * to qemu. This prevents file descriptors which didn't have * O_CLOEXEC set properly from leaking into the subprocess. See * RHBZ#1123007. */ close_file_descriptors (fd > 2); } /* Dump the command line (after setting up stderr above). */ if (g->verbose) print_qemu_command_line (g, cmdline.argv); /* Put qemu in a new process group. */ if (g->pgroup) setpgid (0, 0); setenv ("LC_ALL", "C", 1); setenv ("QEMU_AUDIO_DRV", "none", 1); /* Prevents qemu opening /dev/dsp */ TRACE0 (launch_run_qemu); execv (g->hv, cmdline.argv); /* Run qemu. */ perror (g->hv); _exit (EXIT_FAILURE); } /* Parent (library). */ data->pid = r; /* Fork the recovery process off which will kill qemu if the parent * process fails to do so (eg. if the parent segfaults). */ data->recoverypid = -1; if (g->recovery_proc) { r = fork (); if (r == 0) { struct sigaction sa; pid_t qemu_pid = data->pid; pid_t parent_pid = getppid (); /* Remove all signal handlers. See the justification here: * https://www.redhat.com/archives/libvir-list/2008-August/msg00303.html * We don't mask signal handlers yet, so this isn't completely * race-free, but better than not doing it at all. */ memset (&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; sa.sa_flags = 0; sigemptyset (&sa.sa_mask); for (i = 1; i < NSIG; ++i) sigaction (i, &sa, NULL); /* Close all other file descriptors. This ensures that we don't * hold open (eg) pipes from the parent process. */ close_file_descriptors (1); /* It would be nice to be able to put this in the same process * group as qemu (ie. setpgid (0, qemu_pid)). However this is * not possible because we don't have any guarantee here that * the qemu process has started yet. */ if (g->pgroup) setpgid (0, 0); /* Writing to argv is hideously complicated and error prone. See: * http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/backend/utils/misc/ps_status.c;hb=HEAD */ /* Loop around waiting for one or both of the other processes to * disappear. It's fair to say this is very hairy. The PIDs that * we are looking at might be reused by another process. We are * effectively polling. Is the cure worse than the disease? */ for (;;) { if (kill (qemu_pid, 0) == -1) /* qemu's gone away, we aren't needed */ _exit (EXIT_SUCCESS); if (kill (parent_pid, 0) == -1) { /* Parent's gone away, qemu still around, so kill qemu. */ kill (qemu_pid, 9); _exit (EXIT_SUCCESS); } sleep (2); } } /* Don't worry, if the fork failed, this will be -1. The recovery * process isn't essential. */ data->recoverypid = r; } if (!g->direct_mode) { /* Close the other end of the socketpair. */ close (sv[1]); console_sock = sv[0]; /* stdin of child */ sv[0] = -1; } g->state = LAUNCHING; /* Wait for qemu to start and to connect back to us via * virtio-serial and send the GUESTFS_LAUNCH_FLAG message. */ g->conn = guestfs_int_new_conn_socket_listening (g, daemon_accept_sock, console_sock); if (!g->conn) goto cleanup1; /* g->conn now owns these sockets. */ daemon_accept_sock = console_sock = -1; r = g->conn->ops->accept_connection (g, g->conn); if (r == -1) goto cleanup1; if (r == 0) { guestfs_int_launch_failed_error (g); goto cleanup1; } /* NB: We reach here just because qemu has opened the socket. It * does not mean the daemon is up until we read the * GUESTFS_LAUNCH_FLAG below. Failures in qemu startup can still * happen even if we reach here, even early failures like not being * able to open a drive. */ r = guestfs_int_recv_from_daemon (g, &size, &buf); if (r == -1) { guestfs_int_launch_failed_error (g); goto cleanup1; } if (size != GUESTFS_LAUNCH_FLAG) { guestfs_int_launch_failed_error (g); goto cleanup1; } debug (g, "appliance is up"); /* This is possible in some really strange situations, such as * guestfsd starts up OK but then qemu immediately exits. Check for * it because the caller is probably expecting to be able to send * commands after this function returns. */ if (g->state != READY) { error (g, _("qemu launched and contacted daemon, but state != READY")); goto cleanup1; } TRACE0 (launch_end); guestfs_int_launch_send_progress (g, 12); if (has_appliance_drive) guestfs_int_add_dummy_appliance_drive (g); return 0; cleanup1: if (!g->direct_mode && sv[0] >= 0) close (sv[0]); if (data->pid > 0) kill (data->pid, 9); if (data->recoverypid > 0) kill (data->recoverypid, 9); if (data->pid > 0) guestfs_int_waitpid_noerror (data->pid); if (data->recoverypid > 0) guestfs_int_waitpid_noerror (data->recoverypid); data->pid = 0; data->recoverypid = 0; memset (&g->launch_t, 0, sizeof g->launch_t); guestfs_int_free_qemu_data (data->qemu_data); data->qemu_data = NULL; cleanup0: if (daemon_accept_sock >= 0) close (daemon_accept_sock); if (console_sock >= 0) close (console_sock); if (g->conn) { g->conn->ops->free_connection (g, g->conn); g->conn = NULL; } g->state = CONFIG; return -1; } /* Calculate the appliance device name. * * The easy thing would be to use g->nr_drives (indeed, that's what we * used to do). However this breaks if some of the drives being added * use the deprecated 'iface' parameter. To further add confusion, * the format of the 'iface' parameter has never been defined, but * given existing usage we can assume it has one of only three values: * NULL, "ide" or "virtio" (which means virtio-blk). See RHBZ#975797. */ static char * make_appliance_dev (guestfs_h *g, int virtio_scsi) { size_t i, index = 0; struct drive *drv; char dev[64] = "/dev/Xd"; /* Calculate the index of the drive. */ ITER_DRIVES (g, i, drv) { if (virtio_scsi) { if (drv->iface == NULL || STREQ (drv->iface, "ide")) index++; } else /* virtio-blk */ { if (drv->iface == NULL || STRNEQ (drv->iface, "virtio")) index++; } } dev[5] = virtio_scsi ? 's' : 'v'; guestfs_int_drive_name (index, &dev[7]); return safe_strdup (g, dev); /* Caller frees. */ } /* This is called from the forked subprocess just before qemu runs, so * it can just print the message straight to stderr, where it will be * picked up and funnelled through the usual appliance event API. */ static void print_qemu_command_line (guestfs_h *g, char **argv) { int i = 0; int needs_quote; struct timeval tv; gettimeofday (&tv, NULL); fprintf (stderr, "[%05" PRIi64 "ms] ", guestfs_int_timeval_diff (&g->launch_t, &tv)); while (argv[i]) { if (argv[i][0] == '-') /* -option starts a new line */ fprintf (stderr, " \\\n "); if (i > 0) fputc (' ', stderr); /* Does it need shell quoting? This only deals with simple cases. */ needs_quote = strcspn (argv[i], " ") != strlen (argv[i]); if (needs_quote) fputc ('\'', stderr); fprintf (stderr, "%s", argv[i]); if (needs_quote) fputc ('\'', stderr); i++; } fputc ('\n', stderr); } /* Check if a file can be opened. */ static int is_openable (guestfs_h *g, const char *path, int flags) { int fd = open (path, flags); if (fd == -1) { debug (g, "is_openable: %s: %m", path); return 0; } close (fd); return 1; } static int shutdown_direct (guestfs_h *g, void *datav, int check_for_errors) { struct backend_direct_data *data = datav; int ret = 0; int status; struct rusage rusage; /* Signal qemu to shutdown cleanly, and kill the recovery process. */ if (data->pid > 0) { debug (g, "sending SIGTERM to process %d", data->pid); kill (data->pid, SIGTERM); } if (data->recoverypid > 0) kill (data->recoverypid, 9); /* Wait for subprocess(es) to exit. */ if (g->recovery_proc /* RHBZ#998482 */ && data->pid > 0) { if (guestfs_int_wait4 (g, data->pid, &status, &rusage, "qemu") == -1) ret = -1; else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) { guestfs_int_external_command_failed (g, status, g->hv, NULL); ret = -1; } else /* Print the actual memory usage of qemu, useful for seeing * if techniques like DAX are having any effect. */ debug (g, "qemu maxrss %ldK", rusage.ru_maxrss); } if (data->recoverypid > 0) guestfs_int_waitpid_noerror (data->recoverypid); data->pid = data->recoverypid = 0; if (data->guestfsd_sock[0] != '\0') { unlink (data->guestfsd_sock); data->guestfsd_sock[0] = '\0'; } guestfs_int_free_qemu_data (data->qemu_data); data->qemu_data = NULL; return ret; } static int get_pid_direct (guestfs_h *g, void *datav) { struct backend_direct_data *data = datav; if (data->pid > 0) return data->pid; else { error (g, "get_pid: no qemu subprocess"); return -1; } } /* Maximum number of disks. */ static int max_disks_direct (guestfs_h *g, void *datav) { struct backend_direct_data *data = datav; /* Get qemu help text and version. */ if (data->qemu_data == NULL) { data->qemu_data = guestfs_int_test_qemu (g, &data->qemu_version); if (data->qemu_data == NULL) return -1; } if (guestfs_int_qemu_supports_virtio_scsi (g, data->qemu_data, &data->qemu_version)) return 255; else return 27; /* conservative estimate */ } static struct backend_ops backend_direct_ops = { .data_size = sizeof (struct backend_direct_data), .create_cow_overlay = create_cow_overlay_direct, .launch = launch_direct, .shutdown = shutdown_direct, .get_pid = get_pid_direct, .max_disks = max_disks_direct, }; void guestfs_int_init_direct_backend (void) { guestfs_int_register_backend ("direct", &backend_direct_ops); }