mirror of
https://github.com/libguestfs/libguestfs.git
synced 2026-03-21 22:53:37 +00:00
daemon: Translate device names if Linux device ordering is unstable (RHBZ#1804207).
Linux from around 5.6 now enumerates individual disks in any order (whereas previously it enumerated only drivers in parallel). This means that /dev/sdX ordering is no longer stable - in particular we cannot be sure that /dev/sda inside the guest is the first disk that was attached to the appliance, /dev/sdb the second disk and so on. However we can still use SCSI PCI device numbering as found in /dev/disk/by-path. Use this to translate device names in and out of the appliance. Thanks: Vitaly Kuznetsov, Paolo Bonzini, Dan Berrangé.
This commit is contained in:
13
TODO
13
TODO
@@ -189,6 +189,19 @@ might be replaced by direct use of the library (if this is easier).
|
||||
But it is very hard to be compatible between RHEL6 and RHEL5 when
|
||||
using the library directly.
|
||||
|
||||
Properly fix device name translation
|
||||
------------------------------------
|
||||
|
||||
Currently for Device parameters we pass a string name like "/dev/sda"
|
||||
or "/dev/sdb2" or "/dev/VG/LV" to the daemon. Since Linux broke
|
||||
device enumeration (RHBZ#1804207) this works less well, requiring
|
||||
non-trivial translation within the daemon.
|
||||
|
||||
It would be far better if in the generator we mapped "/dev/XXX" to a
|
||||
proper structure. Device index + partition | LV | MD | ...
|
||||
This would be then used everywhere inside the daemon and mean we would
|
||||
no longer need device name translation at all.
|
||||
|
||||
Visualization
|
||||
-------------
|
||||
|
||||
|
||||
@@ -215,6 +215,7 @@ extern void notify_progress_no_ratelimit (uint64_t position, uint64_t total, con
|
||||
|
||||
/* device-name-translation.c */
|
||||
extern char *device_name_translation (const char *device);
|
||||
extern void device_name_translation_init (void);
|
||||
extern char *reverse_device_name_translation (const char *device);
|
||||
|
||||
/* stubs.c (auto-generated) */
|
||||
|
||||
@@ -27,12 +27,121 @@
|
||||
#include <dirent.h>
|
||||
#include <limits.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <error.h>
|
||||
|
||||
#include "c-ctype.h"
|
||||
|
||||
#include "daemon.h"
|
||||
|
||||
static char **cache;
|
||||
static size_t cache_size;
|
||||
|
||||
/**
|
||||
* Cache daemon disk mapping.
|
||||
*
|
||||
* When the daemon starts up, populate a cache with the contents
|
||||
* of /dev/disk/by-path. It's easiest to use C<ls -lv> here
|
||||
* since the names are sorted awkwardly.
|
||||
*/
|
||||
void
|
||||
device_name_translation_init (void)
|
||||
{
|
||||
const char *by_path = "/dev/disk/by-path";
|
||||
CLEANUP_FREE char *out = NULL, *err = NULL;
|
||||
CLEANUP_FREE_STRING_LIST char **lines = NULL;
|
||||
size_t i, j;
|
||||
int r;
|
||||
|
||||
r = command (&out, &err, "ls", "-1v", by_path, NULL);
|
||||
if (r == -1)
|
||||
error (EXIT_FAILURE, 0,
|
||||
"failed to initialize device name translation cache: %s", err);
|
||||
|
||||
lines = split_lines (out);
|
||||
if (lines == NULL)
|
||||
error (EXIT_FAILURE, errno, "split_lines");
|
||||
|
||||
cache_size = guestfs_int_count_strings (lines);
|
||||
cache = calloc (cache_size, sizeof (char *));
|
||||
if (cache == NULL)
|
||||
error (EXIT_FAILURE, errno, "calloc");
|
||||
|
||||
/* Look up each device name. It should be a symlink to /dev/sdX. */
|
||||
for (i = j = 0; i < cache_size; ++i) {
|
||||
/* Ignore entries for partitions. */
|
||||
if (strstr (lines[i], "-part") == NULL) {
|
||||
CLEANUP_FREE char *full;
|
||||
char *device;
|
||||
|
||||
if (asprintf (&full, "%s/%s", by_path, lines[i]) == -1)
|
||||
error (EXIT_FAILURE, errno, "asprintf");
|
||||
|
||||
device = realpath (full, NULL);
|
||||
if (device == NULL)
|
||||
error (EXIT_FAILURE, errno, "realpath: %s", full);
|
||||
|
||||
/* Ignore the root device. */
|
||||
if (is_root_device (device)) {
|
||||
free (device);
|
||||
continue;
|
||||
}
|
||||
|
||||
cache[j++] = device;
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the final cache size because we deleted entries above. */
|
||||
cache_size = j;
|
||||
}
|
||||
|
||||
/* Free the cache on program exit. */
|
||||
static void device_name_translation_free (void) __attribute__((destructor));
|
||||
|
||||
static void
|
||||
device_name_translation_free (void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < cache_size; ++i)
|
||||
free (cache[i]);
|
||||
free (cache);
|
||||
cache = NULL;
|
||||
cache_size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform device name translation.
|
||||
*
|
||||
* Libguestfs defines a few standard formats for device names.
|
||||
* (see also L<guestfs(3)/BLOCK DEVICE NAMING> and
|
||||
* L<guestfs(3)/guestfs_canonical_device_name>). They are:
|
||||
*
|
||||
* =over 4
|
||||
*
|
||||
* =item F</dev/sdX[N]>
|
||||
*
|
||||
* =item F</dev/hdX[N]>
|
||||
*
|
||||
* =item F</dev/vdX[N]>
|
||||
*
|
||||
* These mean the Nth partition on the Xth device. Because
|
||||
* Linux no longer enumerates devices in the order they are
|
||||
* passed to qemu, we must translate these by looking up
|
||||
* the actual device using /dev/disk/by-path/
|
||||
*
|
||||
* =item F</dev/mdX>
|
||||
*
|
||||
* =item F</dev/VG/LV>
|
||||
*
|
||||
* =item F</dev/mapper/...>
|
||||
*
|
||||
* =item F</dev/dm-N>
|
||||
*
|
||||
* These are not translated here.
|
||||
*
|
||||
* =back
|
||||
*
|
||||
* It returns a newly allocated string which the caller must free.
|
||||
*
|
||||
* It returns C<NULL> on error. B<Note> it does I<not> call
|
||||
@@ -45,18 +154,59 @@ char *
|
||||
device_name_translation (const char *device)
|
||||
{
|
||||
int fd;
|
||||
char *ret;
|
||||
char *ret = NULL;
|
||||
size_t len;
|
||||
|
||||
fd = open (device, O_RDONLY|O_CLOEXEC);
|
||||
if (fd >= 0) {
|
||||
close (fd);
|
||||
return strdup (device);
|
||||
/* /dev/sdX[N] and aliases like /dev/vdX[N]. */
|
||||
if (STRPREFIX (device, "/dev/") &&
|
||||
strchr (device+5, '/') == NULL && /* not an LV name */
|
||||
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
|
||||
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
|
||||
ssize_t i;
|
||||
const char *start;
|
||||
char dev[16];
|
||||
|
||||
/* Translate to a disk index in /dev/disk/by-path sorted numerically. */
|
||||
start = &device[5+len+1];
|
||||
len = strspn (start, "abcdefghijklmnopqrstuvwxyz");
|
||||
if (len >= sizeof dev - 1) {
|
||||
fprintf (stderr, "unparseable device name: %s\n", device);
|
||||
return NULL;
|
||||
}
|
||||
strcpy (dev, start);
|
||||
dev[len] = '\0';
|
||||
|
||||
i = guestfs_int_drive_index (dev);
|
||||
if (i >= 0 && i < (ssize_t) cache_size) {
|
||||
/* Append the partition name if present. */
|
||||
if (asprintf (&ret, "%s%s", cache[i], start+len) == -1)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (errno != ENXIO && errno != ENOENT)
|
||||
return NULL;
|
||||
/* If we didn't translate it above, continue with the same name. */
|
||||
if (ret == NULL) {
|
||||
ret = strdup (device);
|
||||
if (ret == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the name begins with "/dev/sd" then try the alternatives. */
|
||||
/* Now check the device is openable. */
|
||||
fd = open (ret, O_RDONLY|O_CLOEXEC);
|
||||
if (fd >= 0) {
|
||||
close (fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (errno != ENXIO && errno != ENOENT) {
|
||||
perror (ret);
|
||||
free (ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
free (ret);
|
||||
|
||||
/* If the original name begins with "/dev/sd" then try the alternatives. */
|
||||
if (!STRPREFIX (device, "/dev/sd"))
|
||||
return NULL;
|
||||
device += 7; /* device == "a1" etc. */
|
||||
@@ -97,13 +247,34 @@ device_name_translation (const char *device)
|
||||
char *
|
||||
reverse_device_name_translation (const char *device)
|
||||
{
|
||||
char *ret;
|
||||
char *ret = NULL;
|
||||
size_t i;
|
||||
|
||||
/* Look it up in the cache, and if found return the canonical name.
|
||||
* If not found return a copy of the original string.
|
||||
*/
|
||||
for (i = 0; i < cache_size; ++i) {
|
||||
const size_t len = strlen (cache[i]);
|
||||
|
||||
if (STREQ (device, cache[i]) ||
|
||||
(STRPREFIX (device, cache[i]) && c_isdigit (device[len]))) {
|
||||
char drv[16];
|
||||
|
||||
guestfs_int_drive_name (i, drv);
|
||||
if (asprintf (&ret, "/dev/sd%s%s", drv, &device[len]) == -1) {
|
||||
reply_with_perror ("asprintf");
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Currently a no-op. */
|
||||
ret = strdup (device);
|
||||
if (ret == NULL) {
|
||||
reply_with_perror ("strdup");
|
||||
return NULL;
|
||||
ret = strdup (device);
|
||||
if (ret == NULL) {
|
||||
reply_with_perror ("strdup");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -23,18 +23,33 @@ open Std_utils
|
||||
|
||||
open Utils
|
||||
|
||||
let map_block_devices ~return_md f =
|
||||
let devs = Sys.readdir "/sys/block" in
|
||||
let devs = Array.to_list devs in
|
||||
let map_block_devices f =
|
||||
(* We need to get the devices in translated order. Use the
|
||||
* same command that we use in device-name-translation.c.
|
||||
*)
|
||||
let path = "/dev/disk/by-path" in
|
||||
let devs = command "ls" ["-1v"; path] in
|
||||
let devs = String.trimr devs in
|
||||
let devs = String.nsplit "\n" devs in
|
||||
(* Delete entries for partitions. *)
|
||||
let devs = List.filter (fun dev -> String.find dev "-part" = (-1)) devs in
|
||||
let devs =
|
||||
List.filter_map (
|
||||
fun file ->
|
||||
let dev = Unix_utils.Realpath.realpath (sprintf "%s/%s" path file) in
|
||||
(* Ignore non-/dev devices, and return without /dev/ prefix. *)
|
||||
if String.is_prefix dev "/dev/" then
|
||||
Some (String.sub dev 5 (String.length dev - 5))
|
||||
else
|
||||
None
|
||||
) devs in
|
||||
let devs = List.filter (
|
||||
fun dev ->
|
||||
String.is_prefix dev "sd" ||
|
||||
String.is_prefix dev "hd" ||
|
||||
String.is_prefix dev "ubd" ||
|
||||
String.is_prefix dev "vd" ||
|
||||
String.is_prefix dev "sr" ||
|
||||
(return_md && String.is_prefix dev "md" &&
|
||||
String.length dev >= 3 && Char.isdigit dev.[2])
|
||||
String.is_prefix dev "sr"
|
||||
) devs in
|
||||
|
||||
(* Ignore the root device. *)
|
||||
@@ -57,6 +72,20 @@ let map_block_devices ~return_md f =
|
||||
(* Call the map function for the devices left in the list. *)
|
||||
List.map f devs
|
||||
|
||||
(* md devices are not listed under /dev/disk/by-path, so won't *
|
||||
* appear in the list above. Instead we look for them in /sys/block.
|
||||
* We don't have to do anything special about device name translation.
|
||||
*)
|
||||
let map_md_devices f =
|
||||
let devs = Sys.readdir "/sys/block" in
|
||||
let devs = Array.to_list devs in
|
||||
let devs = List.filter (
|
||||
fun dev ->
|
||||
String.is_prefix dev "md" &&
|
||||
String.length dev >= 3 && Char.isdigit dev.[2]
|
||||
) devs in
|
||||
List.map f devs
|
||||
|
||||
let list_devices () =
|
||||
(* For backwards compatibility, don't return MD devices in the list
|
||||
* returned by guestfs_list_devices. This is because most API users
|
||||
@@ -67,14 +96,12 @@ let list_devices () =
|
||||
* by QEMU, and there is a special API for them,
|
||||
* guestfs_list_md_devices.
|
||||
*)
|
||||
let devices =
|
||||
map_block_devices ~return_md:false ((^) "/dev/") in
|
||||
sort_device_names devices
|
||||
map_block_devices ((^) "/dev/")
|
||||
|
||||
let rec list_partitions () =
|
||||
let partitions = map_block_devices ~return_md:true add_partitions in
|
||||
let partitions = List.flatten partitions in
|
||||
sort_device_names partitions
|
||||
let partitions = map_block_devices add_partitions in
|
||||
let md_partitions = map_md_devices add_partitions in
|
||||
List.flatten partitions @ List.flatten md_partitions
|
||||
|
||||
and add_partitions dev =
|
||||
(* Open the device's directory under /sys/block *)
|
||||
@@ -85,7 +112,8 @@ and add_partitions dev =
|
||||
* <device>, eg. /sys/block/sda/sda1.
|
||||
*)
|
||||
let parts = List.filter (fun part -> String.is_prefix part dev) parts in
|
||||
List.map ((^) "/dev/") parts
|
||||
let parts = List.map ((^) "/dev/") parts in
|
||||
sort_device_names parts
|
||||
|
||||
let nr_devices () = List.length (list_devices ())
|
||||
|
||||
|
||||
@@ -233,6 +233,9 @@ main (int argc, char *argv[])
|
||||
_umask (0);
|
||||
#endif
|
||||
|
||||
/* Initialize device name translations cache. */
|
||||
device_name_translation_init ();
|
||||
|
||||
/* Connect to virtio-serial channel. */
|
||||
if (!channel)
|
||||
channel = VIRTIO_SERIAL_CHANNEL;
|
||||
|
||||
@@ -30,11 +30,6 @@
|
||||
*/
|
||||
#define RESOLVE_DEVICE(path,path_out,is_filein) \
|
||||
do { \
|
||||
if (!is_device_parameter ((path))) { \
|
||||
if (is_filein) cancel_receive (); \
|
||||
reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
|
||||
return; \
|
||||
} \
|
||||
(path_out) = device_name_translation ((path)); \
|
||||
if ((path_out) == NULL) { \
|
||||
const int err = errno; \
|
||||
@@ -43,6 +38,11 @@
|
||||
reply_with_perror ("%s: %s", __func__, path); \
|
||||
return; \
|
||||
} \
|
||||
if (!is_device_parameter ((path_out))) { \
|
||||
if (is_filein) cancel_receive (); \
|
||||
reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
|
||||
return; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* All functions that take a mountable argument must call this macro.
|
||||
|
||||
@@ -37,6 +37,11 @@ guestfs_impl_canonical_device_name (guestfs_h *g, const char *device)
|
||||
strchr (device+5, '/') == NULL && /* not an LV name */
|
||||
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
|
||||
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
|
||||
/* NB! These do not need to be translated by
|
||||
* device_name_translation. They will be translated if necessary
|
||||
* when the caller uses them in APIs which go through to the
|
||||
* daemon.
|
||||
*/
|
||||
ret = safe_asprintf (g, "/dev/sd%s", &device[5+len+1]);
|
||||
}
|
||||
else if (STRPREFIX (device, "/dev/mapper/") ||
|
||||
|
||||
Reference in New Issue
Block a user