daemon: Translate device names if Linux device ordering is unstable (RHBZ#1804207).

Linux from around 5.6 now enumerates individual disks in any order
(whereas previously it enumerated only drivers in parallel).  This
means that /dev/sdX ordering is no longer stable - in particular we
cannot be sure that /dev/sda inside the guest is the first disk that
was attached to the appliance, /dev/sdb the second disk and so on.

However we can still use SCSI PCI device numbering as found in
/dev/disk/by-path.  Use this to translate device names in and out of
the appliance.

Thanks: Vitaly Kuznetsov, Paolo Bonzini, Dan Berrangé.
This commit is contained in:
Richard W.M. Jones
2020-02-20 13:05:46 +00:00
parent 92fd5d5d40
commit bca9b94fc5
7 changed files with 252 additions and 31 deletions

13
TODO
View File

@@ -189,6 +189,19 @@ might be replaced by direct use of the library (if this is easier).
But it is very hard to be compatible between RHEL6 and RHEL5 when
using the library directly.
Properly fix device name translation
------------------------------------
Currently for Device parameters we pass a string name like "/dev/sda"
or "/dev/sdb2" or "/dev/VG/LV" to the daemon. Since Linux broke
device enumeration (RHBZ#1804207) this works less well, requiring
non-trivial translation within the daemon.
It would be far better if in the generator we mapped "/dev/XXX" to a
proper structure. Device index + partition | LV | MD | ...
This would be then used everywhere inside the daemon and mean we would
no longer need device name translation at all.
Visualization
-------------

View File

@@ -215,6 +215,7 @@ extern void notify_progress_no_ratelimit (uint64_t position, uint64_t total, con
/* device-name-translation.c */
extern char *device_name_translation (const char *device);
extern void device_name_translation_init (void);
extern char *reverse_device_name_translation (const char *device);
/* stubs.c (auto-generated) */

View File

@@ -27,12 +27,121 @@
#include <dirent.h>
#include <limits.h>
#include <sys/stat.h>
#include <errno.h>
#include <error.h>
#include "c-ctype.h"
#include "daemon.h"
static char **cache;
static size_t cache_size;
/**
* Cache daemon disk mapping.
*
* When the daemon starts up, populate a cache with the contents
* of /dev/disk/by-path. It's easiest to use C<ls -lv> here
* since the names are sorted awkwardly.
*/
void
device_name_translation_init (void)
{
const char *by_path = "/dev/disk/by-path";
CLEANUP_FREE char *out = NULL, *err = NULL;
CLEANUP_FREE_STRING_LIST char **lines = NULL;
size_t i, j;
int r;
r = command (&out, &err, "ls", "-1v", by_path, NULL);
if (r == -1)
error (EXIT_FAILURE, 0,
"failed to initialize device name translation cache: %s", err);
lines = split_lines (out);
if (lines == NULL)
error (EXIT_FAILURE, errno, "split_lines");
cache_size = guestfs_int_count_strings (lines);
cache = calloc (cache_size, sizeof (char *));
if (cache == NULL)
error (EXIT_FAILURE, errno, "calloc");
/* Look up each device name. It should be a symlink to /dev/sdX. */
for (i = j = 0; i < cache_size; ++i) {
/* Ignore entries for partitions. */
if (strstr (lines[i], "-part") == NULL) {
CLEANUP_FREE char *full;
char *device;
if (asprintf (&full, "%s/%s", by_path, lines[i]) == -1)
error (EXIT_FAILURE, errno, "asprintf");
device = realpath (full, NULL);
if (device == NULL)
error (EXIT_FAILURE, errno, "realpath: %s", full);
/* Ignore the root device. */
if (is_root_device (device)) {
free (device);
continue;
}
cache[j++] = device;
}
}
/* This is the final cache size because we deleted entries above. */
cache_size = j;
}
/* Free the cache on program exit. */
static void device_name_translation_free (void) __attribute__((destructor));
static void
device_name_translation_free (void)
{
size_t i;
for (i = 0; i < cache_size; ++i)
free (cache[i]);
free (cache);
cache = NULL;
cache_size = 0;
}
/**
* Perform device name translation.
*
* Libguestfs defines a few standard formats for device names.
* (see also L<guestfs(3)/BLOCK DEVICE NAMING> and
* L<guestfs(3)/guestfs_canonical_device_name>). They are:
*
* =over 4
*
* =item F</dev/sdX[N]>
*
* =item F</dev/hdX[N]>
*
* =item F</dev/vdX[N]>
*
* These mean the Nth partition on the Xth device. Because
* Linux no longer enumerates devices in the order they are
* passed to qemu, we must translate these by looking up
* the actual device using /dev/disk/by-path/
*
* =item F</dev/mdX>
*
* =item F</dev/VG/LV>
*
* =item F</dev/mapper/...>
*
* =item F</dev/dm-N>
*
* These are not translated here.
*
* =back
*
* It returns a newly allocated string which the caller must free.
*
* It returns C<NULL> on error. B<Note> it does I<not> call
@@ -45,18 +154,59 @@ char *
device_name_translation (const char *device)
{
int fd;
char *ret;
char *ret = NULL;
size_t len;
fd = open (device, O_RDONLY|O_CLOEXEC);
if (fd >= 0) {
close (fd);
return strdup (device);
/* /dev/sdX[N] and aliases like /dev/vdX[N]. */
if (STRPREFIX (device, "/dev/") &&
strchr (device+5, '/') == NULL && /* not an LV name */
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
ssize_t i;
const char *start;
char dev[16];
/* Translate to a disk index in /dev/disk/by-path sorted numerically. */
start = &device[5+len+1];
len = strspn (start, "abcdefghijklmnopqrstuvwxyz");
if (len >= sizeof dev - 1) {
fprintf (stderr, "unparseable device name: %s\n", device);
return NULL;
}
strcpy (dev, start);
dev[len] = '\0';
i = guestfs_int_drive_index (dev);
if (i >= 0 && i < (ssize_t) cache_size) {
/* Append the partition name if present. */
if (asprintf (&ret, "%s%s", cache[i], start+len) == -1)
return NULL;
}
}
if (errno != ENXIO && errno != ENOENT)
return NULL;
/* If we didn't translate it above, continue with the same name. */
if (ret == NULL) {
ret = strdup (device);
if (ret == NULL)
return NULL;
}
/* If the name begins with "/dev/sd" then try the alternatives. */
/* Now check the device is openable. */
fd = open (ret, O_RDONLY|O_CLOEXEC);
if (fd >= 0) {
close (fd);
return ret;
}
if (errno != ENXIO && errno != ENOENT) {
perror (ret);
free (ret);
return NULL;
}
free (ret);
/* If the original name begins with "/dev/sd" then try the alternatives. */
if (!STRPREFIX (device, "/dev/sd"))
return NULL;
device += 7; /* device == "a1" etc. */
@@ -97,13 +247,34 @@ device_name_translation (const char *device)
char *
reverse_device_name_translation (const char *device)
{
char *ret;
char *ret = NULL;
size_t i;
/* Look it up in the cache, and if found return the canonical name.
* If not found return a copy of the original string.
*/
for (i = 0; i < cache_size; ++i) {
const size_t len = strlen (cache[i]);
if (STREQ (device, cache[i]) ||
(STRPREFIX (device, cache[i]) && c_isdigit (device[len]))) {
char drv[16];
guestfs_int_drive_name (i, drv);
if (asprintf (&ret, "/dev/sd%s%s", drv, &device[len]) == -1) {
reply_with_perror ("asprintf");
return NULL;
}
break;
}
}
/* Currently a no-op. */
ret = strdup (device);
if (ret == NULL) {
reply_with_perror ("strdup");
return NULL;
ret = strdup (device);
if (ret == NULL) {
reply_with_perror ("strdup");
return NULL;
}
}
return ret;

View File

@@ -23,18 +23,33 @@ open Std_utils
open Utils
let map_block_devices ~return_md f =
let devs = Sys.readdir "/sys/block" in
let devs = Array.to_list devs in
let map_block_devices f =
(* We need to get the devices in translated order. Use the
* same command that we use in device-name-translation.c.
*)
let path = "/dev/disk/by-path" in
let devs = command "ls" ["-1v"; path] in
let devs = String.trimr devs in
let devs = String.nsplit "\n" devs in
(* Delete entries for partitions. *)
let devs = List.filter (fun dev -> String.find dev "-part" = (-1)) devs in
let devs =
List.filter_map (
fun file ->
let dev = Unix_utils.Realpath.realpath (sprintf "%s/%s" path file) in
(* Ignore non-/dev devices, and return without /dev/ prefix. *)
if String.is_prefix dev "/dev/" then
Some (String.sub dev 5 (String.length dev - 5))
else
None
) devs in
let devs = List.filter (
fun dev ->
String.is_prefix dev "sd" ||
String.is_prefix dev "hd" ||
String.is_prefix dev "ubd" ||
String.is_prefix dev "vd" ||
String.is_prefix dev "sr" ||
(return_md && String.is_prefix dev "md" &&
String.length dev >= 3 && Char.isdigit dev.[2])
String.is_prefix dev "sr"
) devs in
(* Ignore the root device. *)
@@ -57,6 +72,20 @@ let map_block_devices ~return_md f =
(* Call the map function for the devices left in the list. *)
List.map f devs
(* md devices are not listed under /dev/disk/by-path, so won't *
* appear in the list above. Instead we look for them in /sys/block.
* We don't have to do anything special about device name translation.
*)
let map_md_devices f =
let devs = Sys.readdir "/sys/block" in
let devs = Array.to_list devs in
let devs = List.filter (
fun dev ->
String.is_prefix dev "md" &&
String.length dev >= 3 && Char.isdigit dev.[2]
) devs in
List.map f devs
let list_devices () =
(* For backwards compatibility, don't return MD devices in the list
* returned by guestfs_list_devices. This is because most API users
@@ -67,14 +96,12 @@ let list_devices () =
* by QEMU, and there is a special API for them,
* guestfs_list_md_devices.
*)
let devices =
map_block_devices ~return_md:false ((^) "/dev/") in
sort_device_names devices
map_block_devices ((^) "/dev/")
let rec list_partitions () =
let partitions = map_block_devices ~return_md:true add_partitions in
let partitions = List.flatten partitions in
sort_device_names partitions
let partitions = map_block_devices add_partitions in
let md_partitions = map_md_devices add_partitions in
List.flatten partitions @ List.flatten md_partitions
and add_partitions dev =
(* Open the device's directory under /sys/block *)
@@ -85,7 +112,8 @@ and add_partitions dev =
* <device>, eg. /sys/block/sda/sda1.
*)
let parts = List.filter (fun part -> String.is_prefix part dev) parts in
List.map ((^) "/dev/") parts
let parts = List.map ((^) "/dev/") parts in
sort_device_names parts
let nr_devices () = List.length (list_devices ())

View File

@@ -233,6 +233,9 @@ main (int argc, char *argv[])
_umask (0);
#endif
/* Initialize device name translations cache. */
device_name_translation_init ();
/* Connect to virtio-serial channel. */
if (!channel)
channel = VIRTIO_SERIAL_CHANNEL;

View File

@@ -30,11 +30,6 @@
*/
#define RESOLVE_DEVICE(path,path_out,is_filein) \
do { \
if (!is_device_parameter ((path))) { \
if (is_filein) cancel_receive (); \
reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
return; \
} \
(path_out) = device_name_translation ((path)); \
if ((path_out) == NULL) { \
const int err = errno; \
@@ -43,6 +38,11 @@
reply_with_perror ("%s: %s", __func__, path); \
return; \
} \
if (!is_device_parameter ((path_out))) { \
if (is_filein) cancel_receive (); \
reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
return; \
} \
} while (0)
/* All functions that take a mountable argument must call this macro.

View File

@@ -37,6 +37,11 @@ guestfs_impl_canonical_device_name (guestfs_h *g, const char *device)
strchr (device+5, '/') == NULL && /* not an LV name */
device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
/* NB! These do not need to be translated by
* device_name_translation. They will be translated if necessary
* when the caller uses them in APIs which go through to the
* daemon.
*/
ret = safe_asprintf (g, "/dev/sd%s", &device[5+len+1]);
}
else if (STRPREFIX (device, "/dev/mapper/") ||