From 1ee879f3e5baa398edc76dc02492662412faf2d7 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Mon, 16 Dec 2013 14:45:29 +0000 Subject: [PATCH] New tool: virt-diff. This tool can be used to show the differences between two disk images. --- .gitignore | 4 + Makefile.am | 3 +- cat/virt-ls.pod | 21 +- configure.ac | 1 + contrib/make-check-on-installed.pl | 1 + diff/Makefile.am | 88 +++ diff/diff.c | 1121 ++++++++++++++++++++++++++++ diff/test-virt-diff.sh | 62 ++ diff/virt-diff.pod | 269 +++++++ examples/guestfs-recipes.pod | 11 +- fish/guestfish.pod | 1 + po/POTFILES | 1 + run.in | 2 +- src/guestfs.pod | 5 + 14 files changed, 1563 insertions(+), 27 deletions(-) create mode 100644 diff/Makefile.am create mode 100644 diff/diff.c create mode 100755 diff/test-virt-diff.sh create mode 100755 diff/virt-diff.pod diff --git a/.gitignore b/.gitignore index a660fddee..d22086798 100644 --- a/.gitignore +++ b/.gitignore @@ -109,6 +109,9 @@ Makefile.in /df/stamp-virt-df.pod /df/virt-df /df/virt-df.1 +/diff/stamp-virt-diff.pod +/diff/virt-diff +/diff/virt-diff.1 /edit/stamp-virt-*.pod /edit/virt-edit /edit/virt-edit.1 @@ -228,6 +231,7 @@ Makefile.in /html/virt-copy-in.1.html /html/virt-copy-out.1.html /html/virt-df.1.html +/html/virt-diff.1.html /html/virt-edit.1.html /html/virt-filesystems.1.html /html/virt-format.1.html diff --git a/Makefile.am b/Makefile.am index 203c06f9d..247cb45e0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -72,7 +72,7 @@ SUBDIRS += test-tool SUBDIRS += fish # virt-tools in C. -SUBDIRS += align cat df edit format inspector rescue +SUBDIRS += align cat diff df edit format inspector rescue # bash-completion SUBDIRS += bash @@ -223,6 +223,7 @@ HTMLFILES = \ html/virt-copy-in.1.html \ html/virt-copy-out.1.html \ html/virt-df.1.html \ + html/virt-diff.1.html \ html/virt-edit.1.html \ html/virt-filesystems.1.html \ html/virt-format.1.html \ diff --git a/cat/virt-ls.pod b/cat/virt-ls.pod index 0b93a0800..482a7a4bf 100755 --- a/cat/virt-ls.pod +++ b/cat/virt-ls.pod @@ -68,24 +68,8 @@ Find regular files modified in the last 24 hours: =head2 DIFFERENCES IN SNAPSHOTS AND BACKING FILES -Find the differences between files in a guest and an earlier snapshot -of the same guest. - - virt-ls -lR -a snapshot.img / --uids > old - virt-ls -lR -a current.img / --uids > new - diff -u old new | less - -The commands above won't find files where the content has changed but -the metadata (eg. file size and modification date) is the same. To do -that, you need to add the I<--checksum> parameter to both C -commands. I<--checksum> can be quite slow since it has to read and -compute a checksum of every regular file in the virtual machine. - -The commands above won't show changes in file times. Add I<--time-t> -to both C commands if you want to show all time changes (that -includes file access times). - -To diff the content of files, use L or L. +Although it is possible to use virt-ls to look for differences, since +libguestfs E 1.26 a new tool is available called L. =head1 OUTPUT MODES @@ -510,6 +494,7 @@ L, L, L, L, +L, L, L. diff --git a/configure.ac b/configure.ac index 1955b5acc..384658697 100644 --- a/configure.ac +++ b/configure.ac @@ -1680,6 +1680,7 @@ AC_CONFIG_FILES([Makefile csharp/Makefile daemon/Makefile df/Makefile + diff/Makefile edit/Makefile erlang/Makefile erlang/examples/Makefile diff --git a/contrib/make-check-on-installed.pl b/contrib/make-check-on-installed.pl index d2337e800..a6030eda3 100755 --- a/contrib/make-check-on-installed.pl +++ b/contrib/make-check-on-installed.pl @@ -70,6 +70,7 @@ my %mapping = ( '/bin/virt-copy-in$' => "fish", '/bin/virt-copy-out$' => "fish", '/bin/virt-df$' => "df", + '/bin/virt-diff$' => "diff", '/bin/virt-edit$' => "edit", '/bin/virt-filesystems$' => "cat", '/bin/virt-format$' => "format", diff --git a/diff/Makefile.am b/diff/Makefile.am new file mode 100644 index 000000000..5b38e21b3 --- /dev/null +++ b/diff/Makefile.am @@ -0,0 +1,88 @@ +# libguestfs virt-diff +# Copyright (C) 2013 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +include $(top_srcdir)/subdir-rules.mk + +EXTRA_DIST = \ + test-virt-diff.sh \ + virt-diff.pod + +CLEANFILES = stamp-virt-diff.pod + +bin_PROGRAMS = virt-diff + +SHARED_SOURCE_FILES = \ + ../cat/visit.h \ + ../cat/visit.c \ + ../fish/domain.c \ + ../fish/inspect.c \ + ../fish/keys.c \ + ../fish/options.h \ + ../fish/options.c \ + ../fish/uri.h \ + ../fish/uri.c + +virt_diff_SOURCES = \ + $(SHARED_SOURCE_FILES) \ + diff.c + +virt_diff_CPPFLAGS = \ + -DGUESTFS_WARN_DEPRECATED=1 \ + -DLOCALEBASEDIR=\""$(datadir)/locale"\" \ + -I$(top_srcdir)/src -I$(top_builddir)/src \ + -I$(top_srcdir)/cat -I$(top_srcdir)/fish \ + -I$(srcdir)/../gnulib/lib -I../gnulib/lib + +virt_diff_CFLAGS = \ + $(WARN_CFLAGS) $(WERROR_CFLAGS) \ + $(GPROF_CFLAGS) $(GCOV_CFLAGS) \ + $(LIBXML2_CFLAGS) + +virt_diff_LDADD = \ + $(top_builddir)/src/libutils.la \ + $(top_builddir)/src/libguestfs.la \ + $(LIBXML2_LIBS) \ + $(LIBVIRT_LIBS) \ + ../gnulib/lib/libgnu.la + +# Manual pages and HTML files for the website. +man_MANS = virt-diff.1 + +noinst_DATA = \ + $(top_builddir)/html/virt-diff.1.html + +virt-diff.1 $(top_builddir)/html/virt-diff.1.html: stamp-virt-diff.pod + +stamp-virt-diff.pod: virt-diff.pod + $(PODWRAPPER) \ + --man virt-diff.1 \ + --html $(top_builddir)/html/virt-diff.1.html \ + --license GPLv2+ \ + $< + touch $@ + +# Tests. + +TESTS_ENVIRONMENT = $(top_builddir)/run --test + +if ENABLE_APPLIANCE +TESTS = \ + test-virt-diff.sh +endif ENABLE_APPLIANCE + +check-valgrind: + $(MAKE) VG="$(top_builddir)/run @VG@" check diff --git a/diff/diff.c b/diff/diff.c new file mode 100644 index 000000000..b2444171e --- /dev/null +++ b/diff/diff.c @@ -0,0 +1,1121 @@ +/* virt-diff + * Copyright (C) 2013 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "c-ctype.h" +#include "human.h" + +#include "guestfs.h" + +#include "options.h" +#include "visit.h" + +/* Internal tree structure built for each guest. */ +struct tree; +static struct tree *visit_guest (guestfs_h *g); +static int diff_guests (struct tree *t1, struct tree *t2); +static void free_tree (struct tree *); + +/* Libguestfs handles for two source guests. */ +guestfs_h *g, *g2; + +int read_only = 1; +int live = 0; +int verbose = 0; +int keys_from_stdin = 0; +int echo_keys = 0; +const char *libvirt_uri = NULL; +int inspector = 1; + +static int atime = 0; +static int csv = 0; +static int dir_links = 0; +static int dir_times = 0; +static int human = 0; +static int enable_extra_stats = 0; +static int enable_times = 0; +static int enable_uids = 0; +static int enable_xattrs = 0; +static int time_t_output = 0; +static int time_relative = 0; /* 1 = seconds, 2 = days */ +static const char *checksum = NULL; + +static time_t now; + +static void output_start_line (void); +static void output_end_line (void); +static void output_flush (void); +static void output_int64 (int64_t); +static void output_int64_dev (int64_t); +static void output_int64_perms (int64_t); +static void output_int64_size (int64_t); +static void output_int64_time (int64_t); +static void output_int64_uid (int64_t); +static void output_string (const char *); +static void output_string_link (const char *); +static void output_binary (const char *, size_t len); + +static void __attribute__((noreturn)) +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else { + fprintf (stdout, + _("%s: list differences between virtual machines\n" + "Copyright (C) 2010-2013 Red Hat Inc.\n" + "Usage:\n" + " %s [--options] -d domain1 -D domain2\n" + " %s [--options] -a disk1.img -A disk2.img [-a|-A ...]\n" + "Options:\n" + " -a|--add image Add image from first guest\n" + " --all Same as: --extra-stats --times --uids --xattrs\n" + " --atime Don't ignore access time changes\n" + " -A image Add image from second guest\n" + " --checksum[=...] Use checksum of file content\n" + " -c|--connect uri Specify libvirt URI for -d option\n" + " --csv Comma-Separated Values output\n" + " --dir-links Don't ignore directory nlink changes\n" + " --dir-times Don't ignore directory time changes\n" + " -d|--domain guest Add disks from first libvirt guest\n" + " -D guest Add disks from second libvirt guest\n" + " --echo-keys Don't turn off echo for passphrases\n" + " --extra-stats Display extra stats\n" + " --format[=raw|..] Force disk format for -a or -A option\n" + " --help Display brief help\n" + " -h|--human-readable Human-readable sizes in output\n" + " --keys-from-stdin Read passphrases from stdin\n" + " --times Display file times\n" + " --time-days Display file times as days before now\n" + " --time-relative Display file times as seconds before now\n" + " --time-t Display file times as time_t's\n" + " --uids Display UID, GID\n" + " -v|--verbose Verbose messages\n" + " -V|--version Display version and exit\n" + " -x Trace libguestfs API calls\n" + " --xattrs Display extended attributes\n" + "For more information, see the manpage %s(1).\n"), + program_name, program_name, program_name, + program_name); + } + exit (status); +} + +int +main (int argc, char *argv[]) +{ + /* Current time for --time-days, --time-relative output. */ + time (&now); + + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEBASEDIR); + textdomain (PACKAGE); + + enum { HELP_OPTION = CHAR_MAX + 1 }; + + static const char *options = "a:A:c:d:D:hvVx"; + static const struct option long_options[] = { + { "add", 1, 0, 'a' }, + { "all", 0, 0, 0 }, + { "atime", 0, 0, 0 }, + { "checksum", 2, 0, 0 }, + { "checksums", 2, 0, 0 }, + { "csv", 0, 0, 0 }, + { "connect", 1, 0, 'c' }, + { "dir-link", 0, 0, 0 }, + { "dir-links", 0, 0, 0 }, + { "dir-nlink", 0, 0, 0 }, + { "dir-nlinks", 0, 0, 0 }, + { "dir-time", 0, 0, 0 }, + { "dir-times", 0, 0, 0 }, + { "domain", 1, 0, 'd' }, + { "echo-keys", 0, 0, 0 }, + { "extra-stat", 0, 0, 0 }, + { "extra-stats", 0, 0, 0 }, + { "format", 2, 0, 0 }, + { "help", 0, 0, HELP_OPTION }, + { "human-readable", 0, 0, 'h' }, + { "keys-from-stdin", 0, 0, 0 }, + { "time", 0, 0, 0 }, + { "times", 0, 0, 0 }, + { "time-days", 0, 0, 0 }, + { "time-relative", 0, 0, 0 }, + { "time-t", 0, 0, 0 }, + { "uid", 0, 0, 0 }, + { "uids", 0, 0, 0 }, + { "verbose", 0, 0, 'v' }, + { "version", 0, 0, 'V' }, + { "xattr", 0, 0, 0 }, + { "xattrs", 0, 0, 0 }, + { 0, 0, 0, 0 } + }; + struct drv *drvs = NULL; /* First guest. */ + struct drv *drvs2 = NULL; /* Second guest. */ + const char *format = NULL; + int c; + int option_index; + struct tree *tree1, *tree2; + + g = guestfs_create (); + if (g == NULL) { + fprintf (stderr, _("guestfs_create: failed to create handle\n")); + exit (EXIT_FAILURE); + } + + g2 = guestfs_create (); + if (g2 == NULL) { + fprintf (stderr, _("guestfs_create: failed to create handle\n")); + exit (EXIT_FAILURE); + } + + for (;;) { + c = getopt_long (argc, argv, options, long_options, &option_index); + if (c == -1) break; + + switch (c) { + case 0: /* options which are long only */ + if (STREQ (long_options[option_index].name, "long-options")) + display_long_options (long_options); + else if (STREQ (long_options[option_index].name, "keys-from-stdin")) { + keys_from_stdin = 1; + } else if (STREQ (long_options[option_index].name, "echo-keys")) { + echo_keys = 1; + } else if (STREQ (long_options[option_index].name, "format")) { + if (!optarg || STREQ (optarg, "")) + format = NULL; + else + format = optarg; + } else if (STREQ (long_options[option_index].name, "all")) { + enable_extra_stats = enable_times = enable_uids = enable_xattrs = 1; + } else if (STREQ (long_options[option_index].name, "atime")) { + atime = 1; + } else if (STREQ (long_options[option_index].name, "csv")) { + csv = 1; + } else if (STREQ (long_options[option_index].name, "checksum") || + STREQ (long_options[option_index].name, "checksums")) { + if (!optarg || STREQ (optarg, "")) + checksum = "md5"; + else + checksum = optarg; + } else if (STREQ (long_options[option_index].name, "dir-link") || + STREQ (long_options[option_index].name, "dir-links") || + STREQ (long_options[option_index].name, "dir-nlink") || + STREQ (long_options[option_index].name, "dir-nlinks")) { + dir_links = 1; + } else if (STREQ (long_options[option_index].name, "dir-time") || + STREQ (long_options[option_index].name, "dir-times")) { + dir_times = 1; + } else if (STREQ (long_options[option_index].name, "extra-stat") || + STREQ (long_options[option_index].name, "extra-stats")) { + enable_extra_stats = 1; + } else if (STREQ (long_options[option_index].name, "time") || + STREQ (long_options[option_index].name, "times")) { + enable_times = 1; + } else if (STREQ (long_options[option_index].name, "time-t")) { + enable_times = 1; + time_t_output = 1; + } else if (STREQ (long_options[option_index].name, "time-relative")) { + enable_times = 1; + time_t_output = 1; + time_relative = 1; + } else if (STREQ (long_options[option_index].name, "time-days")) { + enable_times = 1; + time_t_output = 1; + time_relative = 2; + } else if (STREQ (long_options[option_index].name, "uid") || + STREQ (long_options[option_index].name, "uids")) { + enable_uids = 1; + } else if (STREQ (long_options[option_index].name, "xattr") || + STREQ (long_options[option_index].name, "xattrs")) { + enable_xattrs = 1; + } else { + fprintf (stderr, _("%s: unknown long option: %s (%d)\n"), + program_name, long_options[option_index].name, option_index); + exit (EXIT_FAILURE); + } + break; + + case 'a': + OPTION_a; + break; + + case 'A': + OPTION_A; + break; + + case 'c': + OPTION_c; + break; + + case 'd': + OPTION_d; + break; + + case 'D': + OPTION_D; + break; + + case 'h': + human = 1; + break; + + case 'v': + /* OPTION_v; */ + verbose++; + guestfs_set_verbose (g, verbose); + guestfs_set_verbose (g2, verbose); + break; + + case 'V': + OPTION_V; + break; + + case 'x': + /* OPTION_x; */ + guestfs_set_trace (g, 1); + guestfs_set_trace (g2, 1); + break; + + case HELP_OPTION: + usage (EXIT_SUCCESS); + + default: + usage (EXIT_FAILURE); + } + } + + if (drvs == NULL || drvs2 == NULL) { + fprintf (stderr, + _("%s: you must specify some -a|-A|-d|-D options, see %s(1)\n"), + program_name, program_name); + usage (EXIT_FAILURE); + } + + /* CSV && human is unsafe because spreadsheets fail to parse these + * fields correctly. (RHBZ#600977). + */ + if (human && csv) { + fprintf (stderr, _("%s: you cannot use -h and --csv options together.\n"), + program_name); + exit (EXIT_FAILURE); + } + + if (optind != argc) { + fprintf (stderr, _("%s: extra arguments on the command line\n"), + program_name); + usage (EXIT_FAILURE); + } + + /* These are really constants, but they have to be variables for the + * options parsing code. Assert here that they have known-good + * values. + */ + assert (read_only == 1); + assert (inspector == 1); + assert (live == 0); + + unsigned errors = 0; + + /* Mount up first guest. */ + add_drives (drvs, 'a'); + + if (guestfs_launch (g) == -1) + exit (EXIT_FAILURE); + + inspect_mount (); + + if ((tree1 = visit_guest (g)) == NULL) + errors++; + + /* Mount up second guest. */ + add_drives_handle (g2, drvs2, 'a'); + + if (guestfs_launch (g2) == -1) + exit (EXIT_FAILURE); + + inspect_mount_handle (g2); + + if ((tree2 = visit_guest (g2)) == NULL) + errors++; + + if (errors == 0) { + if (diff_guests (tree1, tree2) == -1) + errors++; + } + + free_tree (tree1); + free_tree (tree2); + + free_drives (drvs); + free_drives (drvs2); + + guestfs_close (g); + guestfs_close (g2); + + exit (errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} + +struct tree { + /* We store the handle here in case we need to go and dig into + * the disk to get file content. + */ + guestfs_h *g; + + /* List of files found, sorted by path. */ + struct file *files; + size_t nr_files, allocated; +}; + +struct file { + char *path; + struct guestfs_stat *stat; + struct guestfs_xattr_list *xattrs; + char *csum; /* Checksum. If NULL, use file times and size. */ +}; + +static void +free_tree (struct tree *t) +{ + size_t i; + + for (i = 0; i < t->nr_files; ++i) { + free (t->files[i].path); + guestfs_free_stat (t->files[i].stat); + guestfs_free_xattr_list (t->files[i].xattrs); + free (t->files[i].csum); + } + + free (t->files); + + free (t); +} + +static int visit_entry (const char *dir, const char *name, const struct guestfs_stat *stat, const struct guestfs_xattr_list *xattrs, void *vt); + +static struct tree * +visit_guest (guestfs_h *g) +{ + struct tree *t = malloc (sizeof *t); + + if (t == NULL) { + perror ("malloc"); + return NULL; + } + + t->g = g; + t->files = NULL; + t->nr_files = t->allocated = 0; + + if (visit (g, "/", visit_entry, t) == -1) { + free_tree (t); + return NULL; + } + + if (verbose) + fprintf (stderr, "read %zu entries from guest\n", t->nr_files); + + return t; +} + +/* Visit each directory/file/etc entry in the tree. This just stores + * the data in the tree. Note we don't store file content, but we + * keep the guestfs handle open so we can pull that out later if we + * need to. + */ +static int +visit_entry (const char *dir, const char *name, + const struct guestfs_stat *stat_orig, + const struct guestfs_xattr_list *xattrs_orig, + void *vt) +{ + struct tree *t = vt; + char *path, *csum = NULL; + struct guestfs_stat *stat; + struct guestfs_xattr_list *xattrs; + size_t i; + + path = full_path (dir, name); + + /* Copy the stats and xattrs because the visit function will + * free them after we return. + */ + stat = guestfs_copy_stat (stat_orig); + if (stat == NULL) { + perror ("guestfs_copy_stat"); + return -1; + } + xattrs = guestfs_copy_xattr_list (xattrs_orig); + if (xattrs == NULL) { + perror ("guestfs_copy_xattr_list"); + return -1; + } + + if (checksum && is_reg (stat->mode)) { + csum = guestfs_checksum (t->g, checksum, path); + if (!csum) + return -1; + } + + /* If --atime option was NOT passed, flatten the atime field. */ + if (!atime) + stat->atime = 0; + + /* If --dir-links option was NOT passed, flatten nlink field in + * directories. + */ + if (!dir_links && is_dir (stat->mode)) + stat->nlink = 0; + + /* If --dir-times option was NOT passed, flatten time fields in + * directories. + */ + if (!dir_times && is_dir (stat->mode)) + stat->atime = stat->mtime = stat->ctime = 0; + + /* Add the pathname and stats to the list. */ + i = t->nr_files++; + if (i >= t->allocated) { + struct file *old_files = t->files; + size_t old_allocated = t->allocated; + + /* Number of entries in an F15 guest was 111524, and in a + * Windows guest was 10709. + */ + if (old_allocated == 0) + t->allocated = 1024; + else + t->allocated = old_allocated * 2; + + t->files = realloc (old_files, t->allocated * sizeof (struct file)); + if (t->files == NULL) { + perror ("realloc"); + t->files = old_files; + t->allocated = old_allocated; + return -1; + } + } + + t->files[i].path = path; + t->files[i].stat = stat; + t->files[i].xattrs = xattrs; + t->files[i].csum = csum; + + return 0; +} + +static void deleted (guestfs_h *, struct file *); +static void added (guestfs_h *, struct file *); +static int compare_stats (struct file *, struct file *); +static void changed (guestfs_h *, struct file *, guestfs_h *, struct file *, int st, int cst); +static void diff (struct file *, guestfs_h *, struct file *, guestfs_h *); +static void output_file (guestfs_h *, struct file *); + +static int +diff_guests (struct tree *t1, struct tree *t2) +{ + struct file *i1 = &t1->files[0]; + struct file *i2 = &t2->files[0]; + struct file *end1 = &t1->files[t1->nr_files]; + struct file *end2 = &t2->files[t2->nr_files]; + + while (i1 < end1 || i2 < end2) { + if (i1 < end1 && i2 < end2) { + int comp = strcmp (i1->path, i2->path); + + /* i1->path < i2->path. i1 catches up with i2 (files deleted) */ + if (comp < 0) { + deleted (t1->g, i1); + i1++; + } + /* i1->path > i2->path. i2 catches up with i1 (files added) */ + else if (comp > 0) { + added (t2->g, i2); + i2++; + } + /* Otherwise i1->path == i2->path, compare in detail. */ + else { + int st = compare_stats (i1, i2); + if (st != 0) + changed (t1->g, i1, t2->g, i2, st, 0); + else if (i1->csum && i2->csum) { + int cst = strcmp (i1->csum, i2->csum); + changed (t1->g, i1, t2->g, i2, 0, cst); + } + i1++; + i2++; + } + } + /* Reached end of i2 list (files deleted). */ + else if (i1 < end1) { + deleted (t1->g, i1); + i1++; + } + /* Reached end of i1 list (files added). */ + else { + added (t2->g, i2); + i2++; + } + } + + output_flush (); + + return 0; +} + +static void +deleted (guestfs_h *g, struct file *file) +{ + output_start_line (); + output_string ("+"); + output_file (g, file); + output_end_line (); +} + +static void +added (guestfs_h *g, struct file *file) +{ + output_start_line (); + output_string ("+"); + output_file (g, file); + output_end_line (); +} + +static int +compare_stats (struct file *file1, struct file *file2) +{ + int r; + + r = guestfs_compare_stat (file1->stat, file2->stat); + if (r != 0) + return r; + + r = guestfs_compare_xattr_list (file1->xattrs, file2->xattrs); + if (r != 0) + return r; + + return 0; +} + +static void +changed (guestfs_h *g1, struct file *file1, + guestfs_h *g2, struct file *file2, + int st, int cst) +{ + /* Did file content change? */ + if (cst != 0 || + (is_reg (file1->stat->mode) && is_reg (file2->stat->mode) && + (file1->stat->mtime != file2->stat->mtime || + file1->stat->ctime != file2->stat->ctime || + file1->stat->size != file2->stat->size))) { + output_start_line (); + output_string ("="); + output_file (g1, file1); + output_end_line (); + + if (!csv) { + /* Display file changes. */ + output_flush (); + diff (file1, g1, file2, g2); + } + } + + /* Did just stats change? */ + else if (st != 0) { + output_start_line (); + output_string ("-"); + output_file (g1, file1); + output_end_line (); + output_start_line (); + output_string ("+"); + output_file (g1, file1); + output_end_line (); + + /* Display stats fields that changed. */ + output_start_line (); + output_string ("#"); + output_string ("changed:"); +#define COMPARE_STAT(n) \ + if (file1->stat->n != file2->stat->n) output_string (#n) + COMPARE_STAT (dev); + COMPARE_STAT (ino); + COMPARE_STAT (mode); + COMPARE_STAT (nlink); + COMPARE_STAT (uid); + COMPARE_STAT (gid); + COMPARE_STAT (rdev); + COMPARE_STAT (size); + COMPARE_STAT (blksize); + COMPARE_STAT (blocks); + COMPARE_STAT (atime); + COMPARE_STAT (mtime); + COMPARE_STAT (ctime); +#undef COMPARE_STAT + if (guestfs_compare_xattr_list (file1->xattrs, file2->xattrs)) + output_string ("xattrs"); + output_end_line (); + } +} + +/* Run a diff on two files. */ +static void +diff (struct file *file1, guestfs_h *g1, struct file *file2, guestfs_h *g2) +{ + CLEANUP_FREE char *tmpdir = guestfs_get_tmpdir (g1); + CLEANUP_FREE char *tmpd, *tmpda = NULL, *tmpdb = NULL, *cmd = NULL; + int r; + + assert (is_reg (file1->stat->mode)); + assert (is_reg (file2->stat->mode)); + + if (asprintf (&tmpd, "%s/virtdiffXXXXXX", tmpdir) < 0) { + perror ("asprintf"); + exit (EXIT_FAILURE); + } + if (mkdtemp (tmpd) == NULL) { + perror ("mkdtemp"); + exit (EXIT_FAILURE); + } + + if (asprintf (&tmpda, "%s/a", tmpd) < 0 || + asprintf (&tmpdb, "%s/b", tmpd) < 0) { + perror ("asprintf"); + exit (EXIT_FAILURE); + } + + if (guestfs_download (g1, file1->path, tmpda) == -1) + goto out; + if (guestfs_download (g2, file2->path, tmpdb) == -1) + goto out; + + /* Note that the tmpdir is safe, and the rest of the path + * should not need quoting. + */ + if (asprintf (&cmd, "diff -u '%s' '%s' | tail -n +3", tmpda, tmpdb) < 0) { + perror ("asprintf"); + exit (EXIT_FAILURE); + } + + if (verbose) + fprintf (stderr, "%s\n", cmd); + r = system (cmd); + if (!WIFEXITED (r) || WEXITSTATUS (r) != 0) { + fprintf (stderr, _("%s: external diff command failed\n"), program_name); + goto out; + } + + printf ("@@ %s @@\n", _("End of diff")); + + out: + unlink (tmpda); + unlink (tmpdb); + rmdir (tmpd); +} + +static void +output_file (guestfs_h *g, struct file *file) +{ + const char *filetype; + size_t i; + CLEANUP_FREE char *link = NULL; + + if (is_reg (file->stat->mode)) + filetype = "-"; + else if (is_dir (file->stat->mode)) + filetype = "d"; + else if (is_chr (file->stat->mode)) + filetype = "c"; + else if (is_blk (file->stat->mode)) + filetype = "b"; + else if (is_fifo (file->stat->mode)) + filetype = "p"; + else if (is_lnk (file->stat->mode)) + filetype = "l"; + else if (is_sock (file->stat->mode)) + filetype = "s"; + else + filetype = "u"; + + output_string (filetype); + output_int64_perms (file->stat->mode & 07777); + + output_int64_size (file->stat->size); + + /* Display extra fields when enabled. */ + if (enable_uids) { + output_int64_uid (file->stat->uid); + output_int64_uid (file->stat->gid); + } + + if (enable_times) { + if (atime) + output_int64_time (file->stat->atime); + output_int64_time (file->stat->mtime); + output_int64_time (file->stat->ctime); + } + + if (enable_extra_stats) { + output_int64_dev (file->stat->dev); + output_int64 (file->stat->ino); + output_int64 (file->stat->nlink); + output_int64_dev (file->stat->rdev); + output_int64 (file->stat->blocks); + } + + if (file->csum) + output_string (file->csum); + + output_string (file->path); + + if (is_lnk (file->stat->mode)) { + /* XXX Fix this for NTFS. */ + link = guestfs_readlink (g, file->path); + if (link) + output_string_link (link); + } + + if (enable_xattrs) { + for (i = 0; i < file->xattrs->len; ++i) { + output_string (file->xattrs->val[i].attrname); + output_binary (file->xattrs->val[i].attrval, + file->xattrs->val[i].attrval_len); + } + } +} + +/* Output functions. + * + * Note that we have to be careful to check return values from printf + * in these functions, because we want to catch ENOSPC errors. + */ +static int field; +static void +next_field (void) +{ + int c = csv ? ',' : ' '; + + field++; + if (field == 1) return; + + if (putchar (c) == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } +} + +static void +output_start_line (void) +{ + field = 0; +} + +static void +output_end_line (void) +{ + if (printf ("\n") < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_flush (void) +{ + if (fflush (stdout) == EOF) { + perror ("fflush"); + exit (EXIT_FAILURE); + } +} + +static void +output_string (const char *s) +{ + next_field (); + + if (!csv) { + print_no_quoting: + if (printf ("%s", s) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } + } + else { + /* Quote CSV string without requiring an external module. */ + size_t i, len; + int needs_quoting = 0; + + len = strlen (s); + + for (i = 0; i < len; ++i) { + if (s[i] == ' ' || s[i] == '"' || + s[i] == '\n' || s[i] == ',') { + needs_quoting = 1; + break; + } + } + + if (!needs_quoting) + goto print_no_quoting; + + /* Quoting for CSV fields. */ + if (putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + for (i = 0; i < len; ++i) { + if (s[i] == '"') { + if (putchar ('"') == EOF || putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } else { + if (putchar (s[i]) == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } + } + if (putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } +} + +static void +output_string_link (const char *link) +{ + if (csv) + output_string (link); + else { + next_field (); + + if (printf ("-> %s", link) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } + } +} + +static void +output_binary (const char *s, size_t len) +{ + size_t i; + + next_field (); + + if (!csv) { + print_no_quoting: + for (i = 0; i < len; ++i) { + if (c_isprint (s[i])) { + if (putchar (s[i]) == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } else { + if (printf ("\\x%02x", (unsigned char) s[i]) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } + } + } + } + else { + /* Quote CSV string without requiring an external module. */ + int needs_quoting = 0; + + for (i = 0; i < len; ++i) { + if (!c_isprint (s[i]) || s[i] == ' ' || s[i] == '"' || + s[i] == '\n' || s[i] == ',') { + needs_quoting = 1; + break; + } + } + + if (!needs_quoting) + goto print_no_quoting; + + /* Quoting for CSV fields. */ + if (putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + for (i = 0; i < len; ++i) { + if (s[i] == '"') { + if (putchar ('"') == EOF || putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } else { + if (c_isprint (s[i])) { + if (putchar (s[i]) == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } else { + if (printf ("\\x%2x", s[i]) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } + } + } + } + if (putchar ('"') == EOF) { + perror ("putchar"); + exit (EXIT_FAILURE); + } + } +} + +static void +output_int64 (int64_t i) +{ + next_field (); + /* csv doesn't need escaping */ + if (printf ("%" PRIi64, i) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_int64_size (int64_t size) +{ + char buf[LONGEST_HUMAN_READABLE]; + int hopts = human_round_to_nearest|human_autoscale|human_base_1024|human_SI; + int r; + + next_field (); + + if (!csv) { + if (!human) + r = printf ("%10" PRIi64, size); + else + r = printf ("%10s", + human_readable ((uintmax_t) size, buf, hopts, 1, 1)); + } else { + /* CSV is the same as non-CSV but we don't need to right-align. */ + if (!human) + r = printf ("%" PRIi64, size); + else + r = printf ("%s", + human_readable ((uintmax_t) size, buf, hopts, 1, 1)); + } + + if (r < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_int64_perms (int64_t i) +{ + next_field (); + /* csv doesn't need escaping */ + if (printf ("%04" PRIo64, i) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_int64_time (int64_t i) +{ + int r; + + next_field (); + + /* csv doesn't need escaping */ + if (time_t_output) { + switch (time_relative) { + case 0: /* --time-t */ + r = printf ("%10" PRIi64, i); + break; + case 1: /* --time-relative */ + r = printf ("%8" PRIi64, now - i); + break; + case 2: /* --time-days */ + default: + r = printf ("%3" PRIi64, (now - i) / 86400); + break; + } + } + else { + time_t t = (time_t) i; + char buf[64]; + struct tm *tm; + + tm = localtime (&t); + if (tm == NULL) { + perror ("localtime"); + exit (EXIT_FAILURE); + } + + if (strftime (buf, sizeof buf, "%F %T", tm) == 0) { + perror ("strftime"); + exit (EXIT_FAILURE); + } + + r = printf ("%s", buf); + } + + if (r < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_int64_uid (int64_t i) +{ + next_field (); + /* csv doesn't need escaping */ + if (printf ("%4" PRIi64, i) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} + +static void +output_int64_dev (int64_t i) +{ + dev_t dev = i; + + next_field (); + + /* csv doesn't need escaping */ + if (printf ("%d:%d", major (dev), minor (dev)) < 0) { + perror ("printf"); + exit (EXIT_FAILURE); + } +} diff --git a/diff/test-virt-diff.sh b/diff/test-virt-diff.sh new file mode 100755 index 000000000..95778d810 --- /dev/null +++ b/diff/test-virt-diff.sh @@ -0,0 +1,62 @@ +#!/bin/bash - +# libguestfs +# Copyright (C) 2013 Red Hat Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +export LANG=C +set -e + +if [ ! -f ../tests/guests/fedora.img ]; then + echo "$0: test skipped because there is no phony fedora test image" + exit 77 +fi + +if [ "$(../fish/guestfish get-backend)" = "uml" ]; then + echo "$0: test skipped because backend is UML" + exit 77 +fi + +rm -f fedora.qcow2 + +# Modify a copy of the image. +qemu-img create -f qcow2 -b ../tests/guests/fedora.img fedora.qcow2 + +../fish/guestfish -a fedora.qcow2 -i < lists the differences between files in two virtual +machines or disk images. The usual use case is to show the +changes in a VM after it has been running for a while, by taking +a snapshot, running the VM, and then using this tool to show +what changed between the new VM state and the old snapshot. + +This tool will find differences in filenames, file sizes, checksums, +extended attributes, file content and more from a virtual machine or +disk image. However it B look at the boot loader, unused +space between partitions or within filesystems, "hidden" sectors and +so on. In other words, it is not a security or forensics tool. + +To specify two guests, you have to use the I<-a> or I<-d> option(s) +for the first guest, and the I<-A> or I<-D> option(s) for the second +guest. The common case is: + + virt-diff -a old.img -A new.img + +or using names known to libvirt: + + virt-diff -d oldguest -D newguest + +=head1 OPTIONS + +=over 4 + +=item B<--help> + +Display brief help. + +=item B<-a> file + +=item B<--add> file + +Add I which should be a disk image from the first virtual +machine. If the virtual machine has multiple block devices, you must +supply all of them with separate I<-a> options. + +The format of the disk image is auto-detected. To override this and +force a particular format use the I<--format=..> option. + +=item B<-a> URI + +=item B<--add> URI + +Add a remote disk. See L. + +=item B<--all> + +Same as I<--extra-stats> I<--times> I<--uids> I<--xattrs>. + +=item B<--atime> + +The default is to ignore changes in file access times, since those are +unlikely to be interesting. Using this flag shows atime differences +as well. + +=item B<-A> file + +=item B<-A> URI + +Add a disk image from the second virtual machine. + +=item B<--checksum> + +=item B<--checksum=crc|md5|sha1|sha224|sha256|sha384|sha512> + +Use a checksum over file contents to detect when regular files have +changed content. + +With no argument, this defaults to using I. Using an argument, +you can select the checksum type to use. If the flag is omitted then +file times and size are used to determine if a file has changed. + +=item B<-c> URI + +=item B<--connect> URI + +If using libvirt, connect to the given I. If omitted, then we +connect to the default libvirt hypervisor. + +If you specify guest block devices directly (I<-a>), then libvirt is +not used at all. + +=item B<--csv> + +Write out the results in CSV format (comma-separated values). This +format can be imported easily into databases and spreadsheets, but +read L below. + +=item B<--dir-links> + +The default is to ignore changed in the number of links in directory +entries, since those are unlikely to be interesting. Using this flag +shows changes to the nlink field of directories. + +=item B<--dir-times> + +The default is to ignore changed times on directory entries, since +those are unlikely to be interesting. Using this flag shows changes +to the time fields of directories. + +=item B<-d> guest + +=item B<--domain> guest + +Add all the disks from the named libvirt guest, as the first guest. +Domain UUIDs can be used instead of names. + +=item B<-D> guest + +Add all the disks from the named libvirt guest, as the second guest. +Domain UUIDs can be used instead of names. + +=item B<--echo-keys> + +When prompting for keys and passphrases, virt-diff normally turns +echoing off so you cannot see what you are typing. If you are not +worried about Tempest attacks and there is no one else in the room you +can specify this flag to see what you are typing. + +=item B<--extra-stats> + +Display extra stats. + +=item B<--format=raw|qcow2|..> + +=item B<--format> + +The default for the I<-a>/I<-A> option is to auto-detect the format of +the disk image. Using this forces the disk format for I<-a>/I<-A> +options which follow on the command line. Using I<--format> with no +argument switches back to auto-detection for subsequent I<-a>/I<-A> +options. + +For example: + + virt-diff --format=raw -a disk.img [...] + +forces raw format (no auto-detection) for C. + + virt-diff --format=raw -a disk.img --format -a another.img [...] + +forces raw format (no auto-detection) for C and reverts to +auto-detection for C. + +If you have untrusted raw-format guest disk images, you should use +this option to specify the disk format. This avoids a possible +security problem with malicious guests (CVE-2010-3851). + +=item B<-h> + +=item B<--human-readable> + +Display file sizes in human-readable format. + +This option only has effect in I<-lR> output mode. See +L above. + +=item B<--keys-from-stdin> + +Read key or passphrase parameters from stdin. The default is +to try to read passphrases from the user by opening C. + +=item B<--times> + +Display time fields. + +=item B<--time-days> + +Display time fields as days before now (negative if in the future). + +Note that C<0> in output means "up to 1 day before now", or that the +age of the file is between 0 and 86399 seconds. + +=item B<--time-relative> + +Display time fields as seconds before now (negative if in the future). + +=item B<--time-t> + +Display time fields as seconds since the Unix epoch. + +=item B<--uids> + +Display UID and GID fields. + +=item B<-v> + +=item B<--verbose> + +Enable verbose messages for debugging. + +=item B<-V> + +=item B<--version> + +Display version number and exit. + +=item B<-x> + +Enable tracing of libguestfs API calls. + +=item B<--xattrs> + +Display extended attributes. + +=back + +=head1 NOTE ABOUT CSV FORMAT + +Comma-separated values (CSV) is a deceptive format. It I like +it should be easy to parse, but it is definitely not easy to parse. + +Myth: Just split fields at commas. Reality: This does I work +reliably. This example has two columns: + + "foo,bar",baz + +Myth: Read the file one line at a time. Reality: This does I +work reliably. This example has one row: + + "foo + bar",baz + +For shell scripts, use C (L +also packaged in major Linux distributions). + +For other languages, use a CSV processing library (eg. C +for Perl or Python's built-in csv library). + +Most spreadsheets and databases can import CSV directly. + +=head1 EXIT STATUS + +This program returns 0 if successful, or non-zero if there was an +error. + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L, +L. + +=head1 AUTHOR + +Richard W.M. Jones L + +=head1 COPYRIGHT + +Copyright (C) 2009-2013 Red Hat Inc. diff --git a/examples/guestfs-recipes.pod b/examples/guestfs-recipes.pod index c0b01b4e4..0dd2e642d 100644 --- a/examples/guestfs-recipes.pod +++ b/examples/guestfs-recipes.pod @@ -180,13 +180,10 @@ of C =head1 Diff two guests; compare a snapshot to the current version -L provides a simple way to find the differences between -two guests (for example if they were originally cloned from the same -source), or between two snapshots from the same guest. See -L. - -There are also experimental patches on the mailing list for a -"virt-diff" tool. +Since libguestfs E 1.26, use L to look for +differences between two guests (for example if they were originally +cloned from the same source), or between two snapshots from the same +guest. In earlier versions of libguestfs, use L. =head1 Disable a systemd service diff --git a/fish/guestfish.pod b/fish/guestfish.pod index 2463d839c..903e63982 100644 --- a/fish/guestfish.pod +++ b/fish/guestfish.pod @@ -1563,6 +1563,7 @@ L, L, L, L, +L, L, L, L, diff --git a/po/POTFILES b/po/POTFILES index 4e1e7de7a..514e5a7f1 100644 --- a/po/POTFILES +++ b/po/POTFILES @@ -112,6 +112,7 @@ df/estimate-max-threads.c df/main.c df/output.c df/parallel.c +diff/diff.c edit/edit.c erlang/erl-guestfs-proto.c erlang/erl-guestfs.c diff --git a/run.in b/run.in index 07e90885b..0ab3569e7 100755 --- a/run.in +++ b/run.in @@ -74,7 +74,7 @@ fi # Set the PATH to contain all the libguestfs binaries. There are a # lot of binaries, so a lot of path entries. -PATH="$b/align:$b/builder:$b/cat:$b/df:$b/edit:$b/erlang:$b/fish:$b/format:$b/fuse:$b/rescue:$b/resize:$b/sparsify:$b/sysprep:$b/test-tool:$b/tools:$PATH" +PATH="$b/align:$b/builder:$b/cat:$b/df:$b/diff:$b/edit:$b/erlang:$b/fish:$b/format:$b/fuse:$b/rescue:$b/resize:$b/sparsify:$b/sysprep:$b/test-tool:$b/tools:$PATH" export PATH # Set LD_LIBRARY_PATH to contain library. diff --git a/src/guestfs.pod b/src/guestfs.pod index 441ba933f..1e4b16c1d 100644 --- a/src/guestfs.pod +++ b/src/guestfs.pod @@ -4259,6 +4259,10 @@ actions. L command and documentation. +=item C + +L command and documentation. + =item C L command and documentation. @@ -4697,6 +4701,7 @@ L, L, L, L, +L, L, L, L,