Files
libguestfs/appliance/init
Richard W.M. Jones 7a81df902a appliance/init: Don't set impossible "noop" disk scheduler
Since RHEL 7.4, the noop scheduler is no longer a thing.  Trying to
set it results in the error:

  + echo noop
  /init: line 108: echo: write error: Invalid argument

The current recommendation (https://access.redhat.com/solutions/5427)
is to use mq-deadline, but that's also the default so we don't have to
do anything.

A bigger reason to remove these lines is that kernel 6.11.0 has
introduced a hang where -- rarely -- the ext4 filesystem hangs if you
try to change the scheduler while handing a page fault, even if you're
setting a scheduler that doesn't exist.  I couldn't get much detail
except for a couple of stack traces from different VMs:

  crash> set 234
      PID: 234
  COMMAND: "modprobe"
     TASK: ffff9f5ec3a22f40  [THREAD_INFO: ffff9f5ec3a22f40]
      CPU: 0
    STATE: TASK_UNINTERRUPTIBLE
  crash> bt
  PID: 234      TASK: ffff9f5ec3a22f40  CPU: 0    COMMAND: "modprobe"
   #0 [ffffb21e002e7840] __schedule at ffffffffa718f6d0
   #1 [ffffb21e002e78f8] schedule at ffffffffa7190a27
   #2 [ffffb21e002e7908] __bio_queue_enter at ffffffffa67e121c
   #3 [ffffb21e002e7968] blk_mq_submit_bio at ffffffffa67f358c
   #4 [ffffb21e002e79f0] __submit_bio at ffffffffa67e1e3c
   #5 [ffffb21e002e7a58] submit_bio_noacct_nocheck at ffffffffa67e2326
   #6 [ffffb21e002e7ac0] ext4_mpage_readpages at ffffffffa65ceafc
   #7 [ffffb21e002e7be0] read_pages at ffffffffa6381d17
   #8 [ffffb21e002e7c40] page_cache_ra_unbounded at ffffffffa6381ff5
   #9 [ffffb21e002e7ca8] filemap_fault at ffffffffa63761b5
  #10 [ffffb21e002e7d48] __do_fault at ffffffffa63d1892
  #11 [ffffb21e002e7d70] do_fault at ffffffffa63d2425
  #12 [ffffb21e002e7da0] __handle_mm_fault at ffffffffa63d8c6b
  #13 [ffffb21e002e7e88] handle_mm_fault at ffffffffa63d95c2
  #14 [ffffb21e002e7ec8] do_user_addr_fault at ffffffffa60b34ea
  #15 [ffffb21e002e7f28] exc_page_fault at ffffffffa7186e4e
  #16 [ffffb21e002e7f50] asm_exc_page_fault at ffffffffa72012a6
      RIP: 000055d16159f8d8  RSP: 00007ffdd4c1f340  RFLAGS: 00010206
      RAX: 00000000000bec82  RBX: 00007ff2fd00dc82  RCX: 000055d1615b492a
      RDX: 00007ffdd4c216b0  RSI: 00000000200bec82  RDI: 000055d185725960
      RBP: 00007ffdd4c1f5a0   R8: 0000000000000000   R9: 0000000000000000
      R10: 0000000000000000  R11: 0000000000000202  R12: 00000000200bec82
      R13: 000055d185725960  R14: 00007ffdd4c216b0  R15: 000055d1615b9708
      ORIG_RAX: ffffffffffffffff  CS: 0033  SS: 002b

  crash> set 230
      PID: 230
  COMMAND: "modprobe"
     TASK: ffff98ce03ca3040  [THREAD_INFO: ffff98ce03ca3040]
      CPU: 0
    STATE: TASK_UNINTERRUPTIBLE
  crash> bt
  PID: 230      TASK: ffff98ce03ca3040  CPU: 0    COMMAND: "modprobe"
   #0 [ffffaf9940307840] __schedule at ffffffff9618f6d0
   #1 [ffffaf99403078f8] schedule at ffffffff96190a27
   #2 [ffffaf9940307908] __bio_queue_enter at ffffffff957e121c
   #3 [ffffaf9940307968] blk_mq_submit_bio at ffffffff957f358c
   #4 [ffffaf99403079f0] __submit_bio at ffffffff957e1e3c
   #5 [ffffaf9940307a58] submit_bio_noacct_nocheck at ffffffff957e2326
   #6 [ffffaf9940307ac0] ext4_mpage_readpages at ffffffff955ceafc
   #7 [ffffaf9940307be0] read_pages at ffffffff95381d1a
   #8 [ffffaf9940307c40] page_cache_ra_unbounded at ffffffff95381ff5
   #9 [ffffaf9940307ca8] filemap_fault at ffffffff953761b5
  #10 [ffffaf9940307d48] __do_fault at ffffffff953d1895
  #11 [ffffaf9940307d70] do_fault at ffffffff953d2425
  #12 [ffffaf9940307da0] __handle_mm_fault at ffffffff953d8c6b
  #13 [ffffaf9940307e88] handle_mm_fault at ffffffff953d95c2
  #14 [ffffaf9940307ec8] do_user_addr_fault at ffffffff950b34ea
  #15 [ffffaf9940307f28] exc_page_fault at ffffffff96186e4e
  #16 [ffffaf9940307f50] asm_exc_page_fault at ffffffff962012a6
      RIP: 0000556b7a7468d8  RSP: 00007ffde2ffb560  RFLAGS: 00000206
      RAX: 00000000000bec82  RBX: 00007f5331a0dc82  RCX: 0000556b7a75b92a
      RDX: 00007ffde2ffd8d0  RSI: 00000000200bec82  RDI: 0000556ba8edf960
      RBP: 00007ffde2ffb7c0   R8: 0000000000000000   R9: 0000000000000000
      R10: 0000000000000000  R11: 0000000000000202  R12: 00000000200bec82
      R13: 0000556ba8edf960  R14: 00007ffde2ffd8d0  R15: 0000556b7a760708
      ORIG_RAX: ffffffffffffffff  CS: 0033  SS: 002b

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2303267
(cherry picked from commit b2d682a473)
2024-08-16 15:21:34 +01:00

271 lines
7.3 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
echo Starting /init script ...
PATH=/sbin:/usr/sbin:/bin:/usr/bin
export PATH
# Debian bug 606622.
RUNLEVEL=S
PREVLEVEL=N
export RUNLEVEL PREVLEVEL
# Make sure /tmp /var/tmp are real directories, not symlinks.
if [ ! -d /tmp ] || [ ! -d /var/tmp ]; then
rm -f /tmp /var/tmp
mkdir /tmp /var/tmp
chmod 1777 /tmp /var/tmp
fi
mkdir -p /proc /sys
mount -t proc /proc /proc
mount -t sysfs /sys /sys
# devtmpfs is required since udev 176
mount -t devtmpfs /dev /dev
ln -s /proc/self/fd /dev/fd
# Parse the kernel command line early (must be after /proc is mounted).
cmdline=$(</proc/cmdline)
if [[ $cmdline == *guestfs_verbose=1* ]]; then
guestfs_verbose=1
set -x
fi
if [[ $cmdline == *guestfs_network=1* ]]; then
guestfs_network=1
fi
if [[ $cmdline == *guestfs_rescue=1* ]]; then
guestfs_rescue=1
fi
if [[ $cmdline == *guestfs_noreboot=1* ]]; then
guestfs_noreboot=1
fi
if [[ $cmdline == *guestfs_boot_analysis=1* ]]; then
guestfs_boot_analysis=1
fi
mkdir -p /dev/pts /dev/shm
mount -t devpts /dev/pts /dev/pts
mount -t tmpfs -o mode=1777 shmfs /dev/shm
mkdir -p /sysroot
# taken from initramfs-tools/init --Hilko Bengen
mkdir -p /run
mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run
mkdir -p /run/lock
ln -s ../run/lock /var/lock
if [[ $cmdline == *selinux=1* ]]; then
mount -t selinuxfs none /sys/fs/selinux
fi
# On Fedora 23, util-linux creates /etc/mtab in %post .. stupid
# and e2fsprogs fails if the link doesn't exist .. stupid stupid
if ! test -e /etc/mtab; then
ln -s /proc/mounts /etc/mtab
fi
# For openssl (RHBZ#2133884).
if test -d /etc/crypto-policies/back-ends &&
! test -f /etc/crypto-policies/back-ends/opensslcnf.config &&
test -f /usr/share/crypto-policies/DEFAULT/opensslcnf.txt ; then
ln -sf /usr/share/crypto-policies/DEFAULT/opensslcnf.txt /etc/crypto-policies/back-ends/opensslcnf.config
fi
# Static nodes must happen before udev is started.
# Set up kmod static-nodes (RHBZ#1011907).
mkdir -p /run/tmpfiles.d
kmod static-nodes --format=tmpfiles --output=/run/tmpfiles.d/kmod.conf
# Create a machine-id with a random UUID
machine_id=$(dd if=/dev/urandom bs=16 count=1 status=none | od -x -A n)
echo "${machine_id// /}" > /etc/machine-id
# Set up tmpfiles (must run after kmod.conf is created above).
systemd-tmpfiles --prefix=/dev --prefix=/run --prefix=/var/run --create --boot
# Find udevd and run it directly.
for f in /lib/systemd/systemd-udevd /usr/lib/systemd/systemd-udevd \
/sbin/udevd /lib/udev/udevd \
/usr/lib/udev/udevd; do
if [ -x "$f" ]; then UDEVD="$f"; break; fi
done
if [ -z "$UDEVD" ]; then
echo "error: udev not found! Things will probably not work ..."
fi
$UDEVD --daemon #--debug
udevadm trigger
udevadm settle --timeout=600
# Disk optimizations.
# Increase the SCSI timeout so we can read remote images.
shopt -s nullglob
for f in /sys/block/sd*/device/timeout; do echo 300 > $f; done
shopt -u nullglob
# Set up the network.
ip addr add 127.0.0.1/8 brd + dev lo scope host
ip link set dev lo up
if test "$guestfs_network" = 1; then
iface=$(ls -I all -I default -I lo /proc/sys/net/ipv4/conf)
# Two workarounds for Ubuntu:
touch /etc/fstab
rm -f /etc/dhcp/dhclient-enter-hooks.d/resolved
if dhclient --version >/dev/null 2>&1; then
dhclient $iface
elif dhcpcd $iface; then
# https://github.com/NetworkConfiguration/dhcpcd/issues/258
for i in `seq 0 10`; do
if grep nameserver /etc/resolv.conf; then break; fi
sleep 1
done
fi
fi
# Scan for MDs but don't run arrays unless all expected drives are present
mdadm -As --auto=yes --no-degraded
# Set up a clean LVM environment.
# Empty LVM configuration file means "all defaults".
mkdir -p /tmp/lvm
touch /tmp/lvm/lvm.conf
# If lvm2 supports a "devices file", we need to disable its use
# (RHBZ#1965941).
if command -v lvmdevices || command -v vgimportdevices; then
{
printf 'devices {\n'
printf '\tuse_devicesfile = 0\n'
printf '}\n'
} >> /tmp/lvm/lvm.conf
fi
LVM_SYSTEM_DIR=/tmp/lvm
export LVM_SYSTEM_DIR
lvmetad
# Scan for LVM.
modprobe dm_mod ||:
lvm pvscan --cache --activate ay
# Scan for MDs and run all found arrays even they are in degraded state
mdadm -As --auto=yes --run
# Scan for Windows dynamic disks.
ldmtool create all
# These are useful when debugging.
if test "$guestfs_verbose" = 1 && test "$guestfs_boot_analysis" != 1; then
uname -a
ls -lR /dev
cat /proc/mounts
cat /proc/mdstat
lvm config
lvm pvs
lvm vgs
lvm lvs
ip a
ip r
cat /etc/resolv.conf
lsmod
#hwclock -r
date
echo -n "clocksource: "
cat /sys/devices/system/clocksource/clocksource0/current_clocksource
#ping -n -v -c 5 8.8.8.8
echo -n "uptime: "; cat /proc/uptime
fi
# Run the daemon.
cmd="guestfsd"
eval `grep -Eo 'guestfs_channel=[^[:space:]]+' /proc/cmdline`
if test "x$guestfs_channel" != "x"; then
cmd="$cmd --channel $guestfs_channel"
fi
if test "$guestfs_verbose" = 1; then
cmd="$cmd --verbose"
fi
if test "$guestfs_network" = 1; then
cmd="$cmd --network"
fi
if false; then
# To get a stack trace if the daemon crashes:
# (1) change this section to 'if true'
# (2) add 'gdb' to 'appliance/packagelist.in'
unset LD_PRELOAD
echo set pagination off > /tmp/gdb-script
echo run >> /tmp/gdb-script
echo info registers >> /tmp/gdb-script
echo 'x/16i $pc' >> /tmp/gdb-script
echo t a a bt >> /tmp/gdb-script
echo quit >> /tmp/gdb-script
cmd="gdb -batch -x /tmp/gdb-script --args $cmd"
fi
if ! test "$guestfs_rescue" = 1; then
echo $cmd
$cmd
else
# Run virt-rescue shell.
# We need a daemon, even in virt-rescue.
$cmd &
# XXX This gives a bit of time for virt-rescue to connect to the
# daemon and mount any filesystems.
sleep 2
# Get name of the serial port, from console= passed by libguestfs.
# XXX Consider using /proc/consoles
guestfs_serial=$(grep -Eo 'console=[^[:space:]]+' /proc/cmdline |
sed s/console=//)
# Remove LD_PRELOAD=libSegFault set above.
unset LD_PRELOAD
:> $HOME/.bashrc
grep -Eo 'TERM=[^[:space:]]+' /proc/cmdline >> $HOME/.bashrc
echo "PS1='><rescue> '" >> $HOME/.bashrc
echo "export TERM PS1" >> $HOME/.bashrc
# The shell is opened by default on /dev/console, which (on Linux)
# is not a controlling terminal, causing job control to fail. For
# how we work around this, see:
# https://busybox.net/FAQ.html#job_control
run_bash_with_ctty ()
{
setsid bash -c \
"exec bash </dev/$guestfs_serial >/dev/$guestfs_serial 2>&1"
}
echo
echo "------------------------------------------------------------"
echo
echo "Welcome to virt-rescue, the libguestfs rescue shell."
echo
echo "Note: The contents of / (root) are the rescue appliance."
if ! test -d "/sysroot/dev"; then
echo "You have to mount the guests partitions under /sysroot"
echo "before you can examine them."
else
echo "Use 'cd /sysroot' or 'chroot /sysroot' to see guest filesystems."
fi
echo
run_bash_with_ctty
echo
echo "virt-rescue: Syncing the disk now before exiting ..."
echo
fi
sync
if ! test "$guestfs_noreboot" = 1; then
# qemu has the -no-reboot flag, so issuing a reboot here actually
# causes qemu to exit gracefully.
reboot -f
fi