Skip to content

Commit df143a7

Browse files
committed
Refactored VM launch
- Modular QEMU command-line construction groups related options together - vCPU PMU now set to off (-cpu pmu=off) - VGA now set to none (-vga none) - virtiofs tag changed to 'runcvmfs' for clarity - virtconsole device replacing serial console by default - SMP explicitly configures CPU sockets == vCPUs - New kernel cmdline options optimise kernel behaviour - New virtio-rng-pci device
1 parent 38a1d0f commit df143a7

File tree

3 files changed

+112
-49
lines changed

3 files changed

+112
-49
lines changed

runcvm-scripts/runcvm-ctr-qemu

Lines changed: 102 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,8 @@
99
QEMU_IFUP="$RUNCVM/scripts/runcvm-ctr-qemu-ifup"
1010
QEMU_IFDOWN="$RUNCVM/scripts/runcvm-ctr-qemu-ifdown"
1111

12-
# BREAK="break=mountroot"
13-
PANIC="panic=-1"
14-
1512
INIT="init=/opt/runcvm/scripts/runcvm-vm-init"
1613

17-
SERIAL="mon:stdio"
18-
# SERIAL="stdio"
19-
2014
error() {
2115
echo "$1" >&2
2216
exit 1
@@ -65,7 +59,7 @@ do_disk() {
6559
local UUID=$(blkid -o value "$src" | head -n 1)
6660
mkdir -p "$RUNCVM_VM_MOUNTPOINT/$dst" >&2
6761
echo "UUID=$UUID $dst $fs defaults,noatime 0 0" >>/.runcvm/fstab
68-
DISKS+=("-drive file=$src,format=raw,if=virtio,media=disk,cache=directsync,aio=native")
62+
DISKS+=(-drive file=$src,format=raw,if=virtio,media=disk,cache=directsync,aio=native)
6963
}
7064

7165
# Argument e.g. /disk1,/home,ext4,5G;/disk2,/var,ext4,1G
@@ -122,20 +116,17 @@ fi
122116

123117
if [ "$RUNCVM_ARCH" = "arm64" ]; then
124118
CMD="qemu-system-aarch64"
125-
OPTS=(-cpu max -machine virt,gic-version=max,usb=off)
119+
MACHINE+=(-cpu max -machine virt,gic-version=max,usb=off)
126120
else
127121
CMD="qemu-system-x86_64"
128-
OPTS=(-enable-kvm -cpu host -machine q35,accel=kvm,usb=off,sata=off -device isa-debug-exit)
122+
MACHINE+=(-enable-kvm -cpu host,pmu=off -machine q35,accel=kvm,usb=off,sata=off -device isa-debug-exit)
129123
fi
130124

131125
if [ -n "$RUNCVM_QEMU_DISPLAY" ]; then
132-
OPTS+=(-display $RUNCVM_QEMU_DISPLAY)
126+
DISPLAY+=(-display $RUNCVM_QEMU_DISPLAY)
133127
else
134-
OPTS+=(-nographic)
135-
fi
136-
137-
if [ "$RUNCVM_KERNEL_DEBUG" = "1" ]; then
138-
APPEND=("console=ttyS0")
128+
DISPLAY+=(-nographic)
129+
DISPLAY+=(-vga none)
139130
fi
140131

141132
if [ "$RUNCVM_BIOS_DEBUG" != "1" ]; then
@@ -145,48 +136,117 @@ if [ "$RUNCVM_BIOS_DEBUG" != "1" ]; then
145136
OPTS+=(-fw_cfg opt/org.seabios/etc/sercon-port,string=0)
146137
fi
147138

148-
# Disable IPv6, which is currently unsupported, at kernel boot time
149-
APPEND+=(ipv6.disable=1)
139+
MEM_BACKEND=(-numa node,memdev=mem)
140+
if [ "$RUNCVM_HUGETLB" != "1" ]; then
141+
# Tests suggests prealloc=on slows down mem-path=/dev/shm
142+
MEM_PATH="/dev/shm" MEM_PREALLOC="off"
143+
MEM_BACKEND+=(-object memory-backend-file,id=mem,size=$RUNCVM_MEM_SIZE,mem-path=$MEM_PATH,share=on,prealloc=$MEM_PREALLOC)
144+
else
145+
# Fastest performance: +15% CPU/net intensive; 3.5x disk intensive.
146+
MEM_BACKEND+=(-object memory-backend-memfd,id=mem,size=$RUNCVM_MEM_SIZE,share=on,prealloc=on,hugetlb=on)
147+
fi
150148

151149
# 16-64 works well and is more performant than 1024 in some scenarios.
152150
# For now, stick with original figure.
153151
VIRTIOFS_QUEUE_SIZE=1024
152+
VIRTIOFS+=(
153+
-chardev socket,id=virtiofs,path=$QEMU_VIRTIOFSD_SOCKET
154+
-device vhost-user-fs-pci,queue-size=$VIRTIOFS_QUEUE_SIZE,chardev=virtiofs,tag=runcvmfs,ats=off
155+
)
154156

155-
if [ "$RUNCVM_HUGETLB" != "1" ]; then
156-
# Tests suggests prealloc=on slows down mem-path=/dev/shm
157-
MEM_PATH=/dev/shm MEM_PREALLOC=off
158-
MEM_BACKEND="-object memory-backend-file,id=mem,size=$RUNCVM_MEM_SIZE,mem-path=$MEM_PATH,share=on,prealloc=$MEM_PREALLOC"
157+
CONSOLE=()
158+
CONSOLE_MONITOR="0"
159+
if [ "$CONSOLE_MONITOR" = "1" ]; then
160+
# Creates a multiplexed stdio backend connected to the serial port (and the QEMU monitor).
161+
# Use with /dev/ttyS0
162+
CONSOLE+=(
163+
-chardev stdio,id=char0,mux=on,signal=off
164+
-serial chardev:char0 -mon chardev=char0
165+
)
166+
167+
# Set monitor escape key to CTRL-T to reduce risk of conflict (as default, CTRL-A, is commonly used)
168+
CONSOLE+=(-echr 20)
169+
170+
CONSOLE_DEV="ttyS0"
159171
else
160-
# Fastest performance: +15% CPU/net intensive; 3.5x disk intensive.
161-
MEM_BACKEND="-object memory-backend-memfd,id=mem,size=$RUNCVM_MEM_SIZE,share=on,prealloc=on,hugetlb=on"
172+
# Creates a stdio backend connected to the virtual console.
173+
# Use with /dev/hvc0
174+
CONSOLE+=(
175+
-chardev stdio,id=char0,mux=off,signal=off
176+
-device virtconsole,chardev=char0,id=console0
177+
)
178+
179+
CONSOLE_DEV="hvc0"
180+
fi
181+
182+
# Save choice of console device
183+
echo "$CONSOLE_DEV" >/.runcvm/console
184+
185+
# Experimental: Enable to specify a dedicated PCI bridge
186+
# OPTS+=(-device pci-bridge,bus=pcie.0,id=pci-bridge-0,chassis_nr=1,shpc=off,addr=2,io-reserve=4k,mem-reserve=1m,pref64-reserve=1m)
187+
188+
# Experimental: Enable for a SCSI bus
189+
# OPTS+=(-device virtio-scsi-pci,id=scsi0,disable-modern=true)
190+
191+
# Disable IPv6, which is currently unsupported, at kernel boot time
192+
APPEND+=(ipv6.disable=1 panic=-1)
193+
194+
# Disable unneeded functionality
195+
APPEND+=(scsi_mod.scan=none tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k cryptomgr.notests pci=lastbus=0 selinux=0)
196+
197+
if [ "$RUNCVM_KERNEL_DEBUG" = "1" ]; then
198+
APPEND+=(console=$CONSOLE_DEV)
199+
else
200+
APPEND+=(quiet)
162201
fi
163202

164203
ARGS=(
165-
"${OPTS[@]}"
166204
-no-user-config
167205
-nodefaults
168-
-serial $SERIAL
206+
-no-reboot
207+
208+
-action panic=none
209+
-action reboot=shutdown
210+
211+
"${MACHINE[@]}"
212+
"${DISPLAY[@]}"
213+
"${OPTS[@]}"
214+
215+
# N.B. There is a counterintuitive relationship between cpus and memory, and performance:
216+
# - more cpus needs more memory to maintain the same virtiofs disk I/O performance.
169217
-m "$RUNCVM_MEM_SIZE"
170-
-chardev socket,id=char0,path=$QEMU_VIRTIOFSD_SOCKET
171-
-device vhost-user-fs-pci,queue-size=$VIRTIOFS_QUEUE_SIZE,chardev=char0,tag=myfs,ats=on
172-
-kernel $RUNCVM_KERNEL_PATH
173-
-initrd $RUNCVM_KERNEL_INITRAMFS_PATH
174-
-append "$RUNCVM_KERNEL_ROOT $INIT rw ${APPEND[*]} $PANIC $RUNCVM_KERNEL_APPEND $BREAK"
175-
$MEM_BACKEND
176-
-numa node,memdev=mem
177-
-smp $RUNCVM_CPUS
218+
-smp $RUNCVM_CPUS,cores=1,threads=1,sockets=$RUNCVM_CPUS,maxcpus=$RUNCVM_CPUS
219+
220+
# Creates a virtio-serial bus on the PCI bus; this is used for the guest agent and virtiofs
221+
-device virtio-serial-pci,id=serial0
222+
223+
# Creates an RNG on the PCI bus
224+
-object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng-pci,rng=rng0
225+
226+
# Memory backend
227+
"${MEM_BACKEND[@]}"
228+
229+
# virtiofs socket and interface
230+
"${VIRTIOFS[@]}"
231+
178232
# Configure host/container tap device with PXE roms disabled
179-
${IFACES[@]}
180-
-no-reboot
181-
${DISKS[@]}
182-
-action panic=none -action reboot=shutdown
233+
"${IFACES[@]}"
234+
"${DISKS[@]}"
235+
236+
# Configure console
237+
"${CONSOLE[@]}"
238+
239+
# Support for guest agent
240+
-chardev socket,id=qemuguest0,path=$QEMU_GUEST_AGENT,server=on,wait=off
241+
-device virtserialport,chardev=qemuguest0,name=org.qemu.guest_agent.0
242+
243+
# Creates a unix socket for the QEMU monitor
183244
-monitor unix:$QEMU_MONITOR_SOCKET,server,nowait
184-
-chardev socket,id=charchannel0,path=$QEMU_GUEST_AGENT,server=on,wait=off
185-
-device virtio-serial
186-
-device virtserialport,chardev=charchannel0,name=org.qemu.guest_agent.0
187245

188-
# Set monitor escape key to CTRL-T to reduce risk of conflict (as default, CTRL-A, is commonly used)
189-
-echr 20
246+
# Kernel and initrd and kernel cmdline
247+
-kernel $RUNCVM_KERNEL_PATH
248+
-initrd $RUNCVM_KERNEL_INITRAMFS_PATH
249+
-append "$RUNCVM_KERNEL_ROOT $INIT rw ${APPEND[*]} $RUNCVM_KERNEL_APPEND"
190250
)
191251

192252
if [[ "$RUNCVM_BREAK" =~ preqemu ]]; then echo Preparing to run: $CMD "${ARGS[@]@Q}"; bash; fi

runcvm-scripts/runcvm-runtime

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,19 +349,19 @@ if [ "$COMMAND" = "create" ]; then
349349
case "$RUNCVM_KERNEL_ID" in
350350
debian) RUNCVM_KERNEL_OS_KERNEL_PATH="/vmlinuz"
351351
RUNCVM_KERNEL_OS_INITRAMFS_PATH="/initrd.img"
352-
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=myfs noresume nomodeset net.ifnames=1"
352+
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1"
353353
;;
354354
ubuntu) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz"
355355
RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initrd.img"
356-
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=myfs noresume nomodeset net.ifnames=1"
356+
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs noresume nomodeset net.ifnames=1"
357357
;;
358358
ol) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz"
359359
RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs"
360-
RUNCVM_KERNEL_ROOT="root=virtiofs:myfs noresume nomodeset net.ifnames=1"
360+
RUNCVM_KERNEL_ROOT="root=virtiofs:runcvmfs noresume nomodeset net.ifnames=1"
361361
;;
362362
alpine) RUNCVM_KERNEL_OS_KERNEL_PATH="/boot/vmlinuz-virt"
363363
RUNCVM_KERNEL_OS_INITRAMFS_PATH="/boot/initramfs-virt"
364-
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=myfs resume= nomodeset"
364+
RUNCVM_KERNEL_ROOT="rootfstype=virtiofs root=runcvmfs resume= nomodeset"
365365
;;
366366

367367
*) error "Unrecognised image O/S '$RUNCVM_KERNEL'; specify --env=RUNCVM_KERNEL=<dist> or --env=RUNCVM_KERNEL=<dist>/<version>"; ;;

runcvm-scripts/runcvm-vm-init

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,14 @@ _EOE_
124124
# Load original environment
125125
. /.runcvm/config
126126

127+
# Load choice of console device
128+
read -r CONSOLE_DEVICE </.runcvm/console
129+
127130
if [ "$RUNCVM_INIT" = "1" ]; then
128131
# If launched with '--init' (or --env=RUNCVM_INIT=1) then run our own init in place of Docker's/Podman's.
129132

130133
$RUNCVM/bin/cat >/etc/inittab <<_EOE_
131-
ttyS0::respawn:-/opt/runcvm/scripts/runcvm-vm-start
134+
$CONSOLE_DEVICE::respawn:-/opt/runcvm/scripts/runcvm-vm-start
132135
null::respawn:/opt/runcvm/scripts/runcvm-vm-qemu-ga
133136
null::respawn:/opt/runcvm/usr/sbin/dropbear -REF -p $SSHD_PORT -A /opt/runcvm/lib64/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid
134137
null::ctrlaltdel:/opt/runcvm/bin/poweroff
@@ -154,8 +157,8 @@ else
154157
$RUNCVM/usr/sbin/dropbear -REF -p $SSHD_PORT -A /opt/runcvm/lib64/tmp/dropbear/libepka_file.so,/.runcvm/dropbear/epka.json -P /.runcvm/dropbear/dropbear.pid &>/dev/null &
155158

156159
# Run init from the image
157-
# Pipe input/output from/to serial console
158-
exec </dev/ttyS0 &>/dev/ttyS0
160+
# Pipe input/output from/to console device
161+
exec </dev/$CONSOLE_DEVICE &>/dev/$CONSOLE_DEVICE
159162

160163
# Invoke runcvm-init with --no-fork purely to create controlling tty,
161164
# then exec runcvm-vm-start

0 commit comments

Comments
 (0)