VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/docs/testbox-maintenance.sh

Last change on this file was 98103, checked in by vboxsync, 17 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.6 KB
Line 
1#!/bin/bash
2# $Id: testbox-maintenance.sh 98103 2023-01-17 14:15:46Z vboxsync $
3## @file
4# VirtualBox Validation Kit - testbox maintenance service
5#
6
7#
8# Copyright (C) 2006-2023 Oracle and/or its affiliates.
9#
10# This file is part of VirtualBox base platform packages, as
11# available from https://www.virtualbox.org.
12#
13# This program is free software; you can redistribute it and/or
14# modify it under the terms of the GNU General Public License
15# as published by the Free Software Foundation, in version 3 of the
16# License.
17#
18# This program is distributed in the hope that it will be useful, but
19# WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21# General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, see <https://www.gnu.org/licenses>.
25#
26# The contents of this file may alternatively be used under the terms
27# of the Common Development and Distribution License Version 1.0
28# (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
29# in the VirtualBox distribution, in which case the provisions of the
30# CDDL are applicable instead of those of the GPL.
31#
32# You may elect to license modified versions of this file under the
33# terms and conditions of either the GPL or the CDDL or both.
34#
35# SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
36#
37
38
39#
40# Global Variables (config first).
41#
42MY_REBOOT_WHEN_DONE="yes"
43#MY_REBOOT_WHEN_DONE="" # enable this for debugging the script
44
45MY_TFTP_ROOT="/mnt/testbox-tftp"
46MY_BACKUP_ROOT="/mnt/testbox-backup"
47MY_BACKUP_MNT_TEST_FILE="/mnt/testbox-backup/testbox-backup"
48MY_GLOBAL_LOG_FILE="${MY_BACKUP_ROOT}/maintenance.log"
49MY_DD_BLOCK_SIZE=256K
50
51MY_IP=""
52MY_BACKUP_DIR=""
53MY_LOG_FILE=""
54MY_PXELINUX_CFG_FILE=""
55
56
57##
58# Info message.
59#
60InfoMsg()
61{
62 echo $*;
63 if test -n "${MY_LOG_FILE}"; then
64 echo "`date -uIsec`: ${MY_IP}: info:" $* >> ${MY_LOG_FILE};
65 fi
66}
67
68
69##
70# Error message and reboot+exit. First argument is exit code.
71#
72ErrorMsgExit()
73{
74 MY_RET=$1
75 shift
76 echo "testbox-maintenance.sh: error:" $* >&2;
77 # Append to the testbox log.
78 if test -n "${MY_LOG_FILE}"; then
79 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_LOG_FILE}";
80 fi
81 # Append to the global log.
82 if test -f "${MY_BACKUP_MNT_TEST_FILE}"; then
83 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_GLOBAL_LOG_FILE}";
84 fi
85
86 #
87 # On error we normally wait 5min before rebooting to avoid repeating the
88 # same error too many time before the admin finds out. We choose NOT to
89 # remove the PXE config file here because (a) the admin might otherwise
90 # not notice something went wrong, (b) the system could easily be in a
91 # weird unbootable state, (c) the problem might be temporary.
92 #
93 # While debugging, we just exit here.
94 #
95 if test -n "${MY_REBOOT_WHEN_DONE}"; then
96 sleep 5m
97 echo "testbox-maintenance.sh: rebooting (after error)" >&2;
98 reboot
99 fi
100 exit ${MY_RET}
101}
102
103#
104# Try figure out the IP address of the box and the hostname from it again.
105#
106MY_IP=` hostname -I | cut -f1 -d' ' | head -1 `
107if test -z "${MY_IP}" -o `echo "${MY_IP}" | wc -w` -ne "1" -o "${MY_IP}" = "127.0.0.1"; then
108 ErrorMsgExit 10 "Failed to get a good IP! (MY_IP=${MY_IP})"
109fi
110MY_HOSTNAME=`getent hosts "${MY_IP}" | sed -s 's/[[:space:]][[:space:]]*/ /g' | cut -d' ' -f2 `
111if test -z "${MY_HOSTNAME}"; then
112 MY_HOSTNAME="unknown";
113fi
114
115# Derive the backup dir and log file name from it.
116if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
117 mount "${MY_BACKUP_ROOT}"
118 if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
119 echo "Retrying mounting '${MY_BACKUP_ROOT}' in 15 seconds..." >&2
120 sleep 15
121 mount "${MY_BACKUP_ROOT}"
122 fi
123 if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
124 ErrorMsgExit 11 "Backup directory is not mounted."
125 fi
126fi
127MY_BACKUP_DIR="${MY_BACKUP_ROOT}/${MY_IP}"
128MY_LOG_FILE="${MY_BACKUP_DIR}/maintenance.log"
129mkdir -p "${MY_BACKUP_DIR}"
130echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_LOG_FILE}"
131echo "`date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} says hi." >> "${MY_GLOBAL_LOG_FILE}"
132InfoMsg "MY_IP=${MY_IP}<eol>"
133
134#
135# Redirect stderr+stdout thru tee and to a log file on the server.
136#
137MY_OUTPUT_LOG_FILE="${MY_BACKUP_DIR}/maintenance-output.log"
138echo "" >> "${MY_OUTPUT_LOG_FILE}"
139echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_OUTPUT_LOG_FILE}"
140exec &> >(tee -a "${MY_OUTPUT_LOG_FILE}")
141
142#
143# Convert the IP address to PXELINUX hex format, then check that we've got
144# a config file on the TFTP share that we later can remove. We consider it a
145# fatal failure if we don't because we've probably got the wrong IP and we'll
146# be stuck doing the same stuff over and over again.
147#
148MY_TMP=`echo "${MY_IP}" | sed -e 's/\./ /g' `
149MY_IP_HEX=`printf "%02X%02X%02X%02X" ${MY_TMP}`
150InfoMsg "MY_IP_HEX=${MY_IP_HEX}<eol>"
151
152if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
153 mount "${MY_TFTP_ROOT}"
154 if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
155 echo "Retrying mounting '${MY_TFTP_ROOT}' in 15 seconds..." >&2
156 sleep 15
157 mount "${MY_BACKUP_ROOT}"
158 fi
159 if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
160 ErrorMsgExit 12 "TFTP share mounted or mixxing pxelinux.0 in the root."
161 fi
162fi
163
164MY_PXELINUX_CFG_FILE="${MY_TFTP_ROOT}/pxelinux.cfg/${MY_IP_HEX}"
165if test ! -f "${MY_PXELINUX_CFG_FILE}"; then
166 ErrorMsgExit 13 "No pxelinux.cfg file found (${MY_PXELINUX_CFG_FILE}) - wrong IP?"
167fi
168
169#
170# Dig the action out of from the kernel command line.
171#
172if test -n "${MY_REBOOT_WHEN_DONE}"; then
173 InfoMsg "/proc/cmdline: `cat /proc/cmdline`"
174 set `cat /proc/cmdline`
175else
176 InfoMsg "Using script command line: $*"
177fi
178MY_ACTION=not-found
179while test $# -ge 1; do
180 case "$1" in
181 testbox-action-*)
182 MY_ACTION="$1"
183 ;;
184 esac
185 shift
186done
187if test "${MY_ACTION}" = "not-found"; then
188 ErrorMsgExit 14 "No action given. Expected testbox-action-backup, testbox-action-backup-again, testbox-action-restore," \
189 "testbox-action-refresh-info, or testbox-action-rescue on the kernel command line.";
190fi
191
192# Validate and shorten the action.
193case "${MY_ACTION}" in
194 testbox-action-backup)
195 MY_ACTION="backup";
196 ;;
197 testbox-action-backup-again)
198 MY_ACTION="backup-again";
199 ;;
200 testbox-action-restore)
201 MY_ACTION="restore";
202 ;;
203 testbox-action-refresh-info)
204 MY_ACTION="refresh-info";
205 ;;
206 testbox-action-rescue)
207 MY_ACTION="rescue";
208 ;;
209 *) ErrorMsgExit 15 "Invalid action '${MY_ACTION}'";
210 ;;
211esac
212
213# Log the action in both logs.
214echo "`date -uIsec`: ${MY_IP}: info: Executing '${MY_ACTION}'." >> "${MY_GLOBAL_LOG_FILE}";
215
216#
217# Generate missing info for this testbox if backing up.
218#
219MY_INFO_FILE="${MY_BACKUP_DIR}/testbox-info.txt"
220if test '!' -f "${MY_INFO_FILE}" \
221 -o "${MY_ACTION}" = "backup" \
222 -o "${MY_ACTION}" = "backup-again" \
223 -o "${MY_ACTION}" = "refresh-info" ;
224then
225 echo "IP: ${MY_IP}" > ${MY_INFO_FILE};
226 echo "HEX-IP: ${MY_IP_HEX}" >> ${MY_INFO_FILE};
227 echo "Hostname: ${MY_HOSTNAME}" >> ${MY_INFO_FILE};
228 echo "" >> ${MY_INFO_FILE};
229 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
230 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
231 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
232 cat /proc/cpuinfo >> ${MY_INFO_FILE};
233 echo "" >> ${MY_INFO_FILE};
234 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
235 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
236 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
237 lspci -vvv >> ${MY_INFO_FILE} 2>&1;
238 echo "" >> ${MY_INFO_FILE};
239 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
240 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
241 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
242 biosdecode >> ${MY_INFO_FILE} 2>&1;
243 echo "" >> ${MY_INFO_FILE};
244 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
245 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
246 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
247 dmidecode >> ${MY_INFO_FILE} 2>&1;
248 echo "" >> ${MY_INFO_FILE};
249 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
250 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
251 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
252 fdisk -l >> ${MY_INFO_FILE} 2>&1;
253 echo "" >> ${MY_INFO_FILE};
254 echo "**** dmesg ****" >> ${MY_INFO_FILE};
255 echo "**** dmesg ****" >> ${MY_INFO_FILE};
256 echo "**** dmesg ****" >> ${MY_INFO_FILE};
257 dmesg >> ${MY_INFO_FILE} 2>&1;
258
259 #
260 # Get the raw ACPI tables and whatnot since we can. Use zip as tar will
261 # zero pad virtual files due to wrong misleading size returned by stat (4K).
262 #
263 # Note! /sys/firmware/dmi/entries/15-0/system_event_log/raw_event_log has been
264 # see causing fatal I/O errors, so skip all raw_event_log files.
265 #
266 zip -qr9 "${MY_BACKUP_DIR}/testbox-info.zip" \
267 /proc/cpuinfo \
268 /sys/firmware/ \
269 -x "*/raw_event_log"
270fi
271
272if test '!' -f "${MY_BACKUP_DIR}/${MY_HOSTNAME}" -a "${MY_HOSTNAME}" != "unknown"; then
273 echo "${MY_HOSTNAME}" > "${MY_BACKUP_DIR}/${MY_HOSTNAME}"
274fi
275
276if test '!' -f "${MY_BACKUP_DIR}/${MY_IP_HEX}"; then
277 echo "${MY_IP}" > "${MY_BACKUP_DIR}/${MY_IP_HEX}"
278fi
279
280#
281# Assemble a list of block devices using /sys/block/* and some filtering.
282#
283if test -f "${MY_BACKUP_DIR}/disk-devices.lst"; then
284 MY_BLOCK_DEVS=`cat ${MY_BACKUP_DIR}/disk-devices.lst \
285 | sed -e 's/[[:space:]][::space::]]*/ /g' -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' `;
286 if test -z "${MY_BLOCK_DEVS}"; then
287 ErrorMsgExit 17 "No block devices found via sys/block."
288 fi
289 InfoMsg "disk-device.lst: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
290else
291 MY_BLOCK_DEVS="";
292 for MY_DEV in `ls /sys/block`; do
293 case "${MY_DEV}" in
294 [sh]d*)
295 MY_BLOCK_DEVS="${MY_BLOCK_DEVS} ${MY_DEV}"
296 ;;
297 *) InfoMsg "Ignoring /sys/block/${MY_DEV}";
298 ;;
299 esac
300 done
301 if test -z "${MY_BLOCK_DEVS}"; then
302 ErrorMsgExit 17 "No block devices found via /sys/block."
303 fi
304 InfoMsg "/sys/block: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
305fi
306
307#
308# Take action
309#
310case "${MY_ACTION}" in
311 #
312 # Create a backup. The 'backup' action refuses to overwrite an
313 # existing backup, but is otherwise identical to 'backup-again'.
314 #
315 backup|backup-again)
316 for MY_DEV in ${MY_BLOCK_DEVS}; do
317 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
318 if test -f "${MY_DST}"; then
319 if test "${MY_ACTION}" != 'backup-again'; then
320 ErrorMsgExit 18 "${MY_DST} already exists"
321 fi
322 InfoMsg "${MY_DST} already exists"
323 fi
324 done
325
326 # Do the backing up.
327 for MY_DEV in ${MY_BLOCK_DEVS}; do
328 MY_SRC="/dev/${MY_DEV}"
329 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
330 if test -f "${MY_DST}"; then
331 mv -f "${MY_DST}" "${MY_DST}.old";
332 fi
333 if test -b "${MY_SRC}"; then
334 InfoMsg "Backing up ${MY_SRC} to ${MY_DST}...";
335 dd if="${MY_SRC}" bs=${MY_DD_BLOCK_SIZE} | gzip -c > "${MY_DST}";
336 MY_RCS=("${PIPESTATUS[@]}");
337 if test "${MY_RCS[0]}" -eq 0 -a "${MY_RCS[1]}" -eq 0; then
338 InfoMsg "Successfully backed up ${MY_SRC} to ${MY_DST}";
339 else
340 rm -f "${MY_DST}";
341 ErrorMsgExit 19 "There was a problem backing up ${MY_SRC} to ${MY_DST}: dd => ${MY_RCS[0]}; gzip => ${MY_RCS[1]}";
342 fi
343 else
344 InfoMsg "Skipping ${MY_SRC} as it either doesn't exist or isn't a block device";
345 fi
346 done
347 ;;
348
349 #
350 # Restore existing.
351 #
352 restore)
353 for MY_DEV in ${MY_BLOCK_DEVS}; do
354 MY_SRC="${MY_BACKUP_DIR}/${MY_DEV}.gz"
355 MY_DST="/dev/${MY_DEV}"
356 if test -b "${MY_DST}"; then
357 if test -f "${MY_SRC}"; then
358 InfoMsg "Restoring ${MY_SRC} onto ${MY_DST}...";
359 gunzip -c "${MY_SRC}" | dd of="${MY_DST}" bs=${MY_DD_BLOCK_SIZE} iflag=fullblock;
360 MY_RCS=("${PIPESTATUS[@]}");
361 if test ${MY_RCS[0]} -eq 0 -a ${MY_RCS[1]} -eq 0; then
362 InfoMsg "Successfully restored ${MY_SRC} onto ${MY_DST}";
363 else
364 ErrorMsgExit 20 "There was a problem restoring ${MY_SRC} onto ${MY_DST}: dd => ${MY_RCS[1]}; gunzip => ${MY_RCS[0]}";
365 fi
366 else
367 InfoMsg "Skipping ${MY_DST} because ${MY_SRC} does not exist.";
368 fi
369 else
370 InfoMsg "Skipping ${MY_DST} as it either doesn't exist or isn't a block device.";
371 fi
372 done
373 ;;
374
375 #
376 # Nothing else to do for refresh-info.
377 #
378 refresh-info)
379 ;;
380
381 #
382 # For the rescue action, we just quit without removing the PXE config or
383 # rebooting the box. The admin will do that once the system has been rescued.
384 #
385 rescue)
386 InfoMsg "rescue: exiting. Admin must remove PXE config and reboot manually when done."
387 exit 0;
388 ;;
389
390 *) ErrorMsgExit 98 "Huh? MY_ACTION='${MY_ACTION}'"
391 ;;
392esac
393
394#
395# If we get here, remove the PXE config and reboot immediately.
396#
397InfoMsg "'${MY_ACTION}' - done";
398if test -n "${MY_REBOOT_WHEN_DONE}"; then
399 sync
400 if rm -f "${MY_PXELINUX_CFG_FILE}"; then
401 InfoMsg "removed ${MY_PXELINUX_CFG_FILE}";
402 else
403 ErrorMsgExit 99 "failed to remove ${MY_PXELINUX_CFG_FILE}";
404 fi
405 sync
406 InfoMsg "rebooting";
407 reboot
408fi
409exit 0
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use