VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/docs/testbox-maintenance.sh@ 67954

Last change on this file since 67954 was 64613, checked in by vboxsync, 8 years ago

testbox-maintenance.sh: Reduce the block size so it's below the max pipe size and use iflag=fullblock when reading from the gunzip pipe.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.3 KB
Line 
1#!/bin/bash
2# $Id: testbox-maintenance.sh 64613 2016-11-09 14:02:31Z vboxsync $
3## @file
4# VirtualBox Validation Kit - testbox mainenance service
5#
6
7#
8# Copyright (C) 2006-2016 Oracle Corporation
9#
10# This file is part of VirtualBox Open Source Edition (OSE), as
11# available from http://www.virtualbox.org. This file is free software;
12# you can redistribute it and/or modify it under the terms of the GNU
13# General Public License (GPL) as published by the Free Software
14# Foundation, in version 2 as it comes in the "COPYING" file of the
15# VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16# hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17#
18# The contents of this file may alternatively be used under the terms
19# of the Common Development and Distribution License Version 1.0
20# (CDDL) only, as it comes in the "COPYING.CDDL" file of the
21# VirtualBox OSE distribution, in which case the provisions of the
22# CDDL are applicable instead of those of the GPL.
23#
24# You may elect to license modified versions of this file under the
25# terms and conditions of either the GPL or the CDDL or both.
26#
27
28
29#
30# Global Variables (config first).
31#
32MY_REBOOT_WHEN_DONE="yes"
33#MY_REBOOT_WHEN_DONE="" # enable this for debugging the script
34
35MY_TFTP_ROOT="/mnt/testbox-tftp"
36MY_BACKUP_ROOT="/mnt/testbox-backup"
37MY_BACKUP_MNT_TEST_FILE="/mnt/testbox-backup/testbox-backup"
38MY_GLOBAL_LOG_FILE="${MY_BACKUP_ROOT}/maintenance.log"
39MY_DD_BLOCK_SIZE=256K
40
41MY_IP=""
42MY_BACKUP_DIR=""
43MY_LOG_FILE=""
44MY_PXELINUX_CFG_FILE=""
45
46
47##
48# Info message.
49#
50InfoMsg()
51{
52 echo $*;
53 if test -n "${MY_LOG_FILE}"; then
54 echo "`date -uIsec`: ${MY_IP}: info:" $* >> ${MY_LOG_FILE};
55 fi
56}
57
58
59##
60# Error message and reboot+exit. First argument is exit code.
61#
62ErrorMsgExit()
63{
64 MY_RET=$1
65 shift
66 echo "testbox-maintenance.sh: error:" $* >&2;
67 # Append to the testbox log.
68 if test -n "${MY_LOG_FILE}"; then
69 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_LOG_FILE}";
70 fi
71 # Append to the global log.
72 if test -f "${MY_BACKUP_MNT_TEST_FILE}"; then
73 echo "`date -uIsec`: ${MY_IP}: error:" $* >> "${MY_GLOBAL_LOG_FILE}";
74 fi
75
76 #
77 # On error we normally wait 5min before rebooting to avoid repeating the
78 # same error too many time before the admin finds out. We choose NOT to
79 # remove the PXE config file here because (a) the admin might otherwise
80 # not notice something went wrong, (b) the system could easily be in a
81 # weird unbootable state, (c) the problem might be temporary.
82 #
83 # While debugging, we just exit here.
84 #
85 if test -n "${MY_REBOOT_WHEN_DONE}"; then
86 sleep 5m
87 echo "testbox-maintenance.sh: rebooting (after error)" >&2;
88 reboot
89 fi
90 exit ${MY_RET}
91}
92
93#
94# Try figure out the IP address of the box and the hostname from it again.
95#
96MY_IP=` hostname -I | cut -f1 -d' ' | head -1 `
97if test -z "${MY_IP}" -o `echo "${MY_IP}" | wc -w` -ne "1" -o "${MY_IP}" = "127.0.0.1"; then
98 ErrorMsgExit 10 "Failed to get a good IP! (MY_IP=${MY_IP})"
99fi
100MY_HOSTNAME=`getent hosts "${MY_IP}" | sed -s 's/[[:space:]][[:space:]]*/ /g' | cut -d' ' -f2 `
101if test -z "${MY_HOSTNAME}"; then
102 MY_HOSTNAME="unknown";
103fi
104
105# Derive the backup dir and log file name from it.
106if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
107 mount "${MY_BACKUP_ROOT}"
108 if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
109 echo "Retrying mounting '${MY_BACKUP_ROOT}' in 15 seconds..." >&2
110 sleep 15
111 mount "${MY_BACKUP_ROOT}"
112 fi
113 if test ! -f "${MY_BACKUP_MNT_TEST_FILE}"; then
114 ErrorMsgExit 11 "Backup directory is not mounted."
115 fi
116fi
117MY_BACKUP_DIR="${MY_BACKUP_ROOT}/${MY_IP}"
118MY_LOG_FILE="${MY_BACKUP_DIR}/maintenance.log"
119mkdir -p "${MY_BACKUP_DIR}"
120echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_LOG_FILE}"
121echo "`date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} says hi." >> "${MY_GLOBAL_LOG_FILE}"
122InfoMsg "MY_IP=${MY_IP}<eol>"
123
124#
125# Redirect stderr+stdout thru tee and to a log file on the server.
126#
127MY_OUTPUT_LOG_FILE="${MY_BACKUP_DIR}/maintenance-output.log"
128echo "" >> "${MY_OUTPUT_LOG_FILE}"
129echo "================ `date -uIsec`: ${MY_IP}: ${MY_HOSTNAME} starts a new session ================" >> "${MY_OUTPUT_LOG_FILE}"
130exec &> >(tee -a "${MY_OUTPUT_LOG_FILE}")
131
132#
133# Convert the IP address to PXELINUX hex format, then check that we've got
134# a config file on the TFTP share that we later can remove. We consider it a
135# fatal failure if we don't because we've probably got the wrong IP and we'll
136# be stuck doing the same stuff over and over again.
137#
138MY_TMP=`echo "${MY_IP}" | sed -e 's/\./ /g' `
139MY_IP_HEX=`printf "%02X%02X%02X%02X" ${MY_TMP}`
140InfoMsg "MY_IP_HEX=${MY_IP_HEX}<eol>"
141
142if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
143 mount "${MY_TFTP_ROOT}"
144 if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
145 echo "Retrying mounting '${MY_TFTP_ROOT}' in 15 seconds..." >&2
146 sleep 15
147 mount "${MY_BACKUP_ROOT}"
148 fi
149 if test ! -f "${MY_TFTP_ROOT}/pxelinux.0"; then
150 ErrorMsgExit 12 "TFTP share mounted or mixxing pxelinux.0 in the root."
151 fi
152fi
153
154MY_PXELINUX_CFG_FILE="${MY_TFTP_ROOT}/pxelinux.cfg/${MY_IP_HEX}"
155if test ! -f "${MY_PXELINUX_CFG_FILE}"; then
156 ErrorMsgExit 13 "No pxelinux.cfg file found (${MY_PXELINUX_CFG_FILE}) - wrong IP?"
157fi
158
159#
160# Dig the action out of from the kernel command line.
161#
162if test -n "${MY_REBOOT_WHEN_DONE}"; then
163 InfoMsg "/proc/cmdline: `cat /proc/cmdline`"
164 set `cat /proc/cmdline`
165else
166 InfoMsg "Using script command line: $*"
167fi
168MY_ACTION=not-found
169while test $# -ge 1; do
170 case "$1" in
171 testbox-action-*)
172 MY_ACTION="$1"
173 ;;
174 esac
175 shift
176done
177if test "${MY_ACTION}" = "not-found"; then
178 ErrorMsgExit 14 "No action given. Expected testbox-action-backup, testbox-action-backup-again, testbox-action-restore," \
179 "testbox-action-refresh-info, or testbox-action-rescue on the kernel command line.";
180fi
181
182# Validate and shorten the action.
183case "${MY_ACTION}" in
184 testbox-action-backup)
185 MY_ACTION="backup";
186 ;;
187 testbox-action-backup-again)
188 MY_ACTION="backup-again";
189 ;;
190 testbox-action-restore)
191 MY_ACTION="restore";
192 ;;
193 testbox-action-refresh-info)
194 MY_ACTION="refresh-info";
195 ;;
196 testbox-action-rescue)
197 MY_ACTION="rescue";
198 ;;
199 *) ErrorMsgExit 15 "Invalid action '${MY_ACTION}'";
200 ;;
201esac
202
203# Log the action in both logs.
204echo "`date -uIsec`: ${MY_IP}: info: Executing '${MY_ACTION}'." >> "${MY_GLOBAL_LOG_FILE}";
205
206#
207# Generate missing info for this testbox if backing up.
208#
209MY_INFO_FILE="${MY_BACKUP_DIR}/testbox-info.txt"
210if test '!' -f "${MY_INFO_FILE}" \
211 -o "${MY_ACTION}" = "backup" \
212 -o "${MY_ACTION}" = "backup-again" \
213 -o "${MY_ACTION}" = "refresh-info" ;
214then
215 echo "IP: ${MY_IP}" > ${MY_INFO_FILE};
216 echo "HEX-IP: ${MY_IP_HEX}" >> ${MY_INFO_FILE};
217 echo "Hostname: ${MY_HOSTNAME}" >> ${MY_INFO_FILE};
218 echo "" >> ${MY_INFO_FILE};
219 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
220 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
221 echo "**** cat /proc/cpuinfo ****" >> ${MY_INFO_FILE};
222 cat /proc/cpuinfo >> ${MY_INFO_FILE};
223 echo "" >> ${MY_INFO_FILE};
224 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
225 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
226 echo "**** lspci -vvv ****" >> ${MY_INFO_FILE};
227 lspci -vvv >> ${MY_INFO_FILE} 2>&1;
228 echo "" >> ${MY_INFO_FILE};
229 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
230 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
231 echo "**** biosdecode ****" >> ${MY_INFO_FILE};
232 biosdecode >> ${MY_INFO_FILE} 2>&1;
233 echo "" >> ${MY_INFO_FILE};
234 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
235 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
236 echo "**** dmidecode ****" >> ${MY_INFO_FILE};
237 dmidecode >> ${MY_INFO_FILE} 2>&1;
238 echo "" >> ${MY_INFO_FILE};
239 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
240 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
241 echo "**** fdisk -l ****" >> ${MY_INFO_FILE};
242 fdisk -l >> ${MY_INFO_FILE} 2>&1;
243 echo "" >> ${MY_INFO_FILE};
244 echo "**** dmesg ****" >> ${MY_INFO_FILE};
245 echo "**** dmesg ****" >> ${MY_INFO_FILE};
246 echo "**** dmesg ****" >> ${MY_INFO_FILE};
247 dmesg >> ${MY_INFO_FILE} 2>&1;
248
249 #
250 # Get the raw ACPI tables and whatnot since we can. Use zip as tar will
251 # zero pad virtual files due to wrong misleading size returned by stat (4K).
252 #
253 # Note! /sys/firmware/dmi/entries/15-0/system_event_log/raw_event_log has been
254 # see causing fatal I/O errors, so skip all raw_event_log files.
255 #
256 zip -qr9 "${MY_BACKUP_DIR}/testbox-info.zip" \
257 /proc/cpuinfo \
258 /sys/firmware/ \
259 -x "*/raw_event_log"
260fi
261
262if test '!' -f "${MY_BACKUP_DIR}/${MY_HOSTNAME}" -a "${MY_HOSTNAME}" != "unknown"; then
263 echo "${MY_HOSTNAME}" > "${MY_BACKUP_DIR}/${MY_HOSTNAME}"
264fi
265
266if test '!' -f "${MY_BACKUP_DIR}/${MY_IP_HEX}"; then
267 echo "${MY_IP}" > "${MY_BACKUP_DIR}/${MY_IP_HEX}"
268fi
269
270#
271# Assemble a list of block devices using /sys/block/* and some filtering.
272#
273if test -f "${MY_BACKUP_DIR}/disk-devices.lst"; then
274 MY_BLOCK_DEVS=`cat ${MY_BACKUP_DIR}/disk-devices.lst \
275 | sed -e 's/[[:space:]][::space::]]*/ /g' -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' `;
276 if test -z "${MY_BLOCK_DEVS}"; then
277 ErrorMsgExit 17 "No block devices found via sys/block."
278 fi
279 InfoMsg "disk-device.lst: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
280else
281 MY_BLOCK_DEVS="";
282 for MY_DEV in `ls /sys/block`; do
283 case "${MY_DEV}" in
284 [sh]d*)
285 MY_BLOCK_DEVS="${MY_BLOCK_DEVS} ${MY_DEV}"
286 ;;
287 *) InfoMsg "Ignoring /sys/block/${MY_DEV}";
288 ;;
289 esac
290 done
291 if test -z "${MY_BLOCK_DEVS}"; then
292 ErrorMsgExit 17 "No block devices found via /sys/block."
293 fi
294 InfoMsg "/sys/block: MY_BLOCK_DEVS=${MY_BLOCK_DEVS}";
295fi
296
297#
298# Take action
299#
300case "${MY_ACTION}" in
301 #
302 # Create a backup. The 'backup' action refuses to overwrite an
303 # existing backup, but is otherwise identical to 'backup-again'.
304 #
305 backup|backup-again)
306 for MY_DEV in ${MY_BLOCK_DEVS}; do
307 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
308 if test -f "${MY_DST}"; then
309 if test "${MY_ACTION}" != 'backup-again'; then
310 ErrorMsgExit 18 "${MY_DST} already exists"
311 fi
312 InfoMsg "${MY_DST} already exists"
313 fi
314 done
315
316 # Do the backing up.
317 for MY_DEV in ${MY_BLOCK_DEVS}; do
318 MY_SRC="/dev/${MY_DEV}"
319 MY_DST="${MY_BACKUP_DIR}/${MY_DEV}.gz"
320 if test -f "${MY_DST}"; then
321 mv -f "${MY_DST}" "${MY_DST}.old";
322 fi
323 if test -b "${MY_SRC}"; then
324 InfoMsg "Backing up ${MY_SRC} to ${MY_DST}...";
325 dd if="${MY_SRC}" bs=${MY_DD_BLOCK_SIZE} | gzip -c > "${MY_DST}";
326 MY_RCS=("${PIPESTATUS[@]}");
327 if test "${MY_RCS[0]}" -eq 0 -a "${MY_RCS[1]}" -eq 0; then
328 InfoMsg "Successfully backed up ${MY_SRC} to ${MY_DST}";
329 else
330 rm -f "${MY_DST}";
331 ErrorMsgExit 19 "There was a problem backing up ${MY_SRC} to ${MY_DST}: dd => ${MY_RCS[0]}; gzip => ${MY_RCS[1]}";
332 fi
333 else
334 InfoMsg "Skipping ${MY_SRC} as it either doesn't exist or isn't a block device";
335 fi
336 done
337 ;;
338
339 #
340 # Restore existing.
341 #
342 restore)
343 for MY_DEV in ${MY_BLOCK_DEVS}; do
344 MY_SRC="${MY_BACKUP_DIR}/${MY_DEV}.gz"
345 MY_DST="/dev/${MY_DEV}"
346 if test -b "${MY_DST}"; then
347 if test -f "${MY_SRC}"; then
348 InfoMsg "Restoring ${MY_SRC} onto ${MY_DST}...";
349 gunzip -c "${MY_SRC}" | dd of="${MY_DST}" bs=${MY_DD_BLOCK_SIZE} iflag=fullblock;
350 MY_RCS=("${PIPESTATUS[@]}");
351 if test ${MY_RCS[0]} -eq 0 -a ${MY_RCS[1]} -eq 0; then
352 InfoMsg "Successfully restored ${MY_SRC} onto ${MY_DST}";
353 else
354 ErrorMsgExit 20 "There was a problem restoring ${MY_SRC} onto ${MY_DST}: dd => ${MY_RCS[1]}; gunzip => ${MY_RCS[0]}";
355 fi
356 else
357 InfoMsg "Skipping ${MY_DST} because ${MY_SRC} does not exist.";
358 fi
359 else
360 InfoMsg "Skipping ${MY_DST} as it either doesn't exist or isn't a block device.";
361 fi
362 done
363 ;;
364
365 #
366 # Nothing else to do for refresh-info.
367 #
368 refresh-info)
369 ;;
370
371 #
372 # For the rescue action, we just quit without removing the PXE config or
373 # rebooting the box. The admin will do that once the system has been rescued.
374 #
375 rescue)
376 InfoMsg "rescue: exiting. Admin must remove PXE config and reboot manually when done."
377 exit 0;
378 ;;
379
380 *) ErrorMsgExit 98 "Huh? MY_ACTION='${MY_ACTION}'"
381 ;;
382esac
383
384#
385# If we get here, remove the PXE config and reboot immediately.
386#
387InfoMsg "'${MY_ACTION}' - done";
388if test -n "${MY_REBOOT_WHEN_DONE}"; then
389 sync
390 if rm -f "${MY_PXELINUX_CFG_FILE}"; then
391 InfoMsg "removed ${MY_PXELINUX_CFG_FILE}";
392 else
393 ErrorMsgExit 99 "failed to remove ${MY_PXELINUX_CFG_FILE}";
394 fi
395 sync
396 InfoMsg "rebooting";
397 reboot
398fi
399exit 0
400
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette