From mboxrd@z Thu Jan 1 00:00:00 1970 From: Michael Tremer To: development@lists.ipfire.org Subject: [PATCH 2/2] core168: Add script to automatically repair MDRAID arrays Date: Thu, 19 May 2022 08:56:34 +0000 Message-ID: <20220519085634.197389-2-michael.tremer@ipfire.org> In-Reply-To: <20220519085634.197389-1-michael.tremer@ipfire.org> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="===============6429077900047214497==" List-Id: --===============6429077900047214497== Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Please see the header of the script for more details. Signed-off-by: Michael Tremer --- config/rootfiles/common/aarch64/stage2 | 1 + config/rootfiles/common/armv6l/stage2 | 1 + config/rootfiles/common/x86_64/stage2 | 1 + config/rootfiles/core/168/update.sh | 3 + src/scripts/repair-mdraid | 169 +++++++++++++++++++++++++ 5 files changed, 175 insertions(+) create mode 100644 src/scripts/repair-mdraid diff --git a/config/rootfiles/common/aarch64/stage2 b/config/rootfiles/common= /aarch64/stage2 index 352c704d4..e328a4526 100644 --- a/config/rootfiles/common/aarch64/stage2 +++ b/config/rootfiles/common/aarch64/stage2 @@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces usr/local/bin/makegraphs usr/local/bin/qosd usr/local/bin/readhash +usr/local/bin/repair-mdraid usr/local/bin/run-parts usr/local/bin/scanhd usr/local/bin/settime diff --git a/config/rootfiles/common/armv6l/stage2 b/config/rootfiles/common/= armv6l/stage2 index 198461a01..2bd00d968 100644 --- a/config/rootfiles/common/armv6l/stage2 +++ b/config/rootfiles/common/armv6l/stage2 @@ -97,6 +97,7 @@ usr/local/bin/ipsec-interfaces usr/local/bin/makegraphs usr/local/bin/qosd usr/local/bin/readhash +usr/local/bin/repair-mdraid usr/local/bin/run-parts usr/local/bin/scanhd usr/local/bin/settime diff --git a/config/rootfiles/common/x86_64/stage2 b/config/rootfiles/common/= x86_64/stage2 index b03a7fecf..586b88e3d 100644 --- a/config/rootfiles/common/x86_64/stage2 +++ b/config/rootfiles/common/x86_64/stage2 @@ -99,6 +99,7 @@ usr/local/bin/ipsec-interfaces usr/local/bin/makegraphs usr/local/bin/qosd usr/local/bin/readhash +usr/local/bin/repair-mdraid usr/local/bin/run-parts usr/local/bin/scanhd usr/local/bin/settime diff --git a/config/rootfiles/core/168/update.sh b/config/rootfiles/core/168/= update.sh index c4005dba9..84dec941c 100644 --- a/config/rootfiles/core/168/update.sh +++ b/config/rootfiles/core/168/update.sh @@ -125,6 +125,9 @@ if ! grep -q rd.auto /etc/default/grub; then sed -e "s/panic=3D10/& rd.auto/" -i /etc/default/grub fi =20 +# Repair any broken MDRAID arrays +/usr/local/bin/repair-mdraid + # Start services /etc/init.d/fcron restart /etc/init.d/sshd restart diff --git a/src/scripts/repair-mdraid b/src/scripts/repair-mdraid new file mode 100644 index 000000000..a622ff71d --- /dev/null +++ b/src/scripts/repair-mdraid @@ -0,0 +1,169 @@ +#!/bin/bash +############################################################################= ### +# = # +# IPFire.org - A linux based firewall = # +# Copyright (C) 2022 IPFire Team = # +# = # +# This program is free software: you can redistribute it and/or modify = # +# it under the terms of the GNU General Public License as published by = # +# the Free Software Foundation, either version 3 of the License, or = # +# (at your option) any later version. = # +# = # +# This program is distributed in the hope that it will be useful, = # +# but WITHOUT ANY WARRANTY; without even the implied warranty of = # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the = # +# GNU General Public License for more details. = # +# = # +# You should have received a copy of the GNU General Public License = # +# along with this program. If not, see . = # +# = # +############################################################################= ### +# +# This script is supposed to repair any broken RAID installations +# where the system has been booted from only one of the RAID devices +# without the software RAID being activated first. +# +# This script does as follows: +# +# * It tries to find an inactive RAID called "ipfire:0" +# * It will then destroy any devices that are still part of this RAID. +# This is required because if the RAID is being assembled correctly, +# data from the disk that has NOT been mounted will be replicated +# back to the device that has been changed. That causes that any +# data that has been written to the mounted disk will be lost. +# To avoid this, we will partially destroy the RAID. +# * We will then erase any partition tables and destroy any filesystems +# on the devices so that they do not get accidentially mounted again. +# * The system will then need to be rebooted where the RAID will be +# mounted again in a degraded state which might take some extra +# time at boot (the system stands still for about a minute). +# * After the system has been booted up correctly, we will re-add +# the devices back to the RAID which will resync and the system +# will be back to its intended configuration. + +find_inactive_raid() { + local status + local device + local arg + local args + + while read -r status device args; do + if [ "${status}" =3D "INACTIVE-ARRAY" ]; then + for arg in ${args}; do + case "${arg}" in + name=3Dipfire:0) + echo "${device}" + return 0 + ;; + esac + done + fi + done <<< "$(mdadm --detail --scan)" + + return 1 +} + +find_root() { + local device + local mp + local fs + local args + + while read -r device mp fs args; do + if [ "${mp}" =3D "/" ]; then + echo "${device:0:-1}" + return 0 + fi + done < /proc/mounts + + return 1 +} + +find_raid_devices() { + local raid=3D"${1}" + + local IFS=3D, + + local device + for device in $(mdadm -v --detail --scan "${raid}" | awk -F=3D '/^[ ]+devic= es/ { print $2 }'); do + echo "${device}" + done + + return 0 +} + +destroy_everything() { + local device=3D"${1}" + local part + + # Destroy the RAID superblock + mdadm --zero-superblock "${device}" + + # Wipe the partition table + wipefs -a "${device}" + + # Wipe any partition signatures + for part in ${device}*; do + wipefs -a "${part}" + done +} + +raid_rebuild() { + local devices=3D( "$@" ) + + cat > /etc/rc.d/rcsysinit.d/S99fix-raid </dev/null + + # Destroy any useful data on all remaining RAID devices + local device + for device in ${devices[@]}; do + # Skip root + [ "${device}" =3D "${root}" ] && continue + + destroy_everything "${device}" + done &>/dev/null + + # Re-add devices to the RAID + raid_rebuild "${device}" + + return 0 +} + +main "$@" || return $? --=20 2.30.2 --===============6429077900047214497==--