This commit is contained in:
Daniel A. Maierhofer
2018-07-13 16:45:05 +02:00
parent ac34166557
commit e347803f62
6 changed files with 299 additions and 0 deletions

View File

@@ -107,3 +107,4 @@ common_iptables_v4: "iptables_default_v4.j2"
common_iptables_v6: "iptables_default_v6.j2" common_iptables_v6: "iptables_default_v6.j2"
common_snapper: False common_snapper: False
common_smartd: False common_smartd: False
common_zfs: False

122
files/scripts/zfs_health.sh Executable file
View File

@@ -0,0 +1,122 @@
#! /bin/sh
#
# Calomel.org
# https://calomel.org/zfs_health_check_script.html
# FreeBSD ZFS Health Check script
# zfs_health.sh @ Version 0.17
# Check health of ZFS volumes and drives. On any faults send email.
# 99 problems but ZFS aint one
problems=0
# Health - Check if all zfs volumes are in good condition. We are looking for
# any keyword signifying a degraded or broken array.
condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
if [ "${condition}" ]; then
emailSubject="`hostname` - ZFS pool - HEALTH fault"
problems=1
fi
# Capacity - Make sure the pool capacity is below 80% for best performance. The
# percentage really depends on how large your volume is. If you have a 128GB
# SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you can
# probably set the warning closer to 95%.
#
# ZFS uses a copy-on-write scheme. The file system writes new data to
# sequential free blocks first and when the uberblock has been updated the new
# inode pointers become valid. This method is true only when the pool has
# enough free sequential blocks. If the pool is at capacity and space limited,
# ZFS will be have to randomly write blocks. This means ZFS can not create an
# optimal set of sequential writes and write performance is severely impacted.
maxCapacity=80
if [ ${problems} -eq 0 ]; then
capacity=$(/sbin/zpool list -H -o capacity | cut -d'%' -f1)
for line in ${capacity}
do
if [ $line -ge $maxCapacity ]; then
emailSubject="`hostname` - ZFS pool - Capacity Exceeded"
problems=1
fi
done
fi
# Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
# on all volumes and all drives using "zpool status". If any non-zero errors
# are reported an email will be sent out. You should then look to replace the
# faulty drive and run "zpool scrub" on the affected volume after resilvering.
if [ ${problems} -eq 0 ]; then
errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
if [ "${errors}" ]; then
emailSubject="`hostname` - ZFS pool - Drive Errors"
problems=1
fi
fi
# Scrub Expired - Check if all volumes have been scrubbed in at least the last
# 8 days. The general guide is to scrub volumes on desktop quality drives once
# a week and volumes on enterprise class drives once a month. You can always
# use cron to schedual "zpool scrub" in off hours. We scrub our volumes every
# Sunday morning for example.
#
# Scrubbing traverses all the data in the pool once and verifies all blocks can
# be read. Scrubbing proceeds as fast as the devices allows, though the
# priority of any I/O remains below that of normal calls. This operation might
# negatively impact performance, but the file system will remain usable and
# responsive while scrubbing occurs. To initiate an explicit scrub, use the
# "zpool scrub" command.
#
# The scrubExpire variable is in seconds. So for 8 days we calculate 8 days
# times 24 hours times 3600 seconds to equal 691200 seconds.
scrubExpire=691200
if [ ${problems} -eq 0 ]; then
currentDate=$(date +%s)
zfsVolumes=$(/sbin/zpool list -H -o name)
for volume in ${zfsVolumes}
do
if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
printf "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
break
fi
if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
break
fi
### Ubuntu with GNU supported date format
scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
scrubDate=$(date -d "$scrubRawDate" +%s)
### FreeBSD with *nix supported date format
#scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
#scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
emailSubject="`hostname` - ZFS pool - Scrub Time Expired. Scrub Needed on Volume(s)"
problems=1
fi
done
fi
# Email - On any problems send email with drive status information and
# capacities including a helpful subject line. Also use logger to write the
# email subject to the local logs. This is also the place you may want to put
# any other notifications like playing a sound file, beeping the internal
# speaker, paging someone or updating Nagios or even BigBrother.
if [ "$problems" -ne 0 ]; then
printf '%s\n' "$emailSubject" "" "`/sbin/zpool list`" "" "`/sbin/zpool status`" | /usr/bin/mail -s "$emailSubject" root@localhost
logger $emailSubject
fi

121
files/scripts/zfs_mount.sh Executable file
View File

@@ -0,0 +1,121 @@
#!/bin/bash
# https://bitbucket.org/dewoodruff/zfs-on-linux-luks-mountvolumes/src/5836def278a3e462f1f508ba02b7fa236dd28717/mountVolumes.sh
. /etc/zfs_mount_settings.sh
# the real work happens below
activePools=()
date >> $LOG
function getPoolStatus {
echo "Checking pool status:" | tee -a $LOG
for pool in "${pools[@]}"
do
echo -en "\t$pool: " | tee -a $LOG
status=`zpool status $pool 2>&1 | grep "state:" | cut -f2 -d:`
if [ -z "$status" ];
then
echo "unknown - not imported" | tee -a $LOG
else
echo $status | tee -a $LOG
activePools+=($pool)
fi
done
}
function exportActivePools {
if [ -n "$activePools" ];
then
echo -n "Exporting pools... " | tee -a $LOG
for pool in "${activePools[@]}"
do
zpool export -f $pool 2>&1 1>>$LOG || { echo "Problem exporting $pool!" | tee -a $LOG; exit 0; }
done
echo " done."
fi
}
function importPools {
echo -n "Importing pools..."
for pool in "${pools[@]}"
do
zpool import $pool 2>&1 1>>$LOG || { echo "Problem importing $pool!" | tee -a $LOG; exit 0; }
done
echo " done."
}
function closeAllLUKS {
echo "Making sure all LUKS disks are closed..."
for dev in "${devs[@]}"
do
#echo $dev
cryptsetup close $dev 2>&1 | 1>>$LOG || { echo "Problem closing $dev!" | tee -a $LOG; exit 0; }
done
echo "Done."
}
function openAllLUKS {
read -s -p "Enter LUKS passphrase: " pass1
echo ""
read -s -p "Confirm LUKS passphrase: " pass2
echo ""
if [ "$pass1" = "$pass2" ];
then
for dev in "${!devs[@]}"
do
echo "Opening $dev to ${devs["$dev"]}" | tee -a $LOG
echo "$pass1" | cryptsetup luksOpen $dev ${devs[$dev]} 2>&1 1>>$LOG || { echo "Problem opening $dev!" | tee -a $LOG; exit 0; }
done
else
echo "ERROR: passphrases don't match!"
fi
pass1=""
pass2=""
}
function LUKSStatus {
for dev in "${devs[@]}"
do
cryptsetup status $dev | head -1 | tee -a $LOG
done | sort
}
function unmount {
zfs unshare -a
getPoolStatus
exportActivePools
closeAllLUKS
getPoolStatus
}
if [ "$1" = "status" ];
then
LUKSStatus
getPoolStatus
elif [ "$1" = "mount" ];
then
getPoolStatus
exportActivePools
closeAllLUKS
openAllLUKS
importPools
getPoolStatus
zfs share -a
elif [ "$1" = "unmount" ];
then
unmount
elif [ "$1" = "reboot" ];
then
unmount
reboot
elif [ "$1" = "shutdown" ];
then
unmount
shutdown -h now
elif [ "$1" = "freespace" ];
then
zfs list
else
echo "Usage: ./mountVolumes.sh [status|mount|unmount|reboot|shutdown|freespace]"
fi

View File

@@ -40,3 +40,12 @@
- name: restart smartd - name: restart smartd
service: name=smartd state=restarted service: name=smartd state=restarted
- name: zfs - start services
service: "name={{ item }} state=restarted"
with_items:
- zfs-import-cache
- zfs-import-scan
- zfs-mount
- zfs-share
listen: zfs restart

View File

@@ -73,3 +73,6 @@
when: common_smartd when: common_smartd
tags: ['common', 'smartd'] tags: ['common', 'smartd']
- import_tasks: zfs.yml
when: common_zfs
tags: ['common', 'zfs']

43
tasks/zfs.yml Normal file
View File

@@ -0,0 +1,43 @@
---
- name: zfs - linux-headers
package: name=linux-headers-{{ ansible_kernel }}
- name: zfs - install ZoL dkms
package: name=zfs-dkms
- name: zfs - install ZoL utils
package: name=zfsutils-linux
- name: zfs - install zfs-auto-snapshot
package: name=zfs-auto-snapshot
- name: zfs - zfs-auto-snapshot find cron files
shell: find /etc/cron* -type f -name zfs-auto-snapshot
register: snapshot_cron
changed_when: False
- name: zfs - zfs-auto-snapshot prefix
lineinfile:
path: "{{ item }}"
regexp: (.*zfs-auto-snapshot.*\d+)\ \ \/\/
line: \1 --prefix= //
backrefs: yes
with_items: "{{ snapshot_cron.stdout_lines }}"
when: snapshot_cron.stdout_lines is defined
- name: zfs - load module
modprobe: name=zfs
notify: zfs restart
- name : zfs - zfs_mount.sh
copy: src=scripts/zfs_mount.sh dest=/usr/local/bin/ owner=root group=root mode=0755
- name : zfs - zfs_health.sh
copy: src=scripts/zfs_health.sh dest=/usr/local/bin/ owner=root group=root mode=0755
- name: zfs - zfs_health cronjob
cron:
name: zfs check health
minute: 0
hour: "7,11,16"
job: "/usr/local/bin/zfs_health.sh"