Add zfs

2018-07-13 16:45:05 +02:00
parent ac34166557
commit e347803f62
6 changed files with 299 additions and 0 deletions
--- a/defaults/main.yml
+++ b/defaults/main.yml
@@ -107,3 +107,4 @@ common_iptables_v4: "iptables_default_v4.j2"
 common_iptables_v6: "iptables_default_v6.j2"
 common_snapper: False
 common_smartd: False
 common_zfs: False
--- a/files/scripts/zfs_health.sh
+++ b/files/scripts/zfs_health.sh
@@ -0,0 +1,122 @@
 #! /bin/sh
 #
 # Calomel.org
 #     https://calomel.org/zfs_health_check_script.html
 #     FreeBSD ZFS Health Check script
 #     zfs_health.sh @ Version 0.17
 # Check health of ZFS volumes and drives. On any faults send email.
 # 99 problems but ZFS aint one
 problems=0
 # Health - Check if all zfs volumes are in good condition. We are looking for
 # any keyword signifying a degraded or broken array.
 condition=$(/sbin/zpool status | egrep -i '(DEGRADED|FAULTED|OFFLINE|UNAVAIL|REMOVED|FAIL|DESTROYED|corrupt|cannot|unrecover)')
 if [ "${condition}" ]; then
        emailSubject="`hostname` - ZFS pool - HEALTH fault"
        problems=1
 fi
 # Capacity - Make sure the pool capacity is below 80% for best performance. The
 # percentage really depends on how large your volume is. If you have a 128GB
 # SSD then 80% is reasonable. If you have a 60TB raid-z2 array then you can
 # probably set the warning closer to 95%.
 #
 # ZFS uses a copy-on-write scheme. The file system writes new data to
 # sequential free blocks first and when the uberblock has been updated the new
 # inode pointers become valid. This method is true only when the pool has
 # enough free sequential blocks. If the pool is at capacity and space limited,
 # ZFS will be have to randomly write blocks. This means ZFS can not create an
 # optimal set of sequential writes and write performance is severely impacted.
 maxCapacity=80
 if [ ${problems} -eq 0 ]; then
   capacity=$(/sbin/zpool list -H -o capacity | cut -d'%' -f1)
   for line in ${capacity}
     do
       if [ $line -ge $maxCapacity ]; then
         emailSubject="`hostname` - ZFS pool - Capacity Exceeded"
         problems=1
       fi
     done
 fi
 # Errors - Check the columns for READ, WRITE and CKSUM (checksum) drive errors
 # on all volumes and all drives using "zpool status". If any non-zero errors
 # are reported an email will be sent out. You should then look to replace the
 # faulty drive and run "zpool scrub" on the affected volume after resilvering.
 if [ ${problems} -eq 0 ]; then
   errors=$(/sbin/zpool status | grep ONLINE | grep -v state | awk '{print $3 $4 $5}' | grep -v 000)
   if [ "${errors}" ]; then
        emailSubject="`hostname` - ZFS pool - Drive Errors"
        problems=1
   fi
 fi
 # Scrub Expired - Check if all volumes have been scrubbed in at least the last
 # 8 days. The general guide is to scrub volumes on desktop quality drives once
 # a week and volumes on enterprise class drives once a month. You can always
 # use cron to schedual "zpool scrub" in off hours. We scrub our volumes every
 # Sunday morning for example.
 #
 # Scrubbing traverses all the data in the pool once and verifies all blocks can
 # be read. Scrubbing proceeds as fast as the devices allows, though the
 # priority of any I/O remains below that of normal calls. This operation might
 # negatively impact performance, but the file system will remain usable and
 # responsive while scrubbing occurs. To initiate an explicit scrub, use the
 # "zpool scrub" command.
 #
 # The scrubExpire variable is in seconds. So for 8 days we calculate 8 days
 # times 24 hours times 3600 seconds to equal 691200 seconds.
 scrubExpire=691200
 if [ ${problems} -eq 0 ]; then
   currentDate=$(date +%s)
   zfsVolumes=$(/sbin/zpool list -H -o name)
  for volume in ${zfsVolumes}
   do
    if [ $(/sbin/zpool status $volume | egrep -c "none requested") -ge 1 ]; then
        printf "ERROR: You need to run \"zpool scrub $volume\" before this script can monitor the scrub expiration time."
        break
    fi
    if [ $(/sbin/zpool status $volume | egrep -c "scrub in progress|resilver") -ge 1 ]; then
        break
    fi
    ### Ubuntu with GNU supported date format
     scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $11" "$12" " $13" " $14" "$15}')
     scrubDate=$(date -d "$scrubRawDate" +%s)
    ### FreeBSD with *nix supported date format
    #scrubRawDate=$(/sbin/zpool status $volume | grep scrub | awk '{print $15 $12 $13}')
    #scrubDate=$(date -j -f '%Y%b%e-%H%M%S' $scrubRawDate'-000000' +%s)
     if [ $(($currentDate - $scrubDate)) -ge $scrubExpire ]; then
        emailSubject="`hostname` - ZFS pool - Scrub Time Expired. Scrub Needed on Volume(s)"
        problems=1
     fi
   done
 fi
 # Email - On any problems send email with drive status information and
 # capacities including a helpful subject line. Also use logger to write the
 # email subject to the local logs. This is also the place you may want to put
 # any other notifications like playing a sound file, beeping the internal
 # speaker, paging someone or updating Nagios or even BigBrother.
 if [ "$problems" -ne 0 ]; then
  printf '%s\n' "$emailSubject" "" "`/sbin/zpool list`" "" "`/sbin/zpool status`" | /usr/bin/mail -s "$emailSubject" root@localhost
  logger $emailSubject
 fi
--- a/files/scripts/zfs_mount.sh
+++ b/files/scripts/zfs_mount.sh
@@ -0,0 +1,121 @@
 #!/bin/bash
 # https://bitbucket.org/dewoodruff/zfs-on-linux-luks-mountvolumes/src/5836def278a3e462f1f508ba02b7fa236dd28717/mountVolumes.sh
 . /etc/zfs_mount_settings.sh
 # the real work happens below
 activePools=()
 date >> $LOG
 function getPoolStatus {
 	echo "Checking pool status:" | tee -a $LOG
 	for pool in "${pools[@]}"
 	do
 		echo -en "\t$pool: " | tee -a $LOG
 		status=`zpool status $pool 2>&1 | grep "state:" | cut -f2 -d:`
 		if [ -z "$status" ];
 		then
 			echo "unknown - not imported" | tee -a $LOG
 		else
 			echo $status | tee -a $LOG
 			activePools+=($pool)
 		fi
 	done
 }
 function exportActivePools {
 	if [ -n "$activePools" ];
 	then
 		echo -n "Exporting pools... " | tee -a $LOG
 		for pool in "${activePools[@]}"
 		do
 			zpool export -f $pool 2>&1 1>>$LOG || { echo "Problem exporting $pool!" | tee -a $LOG; exit 0; }
 		done
 		echo " done."
 	fi
 }
 function importPools {
 	echo -n "Importing pools..."
 	for pool in "${pools[@]}"
 	do
 		zpool import $pool 2>&1 1>>$LOG || { echo "Problem importing $pool!" | tee -a $LOG; exit 0; }
 	done
 	echo " done."
 }
 function closeAllLUKS {
 	echo "Making sure all LUKS disks are closed..."
 	for dev in "${devs[@]}"
 	do
 		#echo $dev
 		cryptsetup close $dev 2>&1 | 1>>$LOG || { echo "Problem closing $dev!" | tee -a $LOG; exit 0; }
 	done
 	echo "Done."
 }
 function openAllLUKS {
 	read -s -p "Enter LUKS passphrase: " pass1
 	echo ""
 	read -s -p "Confirm LUKS passphrase: " pass2
 	echo ""
 	if [ "$pass1" = "$pass2" ];
 	then
 		for dev in "${!devs[@]}"
 		do
 			echo "Opening $dev to ${devs["$dev"]}" | tee -a $LOG
 			echo "$pass1" | cryptsetup luksOpen $dev ${devs[$dev]} 2>&1 1>>$LOG || { echo "Problem opening $dev!" | tee -a $LOG; exit 0; }
 		done
 	else
 		echo "ERROR: passphrases don't match!"
 	fi
 	pass1=""
 	pass2=""
 }
 function LUKSStatus {
 	for dev in "${devs[@]}"
 	do
 		cryptsetup status $dev | head -1 | tee -a $LOG
 	done | sort
 }
 function unmount {
 	zfs unshare -a
 	getPoolStatus
 	exportActivePools
 	closeAllLUKS
 	getPoolStatus
 }
 if [ "$1" = "status" ];
 then
 	LUKSStatus
 	getPoolStatus
 elif [ "$1" = "mount" ];
 then
 	getPoolStatus
 	exportActivePools
 	closeAllLUKS
 	openAllLUKS
 	importPools
 	getPoolStatus
 	zfs share -a
 elif [ "$1" = "unmount" ];
 then
 	unmount
 elif [ "$1" = "reboot" ];
 then
 	unmount
 	reboot
 elif [ "$1" = "shutdown" ];
 then
 	unmount
 	shutdown -h now
 elif [ "$1" = "freespace" ];
 then
 	zfs list
 else
 	echo "Usage: ./mountVolumes.sh [status|mount|unmount|reboot|shutdown|freespace]"
 fi
--- a/handlers/main.yml
+++ b/handlers/main.yml
@@ -40,3 +40,12 @@
 - name: restart smartd
  service: name=smartd state=restarted
 - name: zfs - start services
  service: "name={{ item }} state=restarted"
  with_items:
  - zfs-import-cache
  - zfs-import-scan
  - zfs-mount
  - zfs-share
  listen: zfs restart
--- a/tasks/main.yml
+++ b/tasks/main.yml
@@ -73,3 +73,6 @@
  when: common_smartd
  tags: ['common', 'smartd']
 - import_tasks: zfs.yml
  when: common_zfs
  tags: ['common', 'zfs']
--- a/tasks/zfs.yml
+++ b/tasks/zfs.yml
@@ -0,0 +1,43 @@
 ---
 - name: zfs - linux-headers
  package: name=linux-headers-{{ ansible_kernel }}
 - name: zfs - install ZoL dkms
  package: name=zfs-dkms
 - name: zfs - install ZoL utils
  package: name=zfsutils-linux
 - name: zfs - install zfs-auto-snapshot
  package: name=zfs-auto-snapshot
 - name: zfs - zfs-auto-snapshot find cron files
  shell: find /etc/cron* -type f -name zfs-auto-snapshot
  register: snapshot_cron
  changed_when: False
 - name: zfs - zfs-auto-snapshot prefix
  lineinfile:
    path: "{{ item }}"
    regexp: (.*zfs-auto-snapshot.*\d+)\ \ \/\/
    line: \1 --prefix= //
    backrefs: yes
  with_items: "{{ snapshot_cron.stdout_lines }}"
  when: snapshot_cron.stdout_lines is defined
 - name: zfs - load module
  modprobe: name=zfs
  notify: zfs restart
 - name : zfs - zfs_mount.sh
  copy: src=scripts/zfs_mount.sh dest=/usr/local/bin/ owner=root group=root mode=0755
 - name : zfs - zfs_health.sh
  copy: src=scripts/zfs_health.sh dest=/usr/local/bin/ owner=root group=root mode=0755
 - name: zfs - zfs_health cronjob
  cron:
    name: zfs check health
    minute: 0
    hour: "7,11,16"
    job: "/usr/local/bin/zfs_health.sh"