diff --git a/lib/facter/nagios_smartmon.rb b/lib/facter/nagios_smartmon.rb new file mode 100644 index 00000000..abf0dfd4 --- /dev/null +++ b/lib/facter/nagios_smartmon.rb @@ -0,0 +1,191 @@ +# This facter script adds the fact "nagios_smartmon" and puts there the hash +# that contain information about connected disks, controllers, their ports, etc. +# This information could be passed to the smartctl utility for checking the +# SMART status. +# +# Example of hashes: +# +# The SATA disks attached directly +# { +# 0=>{"dev"=>"sda", "controller"=>"ata"}, +# 1=>{"dev"=>"sdb", "controller"=>"ata"} +# } +# +# The SATA disk connected to the LSI MegaRAID controller +# { +# 5=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"1", "dev"=>"sda"}, +# 0=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"2", "dev"=>"sda"}, +# 1=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"0", "dev"=>"sda"}, +# 2=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"3", "dev"=>"sda"}, +# 3=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"4", "dev"=>"sda"}, +# 4=>{"interface"=>"SATA", "controller"=>"megaraid", "port"=>"5", "dev"=>"sda"} +# } +# +# The SATA disk connected to the LSI MegaRAID controller +# {0=>{"dev"=>"sda", "interface"=>"SAS", "port"=>"8", "controller"=>"megaraid"}} + + +# Check if the RAID controller utility is present and get the full path to it. +def getRaidUtil(utilNames) + raidUtil = nil + + utilNames.each do |name| + if FileTest.exists?(name) + raidUtil = name + end + end + + return raidUtil +end + +# Controller megaraid +def getPhysicalDisksPorts_megaraid + # List of possible names of RAID utility + utilNames = [ + '/usr/sbin/megacli', + '/usr/sbin/MegaCli', + '/usr/sbin/MegaCli64' + ] + + # Get the full path to RAID utility + raidUtil = getRaidUtil(utilNames) + + # Check the connected ports only if the RAID utility is present. + if raidUtil + + # Get the list of connected ports. + physicalDisksPorts = Facter::Core::Execution.exec("#{raidUtil} -PDList -Aall | awk '/Device\ Id/{print $3}'") + return physicalDisksPorts + + # Else return nil + else + return nil + end +end + +# This method checks the interface to which the disk is connected. +# This needed with the MegaRAID controllers in CentOS 6. The smartctl 5.43 +# requires the "sat+megaraid,N" in case of SATA disk and just "megaraid,N" in +# case of SAS. +def checkDiskInterface(port) + # List of possible names of RAID utility + utilNames = [ + '/usr/sbin/megacli', + '/usr/sbin/MegaCli', + '/usr/sbin/MegaCli64' + ] + + # Get the full path to RAID utility + raidUtil = getRaidUtil(utilNames) + + # Get the disk interface (SATA/SAS) + diskInterface = Facter::Core::Execution.exec("#{raidUtil} -PDList -aALL | grep -e '^Device Id: #{port}' -A 10 | awk '/PD Type:/{print $3}'") +end + + +# Controller hpsa +def getPhysicalDisksPorts_cciss + # List of possible names of RAID utility + utilNames = [ + '/usr/sbin/hpssacli', + '/usr/sbin/hpacucli' + ] + + # Get the full path to RAID utility + raidUtil = getRaidUtil(utilNames) + + # Check the connected ports only if the RAID utility is present. + if raidUtil + + # Get slot of SmartArray controller. This required for checking the connected ports. + hpsaSlot = Facter::Core::Execution.exec("#{raidUtil} controller all show status | awk '/Slot/{print $6}'") + + # Get the list of connected ports. + physicalDisksPorts = Facter::Core::Execution.exec("#{raidUtil} controller slot=#{hpsaSlot} physicaldrive all show status | awk '/bay/{ gsub(\",\",\"\"); print (\$6-1)}'") + return physicalDisksPorts + + # Else return the nil + else + return nil + end +end + +# Get the list of connected disks and their attributes (name, port, interface). +def getDisks (controller) + + # Get the list of block devices and transform it to string divided by comma. + blockdevices = Facter.value(:blockdevices).split(",") + + # Delete the CD-drive from array of blockdevices. + # TODO: delete all CD-drives (sr*) and virtualdrives (vd*) + blockdevices.delete('sr0') + + disks = {} + diskInterface = nil + + # Controller "ata" in smartmontools terminology means that there is no any + # hardware RAID controllers and disks are connected directly to the (S)ATA + # ports + if controller == "ata" + i = 0 + + # Add all blockdevices to the "disks" array. + blockdevices.each do |blockdevice| + disks[i] = { + "dev" => blockdevice, + "controller" => controller + } + i += 1 + end + else + + # Check the connected ports using the RAID controller utility (if present) + ports = send("getPhysicalDisksPorts_#{controller}").split("\n") + + # If controller returned the list of non-empty ports then fill the "disks" + # array. In other case do not add elements to array. This means that there + # is no RAID controller utility and there is no way to check to which ports + # disks are connected. + if ports + i = 0 + + # Add all connected to RAID controller disks as separate devices to the + # "disks" array. The smartctl requires the blockdevice for cheching the + # SMART status. Let's pass the first blockdevice what we have to the + # smartctl utility. + ports.each do |port| + + # For the LSI MegaRAID controller we have to check the interface of the + # disk. It may be SAS or SATA + if controller == "megaraid" + diskInterface = checkDiskInterface(port) + end + + disks[i] = { + "dev" => blockdevices[0], + "controller" => controller, + "port" => port, + "interface" => diskInterface + } + i += 1 + end + end + end + + return disks +end + +Facter.add(:nagios_smartmon) do + setcode do + # Check if there is LSI MegaRAID controller + if Facter.value(:nagios_pci_megaraid_sas) + getDisks("megaraid") + # Check if there is HP SmartArray controller + elsif Facter.value(:nagios_pci_hpsa) + getDisks("cciss") + # Else use the "ata" driver + else + getDisks("ata") + end + end +end diff --git a/manifests/check/smartmon.pp b/manifests/check/smartmon.pp new file mode 100644 index 00000000..c64ed61a --- /dev/null +++ b/manifests/check/smartmon.pp @@ -0,0 +1,49 @@ +class nagios::check::smartmon ( + $package = "smartmontools", + $ensure = undef, + $args = '', + $check_title = $::nagios::client::host_name, + $servicegroups = undef, + $check_period = $::nagios::client::service_check_period, + $contact_groups = $::nagios::client::service_contact_groups, + $first_notification_delay = $::nagios::client::first_notification_delay, + $max_check_attempts = $::nagios::client::service_max_check_attempts, + $notification_period = $::nagios::client::service_notification_period, + $use = $::nagios::client::service_use, +) { + + # Service specific script, taken from: + file { "${nagios::client::plugin_dir}/check_smartmon": + ensure => $ensure, + owner => 'root', + group => 'root', + mode => '0755', + content => template("${module_name}/plugins/check_smartmon"), + } + + # The check is being executed via sudo + file { "/etc/sudoers.d/nagios_check_smartmon": + ensure => $ensure, + owner => 'root', + group => 'root', + mode => '0440', + # We customize the user, the nagios plugin dir and few other things + content => template("${module_name}/plugins/smartmon-sudoers.erb"), + } + + ensure_packages($package) + + nagios::client::nrpe_file { "check_smartmon": + ensure => $ensure, + plugin => "check_smartmon", + args => '-d /dev/$ARG1$ -i $ARG2$', + } + + $disks = $::nagios_smartmon + $defaults = { + ensure => $ensure, + } + # Generate resources for each physical disk + create_resources(nagios::check::smartmon::disk, $disks, $defaults) + +} diff --git a/manifests/client.pp b/manifests/client.pp index 53838e3c..b4d11348 100644 --- a/manifests/client.pp +++ b/manifests/client.pp @@ -172,6 +172,9 @@ } } if $::nagios_postgres { class { '::nagios::check::postgres': } } + if $::nagios_smartmon and ($::is_virtual != true) { + class { '::nagios::check::smartmon': } + } } # With selinux, some nrpe plugins require additional rules to work diff --git a/manifests/server.pp b/manifests/server.pp index 9ceb0b60..1729bd29 100644 --- a/manifests/server.pp +++ b/manifests/server.pp @@ -819,6 +819,9 @@ nagios_command {'check_nrpe_mountpoints': command_line => "${nrpe} -c check_mountpoints", } + nagios_command {'check_nrpe_smartmon': + command_line => "${nrpe} -c check_smartmon -a \$ARG1\$ \$ARG2\$", + } # Nagios contacts and contactgroups # Taken from contacts.cfg diff --git a/templates/plugins/check_smartmon b/templates/plugins/check_smartmon new file mode 100644 index 00000000..971a57a4 --- /dev/null +++ b/templates/plugins/check_smartmon @@ -0,0 +1,446 @@ +#!/usr/bin/perl -w +# Check SMART status of ATA/SCSI disks, returning any usable metrics as perfdata. +# For usage information, run ./check_smart -h +# +# This script was created under contract for the US Government and is therefore Public Domain +# +# Changes and Modifications +# ========================= +# Feb 3, 2009: Kurt Yoder - initial version of script (rev 1.0) +# Jul 8, 2013: Claudio Kuenzler - support hardware raids like megaraid (rev 2.0) +# Jul 9, 2013: Claudio Kuenzler - update help output (rev 2.1) +# Oct 11, 2013: Claudio Kuenzler - making the plugin work on FreeBSD (rev 3.0) +# Oct 11, 2013: Claudio Kuenzler - allowing -i sat (SATA on FreeBSD) (rev 3.1) +# Nov 4, 2013: Claudio Kuenzler - works now with CCISS on FreeBSD (rev 3.2) +# Nov 4, 2013: Claudio Kuenzler - elements in grown defect list causes warning (rev 3.3) +# Nov 6, 2013: Claudio Kuenzler - add threshold option "bad" (-b) (rev 4.0) +# Nov 7, 2013: Claudio Kuenzler - modified help (rev 4.0) +# Nov 7, 2013: Claudio Kuenzler - bugfix in threshold logic (rev 4.1) +# Mar 19, 2014: Claudio Kuenzler - bugfix in defect list perfdata (rev 4.2) +# Apr 22, 2014: Jerome Lauret - implemented -g to do a global lookup (rev 5.0) +# Apr 25, 2014: Claudio Kuenzler - cleanup, merge Jeromes code, perfdata output fix (rev 5.1) +# May 5, 2014: Caspar Smit - Fixed output bug in global check / issue #3 (rev 5.2) +# Feb 4, 2015: Caspar Smit and cguadall - Allow detection of more than 26 devices / issue #5 (rev 5.3) +# Feb 5, 2015: Bastian de Groot - Different ATA vs. SCSI lookup (rev 5.4) +# Feb 11, 2015: Josh Behrends - Allow script to run outside of nagios plugins dir / wiki url update (rev 5.5) +# Feb 11, 2015: Claudio Kuenzler - Allow script to run outside of nagios plugins dir for FreeBSD too (rev 5.5) +# Mar 12, 2015: Claudio Kuenzler - Change syntax of -g parameter (regex is now awaited from input) (rev 5.6) + +use strict; +use Getopt::Long; + +use File::Basename qw(basename); +my $basename = basename($0); + +my $revision = '$Revision: 5.6 $'; + +use FindBin; +use lib $FindBin::Bin; +BEGIN { + push @INC,'/usr/lib/nagios/plugins','/usr/lib64/nagios/plugins','/usr/local/libexec/nagios'; +} +use utils qw(%ERRORS &print_revision &support &usage); + +$ENV{'PATH'}='/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin'; +$ENV{'BASH_ENV'}=''; +$ENV{'ENV'}=''; + +use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_v); +Getopt::Long::Configure('bundling'); +GetOptions( + "debug" => \$opt_debug, + "b=i" => \$opt_b, "bad=i" => \$opt_b, + "d=s" => \$opt_d, "device=s" => \$opt_d, + "g=s" => \$opt_g, "global=s" => \$opt_g, + "h" => \$opt_h, "help" => \$opt_h, + "i=s" => \$opt_i, "interface=s" => \$opt_i, + "v" => \$opt_v, "version" => \$opt_v, +); + +if ($opt_v) { + print_revision($basename,$revision); + exit $ERRORS{'OK'}; +} + +if ($opt_h) { + print_help(); + exit $ERRORS{'OK'}; +} + +my ($device, $interface) = qw//; +if ($opt_d || $opt_g ) { + unless($opt_i){ + print "must specify an interface for $opt_d using -i/--interface!\n\n"; + print_help(); + exit $ERRORS{'UNKNOWN'}; + } + + # list of devices for a loop + my(@dev); + + if ( $opt_d ){ + # normal mode - push opt_d on the list of devices + push(@dev,$opt_d); + } else { + # glob all devices - try '?' first + @dev =glob($opt_g); + } + + foreach my $opt_dl (@dev){ + warn "Found $opt_dl\n" if $opt_debug; + if (-b $opt_dl || -c $opt_dl){ + $device .= $opt_dl.":"; + + } else { + warn "$opt_dl is not a valid block/character special device!\n\n" if $opt_debug; + } + } + + if ( ! defined($device) ){ + print "Could not find any valid block/character special device for ". + ($opt_d?"device $opt_d ":"pattern $opt_g")." !\n\n"; + exit $ERRORS{'UNKNOWN'}; + } + + # Allow all device types currently supported by smartctl + # See http://www.smartmontools.org/wiki/Supported_RAID-Controllers + if ($opt_i =~ m/(ata|scsi|3ware|areca|hpt|cciss|megaraid|sat)/) { + $interface = $opt_i; + } else { + print "invalid interface $opt_i for $opt_d!\n\n"; + print_help(); + exit $ERRORS{'UNKNOWN'}; + } +} + + +if ($device eq "") { + print "must specify a device!\n\n"; + print_help(); + exit $ERRORS{'UNKNOWN'}; +} + + +my $smart_command = 'sudo smartctl'; +my $exit_status = 'OK'; +my $exit_status_local = 'OK'; +my $status_string = ''; +my $perf_string = ''; +my $Terminator = ' --- '; + + +foreach $device ( split(":",$device) ){ + my @error_messages = qw//; + my($status_string_local)=''; + my($tag,$label); + $exit_status_local = 'OK'; + + if ($opt_g){ + # we had a pattern based on $opt_g + $tag = $device; + $tag =~ s/$opt_g//; + $label = "[$device] - "; + } else { + # we had a device specified using $opt_d (traditional) + $label = ""; + $tag = $device; + } + + + warn "###########################################################\n" if $opt_debug; + warn "(debug) CHECK 1: getting overall SMART health status for $tag \n" if $opt_debug; + warn "###########################################################\n\n\n" if $opt_debug; + + my $full_command = "$smart_command -d $interface -H $device"; + warn "(debug) executing:\n$full_command\n\n" if $opt_debug; + + my @output = `$full_command`; + warn "(debug) output:\n@output\n\n" if $opt_debug; + + my $output_mode = ""; + # parse ata output, looking for "health status: passed" + my $found_status = 0; + my $line_str_ata = 'SMART overall-health self-assessment test result: '; # ATA SMART line + my $ok_str_ata = 'PASSED'; # ATA SMART OK string + + my $line_str_scsi = 'SMART Health Status: '; # SCSI and CCISS SMART line + my $ok_str_scsi = 'OK'; #SCSI and CCISS SMART OK string + + foreach my $line (@output){ + if($line =~ /$line_str_scsi(.+)/){ + $found_status = 1; + $output_mode = "scsi"; + warn "(debug) parsing line:\n$line\n\n" if $opt_debug; + if ($1 eq $ok_str_scsi) { + warn "(debug) found string '$ok_str_scsi'; status OK\n\n" if $opt_debug; + } + else { + warn "(debug) no '$ok_str_scsi' status; failing\n\n" if $opt_debug; + push(@error_messages, "Health status: $1"); + escalate_status('CRITICAL'); + } + } + if($line =~ /$line_str_ata(.+)/){ + $found_status = 1; + $output_mode = "ata"; + warn "(debug) parsing line:\n$line\n\n" if $opt_debug; + if ($1 eq $ok_str_ata) { + warn "(debug) found string '$ok_str_ata'; status OK\n\n" if $opt_debug; + } + else { + warn "(debug) no '$ok_str_ata' status; failing\n\n" if $opt_debug; + push(@error_messages, "Health status: $1"); + escalate_status('CRITICAL'); + } + } + } + + unless ($found_status) { + push(@error_messages, 'No health status line found'); + escalate_status('UNKNOWN'); + } + + + warn "###########################################################\n" if $opt_debug; + warn "(debug) CHECK 2: getting silent SMART health check\n" if $opt_debug; + warn "###########################################################\n\n\n" if $opt_debug; + + $full_command = "$smart_command -d $interface -q silent -A $device"; + warn "(debug) executing:\n$full_command\n\n" if $opt_debug; + + system($full_command); + my $return_code = $?; + warn "(debug) exit code:\n$return_code\n\n" if $opt_debug; + + if ($return_code & 0x01) { + push(@error_messages, 'Commandline parse failure'); + escalate_status('UNKNOWN'); + } + if ($return_code & 0x02) { + push(@error_messages, 'Device could not be opened'); + escalate_status('UNKNOWN'); + } + if ($return_code & 0x04) { + push(@error_messages, 'Checksum failure'); + escalate_status('WARNING'); + } + if ($return_code & 0x08) { + push(@error_messages, 'Disk is failing'); + escalate_status('CRITICAL'); + } + if ($return_code & 0x10) { + push(@error_messages, 'Disk is in prefail'); + escalate_status('WARNING'); + } + if ($return_code & 0x20) { + push(@error_messages, 'Disk may be close to failure'); + escalate_status('WARNING'); + } + if ($return_code & 0x40) { + push(@error_messages, 'Error log contains errors'); + escalate_status('WARNING'); + } + if ($return_code & 0x80) { + push(@error_messages, 'Self-test log contains errors'); + escalate_status('WARNING'); + } + if ($return_code && !$exit_status_local) { + push(@error_messages, 'Unknown return code'); + escalate_status('CRITICAL'); + } + + if ($return_code) { + warn "(debug) non-zero exit code, generating error condition\n\n" if $opt_debug; + } else { + warn "(debug) zero exit code, status OK\n\n" if $opt_debug; + } + + + warn "###########################################################\n" if $opt_debug; + warn "(debug) CHECK 3: getting detailed statistics\n" if $opt_debug; + warn "(debug) information contains a few more potential trouble spots\n" if $opt_debug; + warn "(debug) plus, we can also use the information for perfdata/graphing\n" if $opt_debug; + warn "###########################################################\n\n\n" if $opt_debug; + + $full_command = "$smart_command -d $interface -A $device"; + warn "(debug) executing:\n$full_command\n\n" if $opt_debug; + @output = `$full_command`; + warn "(debug) output:\n@output\n\n" if $opt_debug; + my @perfdata = qw//; + + # separate metric-gathering and output analysis for ATA vs SCSI SMART output + # Yeah - but megaraid is the same output as ata + if ($output_mode =~ "ata") { + foreach my $line(@output){ + # get lines that look like this: + # 9 Power_On_Minutes 0x0032 241 241 000 Old_age Always - 113h+12m + next unless $line =~ /^\s*\d+\s(\S+)\s+(?:\S+\s+){6}(\S+)\s+(\d+)/; + my ($attribute_name, $when_failed, $raw_value) = ($1, $2, $3); + if ($when_failed ne '-'){ + push(@error_messages, "Attribute $attribute_name failed at $when_failed"); + escalate_status('WARNING'); + warn "(debug) parsed SMART attribute $attribute_name with error condition:\n$when_failed\n\n" if $opt_debug; + } + # some attributes produce questionable data; no need to graph them + if (grep {$_ eq $attribute_name} ('Unknown_Attribute', 'Power_On_Minutes') ){ + next; + } + push (@perfdata, "$attribute_name=$raw_value") if $opt_d; + + # do some manual checks + if ( ($attribute_name eq 'Current_Pending_Sector') && $raw_value ) { + if ($opt_b) { + if (($raw_value > 0) && ($raw_value >= $opt_b)) { + push(@error_messages, "$raw_value Sectors pending re-allocation"); + escalate_status('WARNING'); + warn "(debug) Current_Pending_Sector is non-zero ($raw_value)\n\n" if $opt_debug; + } + elsif (($raw_value > 0) && ($raw_value < $opt_b)) { + push(@error_messages, "$raw_value Sectors pending re-allocation (but less than threshold $opt_b)"); + warn "(debug) Current_Pending_Sector is non-zero ($raw_value) but less than $opt_b\n\n" if $opt_debug; + } + } else { + push(@error_messages, "Sectors pending re-allocation"); + escalate_status('WARNING'); + warn "(debug) Current_Pending_Sector is non-zero ($raw_value)\n\n" if $opt_debug; + } + } + } + + } else { + my ($current_temperature, $max_temperature, $current_start_stop, $max_start_stop) = qw//; + foreach my $line(@output){ + if ($line =~ /Current Drive Temperature:\s+(\d+)/){ + $current_temperature = $1; + } + elsif ($line =~ /Drive Trip Temperature:\s+(\d+)/){ + $max_temperature = $1; + } + elsif ($line =~ /Current start stop count:\s+(\d+)/){ + $current_start_stop = $1; + } + elsif ($line =~ /Recommended maximum start stop count:\s+(\d+)/){ + $max_start_stop = $1; + } + elsif ($line =~ /Elements in grown defect list:\s+(\d+)/){ + my $defectlist = $1; + # check for elements in grown defect list + if ($opt_b) { + push (@perfdata, "defect_list=$defectlist;;$opt_b") if $opt_d; + if (($defectlist > 0) && ($defectlist >= $opt_b)) { + push(@error_messages, "$defectlist Elements in grown defect list (threshold $opt_b)"); + escalate_status('WARNING'); + warn "(debug) Elements in grown defect list is non-zero ($defectlist)\n\n" if $opt_debug; + } + elsif (($defectlist > 0) && ($defectlist < $opt_b)) { + push(@error_messages, "Note: $defectlist Elements in grown defect list"); + warn "(debug) Elements in grown defect list is non-zero ($defectlist) but less than $opt_b\n\n" if $opt_debug; + } + } + else { + if ($defectlist > 0) { + push (@perfdata, "defect_list=$defectlist") if $opt_d; + push(@error_messages, "$defectlist Elements in grown defect list"); + escalate_status('WARNING'); + warn "(debug) Elements in grown defect list is non-zero ($defectlist)\n\n" if $opt_debug; + } + } + } + elsif ($line =~ /Blocks sent to initiator =\s+(\d+)/){ + push (@perfdata, "sent_blocks=$1") if $opt_d; + } + } + if($current_temperature){ + if($max_temperature){ + push (@perfdata, "temperature=$current_temperature;;$max_temperature") if $opt_d; + if($current_temperature > $max_temperature){ + warn "(debug) Disk temperature is greater than max ($current_temperature > $max_temperature)\n\n" if $opt_debug; + push(@error_messages, 'Disk temperature is higher than maximum'); + escalate_status('CRITICAL'); + } + } + else{ + push (@perfdata, "temperature=$current_temperature") if $opt_d; + } + } + if($current_start_stop){ + if($max_start_stop){ + push (@perfdata, "start_stop=$current_start_stop;$max_start_stop") if $opt_d; + if($current_start_stop > $max_start_stop){ + warn "(debug) Disk start_stop is greater than max ($current_start_stop > $max_start_stop)\n\n" if $opt_debug; + push(@error_messages, 'Disk start_stop is higher than maximum'); + escalate_status('WARNING'); + } + } + else{ + push (@perfdata, "start_stop=$current_start_stop") if $opt_d; + } + } + } + warn "(debug) gathered perfdata:\n@perfdata\n\n" if $opt_debug; + $perf_string = join(' ', @perfdata); + + warn "###########################################################\n" if $opt_debug; + warn "(debug) LOCAL STATUS: $exit_status_local, FINAL STATUS: $exit_status\n" if $opt_debug; + warn "###########################################################\n\n\n" if $opt_debug; + + if($exit_status_local ne 'OK'){ + if ($opt_g) { + $status_string_local = $label.join(', ', @error_messages); + $status_string .= $status_string_local.$Terminator; + } + else { + $status_string = join(', ', @error_messages); + } + } + else { + if ($opt_g) { + $status_string_local = $label."Device is clean"; + $status_string .= $status_string_local.$Terminator; + } + else { + $status_string = "no SMART errors detected. ".join(', ', @error_messages); + } + } + +} + + warn "(debug) final status/output: $exit_status\n" if $opt_debug; + +$status_string =~ s/$Terminator$//; +print "$exit_status: $status_string|$perf_string\n"; +exit $ERRORS{$exit_status}; + + +sub print_help { + print_revision($basename,$revision); + print "\nUsage: $basename {-d=|-g=} -i=(ata|scsi|3ware,N|areca,N|hpt,L/M/N|cciss,N|megaraid,N) [-b N] [--debug]\n\n"; + print "At least one of the below. -d supersedes -g\n"; + print " -d/--device: a physical block device to be SMART monitored, eg /dev/sda\n"; + print " -g/--global: a regular expression name of physical devices to be SMART monitored\n"; + print " Example: '/dev/sd[a-z]' will search for all /dev/sda until /dev/sdz devices and report errors globally.\n"; + print "Note that -g only works with a fixed interface input (e.g. scsi, ata), not with special interface ids like cciss,1\n"; + print "\n"; + print "Other options\n"; + print " -i/--interface: device's interface type\n"; + print " (See http://www.smartmontools.org/wiki/Supported_RAID-Controllers for interface convention)\n"; + print " -b/--bad: Threshold value (integer) when to warn for N bad entries\n"; + print " -h/--help: this help\n"; + print " --debug: show debugging information\n"; + print " -v/--version: Version number\n"; +} + +# escalate an exit status IFF it's more severe than the previous exit status +sub escalate_status { + my $requested_status = shift; + # no test for 'CRITICAL'; automatically escalates upwards + if ($requested_status eq 'WARNING') { + return if ($exit_status|$exit_status_local) eq 'CRITICAL'; + } + if ($requested_status eq 'UNKNOWN') { + return if ($exit_status|$exit_status_local) eq 'WARNING'; + return if ($exit_status|$exit_status_local) eq 'CRITICAL'; + } + $exit_status = $requested_status; + $exit_status_local = $requested_status; +} diff --git a/templates/plugins/smartmon-sudoers.erb b/templates/plugins/smartmon-sudoers.erb new file mode 100644 index 00000000..9f3ccc6c --- /dev/null +++ b/templates/plugins/smartmon-sudoers.erb @@ -0,0 +1,2 @@ +Defaults !requiretty +<%= scope['nagios::client::nrpe_user'] %> ALL = (root) NOPASSWD: /usr/sbin/smartctl