#!/usr/bin/perl -w


use strict;
use Monitoring::Plugin qw(%ERRORS %STATUS_TEXT);

# needs libyaml-pp-perl
use YAML::PP;

my $default_conf_file = "/etc/nagios-plugins/check_ssd.yaml";
my $legacy_conf_file = "/etc/nagios-plugins/config/ssd_config.yaml";

my $conf_file = (-e $default_conf_file ? $default_conf_file : $legacy_conf_file);
die "Config file $default_conf_file missing or empty" if ( ! -e $conf_file || -z $conf_file );

my $ypp = YAML::PP->new( schema => [qw/ + Merge Perl /] );
my $vendors_config = $ypp->load_file($conf_file);
my %vendors_list = %{$vendors_config->{'vendors'}};


my $WARN_LVL;
my $CRIT_LVL;
my @devices;
my %disk;


for ( my $i=0; $i<=$#ARGV; $i++ ) {
    if ( $ARGV[$i] =~ m/-w/i ) {
        $WARN_LVL=$ARGV[$i+1];
    } elsif ( $ARGV[$i] =~ m/-c/i ) {
        $CRIT_LVL = $ARGV[$i+1];
    } elsif ( $ARGV[$i] =~ m/\/dev\/[\w+\/]/ ) {
        push @devices, $ARGV[$i];
    }
}

$WARN_LVL=30 if ( ! $WARN_LVL );
$CRIT_LVL=10 if ( ! $CRIT_LVL );


my $ON_TIME_TAG;
my $LBAS_WRITTEN_TAG;
my $WEAR_COUNT_TAG;
my $PROGRAM_FAIL_COUNT;
my $ERASE_FAIL_COUNT;
my $attribute;

# Factor to convert to GB
my $LBAS_WRITTEN_GB_FACTOR;

my @supported_models;
for my $vendor ( keys %vendors_list ) {
    push(@supported_models, $vendor);
    }

my @status_msg;
my $status_code = $ERRORS{OK};

unless ( scalar @ARGV ) {
    print "UNKNOWN: Usage: check_ssd [-w <WARNING LEVEL>] [-c <CRITICAL LEVEL>] device1 [device2] [...]\n";
    print "Default: WARNING LEVEL=30 - CRITICAL LEVEL=10\n";
    exit ($ERRORS{UNKNOWN});
}

foreach my $device (@devices) {
    $device = `readlink -f "$device"`;
    chomp($device);
    my @dev_msg;
    my $dev_status = $ERRORS{OK};
    my $device_name = $device;
    $device_name =~ s!^/dev/!!;
    unless ( -f "/sys/block/$device_name/device/model" ) {
        push @dev_msg, "device $device not found";
        $dev_status = $ERRORS{CRITICAL} unless ( $dev_status > $ERRORS{CRITICAL} ); # modif UnKNOWN -> CRITICAL
        $disk{$device}{status} = $dev_status;
        @{$disk{$device}{msg}} = @dev_msg;
        $status_code = $dev_status unless ( $dev_status < $status_code );
        next;
    }
    my $model = `cat /sys/block/$device_name/device/model`;
    chomp $model;
    unless ($model && grep { $model =~ m/$_/i } @supported_models) {
        push @dev_msg, "$device, unsupported model $model";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
        $disk{$device}{status} = $dev_status;
        @{$disk{$device}{msg}} = @dev_msg;
        $status_code = $dev_status unless ( $dev_status < $status_code );
        next;
    }

    for my $vendor ( keys %vendors_list ) {
        for my $model_name ( keys %{${vendors_list}{$vendor}{'disk_id'}} ) {
            if ($model =~ m/$vendor $model_name/i) {
                $WEAR_COUNT_TAG = "${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'WEAR_COUNT_TAG'}" if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'WEAR_COUNT_TAG'};
                $LBAS_WRITTEN_TAG = "${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'LBAS_WRITTEN_TAG'}" if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'LBAS_WRITTEN_TAG'};
                $LBAS_WRITTEN_GB_FACTOR = ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'LBAS_WRITTEN_GB_FACTOR'} if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'LBAS_WRITTEN_GB_FACTOR'};
                $PROGRAM_FAIL_COUNT = "${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'PROGRAM_FAIL_COUNT'}" if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'PROGRAM_FAIL_COUNT'};
                $ERASE_FAIL_COUNT = "${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'ERASE_FAIL_COUNT'}" if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'ERASE_FAIL_COUNT'};
                $ON_TIME_TAG = "${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'ON_TIME_TAG'}" if defined ${${vendors_list}{$vendor}{'disk_id'}}{$model_name}{'ON_TIME_TAG'};
            }
        }
    }

    my $smart_info = `/usr/sbin/smartctl -A /dev/$device_name`;

    unless ( $smart_info ) {
        push @dev_msg, "$device: no SMART informations";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
        $disk{$device}{status} = $dev_status;
        @{$disk{$device}{msg}} = @dev_msg;
        $status_code = $dev_status unless ( $dev_status < $status_code );
        next;
    }

    my %smart_attributes;
    for ( split /^/ , $smart_info ) {
        chomp();
        next unless ( m/^\s*(\d+)\s+(\w+)\s+(.*)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.*)\s+(.*)\s+(.*)\s+(.+)\s*$/ );
        my @attr=($1,$2,$3,$4,$5,$6,$7,$8,$9,$10);
        if ( $attr[0] == 171 && $model =~ m/intel/i ) {
            $attribute=$PROGRAM_FAIL_COUNT; # Attribut sans nom ("unknown attribute") dans smartctl pour les disques intel,  obligé de passer par l'ID de l'attribut
        } elsif ( $attr[0] == 172 && $model =~ m/intel/i ) {
            $attribute=$ERASE_FAIL_COUNT; # Attribut sans nom ("unknown attribute") dans smartctl pour les disques intel, obligé de passer par l'ID de l'attribut
        } else {
            $attribute=$attr[1];
        }
        $smart_attributes{$attribute}{id} = $attr[0];
        $smart_attributes{$attribute}{flag} = $attr[2];
        $smart_attributes{$attribute}{value} = $attr[3];
        $smart_attributes{$attribute}{worst} = $attr[4];
        $smart_attributes{$attribute}{thresh} = $attr[5];
        $smart_attributes{$attribute}{type} = $attr[6];
        $smart_attributes{$attribute}{updated} = $attr[7];
        $smart_attributes{$attribute}{when_failed} = $attr[8];
        $smart_attributes{$attribute}{raw_value} = $attr[9];

        if ( $attribute eq "Power_On_Hours_and_Msec" ) {
            $smart_attributes{$attribute}{raw_value} =~ s/h.*$//;
        }
        if ( $model =~ m/ocz/i && ($attribute eq $PROGRAM_FAIL_COUNT || $attribute eq $ERASE_FAIL_COUNT) ) {
            if ( $smart_attributes{$attribute}{raw_value} == 0 ) {
                $smart_attributes{$attribute}{value} = 100;
            } elsif ( $smart_attributes{$attribute}{raw_value} < 100 ) {
                $smart_attributes{$attribute}{value} = 100 - $smart_attributes{$attribute}{raw_value};
            } else {
                $smart_attributes{$attribute}{value} = 0;
            }
        }
    }

    unless ( %smart_attributes && scalar keys %smart_attributes ) {
        push @dev_msg, "$device: no SMART attributes";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
        $disk{$device}{status} = $dev_status;
        @{$disk{$device}{msg}} = @dev_msg;
        $status_code = $dev_status unless ( $dev_status < $status_code );
        next; # ajout
    }

    my $wear_leveling_count = "unknown";
    if ( $model =~ m/sandisk/i  ) {
        # Use warranty as a base ... FIXME
        my $warranty_hours = 3*365*24;
	$smart_attributes{$WEAR_COUNT_TAG}{value} = int(100 - ( $smart_attributes{$ON_TIME_TAG}{raw_value} * 100 / $warranty_hours ));
    }
    unless ( $smart_attributes{$WEAR_COUNT_TAG} || $smart_attributes{$WEAR_COUNT_TAG}{value} ) {
        push @dev_msg, "$device: no WEAR_COUNT_TAG";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
    } else {
        $wear_leveling_count = $smart_attributes{$WEAR_COUNT_TAG}{value};
        $wear_leveling_count =~ s/^0+//;
        if ( $wear_leveling_count <= $CRIT_LVL ) {
            push @dev_msg, "$device: WEAR_COUNT_TAG=$wear_leveling_count";
            $dev_status = $ERRORS{CRITICAL} ; # modif
        } elsif ( $wear_leveling_count <= $WARN_LVL ) {
            push @dev_msg, "$device: WEAR_COUNT_TAG=$wear_leveling_count";
            $dev_status = $ERRORS{WARNING} unless ( $dev_status > $ERRORS{WARNING} ); # modif
        } elsif ( $wear_leveling_count == 1 ) { # ajout
        push @dev_msg, "$device: WEAR_COUNT_TAG=$wear_leveling_count Disk down";
        $dev_status = $ERRORS{CRITICAL} ;
        }
    }

    my $program_fail="unknown";
    unless ( $smart_attributes{$PROGRAM_FAIL_COUNT} || $smart_attributes{$PROGRAM_FAIL_COUNT}{value} ) {
        push @dev_msg, "$device: no PROGRAM_FAIL_COUNT attribute";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
    } else {
        $program_fail = $smart_attributes{$PROGRAM_FAIL_COUNT}{value};
        $program_fail =~ s/^0+// ;
        if ( $program_fail <= $CRIT_LVL ) {
            push @dev_msg, "$device: PROGRAM_FAIL_COUNT $program_fail % remaining.";
            $dev_status = $ERRORS{CRITICAL} ;
        } elsif ( $program_fail <= $WARN_LVL ) {
            push @dev_msg, "$device: PROGRAM_FAIL_COUNT $program_fail % remaining.";
            $dev_status = $ERRORS{WARNING} unless ( $dev_status > $ERRORS{WARNING} );
        }
    }

    my $erase_fail = "unknown";
    unless ( $smart_attributes{$ERASE_FAIL_COUNT} || $smart_attributes{$ERASE_FAIL_COUNT}{value} ) {
        push @dev_msg, "$device: no ERASE_FAIL_COUNT attribute";
        $dev_status = $ERRORS{UNKNOWN} unless ( $dev_status > $ERRORS{UNKNOWN} );
    } else {
        $erase_fail = $smart_attributes{$ERASE_FAIL_COUNT}{value};
        $erase_fail =~ s/^0+//;
        if ( $erase_fail <= $CRIT_LVL ) {
#            push @dev_msg, "$device: ERASE_FAIL_COUNT $erase_fail % remaining";
            $dev_status = $ERRORS{CRITICAL} ;
        } elsif ( $erase_fail <= $WARN_LVL ) {
#            push @dev_msg, "$device: ERASE_FAIL_COUNT $erase_fail % remaining";
            $dev_status = $ERRORS{WARNING} unless ( $dev_status > $ERRORS{WARNING} );
        }
    }


    my $power_on_hours = $smart_attributes{$ON_TIME_TAG}{raw_value} || 1;
    my $GB_written;
    my $GB_per_day;
    if ( $smart_attributes{$LBAS_WRITTEN_TAG}{raw_value} ) {
        $GB_written = sprintf("%0.2f", $smart_attributes{$LBAS_WRITTEN_TAG}{raw_value} * $LBAS_WRITTEN_GB_FACTOR );
        $GB_per_day = sprintf("%0.2f", $GB_written / ($power_on_hours / 24 ) );
    } else {
        $GB_written = 'Unknown';
        $GB_per_day = 'Unknown';
    }

    push @dev_msg, "$device: $power_on_hours Hours / $GB_written GB written / $GB_per_day GB/day / Wear_count: $wear_leveling_count% / Erase_count: $erase_fail%";

    $disk{$device}{status} = $dev_status;
    @{$disk{$device}{msg}} = @dev_msg;
    $disk{$device}{wear_level} = $wear_leveling_count;
    $disk{$device}{program_fail} = $program_fail;
    $disk{$device}{erase_fail} = $erase_fail;
    $status_code = $dev_status unless ( $dev_status < $status_code );


}

my @firstline;

foreach my $device ( sort keys %disk ) {
    my $out;
    unless ( join(' ',@{$disk{$device}{msg}}) =~ m/unsupported model|not found/ ) {
        $out = "$device: $STATUS_TEXT{$disk{$device}{status}}, Wear_count $disk{$device}{wear_level}%, Erase_fail_count $disk{$device}{erase_fail}%";
        push @status_msg, join(' ',@{$disk{$device}{msg}});
    } else {
	$out = "$device: " . join(' ',@{$disk{$device}{msg}} );
        push @status_msg, "$device: " . join(' ',@{$disk{$device}{msg}});
    }
    push @firstline, $out;
}

print join(" - ",@firstline)."\n".join("\n",@status_msg)."\n";

exit $status_code;

