#!/bin/bash
#
# Author Raphaël Laguerre (rlaguerre@easter-eggs.com)
# Desc systemd journal pattern detector plugin for monitoring
#
# Usage:
# 	check_journal_ee -u <unit> -p <pattern> -l <last>
#
#	Search the systemd journal of the unit specified for the pattern specified in entries written <last> seconds ago.
#
# 	check_journal_ee -u <unit> -p <pattern> -i
#
#       Initialize the states files for this unit thus reactivating the check the next time it is executed.
#
#	check_journal_ee -h
#
# Options:
# 	-u <unit>	name of systemd unit.
# 	-p <pattern>	pattern to search for ( must conform to the pattern expected by the -g option of journalctl).
# 	-l <last>	search is made for entries written in the last <last> seconds.
# 	-i 		init the state of the plugin for the specified unit.
#
# Description:
#
# This plugin will search the systemd journal in the <unit> systemd unit for
# <pattern> since <last> seconds ago. If the pattern is matched, the plugin will
# return a CRITICAL state and update begining time of search (specified by the
# <last> option) to now. Otherwise it returns an OK state. When in a CRITICAL
# state the plugin stays in the critical STATE until an init command is
# executed ( parameter -i).
#
# Example:
#
# Check for promtail error in promtail.service unit in the last 2 minutes
#
#       check_journal_ee -u promtail -p error -l 120

STATE_OK=0
STATE_CRITICAL=2
STATE_UNKNOWN=3

PROGNAME=${0##*/}

print_usage() {
    echo "Systemd journal pattern detector plugin for monitoring."
    echo ""
    echo "Usage: $PROGNAME -u <unit> -p <pattern> -l <last>"
    echo ""
    echo "	Search the systemd journal of the unit specified for the pattern specified in entries written <last> seconds ago."
    echo ""
    echo "Usage: $PROGNAME -u <unit> -p <pattern> -i"
    echo ""
    echo "	Initialize the states files for this unit thus reactivating the check the next time it is executed."
    echo ""
    echo "Usage: $PROGNAME -h"
    echo ""
    echo "Options:"
    echo "-u <unit>	name of systemd unit"
    echo "-p <pattern>	pattern to search for ( must conform to the pattern expected by the -g option of journalctl"
    echo "-l <last>	search is made for entries written in the last <last> seconds"
}

search_journal() {
	local last_ts=$(date -d "$last seconds ago" +%s)
	local ts

	# Init TS_FILE if it is empty
	[[ ! -f $TS_FILE || -z $(cat $TS_FILE) ]] && echo $last_ts > $TS_FILE

	# Determine the starting date of search : take the most recent date between the timestamp obtained from the <last> parameter and the one in the TS_FILE
	if [[ "$(cat $TS_FILE)" -lt "$last_ts" ]]
	then
		ts=$last_ts
	else
		ts="$(cat $TS_FILE)"
	fi

	#search journal
	journalctl -u $unit -g "$pattern" --since "$(date -d @$ts +%H:%M:%S )" --no-pager | grep -P "$pattern"
}

report_critical_state() {
	echo "Found entries matching \"$pattern\" in \"$unit\""
	echo ""
	echo "Check $CRIT_FILE to see the matched entries"
	echo ""
	echo "When the errors are corrected, you can reactivate the check with the following command: "
	echo "$(realpath $0) -u $unit -p $pattern -i"
}

#
# Check parameters
#
while getopts ":hu:p:l:i" opt; do
        case $opt in
                h)
                        print_usage
                        exit 0
                        ;;
                u)
                        unit="$OPTARG"
                        ;;
                p)
                        pattern="$OPTARG"
                        ;;
                l)
                        last="$OPTARG"
                        [[ "$last" =~ ^[0-9]+$ ]] || { print_usage; exit $STATE_UNKNOWN; }
                        ;;
		i)
			init=1
			;;
                \?)
                        echo "Invalid option: -$OPTARG" >&2
                        exit $STATE_UNKNOWN
                        ;;
                :)
                        echo "Option -$OPTARG requires an argument." >&2
                        exit $STATE_UNKNOWN
                        ;;
        esac
done

[[ -z $unit ]] && { echo "Missing -u parameter"; print_usage; exit $STATE_UNKNOWN; }
[[ -z $pattern ]] && { echo "Missing -p parameter"; print_usage; exit $STATE_UNKNOWN; }
[[ -z $last && -z $init ]] && { echo "Missing parameters"; print_usage; exit $STATE_UNKNOWN; }

#
# Prepare state
#
CACHE_DIR="/var/lib/nagios/check_journal_ee/$unit"
TS_FILE="$CACHE_DIR/timestamp"
CRIT_FILE="$CACHE_DIR/critical"

if [[ ! -d $CACHE_DIR ]]
then
        err_msg=$(mkdir -p $CACHE_DIR 2>&1)
        if [[ $? -ne 0 ]]
        then
                echo "CRITICAL: unable to create cache dir $CACHE_DIR: $err_msg"
                exit $STATE_UNKNOWN
        fi
fi


#
# Compute new state
#
if [[ -f "$CRIT_FILE" ]]
then
	# IF INIT
	if [[ "$init" == "1" ]]
	then
		rm "$CRIT_FILE"
		echo $(date +%s) > $TS_FILE
		exit $STATE_OK
	else
		report_critical_state
		exit $STATE_CRITICAL
	fi
else
	# IF INIT
	if [[ "$init" == "1" ]]
	then
		rm "$TS_FILE"
		exit $STATE_OK
	else
		res=$(search_journal)
	# IF PATTERN FOUND
		if [[ ! -z "$res" ]]
		then
			echo $(date +%s) > $TS_FILE
			echo "$res" >> $CRIT_FILE
			report_critical_state
			exit $STATE_CRITICAL
	# IF PATTERN NOT FOUND
		else
			echo "OK"
			exit $STATE_OK
		fi
	fi
fi

exit $STATE_UNKNOWN
