#!/usr/bin/env python3
#
# Nagios plugin to check Ceph cluster usage
#
#     Usage: check_ceph_usage [options]
#
#     Options:
#       -h, --help            show this help message and exit
#       -d, --debug
#       -b BIN, --bin=BIN     Ceph binary (default: /usr/bin/ceph)
#       --conf=CONF           Ceph configuration file
#       -m MON, --mon=MON     Ceph monitor address[:port]
#       -i ID, --id=ID        Ceph client id
#       -k KEYRING, --keyring=KEYRING
#                             Ceph client keyring file
#       -w WARNDATA, --warning-data=WARNDATA
#                             Warning data threshold (default: 70%)
#       -c CRITDATA, --critical-data=CRITDATA
#                             Critical data threshold (default: 85%)
#       -W WARNALLOC, --warning-allocated=WARNALLOC
#                             Warning allocated threshold (default: 80%)
#       -C CRITALLOC, --critical-allocated=CRITALLOC
#                             Critical allocated threshold (default: 90%)
#
# Copyright (c) 2013 Benjamin Renard <brenard@zionetrix.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

"""Nagios plugin to check Ceph cluster usage"""

import argparse
import json
import os
import subprocess
import sys

# default ceph values
CEPH_COMMAND = "/usr/bin/ceph"
WARN_DATA = 70
CRIT_DATA = 85
WARN_ALLOC = 80
CRIT_ALLOC = 90

# nagios exit code
STATUS = {"OK": 0, "WARNING": 1, "CRITICAL": 2, "UNKNOWN": 3}

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-d", "--debug", action="store_true", dest="debug", default=False)

parser.add_argument(
    "-b",
    "--bin",
    action="store",
    dest="bin",
    help=f"Ceph binary (default: {CEPH_COMMAND})",
    type=str,
    default=CEPH_COMMAND,
)

parser.add_argument(
    "--conf",
    action="store",
    dest="conf",
    help="Ceph configuration file",
    type=str,
    default=None,
)

parser.add_argument(
    "-m",
    "--mon",
    action="store",
    dest="mon",
    help="Ceph monitor address[:port]",
    type=str,
    default=None,
)

parser.add_argument(
    "-i",
    "--id",
    action="store",
    dest="id",
    help="Ceph client id",
    type=str,
    default=None,
)

parser.add_argument(
    "-k",
    "--keyring",
    action="store",
    dest="keyring",
    help="Ceph client keyring file",
    type=str,
    default=None,
)

parser.add_argument(
    "-w",
    "--warning-data",
    action="store",
    dest="warndata",
    help=f"Warning data threshold (default: {WARN_DATA}%%)",
    type=int,
    default=WARN_DATA,
)

parser.add_argument(
    "-c",
    "--critical-data",
    action="store",
    dest="critdata",
    help=f"Critical data threshold (default: {CRIT_DATA}%%)",
    type=int,
    default=CRIT_DATA,
)

parser.add_argument(
    "-W",
    "--warning-allocated",
    action="store",
    dest="warnalloc",
    help=f"Warning allocated threshold (default: {WARN_ALLOC}%%)",
    type=int,
    default=WARN_ALLOC,
)

parser.add_argument(
    "-C",
    "--critical-allocated",
    action="store",
    dest="critalloc",
    help=f"Critical allocated threshold (default: {CRIT_ALLOC}%%)",
    type=int,
    default=CRIT_ALLOC,
)

options = parser.parse_args()

# validate args
if not os.path.exists(options.bin):
    print(f"ERROR: ceph executable '{options.bin}' doesn't exist")
    sys.exit(STATUS["UNKNOWN"])

if options.conf and not os.path.exists(options.conf):
    print(f"ERROR: ceph conf file '{options.conf}' doesn't exist")
    sys.exit(STATUS["UNKNOWN"])

if options.keyring and not os.path.exists(options.keyring):
    print(f"ERROR: keyring file '{options.keyring}' doesn't exist")
    sys.exit(STATUS["UNKNOWN"])

# build command
ceph_cmd = [options.bin]
if options.mon:
    ceph_cmd.append("-m")
    ceph_cmd.append(options.mon)
if options.conf:
    ceph_cmd.append("-c")
    ceph_cmd.append(options.conf)
if options.id:
    ceph_cmd.append("--id")
    ceph_cmd.append(options.id)
if options.keyring:
    ceph_cmd.append("--keyring")
    ceph_cmd.append(options.keyring)
ceph_cmd.append("status")
ceph_cmd.append("--format=json")

# exec command
# Note: do not use with ... as form to keep Python 3.7 compatibility
# pylint: disable=consider-using-with
p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate()

if not output:
    print("UNKNOWN: fail to execute ceph status command")
    sys.exit(STATUS["UNKNOWN"])

data = json.loads(output.decode(sys.getdefaultencoding()))
if "pgmap" not in data:
    print("UNKNOWN: fail to read pgmap status")
    sys.exit(STATUS["UNKNOWN"])

if options.debug:
    print(f"data: {data['pgmap']['data_bytes']}")
    print(f"allocated: {data['pgmap']['bytes_used']}")
    print(f"total: {data['pgmap']['bytes_total']}")

PER_DATA = round(int(data["pgmap"]["data_bytes"]) * 100 / int(data["pgmap"]["bytes_total"]), 1)
DATA_WARN_T = int(int(data["pgmap"]["bytes_total"]) * options.warndata / 100)
DATA_CRIT_T = int(int(data["pgmap"]["bytes_total"]) * options.critdata / 100)
PER_ALLOC = round(int(data["pgmap"]["bytes_used"]) * 100 / int(data["pgmap"]["bytes_total"]), 1)
ALLOC_WARN_T = int(int(data["pgmap"]["bytes_total"]) * options.warnalloc / 100)
ALLOC_CRIT_T = int(int(data["pgmap"]["bytes_total"]) * options.critalloc / 100)

if options.debug:
    print(f"%% data: {PER_DATA}")
    print(f"%% allocated: {PER_ALLOC}")

if PER_DATA > options.critdata or PER_ALLOC > options.critalloc:
    STATUS_TXT = "CRITICAL"
elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc:
    STATUS_TXT = "WARNING"
else:
    STATUS_TXT = "OK"

print(
    f"{STATUS_TXT} - {PER_ALLOC}%% allocated / {PER_DATA}%% really used"
    f"|allocated={data['pgmap']['bytes_used']}B;{ALLOC_WARN_T};{ALLOC_CRIT_T};0;"
    f"{data['pgmap']['bytes_total']},"
    f"used={data['pgmap']['data_bytes']}B;{DATA_WARN_T};{DATA_CRIT_T};0;"
    f"{data['pgmap']['bytes_total']}"
)
sys.exit(STATUS[STATUS_TXT])
