#!/usr/bin/env python
#
# Nagios plugin to check Ceph cluster usage
#
#     Usage: check_ceph_usage [options]
#
#     Options:
#       -h, --help            show this help message and exit
#       -d, --debug
#       -b BIN, --bin=BIN     Ceph binary (default: /usr/bin/ceph)
#       --conf=CONF           Ceph configuration file
#       -m MON, --mon=MON     Ceph monitor address[:port]
#       -i ID, --id=ID        Ceph client id
#       -k KEYRING, --keyring=KEYRING
#                             Ceph client keyring file
#       -w WARNDATA, --warning-data=WARNDATA
#                             Warning data threshold (default: 70%)
#       -c CRITDATA, --critical-data=CRITDATA
#                             Critical data threshold (default: 85%)
#       -W WARNALLOC, --warning-allocated=WARNALLOC
#                             Warning allocated threshold (default: 80%)
#       -C CRITALLOC, --critical-allocated=CRITALLOC
#                             Critical allocated threshold (default: 90%)
#
# Copyright (c) 2013 Benjamin Renard <brenard@zionetrix.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

import sys
import os
import json
import subprocess
import argparse

# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
WARN_DATA = 70
CRIT_DATA = 85
WARN_ALLOC = 80
CRIT_ALLOC = 90

# nagios exit code
STATUS = {
    'OK': 0,
    'WARNING': 1,
    'CRITICAL': 2,
    'UNKNOWN': 3
}

parser = argparse.ArgumentParser()
parser.add_argument(
    '-d', '--debug',
    action="store_true",
    dest="debug",
    default=False
)

parser.add_argument(
    '-b', '--bin',
    action="store",
    dest="bin",
    help="Ceph binary (default: %s)" % CEPH_COMMAND,
    type=str,
    default=CEPH_COMMAND
)

parser.add_argument(
    '--conf',
    action="store",
    dest="conf",
    help="Ceph configuration file",
    type=str,
    default=None
)

parser.add_argument(
    '-m', '--mon',
    action="store",
    dest="mon",
    help="Ceph monitor address[:port]",
    type=str,
    default=None
)

parser.add_argument(
    '-i', '--id',
    action="store",
    dest="id",
    help="Ceph client id",
    type=str,
    default=None
)

parser.add_argument(
    '-k', '--keyring',
    action="store",
    dest="keyring",
    help="Ceph client keyring file",
    type=str,
    default=None
)

parser.add_argument(
    '-w', '--warning-data',
    action="store",
    dest="warndata",
    help="Warning data threshold (default: %s%%%%)" % WARN_DATA,
    type=int,
    default=WARN_DATA
)

parser.add_argument(
    '-c', '--critical-data',
    action="store",
    dest="critdata",
    help="Critical data threshold (default: %s%%%%)" % CRIT_DATA,
    type=int,
    default=CRIT_DATA
)

parser.add_argument(
    '-W', '--warning-allocated',
    action="store",
    dest="warnalloc",
    help="Warning allocated threshold (default: %s%%%%)" % WARN_ALLOC,
    type=int,
    default=WARN_ALLOC
)

parser.add_argument(
    '-C', '--critical-allocated',
    action="store",
    dest="critalloc",
    help="Critical allocated threshold (default: %s%%%%)" % CRIT_ALLOC,
    type=int,
    default=CRIT_ALLOC
)

options = parser.parse_args()

 # validate args
if not os.path.exists(options.bin):
    print("ERROR: ceph executable '%s' doesn't exist" % options.bin)
    sys.exit(STATUS['UNKNOWN'])

if options.conf and not os.path.exists(options.conf):
    print("ERROR: ceph conf file '%s' doesn't exist" % options.conf)
    sys.exit(STATUS['UNKNOWN'])

if options.keyring and not os.path.exists(options.keyring):
    print("ERROR: keyring file '%s' doesn't exist" % options.keyring)
    sys.exit(STATUS['UNKNOWN'])

# build command
ceph_cmd = [options.bin]
if options.mon:
    ceph_cmd.append('-m')
    ceph_cmd.append(options.mon)
if options.conf:
    ceph_cmd.append('-c')
    ceph_cmd.append(options.conf)
if options.id:
    ceph_cmd.append('--id')
    ceph_cmd.append(options.id)
if options.keyring:
    ceph_cmd.append('--keyring')
    ceph_cmd.append(options.keyring)
ceph_cmd.append('status')
ceph_cmd.append('--format=json')
    
# exec command
p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, err = p.communicate()

if not output:
    print("UNKNOWN: fail to execute ceph status command")
    sys.exit(STATUS['UNKNOWN'])

data = json.loads(output.decode(sys.getdefaultencoding()))
if 'pgmap' not in data:
    print("UNKNOWN: fail to read pgmap status")
    sys.exit(STATUS['UNKNOWN'])

if options.debug:
    print("data: %s" % data['pgmap']['data_bytes'])
    print("allocated: %s" % data['pgmap']['bytes_used'])
    print("total: %s" % data['pgmap']['bytes_total'])

PER_DATA = round(int(data['pgmap']['data_bytes']) * 100 / int(data['pgmap']['bytes_total']), 1)
DATA_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warndata / 100)
DATA_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critdata / 100)
PER_ALLOC = round(int(data['pgmap']['bytes_used']) * 100 / int(data['pgmap']['bytes_total']), 1)
ALLOC_WARN_T = int(int(data['pgmap']['bytes_total']) * options.warnalloc / 100)
ALLOC_CRIT_T = int(int(data['pgmap']['bytes_total']) * options.critalloc / 100)

if options.debug:
    print("%% data: %s" % PER_DATA)
    print("%% allocated: %s" % PER_ALLOC)

if PER_DATA > options.critdata or PER_ALLOC > options.critalloc:
    STATUS_TXT = 'CRITICAL'
elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc:
    STATUS_TXT = 'WARNING'
else:
    STATUS_TXT = 'OK'

print(
    "%s - %s%% allocated / %s%% really used|allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % (
        STATUS_TXT, PER_ALLOC, PER_DATA,
        data['pgmap']['bytes_used'], ALLOC_WARN_T, ALLOC_CRIT_T, data['pgmap']['bytes_total'],
        data['pgmap']['data_bytes'], DATA_WARN_T, DATA_CRIT_T, data['pgmap']['bytes_total']
    )
)
sys.exit(STATUS[STATUS_TXT])
