HEX
Server: Apache
System: Linux pdx1-shared-a1-38 6.6.104-grsec-jammy+ #3 SMP Tue Sep 16 00:28:11 UTC 2025 x86_64
User: mmickelson (3396398)
PHP: 8.1.31
Disabled: NONE
Upload Files
File: //usr/local/bin/zabbix/ceph-status.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import os
import sys
import commands
import json
def main():
    if sys.argv[1] == 'health':
        try:
            print get_cluster_health()
        except:
            print 255
    if sys.argv[1] == 'used_percent':
        try:
            print get_cluster_used_percent()
        except:
            print 0
    if sys.argv[1] == 'total_objects':
        try:
            print get_cluster_total_objects()
        except:
            print 0
    if sys.argv[1] == 'total_pgs':
        try:
            print get_cluster_total_pgs()
        except:
            print 0
    if sys.argv[1] == 'commit_latency':
        try:
            print get_cluster_latency("ave_commit")
        except:
            print 0
    if sys.argv[1] == 'apply_latency':
        try:
            print get_cluster_latency("ave_apply")
        except:
            print 0
    if sys.argv[1] == 'throughput_write':
        try:
            print get_cluster_throughput("write")
        except:
            print 0
    if sys.argv[1] == 'throughput_read':
        try:
            print get_cluster_throughput("read")
        except:
            print 0
    if sys.argv[1] == 'total_ops':
        try:
            print get_cluster_total_ops()
        except:
            print 0
    if sys.argv[1] == 'total_pools':
        try:
            print get_cluster_total_pools()
        except:
            print 0
    if sys.argv[1] == 'pools':
        try:
            print get_cluster_pools()
        except:
            print 0
    if sys.argv[1] == 'osds':
        try:
            print get_host_osds()
        except:
            print 0
    if sys.argv[1] == 'osds_mem_use_virt':
        try:
            print get_osd_mem_virt(sys.argv[2],"virt")
        except:
            print 0
    if sys.argv[1] == 'osds_mem_use_res':
#        try:
            print get_osd_mem_virt(sys.argv[2],"res")
#        except:
#            print 0

    if sys.argv[1] == 'osds_cpu_use':
        try:
            print get_osd_cpu(sys.argv[2])
        except:
            print 0
    
#get fio write speed (KB/s)
    if sys.argv[1] == 'fio_write_speed':
        try:
            print get_fio_write_speed()
        except:
            print 0


#get fio write speed (KB/s)
    if sys.argv[1] == 'fio_read_speed':
        try:
            print get_fio_read_speed()
        except:
            print 0


#test unit
    if sys.argv[1] == 'pool_objects':
        try:
            print get_pool_stats(sys.argv[2],"objects")
        except:
            print 0
    if sys.argv[1] == 'pool_bytes_used':
        try:
            print get_pool_stats(sys.argv[2],"used")
        except:
            print 0
    if sys.argv[1] == 'pool_throughput_write':
        try:
            print get_pool_stats(sys.argv[2],"throughput_write")
        except:
            print 0
    if sys.argv[1] == 'pool_throughput_read':
        try:
            print get_pool_stats(sys.argv[2], "throughput_read")
        except:
            print 0
    if sys.argv[1] == 'pool_op_write':
        try:
            print get_pool_stats(sys.argv[2], "op_write")
        except:
            print 0
    if sys.argv[1] == 'pool_op_read':
        try:
            print get_pool_stats(sys.argv[2], "op_read")
        except:
            print 0
    if sys.argv[1] == 'pool_id':
        try:
            print get_pool_config(sys.argv[2],"id")
        except:
            print 0
    if sys.argv[1] == 'pool_size':
        try:
            print get_pool_config(sys.argv[2],"size")
        except:
            print 0
    if sys.argv[1] == 'pool_min_size':
        try:
            print get_pool_config(sys.argv[2], "min_size")
        except:
            print 0
    if sys.argv[1] == 'pool_pg_num':
        try:
            print get_pool_config(sys.argv[2], "pg_num")
        except:
            print 0
    if sys.argv[1] == 'pool_pgp_num':
        try:
            print get_pool_config(sys.argv[2], "pgp_num")
        except:
            print 0
##get ceph cluster status
def get_cluster_health() :
    cluster_health = commands.getoutput('timeout 10 ceph health -f json-pretty 2>/dev/null')
    try:
        json_str = json.loads(cluster_health)
        if json_str["overall_status"] == "HEALTH_OK":
            return 1
        elif  json_str["overall_status"] == "HEALTH_WARN":
            return 2
        elif  json_str["overall_status"] == "HEALTH_ERR":
            return 3
        else:
            return 255
    except:
        return 255
##get cluster used percent
def get_cluster_used_percent():
    try:
        cluster_used_percent = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null')
        json_str = json.loads(cluster_used_percent)
        cluster_used = int(json_str["pgmap"]["bytes_used"])
        cluster_total = int(json_str["pgmap"]["bytes_total"])
        return    "%.3f"   %(cluster_used/float(cluster_total))
    except:
        return 0
##get cluster total objects(has bug for get objects)
def get_cluster_total_objects():
    get_cluster_total_objects = commands.getoutput('timeout 10 ceph -s  2> /dev/null|grep pgmap|awk \'{print $10}\'')
    try:
        if len(get_cluster_total_objects) != 0:
            return get_cluster_total_objects
        else:
            return 0
    except:
        return 0
#get cluster total pg
def get_cluster_total_pgs():
    try:
        get_cluster_total_pgs = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null')
        json_str = json.loads(get_cluster_total_pgs)
        return json_str["pgmap"]["num_pgs"]
    except:
        return 0
#get cluster average latency
def get_cluster_latency(arg):
    if arg =="ave_commit":
        osd_commit_list = []
        try:
            get_cluster_latency_commit = commands.getoutput('timeout 10 ceph osd perf -f json-pretty 2>/dev/null')
            json_str = json.loads(get_cluster_latency_commit)
            for item in json_str["osd_perf_infos"]:
                osd_commit_list.append(int(item["perf_stats"]["commit_latency_ms"]))
            return sum(osd_commit_list)/len(osd_commit_list)
        except:
            return 0
    if arg =="ave_apply":
        osd_apply_list = []
        try:
            get_cluster_latency_apply = commands.getoutput('timeout 10 ceph osd perf -f json-pretty 2>/dev/null')
            json_str = json.loads(get_cluster_latency_apply)
            for item in json_str["osd_perf_infos"]:
                osd_apply_list.append(int(item["perf_stats"]["apply_latency_ms"]))
            return sum(osd_apply_list)/len(osd_apply_list)
        except:
            return 0
#get cluster throughput write and read
def get_cluster_throughput(arg):
    if arg == "write":
        try:
            get_cluster_throughput_write = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null ')
            json_str = json.loads(get_cluster_throughput_write)
            if json_str["pgmap"].has_key('write_bytes_sec') == True:
                return  json_str["pgmap"]["write_bytes_sec"]
            else:
                return 0
        except:
            return 0
    if arg == "read":
        try:
            get_cluster_throughput_read = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null ')
            json_str = json.loads(get_cluster_throughput_read)
            if json_str["pgmap"].has_key('read_bytes_sec') == True:
                return json_str["pgmap"]["read_bytes_sec"]
            else:
                return 0
        except:
            return 0
# get cluster ops (read ,write,promote)
def get_cluster_total_ops():
    ops_list =[]
    try:
        cluster_total_ops = commands.getoutput('timeout 10 ceph -s -f json-pretty 2>/dev/null')
        json_str = json.loads(cluster_total_ops)
        if json_str["pgmap"].has_key('write_op_per_sec') == True:
            ops_list.append(int(json_str["pgmap"]["write_op_per_sec"]))
        if json_str["pgmap"].has_key('read_op_per_sec') == True:
            ops_list.append(int(json_str["pgmap"]["read_op_per_sec"]))
        if json_str["pgmap"].has_key('promote_op_per_sec') == True:
            ops_list.append(int(json_str["pgmap"]["promote_op_per_sec"]))
        return sum(ops_list)
    except:
        return 0
# get cluster total pools (has bug for get pools)
def get_cluster_total_pools():
    try:
        cluster_total_pools = commands.getoutput('timeout 10 ceph osd lspools  -f json-pretty 2>/dev/null')
        json_str = json.loads(cluster_total_pools)
        return len(json_str)
    except:
        return 0
#get all pool name
def get_cluster_pools():
    try:
        pool_list=[]
        cluster_pools = commands.getoutput('timeout 10 ceph df -f json-pretty 2>/dev/null')
        json_str=json.loads(cluster_pools)
        for item in json_str["pools"]:
            pool_dic = {}
            pool_dic['{#POOL}'] = str(item["name"])
            pool_list.append(pool_dic)
        return json.dumps(pool_list,separators=(',', ':'))
    except:
        return 0

def get_host_osds():
    try:
        osd_list=[]
        osds=[]
        host_osds = commands.getoutput("mount|grep osd|grep -v lockbox|awk '{print $3}'|cut -f2 -d - 2>/dev/null")
        host_osds = host_osds.splitlines()
        for osd in host_osds:
            osd_dic = {}
            osd_dic['{#OSD}'] = str(osd)
            osd_list.append(osd_dic)
        return json.dumps(osd_list,separators=(',', ':'))
    except:
        return 0


def get_osd_mem_virt(osd,memtype):
#    try:
        pidfile="/var/run/ceph/osd.%s.pid" %osd
        osdpid = commands.getoutput('cat %s  2>/dev/null' %pidfile)
        if not osdpid :
            return 0
        elif memtype == "virt":
            osd_runmemvsz = commands.getoutput('ps -p %s  -o vsz |grep -v VSZ 2>/dev/null' %osdpid)
            return osd_runmemvsz
        elif memtype == "res":
            osd_runmemrsz = commands.getoutput('ps -p %s  -o rsz |grep -v RSZ 2>/dev/null' %osdpid)
            return osd_runmemrsz
        
#    except:
#        return 0
def get_osd_cpu(osd):
    try:
        pidfile="/var/run/ceph/osd.%s.pid" %osd
        osdpid = commands.getoutput('cat %s  2>/dev/null' %pidfile)
        if not osdpid :
            return 0
        osd_cpu = commands.getoutput('''ps -p %s  -o pcpu |grep -v CPU|awk 'gsub(/^ *| *$/,"")' 2>/dev/null''' %osdpid)
        return osd_cpu
    except:
        return 0

def get_fio_write_speed():
    try:
        fio_write_speed = commands.getoutput('''iotop --batch --iter 1 -P -k |grep fio|grep -v fio_write_speed|grep -v grep |awk '{print $6}'  2>/dev/null''')
        if not fio_write_speed:
            return 0
        else:
            return fio_write_speed
    except:
        return 0

def get_fio_read_speed():
    try:
        fio_read_speed = commands.getoutput('''iotop --batch --iter 1 -P -k |grep fio|grep -v fio_read_speed|grep -v grep |awk '{print $4}'  2>/dev/null''')
        if not fio_read_speed:
            return 0
        else:
            return fio_read_speed
    except:
        return 0





#get every pool object,used, throughput,ops
def get_pool_stats(poolname,stats):
    if stats == "objects":
        try:
            pool_objects = commands.getoutput('timeout 10 ceph df -f json-pretty 2>/dev/null')
            json_str = json.loads(pool_objects)
            for item in json_str["pools"]:
                if item["name"] == poolname:
                    return item["stats"]["objects"]
                    break
        except:
            return 0
    elif stats == "used" :
        try:
            pool_bytes_used = commands.getoutput("timeout 10 ceph df -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_bytes_used)
            for item in json_str["pools"]:
                if item["name"] == poolname:
                    return item["stats"]["bytes_used"]
                    break
        except:
            return 0
    elif stats == "throughput_write":
        try:
            pool_throughput_write = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_throughput_write)
            for item in json_str:
                if item["pool_name"] == poolname:
                    if item["client_io_rate"].has_key('write_bytes_sec') == True:
                        return  item["client_io_rate"]["write_bytes_sec"]
                    else:
                        return 0
        except:
            return 0

    elif stats == "throughput_read":
        try:
            pool_throughput_read = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_throughput_read)
            for item in json_str:
                if item["pool_name"] == poolname:
                    if item["client_io_rate"].has_key('read_bytes_sec') == True:
                        return item["client_io_rate"]["read_bytes_sec"]
                    else:
                        return 0
        except:
            return 0

    elif stats == "op_write":
        try:
            pool_op_write = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_op_write)
            for item in json_str:
                if item["pool_name"] == poolname:
                    if item["client_io_rate"].has_key('write_op_per_sec') == True:
                        return item["client_io_rate"]["write_op_per_sec"]
                    else:
                        return 0
        except:
            return 0

    elif stats == "op_read":
        try:
            pool_op_read = commands.getoutput("timeout 10 ceph osd pool stats -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_op_read)
            for item in json_str:
                if item["pool_name"] == poolname:
                    if item["client_io_rate"].has_key('read_op_per_sec') == True:
                        return item["client_io_rate"]["read_op_per_sec"]
                    else:
                        return 0
        except:
            return 0
    elif stats == "size":
        try:
            print stats
            pool_size = commands.getoutput("timeout 10 ceph   osd pool get rbd size -f json-pretty 2>/dev/null")
            json_str = json.loads(pool_size)
            print json_str
        except:
            return 0
#get cluster pool config
def get_pool_config(poolname,config):
    if config == "size":
        try:
            pool_size = commands.getoutput("timeout 10 ceph   osd pool get %s size -f json-pretty 2>/dev/null" %(poolname))
            json_str = json.loads(pool_size)
            return json_str["size"]
        except:
            return 0
    elif config == "id":
        try:
            pool_id = commands.getoutput("timeout 10 ceph   osd pool get %s size -f json-pretty 2>/dev/null" % (poolname))
            json_str = json.loads(pool_id)
            return json_str["pool_id"]
        except:
            return 0
    elif config == "min_size":
        try:
            pool_min_size = commands.getoutput("timeout 10 ceph   osd pool get %s min_size -f json-pretty 2>/dev/null" % (poolname))
            json_str = json.loads(pool_min_size)
            return json_str["min_size"]
        except:
            return 0
    elif config == "pg_num":
        try:
            pool_pg_num = commands.getoutput("timeout 10 ceph   osd pool get %s pg_num -f json-pretty 2>/dev/null" % (poolname))
            json_str = json.loads(pool_pg_num)
            return json_str["pg_num"]
        except:
            return 0
    elif config == "pgp_num":
        try:
            pool_pgp_num = commands.getoutput("timeout 10 ceph   osd pool get %s pgp_num -f json-pretty 2>/dev/null" % (poolname))
            json_str = json.loads(pool_pgp_num)
            return json_str["pgp_num"]
        except:
            return 0

if __name__ == '__main__':
    main()