[checkmk-commits] Check_MK Git: check_mk: check_cluster_nodes: added cluster check from Karl-Heinz Fiebig

Mathias Kettner mk at mathias-kettner.de
Fri Feb 20 14:10:41 CET 2015


Module: check_mk
Branch: master
Commit: bf04855b1fd1f64882229a25559fa04e5074b805
URL:    http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=bf04855b1fd1f64882229a25559fa04e5074b805

Author: Mathias Kettner <mk at mathias-kettner.de>
Date:   Fri Feb 20 14:10:38 2015 +0100

check_cluster_nodes: added cluster check from Karl-Heinz Fiebig

---

 doc/treasures/check_cluster_nodes |  145 +++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/doc/treasures/check_cluster_nodes b/doc/treasures/check_cluster_nodes
new file mode 100755
index 0000000..40b8f47
--- /dev/null
+++ b/doc/treasures/check_cluster_nodes
@@ -0,0 +1,145 @@
+#!/usr/bin/python
+# -*- encoding: utf-8; py-indent-offset: 4 -*-
+###############################################################################
+# name       : ClusterNodesState.py
+# to do      : check state of all cluster nodes
+# start      : nagios plugin
+# docu       : needs a check_mk environment
+# parameter  : -n Clustername (use -h to see all posibilities)
+# created    :
+# created by :
+#
+# modifed by        date         comment
+###############################################################################
+
+import os, sys, getopt
+import livestatus
+
+def usage():
+    usage = """
+    -h --help                 Prints this
+    -n --clustername          cluster name
+    -w --warning              warning at % of lost hosts
+    -c --critical             critical at % of  lost hosts
+    """
+    print usage
+
+def main(argv):
+    clustername = 'NoClusterName'
+    warning     = float(5)
+    critical    = float(50)
+
+    if len(argv) == 0 or not sys.argv[1].startswith('-'):
+        usage()
+        sys.exit(2)
+
+    try:
+        opt, args = getopt.getopt(argv, "h:n:w:c:", [ 'help', 'clustername=', 'warning=', 'critical=' ] )
+    except getopt.GetoptError, err:
+        print str(err)
+        usage()
+        sys.exit(2)
+    output = None
+    verbose = False
+    for o, a in opt:
+        if o in ("-h", "--help"):
+            usage()
+            sys.exit()
+        elif o in ("-n", "--clustername"):
+            clustername = a
+        elif o in ("-w", "--warning"):
+            warning = float(a)
+        elif o in ("-c", "--critical"):
+            critical = float(a)
+        else:
+            assert False, "unhandled option"
+
+    try:
+        omd_root = os.getenv("OMD_ROOT")
+        socket_path = "unix:" + omd_root + "/tmp/run/live"
+    except:
+        sys.stderr.write("This example is indented to run in an OMD site\n")
+        sys.stderr.write("Please change socket_path in this example, if you are\n")
+        sys.stderr.write("not using OMD.\n")
+        sys.exit(1)
+
+    try:
+        clusternodes = {}
+        up_list = []
+        unreachable_list = []
+        down_list = []
+        unknown_list = []
+
+        parents = livestatus.SingleSiteConnection(socket_path).query_table("GET hosts\nColumns: parents\nFilter: host_name = %s\n" % clustername)
+        for i in range(len(parents[0][0])):
+            state = livestatus.SingleSiteConnection(socket_path).query_table("GET hosts\nColumns: name hard_state\nFilter: host_name = %s\n" % parents[0][0][i])
+            cmk_state = livestatus.SingleSiteConnection(socket_path).query_table("GET services\nColumns: host_name description state\nFilter: description ~ Check_MK$\nFilter: host_name = %s\n" % parents[0][0][i])
+            nodename = state[0][0]
+            if state[0][1] == 0 and cmk_state[0][2] == 0:
+                nodestate = "up"
+            elif state[0][1] == 1:
+                nodestate = "unreachable"
+            elif state[0][1] == 2:
+                nodestate = "down"
+            elif cmk_state[0][2] == 2:
+                nodestate = "unknown"
+            clusternodes[nodename] = nodestate
+
+        #clusternodes['zbghvm42'] = 'unknown'
+        #clusternodes['zbghvm43'] = 'unreachable'
+        #clusternodes['zbghvm44'] = 'up'
+        #clusternodes['zbghvm45'] = 'up'
+        #clusternodes['zbghvm46'] = 'down'
+        #clusternodes['zbghvm47'] = 'down'
+        #clusternodes['zbghvm48'] = 'up'
+        #clusternodes['zbghvm49'] = 'unknown'
+
+        for key, value in clusternodes.items():
+            if value.count('up') > 0:
+                up_list.append(key)
+            if value.count('unreachable') > 0:
+                unreachable_list.append(key)
+            if value.count('down') > 0:
+                down_list.append(key)
+            if value.count('unknown') > 0:
+                unknown_list.append(key)
+
+        all_nodes         = float(len(clusternodes))
+        up_nodes          = float(len(up_list))
+        unreachable_nodes = float(len(unreachable_list))
+        down_nodes        = float(len(down_list))
+        unknown_nodes     = float(len(unknown_list))
+
+        failedpercent = ((down_nodes + unreachable_nodes + unknown_nodes) / all_nodes) * 100.0
+
+        str_up = ""
+        str_down = ""
+        str_unreachable = ""
+        str_unknown = ""
+        if len(up_list) > 0:
+            str_up = (" %s " % ([str(item) for item in up_list])).translate(None, "'[,]")
+        if len(down_list) > 0:
+            str_down = ("(!!)%s " % ([str(item) for item in down_list])).translate(None, "'[,]")
+        if len(unreachable_list) > 0:
+            str_unreachable = ("(ur)%s " % ([str(item) for item in unreachable_list])).translate(None, "'[,]")
+        if len(unknown_list) > 0:
+            str_unknown = ("(?)%s " % ([str(item) for item in unknown_list])).translate(None, "'[,]")
+
+        if failedpercent > warning and failedpercent < critical:
+            print "WARN - (.)" + str_up + str_down + str_unreachable + str_unknown
+            sys.exit(1)
+        elif failedpercent >= critical:
+            if str_up == "":
+                print "CRIT - " + str_up + str_down + str_unreachable + str_unknown
+            else:
+                print "CRIT - (.)" + str_up + str_down + str_unreachable + str_unknown
+            sys.exit(2)
+        else:
+            print "OK - (.)" + str_up
+            sys.exit(0)
+
+    except Exception, e: # livestatus.MKLivestatusException, e:
+        print "Livestatus error: %s" % str(e)
+
+if __name__ == "__main__":
+    main(sys.argv[1:]) # [1:] slices off the first argument which is the name of the program



More information about the checkmk-commits mailing list