Checking NT Services with Nagios

From Nagios Wiki

Jump to: navigation, search

[edit] NSclient

First, NSClient (or equivalent) must be installed on the Windows server that Nagios will be polling.

Nagios FAQ on NSClient

(port 1248 or 12489, depending on your client ver and/or configuration)

[edit] commands.cfg

Next, add the following definitions to your commands.cfg file.


#taken from CentOS 4.x

define command{
        command_name    check_nt_disk
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v USEDDISKSPACE -l $ARG1$ -w $ARG2$ -c $ARG3$
        }

define command{
        command_name    check_nt_cpuload
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v CPULOAD -l $ARG1$
        }

define command{
        command_name    check_nt_uptime
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v UPTIME
        }

define command{
        command_name    check_nt_clientversion
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v CLIENTVERSION
        }

define command{
        command_name    check_nt_process
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v PROCSTATE -l $ARG1$
        }

define command{
        command_name    check_nt_service
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v SERVICESTATE -d SHOWALL -l $ARG1$
        }

define command{
        command_name    check_nt_memuse
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v MEMUSE -w $ARG1$ -c $ARG2$
        }

define command{
        command_name    check_nt_pagingfile
        command_line    $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Paging File(_Total)\\% Usage","Paging File usage is %.2f %%" -w $ARG1$ -c $ARG2$
        }

#DHCP – Queue Length due to conflicts

define command{
       command_name     check_nt_DHCP_queue_length
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\DHCP Server\\Conflict Check Queue Length","Waiting in DHCP Queue due to Conflict is %.f" -w 2 -c 5 
        }

#DHCP – Active Queue

define command{
       command_name     check_nt_DHCP_active_queue_length
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\DHCP Server\\Active Queue Length","Waiting in DHCP Queue for Normal Processing is %.f" -w 15 -c 30
        }

#DHCP – Average Response Time

define command{
       command_name     check_nt_DHCP_average_response_time
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\DHCP Server\\Milliseconds per packet (Avg)","Average DHCP Server Response in is %.f" -w 70 -c 250
        }

#DNS – Recursive Query Failures
define command{
       command_name     check_nt_DNS_recursive_query_failures
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\DNS\\Recursive Query Failure/sec","DNS Recursive Queries are failing at %.f per second" -w 5 -c 80
        }

#DNS – Recursive Query Timeouts

define command{
       command_name     check_nt_DNS_recursive_query_timeouts
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\DNS\\Recursive Query TimeOut/sec","DNS Recursive Queries are failing because Timed Out at %.f per second" -w 5 -c 80
        }

#DNS – Secure Update Failures

define command{
       command_name     check_nt_DNS_secure_update_failures
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\DNS\\Secure Update Failure","DNS Secure Update Failures since last Service Restart is %.f" -w 1 -c 15
        }

#DNS – Total Queries Received per Second

define command{
       command_name     check_nt_DNS_total_queries_per_sec
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\WINS Server\\Failed Queries/sec","Total Queries received per second is %.f" -w 3 -c 5
        }

#Logon Errors

define command{
       command_name     check_nt_logon_errors
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Server\\Errors Logon","Logon Errors since last reboot is %.f" -w 50 -c 150
        }

#SMB General System Errors

define command{
       command_name     check_nt_SMB_general_system_errors
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Server\\Errors System","SMB Errors due to Server problems is %.f" -w 2 -c 20
        }

#SMB Blocking Requests Rejected

define command{
       command_name     check_nt_SMB_blocking_requests_rejected
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Server\\Blocking Requests Rejected","SMB Blockiing requests rejected due to insufficient free resources is %.f Server Parameters need adjusting" -w 10 -c 100
        }

#CPU Load over 10 Min, 60 Min, 24 hrs

define command{
       command_name     check_nt_cpu_avg
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -v CPULOAD -l 10,60,95,60,60,95,1440,60,95
        }

#Memory Pool Nonpaged Peak

define command{
       command_name     check_nt_memory_pool_nonpaged_peak
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\Server\\Pool Nonpaged Peak","Maxium number of bytes of nonpaged pool which should be same as installed physical memory is %.f"
        }

#Memory Pool Paged Failures

define command{
       command_name     check_nt_memory_pool_paged_failures
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Server\\Pool Paged Failures","Number of times allocation from the page pool have failed is %.f Physical RAM or paging file too small" -w 2 -c 50 
        }

#Paging File Usage
define command{
       command_name     check_nt_paging_file_useage
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\Paging File(_Total)\\% Usage","Paging file usage is %.2f %%" -w 30 -c 60
        }

#Paging File Peak Usage

define command{
       command_name     check_nt_paging_file_peak_usage
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER  -l "\\Paging File(_Total)\\% Usage Peak","Paging file Peak usage is %.2f %%" -w 80 -c 90
        }

#System PTEs with 3GB switch

define command{
       command_name     check_nt_system_PTE_with_3GB
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Memory\\Free System Page Table Entries","Number of Page Table Entries not being used is %.f Thresholds set for testing /3GB switch on or off" -w 8000 -c 5000
        }

#Registry Quota in Use (percent)

define command{
       command_name     check_nt_registry_quota_in_use
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\System\\% Registry Quota In Use","Percent Quota in use is %.2f %%" -w 60 -c 85
        }

#Server Work Queue Length

define command{
       command_name     check_nt_server_work_queues
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Server Work Queues(0)\\Queue Length","Current work queue which is an indication of Processing Load is %.f " -w 4 -c 7
        }

#Disk Usage

define command{
       command_name     check_nt_disk_usage
       command_line     $USER1$/check_nt -H $HOSTNAME$ -v USEDDISKSPACE -l c -w 80 -c 90
        }

#Disk Queue Length

define command{
       command_name     check_nt_queue_length
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\PhysicalDisk(_Total)\\Avg. Disk Queue Length","Average number of both read and write requests queued is %.2f Consider a faster disk array" -w 1 -c 5
        }

#Pool Nonpaged Bytes no3GB switch

define command{
       command_name     check_nt_pool_nonpaged_bytes_no3GB_switch
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Memory\\Pool Nonpaged Bytes","Pool Nonpaged Bytes is %.f  Thresholds set for testing /3GB switch off" -w 200000000 -c
        }

#Pool Nonpaged Bytes 3GB switch

define command{
       command_name     check_nt_pool_nonpaged_bytes_3GB_switch
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Memory\\Pool Nonpaged Bytes","Pool Nonpaged Bytes is %.f Thresholds set for testing /3GB switch on" -w 100000000 -c 110000000
        }

#Pool Paged Bytes no3GB switch
define command{
       command_name     check_nt_pool_paged_bytes_no3GB_switch
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Memory\\Pool Paged Bytes","Pool Paged Bytes is %.f Thresholds set for testing /3GB switch off" -w 300000000 -c 320000000
        }

#Pool Paged Bytes 3GB switch

define command{
       command_name     check_nt_pool_paged_bytes_3GB_switch
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Memory\\Pool Paged Bytes","Pool Paged Bytes is %.f  Thresholds set for testing /3GB switch on" -w 200000000 -c 220000000
        }

#Printer not ready errors since last restart

define command{
       command_name     check_nt_printer_not_ready
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Print Queue(_Total)\\Out of Paper Errors","Out of Paper Printer Errors since last Service restart is %.f" -w 1 -c 3
        }

#Printer Out of Paper errors since last restart

define command{
       command_name     check_nt_printer_out_of_paper_errors
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\Print Queue(_Total)\\Out of Paper Errors","Out of Paper Printer Errors since last Service restart is %.f" -w 1 -c 3 
        }

#SMTP Local Delivery Queue

define command{
       command_name     check_nt_smtp_local_delivery
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SMTP Server(_Total)\\Local Queue Length","Number of Messages waiting in queue for Local Recipients is %.f" -w 5 -c 15
        }

#SMTP Remote Delivery Queue

define command{
       command_name     check_nt_smtp_remote_delivery_queue
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SMTP Server(_Total)\\Remote Queue Length","Number of Messages waiting in queue for Remote Recipients is %.f" -w 25 -c 50
        }

#Exchange Active User Count

define command{
       command_name     check_nt_exchange_active_user_count
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSExchangeIS\\Active User Count" -w 4 -c 10
        }

#Exchange Connection Count

define command{
       command_name     check_nt_exchange_connection_count
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSExchangeIS\\Connection Count" -w 100 -c 250 
        }

#Exchange Delivery Time

define command{
       command_name     check_nt_exchange_delivery_time
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSExchangeIS(_Average Delivery Time)\\%%Usage","Average Delivery Time is %.2f%%" -w 2 -c 10 
        }

#Exchange Maximum Users

define command{
       command_name     check_nt_exchange_maximum_users
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSExchangeIS\\Connection Count" -w 100 -c 250
        }

#Exchange Messages Delivered/Minute

define command{
       command_name     check_nt_exchange_messages_delivered_per_min
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l  "MSExchangeIS Mailbox(_Total)\\Messages Delivered/min" -w 25 -c 120
        }

#Exchange Messages Submitted/Minute

define command{
       command_name     check_nt_exchange_messages_submitted_per_min
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l  "\\MSExchangeIS Mailbox(_Total)\\Messages Submitted/min" -w 5 -c 35
        }

#Exchange Receive Queue

define command{
       command_name     check_nt_exchange_receive_queue
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSExchangeIS Mailbox(_Receive Queue Size)\\%%Usage","Queue Length is %.2f" -w 3 -c 15
        }

#Exchange Send Queue

define command{
       command_name     check_nt_exchange_send_queue
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l  "\\MSExchangeIS(_Send Queue Size)\\%%Usage","Queue Length is $.2f%%" -w 3 -c 15 
        }

#Exchange User Count
#need check

define command{
       command_name     
       command_line     
        }

#SQL Database Data Files Size (Total)

define command{
       command_name     check_nt_sql_database_files_size
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServer:Databases(_Total)\\Data File(s) Size (KB)","SQL Server Databases Datafile size total is %.f" -w 5 -c 30
        }

#SQL Database Log Files Size (Total)

define command{
       command_name     check_nt_sql_database_log_files_size
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l  "\\SQLServer:Databases(_Total)\\Log File(s) Size (KB)","SQL Server Databases Logfile size total %.f" -w 10 -c 100
        }

#SQL Database Data File Size (Individual)

define command{
       command_name     check_nt_sql_database_data_file_size_individual
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSSQL$SBSMONITORING:Databases(_Total)\\Data File(s) Size (KB)","SQL Server Databases Datafile size total is %.f" -w 5 -c 30
        }

#SQL Database Log File Size (Individual)

define command{
       command_name     check_nt_sql_database_log_file_size_individual
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSSQL$SBSMONITORING:Databases(_Total)\\Log File(s) Size (KB)","SQL Server Databases Logfile size total %.f" -w 10 -c 100 
        }

#SQL Server Service

define command{
       command_name     check_nt_sql_server_service
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v SERVICESTATE -l mssqlserver
        }

#SQL Database Size

define command{
       command_name     check_nt_sql_database_size
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServerDatabases(_Data Files() Size (KB)\\%%Usage","Databases Size is $.2f%%" -w 30000 -c 70000
        }


#SQL Server Deadlocks/second

define command{
       command_name     check_nt_sql_server_deadlocks_per_sec
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServer:Locks(_Total)\\Number of Deadlocks/sec","SQL Server Deadlocks per second total %.f" -w 1 -c 5
        }

#SQL Server Connections

define command{
       command_name     check_nt_sql_server_connections
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServer:General Statistics\\User Connections","SQL Server Connections are %.f" -w 20 -c 40 
        }

#SQL Database Data size (Total)

define command{
       command_name     check_nt_sql_database_data_size
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServer:Databases(_Total)\\Data File(s) Size (KB)","SQL Server Databases Datafile size total is %.f" -w 5 -c 30
        }

#SQL Database Log size (Total)

define command{
       command_name     check_nt_sql_database_data_size
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\SQLServer:Databases(_Total)\\Log File(s) Size (KB)","SQL Server Databases Logfile size total %.f" -w 10 -c 100
        }

#SQL Replication Agents (SharePoint Instance)

define command{
       command_name     check_nt_sql_replication_agents
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSSQL$Sharepoint:Replication Agents\\Running","Number of SQL Server Replication Agents running are %.f“
        }

#SQL Database Log size (SharePoint Instance)

define command{
       command_name     check_nt_sql_database_log_size_sharepoint
       command_line     $USER1$/check_nt -H $HOSTADDRESS$ -s password -p 12489 -v COUNTER -l "\\MSSQL$Sharepoint:Databases(_Total)\\Log File(s) Size (KB)","SQL Server Databases Logfile size total %.f" -w 10 -c 30
        }



[edit] notes

  • some checks from Gentoo wiki
  • others taken from misc places on the web
Personal tools
Advertisement