nagios 监控配置

/usr/local/nagios/etc/objects
commands.cfg  contacts.cfg  host.cfg  localhost.cfg  printer.cfg  services.cfg  switch.cfg  templates.cfg  timeperiods.cfg  windows.cfg
host.cfg services.cfg需要自己创建
其中host.cfg用来指定监控的主机地址,以及相关属性信息,
[root@web- objects]# cat host.cfg
define host {
use                     linux-server,hosts-pnp #引用在temperiods.cfg中定义的linux-server 和hosts-pnp
host_name               1.222 主机名
alias                   1.222 主机别名
address                 192.168.1.222 主机具体的ip地址,域名也可以
}
define host {
use                     linux-server,hosts-pnp
host_name               1.223
alias                   1.223
address                 192.168.1.223
}
define host {
use                     linux-server,hosts-pnp
host_name               1.233
alias                   1.233
address                 192.168.1.233
}
define host {
use                     linux-server,hosts-pnp
host_name               1.215
alias                   1.215
address                 192.168.1.215
}
define host {
use                     linux-server,hosts-pnp
host_name               2.202
alias                   2.202
address                 192.168.2.202
}
define host {
use                     linux-server,hosts-pnp
host_name               2.203
alias                   2.203
address                 192.168.2.203
}
define host {
use                     linux-server,hosts-pnp
host_name               2.214
alias                   2.214
address                 192.168.2.214
}
define host {
use                     linux-server,hosts-pnp
host_name               2.200
alias                   2.200
address                 192.168.2.200
}
define host {
use                     linux-server,hosts-pnp
host_name               2.210
alias                   2.210
address                 192.168.2.210
}
define host {
use                     linux-server,hosts-pnp
host_name               2.211
alias                   2.211
address                 192.168.2.211
}
define host {
use                     linux-server,hosts-pnp
host_name               2.252
alias                   2.252
address                 192.168.2.252
}
define host {
use                     linux-server,hosts-pnp
host_name               2.212
alias                   2.212
address                 192.168.2.212
}
define host {
use                     linux-server,hosts-pnp
host_name               2.213
alias                   2.213
address                 192.168.2.213
}
define hostgroup {
hostgroup_name          sa-servers #定义一个主机组
alias                   sa-servers #主机组的别名
members                 1.222,1.223,1.233,1.215,2.202,2.203,2.214,2.200,2.210,2.252,2.211,2.212,2.213 #主机组的成员,为上面hostname中定义的名字
#####################################
services.cfg用于定义监控服务和主机资源没,如http服务ftp服务,磁盘空间,系统负载等
[root@web- objects]# cat services.cfg
##########################web###################################
define service {
use                     local-service,services-pnp #引用templates.cfg中定义的local-server的属性值
host_name               1.222,1.223,1.233,1.215,2.202,2.203,2.214,2.200,2.210,2.252,2.211,2.212,2.213 #指定要监控那台主机,在host.cfg中定义的
service_description     ping #内容的描述
check_command           check_ping!100.0,20%!500.0,60% #指定检查命令check_ping 在commands.cfg中定义的后面接两个参数,命令和参数之间用!隔开
}
define service {
use                     local-service,services-pnp
host_name               1.222,1.223,1.233,1.215,2.202,2.203,2.214,2.200,2.210,2.252,2.211,2.212,2.213
service_description     ssh
check_command           check_ssh
}
define service {
use                     local-service,services-pnp
host_name               1.222,1.223,1.233,1.215,2.202,2.203,2.214,2.200,2.210,2.252,2.211,2.212,2.213
service_description     sshd
check_command           check_tcp!22
}
define service {
use                     local-service,services-pnp
host_name             1.222,1.215,2.214,2.200,2.210,2.252,2.211,2.212,2.213
service_description     mysqlport
check_command           check_tcp!3306
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-users
check_command           check_nrpe!check_users
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-load
check_command           check_nrpe!check_load
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-sdb1
check_command           check_nrpe!check_sdb1
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-sda2
check_command           check_nrpe!check_sda2
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-zombie_procs
check_command           check_nrpe!check_zombie_procs
}
define service {
use                     local-service,services-pnp
host_name               1.215
service_description     mailserver-total_procs
check_command           check_nrpe!check_total_procs
}
contacts.cfg 用来定义联系人,和联系人组等 timeperiods.cfg用来定义监控时间段
cgi.cfg用来控制相关cgi脚本,如想在web监控中执行cgi脚本,如restart nagios,shutdown nagios stop nagios 需要配置cfg文件
[root@web-musicxx etc]# cat cgi.cfg  | grep -v ^# |grep -v ^$
main_config_file=/usr/local/nagios/etc/nagios.cfg
physical_html_path=/usr/local/nagios/share
url_html_path=/nagios
show_context_help=0
use_pending_states=1
use_authentication=0
use_ssl_authentication=0
 
authorized_for_system_information=nagios
authorized_for_configuration_information=nagios
authorized_for_system_commands=nagios
authorized_for_all_services=nagios
authorized_for_all_hosts=nagios
authorized_for_all_service_commands=nagios
authorized_for_all_host_commands=nagios
default_statusmap_layout=5
default_statuswrl_layout=4
ping_syntax=/bin/ping -n -U -c 5 $HOSTADDRESS$
refresh_rate=90
escape_html_tags=1
action_url_target=_blank
notes_url_target=_blank
lock_author_names=1
nagios.cfg是nagios的核心配置文件,所有对象配置文件必须在这个文件中进行定义才能发挥作用,[root@web-musicxx etc]# cat nagios.cfg  | grep -v ^#  | grep -v ^$
log_file=/usr/local/nagios/var/nagios.log #日志文件的路径
cfg_file=/usr/local/nagios/etc/objects/commands.cfg
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
cfg_file=/usr/local/nagios/etc/objects/templates.cfg
cfg_file=/usr/local/nagios/etc/objects/host.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg #引用对象配置文件
object_cache_file=/usr/local/nagios/var/objects.cache #对象缓冲文件,指定一个所有对象配置文件
precached_object_file=/usr/local/nagios/var/objects.precache
resource_file=/usr/local/nagios/etc/resource.cfg #指定nagios资源文件路径
status_file=/usr/local/nagios/var/status.dat #定义一个状态文件,保存nagios当前状态,注释和宕机信息
status_update_interval=10
nagios_user=nagios
nagios_group=nagios
check_external_commands=1 #定义是否允许nagios在web页面允许cgi命令,1为运行,0为不允许运行
command_check_interval=-1
command_file=/usr/local/nagios/var/rw/nagios.cmd
external_command_buffer_slots=4096
lock_file=/usr/local/nagios/var/nagios.lock
temp_file=/usr/local/nagios/var/nagios.tmp
temp_path=/tmp
event_broker_options=-1
log_rotation_method=d
log_archive_path=/usr/local/nagios/var/archives
use_syslog=1
log_notifications=1
log_service_retries=1
log_host_retries=1
log_event_handlers=1
log_initial_states=0
log_external_commands=1
log_passive_checks=1
service_inter_check_delay_method=s
max_service_check_spread=30
service_interleave_factor=s
host_inter_check_delay_method=s
max_host_check_spread=30
max_concurrent_checks=0
check_result_reaper_frequency=10
max_check_result_reaper_time=30
check_result_path=/usr/local/nagios/var/spool/checkresults
max_check_result_file_age=3600
cached_host_check_horizon=15
cached_service_check_horizon=15
enable_predictive_host_dependency_checks=1
enable_predictive_service_dependency_checks=1
soft_state_dependencies=0
auto_reschedule_checks=0
auto_rescheduling_interval=30
auto_rescheduling_window=180
sleep_time=0.25
service_check_timeout=60
host_check_timeout=30
event_handler_timeout=30
notification_timeout=30
ocsp_timeout=5
perfdata_timeout=5
retain_state_information=1
state_retention_file=/usr/local/nagios/var/retention.dat
retention_update_interval=60
use_retained_program_state=1
use_retained_scheduling_info=1
retained_host_attribute_mask=0
retained_service_attribute_mask=0
retained_process_host_attribute_mask=0
retained_process_service_attribute_mask=0
retained_contact_host_attribute_mask=0
retained_contact_service_attribute_mask=0
interval_length=60
check_for_updates=1
bare_update_check=0
use_aggressive_host_checking=0
execute_service_checks=1
accept_passive_service_checks=1
execute_host_checks=1
accept_passive_host_checks=1
enable_notifications=1
enable_event_handlers=1
process_performance_data=1
host_perfdata_command=process-host-perfdata
service_perfdata_command=process-service-perfdata
obsess_over_services=0
obsess_over_hosts=0
translate_passive_host_checks=0
passive_host_checks_are_soft=0
check_for_orphaned_services=1
check_for_orphaned_hosts=1
check_service_freshness=1
service_freshness_check_interval=60
check_host_freshness=0
host_freshness_check_interval=60
additional_freshness_latency=15
enable_flap_detection=1
low_service_flap_threshold=5.0
high_service_flap_threshold=20.0
low_host_flap_threshold=5.0
high_host_flap_threshold=20.0
date_format=us
p1_file=/usr/local/nagios/bin/p1.pl
enable_embedded_perl=1
use_embedded_perl_implicitly=1
illegal_object_name_chars=`~!$%^&*|'"<>?,()=
illegal_macro_output_chars=`~$&|'"<>
use_regexp_matching=0
use_true_regexp_matching=0
admin_email=nagios@localhost
admin_pager=pagenagios@localhost
daemon_dumps_core=0
use_large_installation_tweaks=0
enable_environment_macros=1
debug_level=0
debug_verbosity=1
debug_file=/usr/local/nagios/var/nagios.debug
max_debug_file_size=1000000
验证nagios配置文件的正确性
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
Nagios Core 3.2.3
Copyright (c) 2009-2010 Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 10-03-2010
License: GPL
Website: http://www.nagios.org
Reading configuration data...
   Read main config file okay...
Processing object config file '/usr/local/nagios/etc/objects/commands.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/contacts.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/timeperiods.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/templates.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/host.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/services.cfg'...
Processing object config file '/usr/local/nagios/etc/objects/localhost.cfg'...
   Read object config files okay...
略若干字。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。。
rocessor commands...
Checking misc settings...
Total Warnings: 0
Total Errors:   0
Things look okay - No serious problems were detected during the pre-flight check
2)
安装nagios性能分析图表
nagios对服务或主机监控是一个瞬间或者说瞬时状态,有时候系统管理员需要了解主机在一段时间的性能,以及服务的相约情况,并且要生成图表
可以利用pnp来实现,她是居于php和perl 利用rrdtool工具将nagios采集的数据绘制成相关的图表,然后显示主机或者服务器在一段时间内的运行状态、
下载pnp rrdtool最新版
在安装之前安装下依赖包和eedtool
yum -y install apr-devel apr-util check-devel cairo-devel pango-devel libxml2-devel rpmbuild glib2-devel dbus-devel freetype-devel fontconfig-devel gcc-c++ expat-devel python-devel libXrender-devel zlib libpng freetype libjpeg fontconfig gd libxml2 pcre pcre-devel libpcre libconfuse libart_lgpl-devel
安装pnp
tar pnp.tar.gz
cd pnp
./configure --with-nagios-user=nagios --with-nagios-group=nagios --with-rrdtool=/usr/bin/rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata
make all
make install
make install-config
make install-init
可能会有报错 什么ssl 啥的,我的做法是安装openssl-devel包!
另外映射
ln -s /usr/lib64/libssl.so /usr/lib64/libssl.so.0.9.8
ln -s /usr/lib64/libcrypto.so /usr/lib64/libcrypto.so.0.9.8
配置pnp
1:创建默认配置文件
cd /usr/local/nagios/etc/pnp/
cp process_perfdata.cfg-sample process_perfdata.cfg
cp npcd.cfg-sample npcd.cfg
cp rra.cfg-sample rra.cfg
chown -R nagios.nagios /usr/local/nagios/etc/pnp/
2:修改process_perfdata.cfg文件
将日志修改为2级别
# Loglevel 0=silent 1=normal 2=debug
#
LOG_LEVEL = 2
3:修改nagios配置文件
找到如下几项去掉注释,贴
process_performance_data=1
host_perfdata_command=process-host-perfdata
service_perfdata_command=process-service-perfdata
4:增加小太阳图表,修改templates.cfg 增加一个定义pnp的host和service
define host{
name            hosts-pnp
register        0
action_url      /nagios/pnp/index.php?host=$HOSTNAME$
process_perf_data
}
define service{
name            services-pnp
register        0
action_url      /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$
process_perf_data
}
5:修改commands.cfg 贴
# 'process-host-perfdata' command definition
define command{
        command_name    process-host-perfdata
        #command_line   /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata.out
         command_line   /usr/local/nagios/libexec/process_perfdata.pl
        }
# 'process-service-perfdata' command definition
define command{
        command_name    process-service-perfdata
        #command_line   /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata.out
         command_line   /usr/local/nagios/libexec/process_perfdata.pl
        }
6:简略,修改hosts.cfg 和services.cfg添加 hosts-pnp services-pnp,具体略,见上面
restart nagios
如配置正确,会生成若干文件
/usr/local/nagios/share/perfdata/1.215
mailserver-load.rrd  mailserver-sda2.rrd  mailserver-sdb1.rrd  mailserver-users.rrd  mysqlport.rrd  ping.rrd  sshd.rrd
mailserver-load.xml  mailserver-sda2.xml  mailserver-sdb1.xml  mailserver-users.xml  mysqlport.xml  ping.xml  sshd.xml

利用插件扩展nagios的监控功能
利用nrpe外部插件监控远程主机

配置nagios客户端
1:安装nagios-plugins
tar -xf nagios-plugins-1.4.15.tar.gz
cd nagios-plugins-1.4.15
./configure
make
make install
useradd -s /sbin/nologin nagios
chown nagios.nagios /usr/local/nagios/ -R
chown nagios.nagios /usr/local/nagios/
chown nagios.nagios /usr/local/nagios/libexec/ -R
安装nrpe
cd nrpe-2.13
./configure
make all
make all
yum install openssl-devel
./configure
make all
make install-plugin && make install-daemon && make install-daemon-config
安装完成后在/usr/local/nagios/libexec下会生成check_nrpe 命令
配置nrpe修改nrpe.cof 中allowed_hosts=nagiosserveripaddress
启动nagios守护进程
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
[root@mail ~]# netstat -lantp | grep 5666
tcp        0      0 0.0.0.0:5666                0.0.0.0:*                   LISTEN      2832/nrpe  
服务端测试
[root@web- 1.215]# /usr/local/nagios/libexec/check_nrpe -H 192.168.1.215
NRPE v2.13
客户端上定义监控服务器内容
# The following examples use hardcoded command arguments...
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_sdb1]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sdb1
command[check_sda2]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda2
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
nagios服务器配置
1:安装nrpe插件,./configure
make all
make install-plugin
2:commands.cfg中定义一个check_nrpe监控命令
define command{
        command_name    check_nrpe
        command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
        }
在service.cfg中添监控内容,略,见上面
测试nagios配置文件没问题,则restartnagios