我正在尝试实现一个自定义 perl nagios 脚本来使用 nrpe 远程检查流氓 dhcp 服务器。在我运行时在中央服务器上:
/usr/local/nagios/libexec/check_nrpe -H 10.9.0.25 -c check_roguedhcp
在我的调试日志中,我看到了这个:
Host is asking for command 'check_roguedhcp' to be run...
Running command: sudo /usr/lib64/nagios/plugins/check_roguedhcp.pl
Command completed with return code 1 and output:
Return Code: 1, Output: NRPE: Unable to read output
如果我在本地运行脚本(即使作为 nrpe 用户),我也会得到预期的输出。
在本地服务器上,我的 /etc/nagios/nrpe.cfg 具有以下设置:
command[check_roguedhcp]=sudo /usr/lib64/nagios/plugins/check_roguedhcp.pl
command[check_dhcp]=sudo /usr/lib64/nagios/plugins/check_dhcp -v
nrpe_user=nrpe
nrpe_group=nagios
ps aux 显示 nrpe 以用户 nrpe 运行(nrpe 在组 nagios 中)
nrpe 5941 0.0 0.1 52804 2384 ? Ss 08:25 0:00 /usr/sbin/nrpe -c /etc/nagios/nrpe.cfg -d
我已将命令添加到 /etc/sudoers
%nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios64/plugins/check_dhcp, /usr/lib64/nagios/plugins/check_roguedhcp.pl
在执行 nrpe 调用的中央服务器上,我有以下服务组和配置:
define servicegroup{
servicegroup_name rogue_dhcp
alias All dhcp monitors
}
define service{
name security-service
servicegroups rogue_dhcp
register 0
max_check_attempts 1
}
Nagios 可以通过 nrpe 在此服务器上运行任何其他 check_users 等脚本。
这是 perl 脚本本身,尽管我们知道该文件在本地执行得很好。
1 #!/usr/bin/perl -w
2 # nagios: -epn
3 # the above makes nagios run the script separately.
4 use POSIX;
5 use lib "/usr/lib64/nagios/plugins";
6 use utils qw(%ERRORS);
7
8 sub fail_usage {
9 if (scalar @_) {
10 print "$0: error: \n";
11 map { print " $_\n"; } @_;
12 }
13 print "$0: Usage: \n";
14 print "$0 [-v [-v [-v]]] [ []] \n";
15 print "$0 [-v [-v [-v]]] [-s] [[-s] [[-s] ]] \n";
16 print " \n";
17 exit 3 ;
18 }
19
20 my $verbose = 0;
21 my %servers=(
22 "x", "10.x.x.x",
23 "x", "10.x.x.x",
24 "x", "10.x.x.x",
25 "x", "10.x.x.x"
26 );
27
28 # examine commandline args
29 while ($ARGV=$ARGV[0]) {
30 my $myarg = $ARGV;
31 if ($ARGV eq '-s') {
32 shift @ARGV;
33 if (!($ARGV = $ARGV[0])) { fail_usage ("$myarg needs an argument"); }
34 if ($ARGV =~ /^-/) { fail_usage ("$myarg must be followed by an argument"); }
35 if (!defined($servers{$ARGV})) { $servers{$ARGV}=1; }
36 }
37 elsif ($ARGV eq '-v' ) { $verbose++; }
38 elsif ($ARGV eq '-h' or $ARGV eq '--help' ) { fail_usage ; }
39 elsif ($ARGV =~ /^-/ ) { fail_usage " invalid option ($ARGV)"; }
40 elsif ($ARGV =~ /^\d+\.\d+\.\d+\.\d+$/)
41 # servers should be ip addresses. I'm not doing detailed checks for this.
42 { if (!defined($servers{$ARGV})) { $servers{$ARGV}=1; } }
43 else { last; }
44 shift @ARGV;
45 }
46 # for some reason I can't test for empty ARGs in the while loop
47 @ARGV = grep {!/^\s*$/} @ARGV;
48 if (scalar @ARGV) { fail_usage "didn't understand arguments: (".join (" ",@ARGV).")"; }
49
50 my $serversn = scalar keys %servers;
51
52 if ($verbose > 2) {
53 print "verbosity=($verbose)\n";
54 print "servers = ($serversn)\n";
55 if ($serversn) { for my $i (keys %servers) { print "server ($i)\n"; } }
56 }
57
58 if (!$serversn) { fail_usage "no servers"; }
59 my $responses=0;
60 my $responders="";
61 my @check_dhcp = qx{/usr/lib64/nagios/plugins/check_dhcp -v};
62 foreach my $value (@check_dhcp) {
63 if ($value =~ /Added offer from server \@ /i){
64 $value =~ m/(\d+\.\d+\.\d+\.\d+)/i;
65 my $host = $1;
66 # we find a server in our list
67 if (defined($servers{$host})) { $responses++; $responders.="$host "; }
68 # we find a rogue DHCP server. Danger Will Robinson!
69 else {
70 print "DHCP:CRITICAL: DHCP service running on $host";
71 exit $ERRORS{'OK'}
72 }
73 }
74 }
75 # we saw all the servers in our list. All is good.
76 if ($responses == $serversn) {
77 print "DHCP:OK: $responses of $serversn Expected Responses to DHCP Broadcast";
78 exit $ERRORS{'OK'};
79 }
80 # we found no DHCP responses.
81 if ($responses == 0) {
82 print "DHCP:OK: no rogue servers detected!!!!#!@#";
83 exit $ERRORS{'OK'}
84 }
85 # we found less DHCP servers than we should have. Oh Nos!
86 $responders =~ s/ $//;
87 print "DHCP:OK: $responses of $serversn Responses to DHCP Broadcast. ($responders) responded. ";
88 exit $ERRORS{'OK'};
这是我在对 nrpe 过程进行 strace 时看到的(相关的)内容。
955 6950 stat("/usr/lib64/nagios/plugins/check_roguedhcp.pl", {st_mode=S_IFREG|S_ISUID|S_ISGID|0755, st_size=2799, ...}) = 0
956 6950 setresuid(4294967295, 4294967295, 4294967295) = 0
957 6950 setresgid(4294967295, 536347864, 4294967295) = 0
958 6950 setgroups(3, [536347864, 536347137, 536353632]) = 0
959 6950 open("/dev/tty", O_RDWR|O_NOCTTY) = -1 ENXIO (No such device or address)
960 6950 socket(PF_NETLINK, SOCK_RAW, 9) = 3
961 6950 fcntl(3, F_SETFD, FD_CLOEXEC) = 0
962 6950 fcntl(3, F_SETFD, FD_CLOEXEC) = 0
963 6950 ioctl(0, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fff3de81ac0) = -1 ENOTTY (Inappropriate ioctl for device)
964 6950 ioctl(1, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fff3de81ac0) = -1 EINVAL (Invalid argument)
965 6950 ioctl(2, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fff3de81ac0) = -1 ENOTTY (Inappropriate ioctl for device)
966 6950 getcwd("/", 4096) = 2
967 6950 sendto(3, "d\0\0\0c\4\5\0\1\0\0\0\0\0\0\0cwd=\"/\" cmd=\"/us"..., 100, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 100
968 6950 poll([{fd=3, events=POLLIN}], 1, 500) = 1 ([{fd=3, revents=POLLIN}])
969 6950 recvfrom(3, "$\0\0\0\2\0\0\0\1\0\0\0&\33\0\0\0\0\0\0d\0\0\0c\4\5\0\1\0\0\0"..., 8988, MSG_PEEK|MSG_DONTWAIT, {sa_family=AF_NE TLINK, pid=0, groups=00000000}, [12]) = 36
970 6950 recvfrom(3, "$\0\0\0\2\0\0\0\1\0\0\0&\33\0\0\0\0\0\0d\0\0\0c\4\5\0\1\0\0\0"..., 8988, MSG_DONTWAIT, {sa_family=AF_NETLINK, pi d=0, groups=00000000}, [12]) = 36
971 6950 write(2, "sudo", 4) = 4
972 6950 write(2, ": ", 2) = 2
973 6950 write(2, "sorry, you must have a tty to ru"..., 38) = 38
974 6950 write(2, "\n", 1) = 1
975 6950 setresuid(4294967295, 4294967295, 4294967295) = 0
976 6950 setresgid(4294967295, 4294967295, 4294967295) = 0
977 6950 exit_group(1) = ?
978 6949 <... read resumed> "", 4096) = 0
979 6949 --- SIGCHLD (Child exited) @ 0 (0) ---
980 6949 close(5) = 0
981 6949 wait4(6950, [{WIFEXITED(s) && WEXITSTATUS(s) == 1}], 0, NULL) = 6950
970 6950 recvfrom(3, "$\0\0\0\2\0\0\0\1\0\0\0&\33\0\0\0\0\0\0d\0\0\0c\4\5\0\1\0\0\0"..., 8988, MSG_DONTWAIT, {sa_family=AF_NETLINK, pi d=0, groups=00000000}, [12]) = 36
971 6950 write(2, "sudo", 4) = 4
972 6950 write(2, ": ", 2) = 2
973 6950 write(2, "sorry, you must have a tty to ru"..., 38) = 38
974 6950 write(2, "\n", 1) = 1
975 6950 setresuid(4294967295, 4294967295, 4294967295) = 0
976 6950 setresgid(4294967295, 4294967295, 4294967295) = 0
977 6950 exit_group(1) = ?