1

我正在尝试正确配置安装在 FreeBSD 10.1 中的 collectd (5.4.2 ) 插件。我希望根据下面包含的配置,在我的 /var/log/messages 文件中查看警告事件,该文件由 rsyslog 管理,该文件将这个文件写入任何处于警告级别及以上的设施报告。

我没有收到任何阈值警告。我使用了诸如“stress -c”之类的工具来强制关闭空闲的“jiffies”。

我似乎成功地收集了基于使用 rrdtool lastupdate 显示的信息

  • /var/db/collectd/rrd/localhost/cpu-average/cpu-idle.rrd
  • /var/db/collectd/rrd/localhost/tail-messages/counter-os.rrd

我怀疑这是我在关于插件、类型和实例关键字的尾部、聚合、链或阈值的配置声明中不太正确的事情。

在我从各种设施中寻找问题的尾部,我认为 GaugeInc 将是更合适的 DSType,但我当前的 collectd 修订版不支持。

欣赏有关可能是设置问题的任何见解。

猫 /usr/local/etc/collectd.conf

Hostname    "localhost"
FQDNLookup   true
BaseDir     "/var/db/collectd"
PIDFile     "/var/run/collectd.pid"
TypesDB     "/usr/local/share/collectd/types.db"

#ReadThreads 5
#WriteThreads 5

#https://collectd.org/wiki/index.php/Main_Page
#https://collectd.org/wiki/index.php/Naming_schema

#A value is identified by a unique name, which we usually call The "identifier" consists of five parts, two of which are optional:
#host
#plugin
#plugin instance (optional)
#type
#type instance (optional)

# e.g. host "/" plugin ["-" plugin instance] "/" type ["-" type instance]
#  localhost/cpu-0/cpu-idle


LoadPlugin syslog
<plugin syslog>
  LogLevel warning
  NotifyLevel "OKAY"
</plugin>

LoadPlugin cpu
LoadPlugin aggregation

<LoadPlugin df >
  Interval 300
</LoadPlugin>

LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin match_regex

LoadPlugin rrdtool
LoadPlugin threshold
<LoadPlugin tail >
  Interval 60
</LoadPlugin>


<plugin "df">
  FSType zfs
  MountPoint "/"
#ReportInodes false
  ValuesPercentage true
</plugin>


<plugin rrdtool>
  DataDir "/var/db/collectd/rrd"
  CacheTimeout 120
  CacheFlush 900
</plugin>


<plugin aggregation>
 <Aggregation>
   Plugin "cpu"
   Type "cpu"

   SetPlugin "cpu"
   SetPluginInstance "%{aggregation}"

   GroupBy "Host"
   GroupBy "TypeInstance"

   CalculateAverage true

 </Aggregation>
</plugin>

<Chain "PostCache">
  <Rule> # send cpu values for aggregation
    <Match regex>
       Plugin "^cpu$"
       PluginInstance "[0-9]+$"
    </Match>
    <Target write>
      Plugin "aggregation"
    </Target>
    Target stop
  </Rule>
  <Target write> # Write everything else via rrdtool.
    Plugin "rrdtool"
  </Target>
</Chain>

<plugin "tail">
 <File "/var/log/messages">
  Instance "messages"
  <Match>
# localhost/tail-messages/counter-ace
   Regex "local1.(err|warn|alert|crit)"
   DSType "CounterInc"
   Type "counter"
   Instance "ace"
  </Match>
  <Match>
   Regex "local0.(err|warn|alert|crit)"
   ExcludeRegex "smdr:"
   DSType "CounterInc"
   Type "counter"
   Instance "postgres"
  </Match>
  <Match>
   Regex "local4.(err|warn|alert|crit)"
   DSType "CounterInc"
   Type "counter"
   Instance "mec"
  </Match>
  <Match>
   Regex "local5.(err|warn|alert|crit)"
   DSType "CounterInc"
   Type "counter"
   Instance "web"
  </Match>
  <Match>
   Regex "(local6|local7).(err|warn|alert|crit)"
   DSType "CounterInc"
   Type "counter"
   Instance "apache"
  </Match>
   <Match>
   Regex "^.*$"
   ExcludeRegex " local[0-7] "
   DSType "CounterInc"
   Type "counter"
   Instance "os"
  </Match>
 </File>
</plugin>



#https://collectd.org/documentation/manpages/collectd-threshold.5.shtml
<Plugin "threshold">

   <Plugin "interface">
     Instance "eth0"
     <Type "if_octets">
       FailureMax 10000000
       DataSource "rx"
     </Type>
   </Plugin>

   <plugin "df">
     <type "df">
      Instance "/zroot/ROOT/default"
      WarningMax 75
     </type>
   </plugin>

  <Host "Hostname">
   <plugin "aggregation">
     <type "cpu-average">
       Instance "idle"
       WarningMin 17000
       FailureMin 15000
       Hits 1
     </type>
   </plugin>

   <Plugin "memory">
     <Type "memory">
         Instance "free"
         WarningMin 10000000
     </Type>
   </Plugin>

   <plugin "load">
    <type "load">
     DataSource "midterm"
     FailureMax 4
     Hits 3
     Hysteresis 3
    </type>
   </plugin>

   <Plugin "tail">
    Instance "messages"
    <type "counter">
     Instance "os"
     WarningMax .001
    </type>
    <type"counter">
     Instance "ace"
     WarningMax .001
    </type>
   </Plugin>

  </Host>
 </Plugin>
4

1 回答 1

0

阈值最常见的错误是过滤器过于严格。尝试删除 等部分HostInstance直到您可以看到通知。您也可以使用该unixsock插件来PUTVAL伪造值,而不是尝试烧毁您的系统。

于 2015-08-06T18:03:44.407 回答