在使用大约 500 个 scollector 运行 bosun 后,我在日志中看到了各种问题。这来自 opentsdb 标准输出日志。
org.hbase.async.RemoteException: org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: Server localhost,39157,1475044082464 aborting
at org.apache.hadoop.hbase.regionserver.RSRpcServices.checkOpen(RSRpcServices.java:980)
at org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:1895)
at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32201)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2114)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:101)
at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)
at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)
at java.lang.Thread.run(Thread.java:745)
at org.hbase.async.RegionClient.makeException(RegionClient.java:1738) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decodeException(RegionClient.java:1756) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decode(RegionClient.java:1468) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decode(RegionClient.java:88) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:500) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:435) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70) [netty-3.9.4.Final.jar:na]
at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1206) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelHandler.messageReceived(SimpleChannelHandler.java:142) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:88) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.timeout.IdleStateHandler.messageReceived(IdleStateHandler.java:294) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559) [netty-3.9.4.Final.jar:na]
at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3108) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:268) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:255) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.socket.nio.NioWorker.read
java.lang.OutOfMemoryError: Java heap space
我有时也会看到错误。
我最近更改的一件事是使用我自己的 zk 集群,并tsd.storage.hbase.zk_quorum=ip1,ip2,ip3
在 opentsdb.conf 中设置。
使用容器化 bosun 可以调整任何配置以使其适用于大约 1200 个 scollector 实例。这是针对 PoC 的,目前我不打算单独配置和扩展模块。