我正在使用一个用作 Hibernate Search 后端的 10 节点 Infinispan 集群。我们的服务器在 Java 1.6_24 上运行 TC server 2.5 (tomcat 6.0.32)。我们使用 jGroups 2.12.1.3 来处理来自每个节点的集群缓存写入,以及多播 UDP 传输。
当我们在集群中启动 3 个以上的节点时,最终其中一个节点开始记录复制超时。无论我们将 Infinispan 配置为复制还是分发缓存模式,我们都观察到了相同的结果。尽管集群的其余部分保持稳定,但故障节点基本上无法进行搜索。
我们的配置:
英菲尼斯潘:
<?xml version="1.0" encoding="UTF-8"?>
<infinispan
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:infinispan:config:5.0 http://www.infinispan.org/schemas/infinispan-config-5.0.xsd"
xmlns="urn:infinispan:config:5.0">
<global>
<globalJmxStatistics
enabled="true"
cacheManagerName="HibernateSearch"
allowDuplicateDomains="true" />
<transport
clusterName="HibernateSearch-Infinispan-cluster-MT"
distributedSyncTimeout="50000">
<properties>
<property name="configurationFile" value="infinispan-udp.cfg.xml" />
</properties>
</transport>
<shutdown
hookBehavior="DONT_REGISTER" />
</global>
<default>
<locking
lockAcquisitionTimeout="20000"
writeSkewCheck="false"
concurrencyLevel="5000"
useLockStriping="false" />
<storeAsBinary storeKeysAsBinary="false" storeValuesAsBinary="true"
enabled="false" />
<invocationBatching
enabled="true" />
<clustering
mode="replication">
<stateRetrieval
timeout="60000"
logFlushTimeout="65000"
fetchInMemoryState="true"
alwaysProvideInMemoryState="true" />
<sync
replTimeout="50000" />
<l1 enabled="false" />
</clustering>
<jmxStatistics
enabled="true" />
<eviction
maxEntries="-1"
strategy="NONE" />
<expiration
maxIdle="-1" />
</default>
<namedCache
name="LuceneIndexesMetadata">
<clustering
mode="replication">
<stateRetrieval
fetchInMemoryState="true"
logFlushTimeout="30000" />
<sync
replTimeout="50000" />
<l1 enabled="false" />
</clustering>
<locking
lockAcquisitionTimeout="20000"
writeSkewCheck="false"
concurrencyLevel="5000"
useLockStriping="false" />
<loaders shared="true" preload="true">
<loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
<properties>
<property name="location" value="/usr/local/tc/.index/metadata" />
</properties>
</loader>
</loaders>
</namedCache>
<namedCache
name="LuceneIndexesData">
<clustering
mode="replication">
<stateRetrieval
fetchInMemoryState="true"
logFlushTimeout="30000" />
<sync
replTimeout="50000" />
<l1 enabled="false" />
</clustering>
<locking
lockAcquisitionTimeout="20000"
writeSkewCheck="false"
concurrencyLevel="5000"
useLockStriping="false" />
<loaders shared="true" preload="true">
<loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
<properties>
<property name="location" value="/usr/local/tc/.index/data" />
</properties>
</loader>
</loaders>
</namedCache>
<namedCache
name="LuceneIndexesLocking">
<clustering
mode="replication">
<stateRetrieval
fetchInMemoryState="true"
logFlushTimeout="30000" />
<sync
replTimeout="50000" />
<l1 enabled="false" />
</clustering>
<locking
lockAcquisitionTimeout="20000"
writeSkewCheck="false"
concurrencyLevel="5000"
useLockStriping="false" />
</namedCache>
jGroups(UDP):
<config xmlns="urn:org:jgroups"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:org:jgroups http://www.jgroups.org/schema/JGroups-2.12.xsd">
<UDP
mcast_addr="${jgroups.udp.mcast_addr:228.10.10.9}"
mcast_port="${jgroups.udp.mcast_port:45599}"
tos="8"
ucast_recv_buf_size="20000000"
ucast_send_buf_size="640000"
mcast_recv_buf_size="25000000"
mcast_send_buf_size="640000"
loopback="true"
discard_incompatible_packets="true"
max_bundle_size="64000"
max_bundle_timeout="30"
ip_ttl="${jgroups.udp.ip_ttl:2}"
enable_bundling="true"
enable_diagnostics="false"
thread_naming_pattern="pl"
thread_pool.enabled="true"
thread_pool.min_threads="2"
thread_pool.max_threads="30"
thread_pool.keep_alive_time="5000"
thread_pool.queue_enabled="false"
thread_pool.queue_max_size="100"
thread_pool.rejection_policy="Discard"
oob_thread_pool.enabled="true"
oob_thread_pool.min_threads="2"
oob_thread_pool.max_threads="30"
oob_thread_pool.keep_alive_time="5000"
oob_thread_pool.queue_enabled="false"
oob_thread_pool.queue_max_size="100"
oob_thread_pool.rejection_policy="Discard"
/>
我们观察到的错误:
10-31-2011 13:53:02 ERROR Hibernate Search: Directory writer-3 interceptors.InvocationContextInterceptor: ISPN000136: Execution error
org.infinispan.util.concurrent.TimeoutException: Replication timeout for tc-cluster-0105-21082
at org.infinispan.remoting.transport.AbstractTransport.parseResponseAndAddToResponseList(AbstractTransport.java:71)
at org.infinispan.remoting.transport.jgroups.JGroupsTransport.invokeRemotely(JGroupsTransport.java:452)
at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:132)
at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:156)
at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:265)
at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:252)
at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:235)
at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:228)
at org.infinispan.interceptors.ReplicationInterceptor.handleCrudMethod(ReplicationInterceptor.java:116)
at org.infinispan.interceptors.ReplicationInterceptor.visitPutKeyValueCommand(ReplicationInterceptor.java:79)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.LockingInterceptor.visitPutKeyValueCommand(LockingInterceptor.java:294)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.base.CommandInterceptor.handleDefault(CommandInterceptor.java:133)
at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.TxInterceptor.enlistWriteAndInvokeNext(TxInterceptor.java:214)
at org.infinispan.interceptors.TxInterceptor.visitPutKeyValueCommand(TxInterceptor.java:162)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.CacheMgmtInterceptor.visitPutKeyValueCommand(CacheMgmtInterceptor.java:114)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.InvocationContextInterceptor.handleAll(InvocationContextInterceptor.java:104)
at org.infinispan.interceptors.InvocationContextInterceptor.handleDefault(InvocationContextInterceptor.java:64)
at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
at org.infinispan.interceptors.BatchingInterceptor.handleDefault(BatchingInterceptor.java:77)
at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
at org.infinispan.interceptors.InterceptorChain.invoke(InterceptorChain.java:274)
at org.infinispan.CacheImpl.putIfAbsent(CacheImpl.java:524)
at org.infinispan.CacheSupport.putIfAbsent(CacheSupport.java:74)
at org.infinispan.lucene.locking.BaseLuceneLock.obtain(BaseLuceneLock.java:65)
at org.apache.lucene.store.Lock.obtain(Lock.java:72)
at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1097)
at org.hibernate.search.backend.Workspace.createNewIndexWriter(Workspace.java:202)
at org.hibernate.search.backend.Workspace.getIndexWriter(Workspace.java:180)
at org.hibernate.search.backend.impl.lucene.PerDPQueueProcessor.run(PerDPQueueProcessor.java:103)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
因为无论我们的拓扑或缓存模式如何,这个错误都非常普遍,我们相信我们一定是在某个地方配置错误。任何人都可以推荐一个修复?