3

我正在使用一个用作 Hibernate Search 后端的 10 节点 Infinispan 集群。我们的服务器在 Java 1.6_24 上运行 TC server 2.5 (tomcat 6.0.32)。我们使用 jGroups 2.12.1.3 来处理来自每个节点的集群缓存写入,以及多播 UDP 传输。

当我们在集群中启动 3 个以上的节点时,最终其中一个节点开始记录复制超时。无论我们将 Infinispan 配置为复制还是分发缓存模式,我们都观察到了相同的结果。尽管集群的其余部分保持稳定,但故障节点基本上无法进行搜索。

我们的配置:

英菲尼斯潘:

<?xml version="1.0" encoding="UTF-8"?>
 <infinispan
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="urn:infinispan:config:5.0 http://www.infinispan.org/schemas/infinispan-config-5.0.xsd"
    xmlns="urn:infinispan:config:5.0">
<global>
    <globalJmxStatistics
        enabled="true"
        cacheManagerName="HibernateSearch"
        allowDuplicateDomains="true" />
    <transport
        clusterName="HibernateSearch-Infinispan-cluster-MT"
        distributedSyncTimeout="50000">
        <properties>
            <property name="configurationFile" value="infinispan-udp.cfg.xml" />
        </properties>
    </transport>
    <shutdown
        hookBehavior="DONT_REGISTER" />
</global>


<default>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <storeAsBinary storeKeysAsBinary="false" storeValuesAsBinary="true"
        enabled="false" />
    <invocationBatching
        enabled="true" />
    <clustering
        mode="replication">
        <stateRetrieval
            timeout="60000"
            logFlushTimeout="65000"
            fetchInMemoryState="true"
            alwaysProvideInMemoryState="true" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <jmxStatistics
        enabled="true" />
    <eviction
        maxEntries="-1"
        strategy="NONE" />
    <expiration
        maxIdle="-1" />
</default>


<namedCache
    name="LuceneIndexesMetadata">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <loaders shared="true" preload="true">
        <loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
            <properties>
                <property name="location" value="/usr/local/tc/.index/metadata" />
            </properties>
        </loader>
    </loaders>
</namedCache>
<namedCache
    name="LuceneIndexesData">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
    <loaders shared="true" preload="true">
        <loader class="org.infinispan.loaders.jdbm.JdbmCacheStore" fetchPersistentState="false" ignoreModifications="false" purgeOnStartup="false">
            <properties>
                <property name="location" value="/usr/local/tc/.index/data" />
            </properties>
        </loader>
    </loaders>
</namedCache>
<namedCache
    name="LuceneIndexesLocking">
    <clustering
        mode="replication">
        <stateRetrieval
            fetchInMemoryState="true"
            logFlushTimeout="30000" />
        <sync
            replTimeout="50000" />
        <l1 enabled="false" />
    </clustering>
    <locking
        lockAcquisitionTimeout="20000"
        writeSkewCheck="false"
        concurrencyLevel="5000"
        useLockStriping="false" />
</namedCache>

jGroups(UDP):

<config xmlns="urn:org:jgroups"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="urn:org:jgroups http://www.jgroups.org/schema/JGroups-2.12.xsd">
<UDP
     mcast_addr="${jgroups.udp.mcast_addr:228.10.10.9}"
     mcast_port="${jgroups.udp.mcast_port:45599}"
     tos="8"
     ucast_recv_buf_size="20000000"
     ucast_send_buf_size="640000"
     mcast_recv_buf_size="25000000"
     mcast_send_buf_size="640000"
     loopback="true"
     discard_incompatible_packets="true"
     max_bundle_size="64000"
     max_bundle_timeout="30"
     ip_ttl="${jgroups.udp.ip_ttl:2}"
     enable_bundling="true"
     enable_diagnostics="false"
     thread_naming_pattern="pl"
     thread_pool.enabled="true"
     thread_pool.min_threads="2"
     thread_pool.max_threads="30"
     thread_pool.keep_alive_time="5000"
     thread_pool.queue_enabled="false"
     thread_pool.queue_max_size="100"
     thread_pool.rejection_policy="Discard"
     oob_thread_pool.enabled="true"
     oob_thread_pool.min_threads="2"
     oob_thread_pool.max_threads="30"
     oob_thread_pool.keep_alive_time="5000"
     oob_thread_pool.queue_enabled="false"
     oob_thread_pool.queue_max_size="100"
     oob_thread_pool.rejection_policy="Discard"
     />


我们观察到的错误:

10-31-2011 13:53:02 ERROR Hibernate Search: Directory writer-3 interceptors.InvocationContextInterceptor: ISPN000136: Execution error
org.infinispan.util.concurrent.TimeoutException: Replication timeout for tc-cluster-0105-21082
          at org.infinispan.remoting.transport.AbstractTransport.parseResponseAndAddToResponseList(AbstractTransport.java:71)
          at org.infinispan.remoting.transport.jgroups.JGroupsTransport.invokeRemotely(JGroupsTransport.java:452)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:132)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:156)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:265)
          at org.infinispan.remoting.rpc.RpcManagerImpl.invokeRemotely(RpcManagerImpl.java:252)
          at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:235)
          at org.infinispan.remoting.rpc.RpcManagerImpl.broadcastRpcCommand(RpcManagerImpl.java:228)
          at org.infinispan.interceptors.ReplicationInterceptor.handleCrudMethod(ReplicationInterceptor.java:116)
          at org.infinispan.interceptors.ReplicationInterceptor.visitPutKeyValueCommand(ReplicationInterceptor.java:79)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.LockingInterceptor.visitPutKeyValueCommand(LockingInterceptor.java:294)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.base.CommandInterceptor.handleDefault(CommandInterceptor.java:133)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.TxInterceptor.enlistWriteAndInvokeNext(TxInterceptor.java:214)
          at org.infinispan.interceptors.TxInterceptor.visitPutKeyValueCommand(TxInterceptor.java:162)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.CacheMgmtInterceptor.visitPutKeyValueCommand(CacheMgmtInterceptor.java:114)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.InvocationContextInterceptor.handleAll(InvocationContextInterceptor.java:104)
          at org.infinispan.interceptors.InvocationContextInterceptor.handleDefault(InvocationContextInterceptor.java:64)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.base.CommandInterceptor.invokeNextInterceptor(CommandInterceptor.java:119)
          at org.infinispan.interceptors.BatchingInterceptor.handleDefault(BatchingInterceptor.java:77)
          at org.infinispan.commands.AbstractVisitor.visitPutKeyValueCommand(AbstractVisitor.java:60)
          at org.infinispan.commands.write.PutKeyValueCommand.acceptVisitor(PutKeyValueCommand.java:77)
          at org.infinispan.interceptors.InterceptorChain.invoke(InterceptorChain.java:274)
          at org.infinispan.CacheImpl.putIfAbsent(CacheImpl.java:524)
          at org.infinispan.CacheSupport.putIfAbsent(CacheSupport.java:74)
          at org.infinispan.lucene.locking.BaseLuceneLock.obtain(BaseLuceneLock.java:65)
          at org.apache.lucene.store.Lock.obtain(Lock.java:72)
          at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:1097)
          at org.hibernate.search.backend.Workspace.createNewIndexWriter(Workspace.java:202)
          at org.hibernate.search.backend.Workspace.getIndexWriter(Workspace.java:180)
          at org.hibernate.search.backend.impl.lucene.PerDPQueueProcessor.run(PerDPQueueProcessor.java:103)
          at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
          at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
          at java.lang.Thread.run(Thread.java:662)

因为无论我们的拓扑或缓存模式如何,这个错误都非常普遍,我们相信我们一定是在某个地方配置错误。任何人都可以推荐一个修复?

4

1 回答 1

2

事实证明,我们在 Infinispan 和 Hibernate Search 之间存在版本冲突。如果使用 Hibernate Search 3.4.1,则必须使用 Infinispan 4.2.1,以后的版本可能无法使用。

于 2011-11-07T17:47:41.330 回答