11

我们使用 apache ignite v2.2 作为 grails 应用程序中的休眠二级缓存。我们有 4 个节点集群,每个集群有 10G RAM。第一个节点启动正常。但随后挂起。有时是第 2 有时是第 3 或第 4。也有成功的创业公司发生,但非常罕见。应用程序总是挂在同一个地方:

"host-startStop-1" #45 daemon prio=5 os_prio=0 tid=0x00007f7cac004800 nid=0x3d44 waiting on condition [0x00007f7cfdd81000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:338)
        at org.apache.ignite.internal.util.future.GridFutureAdapter.get0(GridFutureAdapter.java:216)
        at org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:158)
        at org.apache.ignite.internal.util.future.GridFutureAdapter.get(GridFutureAdapter.java:150)
        at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager.onKernalStart(GridCachePartitionExchangeManager.java:551)
        at org.apache.ignite.internal.processors.cache.GridCacheProcessor.onKernalStart(GridCacheProcessor.java:843)
        at org.apache.ignite.internal.IgniteKernal.start(IgniteKernal.java:1040)
        at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start0(IgnitionEx.java:1896)
        at org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance.start(IgnitionEx.java:1648)
        - locked <0x00000007890a1198> (a org.apache.ignite.internal.IgnitionEx$IgniteNamedInstance)
        at org.apache.ignite.internal.IgnitionEx.start0(IgnitionEx.java:1076)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:596)
        at org.apache.ignite.internal.IgnitionEx.start(IgnitionEx.java:520)
        at org.apache.ignite.Ignition.start(Ignition.java:322)

在此过程中,所有其他节点都被锁定。配置:

IgniteConfiguration configuration = new IgniteConfiguration()
        List<CacheConfiguration> cacheConfigurations = []
        for (String name : caches) {
            CacheConfiguration cacheConfiguration = new CacheConfiguration<>()
            cacheConfiguration.setCacheMode(CacheMode.REPLICATED)
            cacheConfiguration.setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL)
            cacheConfiguration.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_ASYNC)
            cacheConfiguration.setName(name)
            cacheConfiguration.onheapCacheEnabled =  true
            cacheConfiguration.evictionPolicy = new LruEvictionPolicy()
            cacheConfiguration.memoryPolicyName = MEMORY_POLICY
            cacheConfigurations.add(cacheConfiguration)
        }
        for (String name : ['org.hibernate.cache.spi.UpdateTimestampsCache',
                            'org.hibernate.cache.internal.StandardQueryCache']) {
            CacheConfiguration cacheConfiguration = new CacheConfiguration<>()
            cacheConfiguration.setCacheMode(CacheMode.REPLICATED)
            cacheConfiguration.setAtomicityMode(CacheAtomicityMode.ATOMIC)
            cacheConfiguration.setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_ASYNC)
            cacheConfiguration.setName(name)
            cacheConfiguration.onheapCacheEnabled =  true
            cacheConfiguration.evictionPolicy = new LruEvictionPolicy()
            cacheConfiguration.memoryPolicyName = MEMORY_POLICY
            cacheConfigurations.add(cacheConfiguration)
        }
        configuration.setCacheConfiguration(cacheConfigurations.toArray(new CacheConfiguration[cacheConfigurations.size()]))
        configuration.peerClassLoadingEnabled = true
        configuration.igniteInstanceName = Constants.IGNITE_GRID
        configuration.gridLogger = new Slf4jLogger()
        MemoryConfiguration memoryConfiguration = new MemoryConfiguration()
        memoryConfiguration.defaultMemoryPolicySize = 1 * 1024 * 1024 * 1024l
        MemoryPolicyConfiguration l2CachePolicy = new MemoryPolicyConfiguration()
        l2CachePolicy.name = MEMORY_POLICY
        l2CachePolicy.setMaxSize(4 * 1024 * 1024 * 1024l)
        l2CachePolicy.pageEvictionMode = DataPageEvictionMode.RANDOM_LRU
        memoryConfiguration.setMemoryPolicies(l2CachePolicy)
        configuration.memoryConfiguration = memoryConfiguration
        int[] eventTypes = new int[1]
        eventTypes[0] = EventType.EVT_NODE_FAILED
        configuration.includeEventTypes = eventTypes
        Map<IgnitePredicate<? extends Event>, int[]> listeners = new HashedMap()
        listeners.put(new NodeFailedEventListener(), eventTypes)
        configuration.localEventListeners = listeners
        TcpCommunicationSpi commSpi = new TcpCommunicationSpi()
        commSpi.slowClientQueueLimit = 1000
        commSpi.messageQueueLimit = 5000
        configuration.communicationSpi = commSpi
        TcpDiscoverySpi discoverySpi = new TcpDiscoverySpi()
        configuration.discoverySpi = discoverySpi
        if (grailsApplication.config.grails?.plugin?.awssdk?.accessKey && Env.igniteS3Bucket) {
            TcpDiscoveryS3IpFinder awsIpFinder = new TcpDiscoveryS3IpFinder()
            awsIpFinder.setBucketName(Env.igniteS3Bucket)
            AWSCredentials awsCredentials = new BasicAWSCredentials(grailsApplication.config.grails.plugin.awssdk.accessKey,
                    grailsApplication.config.grails.plugin.awssdk.secretKey)
            awsIpFinder.setAwsCredentials(awsCredentials)
            discoverySpi.ipFinder = awsIpFinder
        } else {
            TcpDiscoveryVmIpFinder ipFinder = new TcpDiscoveryVmIpFinder()
            ipFinder.setAddresses(["127.0.0.1:47500"])
            discoverySpi.ipFinder = ipFinder
        }
        configuration.classLoader = grailsApplication.classLoader
        ignite = Ignition.start(configuration)

编辑

失败节点的完整线程转储

成功节点的全线程转储

4

2 回答 2

0

如果你想在一台物理机器上运行超过 1 个节点,我建议配置MemoryConfiguration(因为默认情况下,在 2.2 版本中,Ignite 将需要 80% 的物理 RAM 用于一个节点)或更新到 2.3 版本(默认值降低到20%)

于 2017-12-26T08:30:18.447 回答
0

您能否尝试使用不包含句点“。”的缓存?以他们的名义?众所周知,这会导致延迟问题。

于 2018-01-17T17:54:06.660 回答