我正在使用jdbc river将 mysql 表数据(50 万条记录)的所有记录索引到 elasticsearch 中。有一段时间一切都很好,然后突然河流开始抛出一些奇怪的索引错误,每当它试图索引一个文档时。
错误:
[2012-10-10 16:20:11,867][DEBUG][action.bulk] [Man-Wolf] [monitoring][0] failed to bulk item (index) index {[monitoring][mention_reports][70138], source[{"location":null,"wday":6,"monitoring_profile_id":22,"stream_type":"facebook","score":0,"tweet_count":0,"city":null,"to_user_id":0,"sentiment":-1,"post_message":null,"description":null,"location_data_source":null,"video_id":null,"year":2012,"post_name":"Danny Waddell's Photos","video_thumb_url":null,"user_follower":0,"tweet_text":null,"country":null,"content":null,"picture":"http://photos-e.ak.fbcdn.net/hphotos-ak-prn1/547714_507439249282865_1395188712_s.jpg","updated_at":"2012-08-04T16:06:59Z","model_id":82884,"month":8,"day":4,"profile_image_url":null,"result_type":null,"from_user_id":1407391780,"stream":null,"geo_coordinates_long":0.0,"tweet_id":0,"link":null,"state":null,"post_id":"1407391780_227741774015103","model_name":"FacebookSearchResult","id":70138,"title":null,"sub_location_1":null,"user_friends":0,"sub_location_2":null,"from_user":"Stephanie Morris","post_type":"photo","creation_time":"2012-08-04T16:05:20Z","created_at":"2012-08-04T16:06:59Z","to_user":null,"user_pic":"https://graph.facebook.com/1407391780/picture","video_type":"youtube","is_active":1,"caption":"obama will let them die for america but not vote","hour":16,"country_code":null,"duration":0,"positive_keyword":"obama","geo_coordinates_lat":0.0}]}
org.elasticsearch.index.engine.IndexFailedEngineException: [monitoring][0] Index failed for [mention_reports#70138]
at org.elasticsearch.index.engine.robin.RobinEngine.index(RobinEngine.java:482)
at org.elasticsearch.index.shard.service.InternalIndexShard.index(InternalIndexShard.java:323)
at org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:158)
at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction.performOnPrimary(TransportShardReplicationOperationAction.java:529)
at org.elasticsearch.action.support.replication.TransportShardReplicationOperationAction$AsyncShardOperationAction$1.run(TransportShardReplicationOperationAction.java:427)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
at java.lang.Thread.run(Thread.java:636)
Caused by: java.io.FileNotFoundException: /var/www/elasticsearch-0.19.0/data/elasticsearch/nodes/0/indices/monitoring/0/index/_1pr.tvd (Too many open files)
at java.io.RandomAccessFile.open(Native Method)
at java.io.RandomAccessFile.<init>(RandomAccessFile.java:233)
at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:441)
at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:306)
at org.elasticsearch.index.store.Store$StoreDirectory.createOutput(Store.java:418)
at org.elasticsearch.index.store.Store$StoreDirectory.createOutput(Store.java:390)
at org.apache.lucene.index.TermVectorsTermsWriter.initTermVectorsWriter(TermVectorsTermsWriter.java:123)
at org.apache.lucene.index.TermVectorsTermsWriter.finishDocument(TermVectorsTermsWriter.java:143)
at org.apache.lucene.index.TermVectorsTermsWriter$PerDoc.finish(TermVectorsTermsWriter.java:250)
at org.apache.lucene.index.DocFieldProcessorPerThread$PerDoc.finish(DocFieldProcessorPerThread.java:348)
at org.apache.lucene.index.DocumentsWriter$WaitQueue.writeDocument(DocumentsWriter.java:1404)
at org.apache.lucene.index.DocumentsWriter$WaitQueue.add(DocumentsWriter.java:1424)
at org.apache.lucene.index.DocumentsWriter.finishDocument(DocumentsWriter.java:1043)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:772)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:2066)
at org.elasticsearch.index.engine.robin.RobinEngine.innerIndex(RobinEngine.java:565)
at org.elasticsearch.index.engine.robin.RobinEngine.index(RobinEngine.java:477)
... 7 more
JDBC 河:
curl -XPUT 'localhost:9200/_river/river_mention_reports/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/monitoring",
"user" : "USERNAME",
"password" : "PASSWORD",
"sql" : "select * from mention_reports",
"poll" : "2h",
"versioning" : false
},
"index" : {
"index" : "monitoring",
"type" : "mention_reports",
"bulk_size" : 200,
"bulk_timeout" : "60s"
}
}'
我的映射:
curl -XPOST http://localhost:9200/monitoring/ -d '
{
"settings":{
"index":{
"number_of_shards":5,
"number_of_replicas":1
},
"analysis":{
"filter":{
"myCustomShingle":{
"type":"shingle",
"max_shingle_size":3,
"output_unigrams":true
},
"myCustomStop":{
"type":"stop",
"stopwords":["a","about","abov ... ]
}
},
"analyzer":{
"myAnalyzer":{
"type":"custom",
"tokenizer":"standard",
"filter":[
"lowercase",
"myCustomShingle",
"stop",
"myCustomStop"
]
}
}
}
},
"mappings":{
"mention_reports":{
"_source":{
"enabled":true
},
"_all":{
"enabled":false
},
"index.query.default_field":"post_message",
"properties":{
"id":{
"type":"string",
"index":"not_analyzed",
"include_in_all" : "false",
"null_value" : "null"
},
"creation_time":{
"type":"date"
},
"from_user":{
"type":"string",
"analyzer":"standard",
"include_in_all":"false",
"null_value":0
},
"from_user_id":{
"type":"string",
"index":"not_analyzed",
"include_in_all":"false",
"null_value":"null"
},
. . .
"title":{
"type":"string",
"analyzer":"myAnalyzer",
"term_vector":"with_positions_offsets",
"null_value" : "null"
}
}
}
}
}
'
我该如何解决?