3

我试图通过index.cache.field.max_size: NUMBERconfig/elasticsearch.yml文件中设置来限制字段缓存(常驻)。我有大约 100 万条记录,对 7 个字段(所有字段都包含大量文本数据)执行分面操作,以构建“词云”。

curl -X POST 'http://localhost:9200/monitoring/mention_reports/_search?&pretty=true' -d '
{
  "size":"0",

  "query": {
    "filtered":{
      "query":{
        "text": {
          "positive_keyword": {
            "query": "quora"
          }
        }
      },
      "filter":{

                    . . .


      }
    }
  },


  "facets": {
    "tagcloud": {
      "terms": {
        "fields":["field1","field2","field3","field4","field5","field6","field7"],
        "size":"300"
      }
    }
  }
}
'

无论为 index.cache.field.max_size 指定什么值(1000 或 100000),堆内存(分配的 15gb)都会一直被吃掉。我究竟做错了什么?还有没有更好的方法来构建词云,而不是对如此大量的文本数据进行分面?

映射:

 curl -XPOST http://localhost:9200/monitoring/ -d '
{
  "settings":{
    "index":{
      "number_of_shards":5,
      "number_of_replicas":1
    },
    "analysis":{
      "filter":{
        "myCustomShingle":{
          "type":"shingle",
          "max_shingle_size":3,
          "output_unigrams":true
        },
        "myCustomStop":{
          "type":"stop",
          "stopwords":["a","about","abov ... ]
        }
      },
      "analyzer":{
        "myAnalyzer":{
          "type":"custom",
          "tokenizer":"standard",
          "filter":[
            "lowercase",
            "myCustomShingle",
            "stop",
            "myCustomStop"
          ]
        }
      }
    }
  },
  "mappings":{
    "mention_reports":{
      "_source":{
        "enabled":true
      },
      "_all":{
        "enabled":false
      },
      "index.query.default_field":"post_message",
      "properties":{
      "id":{
        "type":"string",
        "index":"not_analyzed",
        "include_in_all" : "false",
        "null_value" : "null"
      },
      "creation_time":{
        "type":"date"
      },
      "field1":{
        "type":"string",
        "analyzer":"standard",
        "include_in_all":"false",
        "null_value":0
      },
      "field2":{
        "type":"string",
        "index":"not_analyzed",
        "include_in_all":"false",
        "null_value":"null"
      },

            . . .


        "field7":{
          "type":"string",
          "analyzer":"myAnalyzer",
          "term_vector":"with_positions_offsets",
          "null_value" : "null"
        }                                           

      }
    }
  }
}
'
4

0 回答 0