您的方法直观地说是有道理的,但是,规范化器只能应用于keyword
字段,而分析器只能应用于text
字段。
另一种方法是利用摄取处理器并使用script
处理器在索引时进行映射。
您可以在下面找到此类处理器的模拟,该script
处理器将根据字段monthNum
中存在的月份创建一个名为的新month
字段。
POST _ingest/pipeline/_simulate
{
"pipeline": {
"processors": [
{
"script": {
"source": """
def mapping = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'];
ctx.monthNum = mapping.indexOf(ctx.month) + 1;
"""
}
}
]
},
"docs": [
{
"_source": {
"month": "feb"
}
},
{
"_source": {
"month": "mar"
}
},
{
"_source": {
"month": "jul"
}
},
{
"_source": {
"month": "aug"
}
},
{
"_source": {
"month": "nov"
}
},
{
"_source": {
"month": "dec"
}
},
{
"_source": {
"month": "xyz"
}
}
]
}
结果文件:
{
"docs" : [
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 2,
"month" : "feb"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 3,
"month" : "mar"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 7,
"month" : "jul"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 8,
"month" : "aug"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 11,
"month" : "nov"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 12,
"month" : "dec"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"monthNum" : 0,
"month" : "xyz"
},
"_ingest" : {
"timestamp" : "2019-05-08T12:28:27.006Z"
}
}
}
]
}