0

我对弹性搜索完全陌生。所以如果这是一个愚蠢的问题,请原谅我,我的问题可能已经在其他地方得到了回答,但我找不到它。我想在我的网络中使用 Elastic Search 作为 PDF 和 docx 的搜索引擎。我使用 fscrawler 将 PDF 提取到弹性搜索中。由于我要摄取的文档是多种语言的,因此我想使用 n-graming 进行词干提取。为此,我想像这样更新我的映射

PUT test/_mappings/_all
{
"mappings": {
    "title": {
      "properties": {
        "title": {
          "type": "text",
          "fields": {
            "de": {
              "type":     "string",
              "analyzer": "german"
            },
            "en": {
              "type":     "string",
              "analyzer": "english"
            },
             "general": { 
              "type":     "string",
              "analyzer": "trigrams"
          }
        }
      }
    }
  }
}
}

现在我得到了这个错误消息

{ "error": { "root_cause": [ { "type": "mapper_parsing_exception", "reason": "根映射定义有不受支持的参数:[mappings : {title={properties={title={type=text, fields ={de={type=string,analyzer=german},en={type=string,analyzer=english},general={type=string,analyzer=trigrams}}}}}}]" } ],"type" :“mapper_parsing_exception”,“原因”:“根映射定义有不受支持的参数:[映射:{title={properties={title={type=text,fields={de={type=string,analyzer=german},en ={type=string,analyzer=english},general={type=string,analyzer=trigrams}}}}}}]"
},"status": 400 }

你知道我该如何解决这个问题吗?或者您知道如何在不使用 fscrawler 的情况下使用正确的映射来摄取文件吗?

4

2 回答 2

0

我的映射

{
      "test": {
        "mappings": {
          "_doc": {
            "dynamic_templates": [
              {
                "raw_as_text": {
                  "path_match": "meta.raw.*",
                  "mapping": {
                    "fields": {
                      "keyword": {
                        "ignore_above": 256,
                        "type": "keyword"
                      }
                    },
                    "type": "text"
                  }
                }
              }
            ],
            "properties": {
              "attachment": {
                "type": "binary"
              },
              "attributes": {
                "properties": {
                  "group": {
                    "type": "keyword"
                  },
                  "owner": {
                    "type": "keyword"
                  }
                }
              },
              "content": {
                "type": "text"
              },
              "file": {
                "properties": {
                  "checksum": {
                    "type": "keyword"
                  },
                  "content_type": {
                    "type": "keyword"
                  },
                  "created": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "extension": {
                    "type": "keyword"
                  },
                  "filename": {
                    "type": "keyword",
                    "store": true
                  },
                  "filesize": {
                    "type": "long"
                  },
                  "indexed_chars": {
                    "type": "long"
                  },
                  "indexing_date": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "last_accessed": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "last_modified": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "url": {
                    "type": "keyword",
                    "index": false
                  }
                }
              },
              "meta": {
                "properties": {
                  "altitude": {
                    "type": "text"
                  },
                  "author": {
                    "type": "text"
                  },
                  "comments": {
                    "type": "text"
                  },
                  "contributor": {
                    "type": "text"
                  },
                  "coverage": {
                    "type": "text"
                  },
                  "created": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "creator_tool": {
                    "type": "keyword"
                  },
                  "date": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "description": {
                    "type": "text"
                  },
                  "format": {
                    "type": "text"
                  },
                  "identifier": {
                    "type": "text"
                  },
                  "keywords": {
                    "type": "text"
                  },
                  "language": {
                    "type": "keyword"
                  },
                  "latitude": {
                    "type": "text"
                  },
                  "longitude": {
                    "type": "text"
                  },
                  "metadata_date": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "modifier": {
                    "type": "text"
                  },
                  "print_date": {
                    "type": "date",
                    "format": "dateOptionalTime"
                  },
                  "publisher": {
                    "type": "text"
                  },
                  "rating": {
                    "type": "byte"
                  },
                  "relation": {
                    "type": "text"
                  },
                  "rights": {
                    "type": "text"
                  },
                  "source": {
                    "type": "text"
                  },
                  "title": {
                    "type": "text"
                  },
                  "type": {
                    "type": "text"
                  }
                }
              },
              "path": {
                "properties": {
                  "real": {
                    "type": "keyword",
                    "fields": {
                      "fulltext": {
                        "type": "text"
                      },
                      "tree": {
                        "type": "text",
                        "analyzer": "fscrawler_path",
                        "fielddata": true
                      }
                    }
                  },
                  "root": {
                    "type": "keyword"
                  },
                  "virtual": {
                    "type": "keyword",
                    "fields": {
                      "fulltext": {
                        "type": "text"
                      },
                      "tree": {
                        "type": "text",
                        "analyzer": "fscrawler_path",
                        "fielddata": true
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
于 2018-11-13T16:05:08.650 回答
0

这些是我的设置

    {
  "test": {
    "settings": {
      "index": {
        "mapping": {
          "total_fields": {
            "limit": "2000"
          }
        },
        "number_of_shards": "5",
        "provided_name": "test",
        "creation_date": "1542031632596",
        "analysis": {
          "filter": {
            "trigrams_filter": {
              "type": "ngram",
              "min_gram": "3",
              "max_gram": "3"
            }
          },
          "analyzer": {
            "fscrawler_path": {
              "tokenizer": "fscrawler_path"
            },
            "trigrams": {
              "filter": [
                "lowercase",
                "trigrams_filter"
              ],
              "type": "custom",
              "tokenizer": "standard"
            }
          },
          "tokenizer": {
            "fscrawler_path": {
              "type": "path_hierarchy"
            }
          }
        },
        "number_of_replicas": "1",
        "uuid": "7L3QE5_xRACECVbTFlFY-Q",
        "version": {
          "created": "6040399"
        }
      }
    }
  }
}
于 2018-11-13T16:01:49.423 回答