9

我正在尝试使用 elasticsearch/NEST 索引 pdf 文档。

该文件已编入索引,但搜索结果返回 0 个匹配项。

我需要搜索结果只返回文档 ID 和突出显示结果

(不含 base64 内容)

这是代码:

我会很感激这里的任何帮助,

谢谢,

class Program
{
    static void Main(string[] args)
    {
        // create es client
        string index = "myindex";

        var settings = new ConnectionSettings("localhost", 9200)
            .SetDefaultIndex(index);
        var es = new ElasticClient(settings);

        // delete index if any
        es.DeleteIndex(index);

        // index document
        string path = "test.pdf";
        var doc = new Document()
        {
            Id = 1,
            Title = "test",
            Content = Convert.ToBase64String(File.ReadAllBytes(path))
        };

        var parameters = new IndexParameters() { Refresh = true };
        if (es.Index<Document>(doc, parameters).OK)
        {
            // search in document
            string query = "semantic"; // test.pdf contains the string "semantic"

            var result = es.Search<Document>(s => s
                .Query(q =>
                    q.QueryString(qs => qs
                        .Query(query)
                    )
                )
                .Highlight(h => h
                    .PreTags("<b>")
                    .PostTags("</b>")
                    .OnFields(
                      f => f
                        .OnField(e => e.Content)
                        .PreTags("<em>")
                        .PostTags("</em>")
                    )
                )
            );

            if (result.Hits.Total == 0)
            {
            }
        }
    }
}

[ElasticType(
    Name = "document",
    SearchAnalyzer = "standard",
    IndexAnalyzer = "standard"
)]
public class Document
{
    public int Id { get; set; }

    [ElasticProperty(Store = true)]
    public string Title { get; set; }

    [ElasticProperty(Type = FieldType.attachment,
        TermVector = TermVectorOption.with_positions_offsets)]
    public string Content { get; set; }
}
4

4 回答 4

9

安装附件插件并重启ES

bin/plugin -install elasticsearch/elasticsearch-mapper-attachments/2.3.2

创建一个映射到附件插件文档的附件类

  public class Attachment
  {
      [ElasticProperty(Name = "_content")]
      public string Content { get; set; }

      [ElasticProperty(Name = "_content_type")]
      public string ContentType { get; set; }

      [ElasticProperty(Name = "_name")]
      public string Name { get; set; }
  }

在您正在索引的 Document 类上添加一个名为“File”的属性和正确的映射

  [ElasticProperty(Type = FieldType.Attachment, TermVector = TermVectorOption.WithPositionsOffsets, Store = true)]
  public Attachment File { get; set; }

在索引类的任何实例之前显式创建索引。如果您不这样做,它将使用动态映射并忽略您的属性映射。如果您将来更改映射,请始终重新创建索引。

  client.CreateIndex("index-name", c => c
     .AddMapping<Document>(m => m.MapFromAttributes())
  );

索引您的项目

  string path = "test.pdf";

  var attachment = new Attachment();
  attachment.Content = Convert.ToBase64String(File.ReadAllBytes(path));
  attachment.ContentType = "application/pdf";
  attachment.Name = "test.pdf";

  var doc = new Document()
  {
      Id = 1,
      Title = "test",
      File = attachment
  };
  client.Index<Document>(item);

在 File 属性上搜索

  var query = Query<Document>.Term("file", "searchTerm");

  var searchResults = client.Search<Document>(s => s
          .From(start)
          .Size(count)
          .Query(query)
  );
于 2014-09-25T16:30:42.870 回答
1

// 我正在使用 FSRiver 插件 - https://github.com/dadoonet/fsriver/

void Main()
{
    // search in document
    string query = "directly"; // test.pdf contains the string "directly"
    var es = new ElasticClient(new ConnectionSettings( new Uri("http://*.*.*.*:9200"))
        .SetDefaultIndex("mydocs")
        .MapDefaultTypeNames(s=>s.Add(typeof(Doc), "doc")));
        var result = es.Search<Doc>(s => s
        .Fields(f => f.Title, f => f.Name)
        .From(0)
        .Size(10000)
            .Query(q => q.QueryString(qs => qs.Query(query)))
            .Highlight(h => h
                .PreTags("<b>")
                .PostTags("</b>")
                .OnFields(
                  f => f
                    .OnField(e => e.File)
                    .PreTags("<em>")
                    .PostTags("</em>")
                )
            )
        );
}

[ElasticType(Name = "doc",  SearchAnalyzer = "standard", IndexAnalyzer = "standard")]
public class Doc
{
    public int Id { get; set; }

     [ElasticProperty(Store = true)]
     public string Title { get; set; }

    [ElasticProperty(Type = FieldType.attachment, TermVector = TermVectorOption.with_positions_offsets)]
    public string File { get; set; }
    public string Name { get; set; }
}
于 2013-09-21T11:58:42.553 回答
0

I am working on the same so now i am trying this http://www.elasticsearch.cn/tutorials/2011/07/18/attachment-type-in-action.html

This article explains issue

pay attension that you should do correct mapping

 "title" : { "store" : "yes" },
 "file" : { "term_vector":"with_positions_offsets", "store":"yes" }

I will try to figure out how to do that with NEST api and update this post

于 2013-12-06T09:38:39.910 回答
-1

您需要在索引项目之前添加如下映射。

client.CreateIndex("yourindex", c => c.NumberOfReplicas(0).NumberOfShards(12).AddMapping<AssetSearchEntryModels>(m => m.MapFromAttributes()));
于 2014-05-01T04:06:48.143 回答