0

我正在尝试搜索属于特定项目的文档模型 has_and_belongs_to_many 项目。

模型/doc.rb 需要“弹性搜索/模型”

class Doc < ActiveRecord::Base
  include Elasticsearch::Model
  include Elasticsearch::Model::Callbacks

  settings index: {
    analysis: {
      tokenizer: {
        ngram_tokenizer: {
          type: "nGram",
          min_gram: "2",
          max_gram: "3",
          token_chars: [
            "letter",
            "digit",
            "punctuation"
          ]
        }
      },
      analyzer: {
        ngram_analyzer: {
          tokenizer: "ngram_tokenizer"
        }
      },
    },
  } do
      mappings do
        indexes :sourcedb, type: 'string', analyzer: 'ngram_analyzer'
        indexes :sourceid, type: 'string', analyzer: 'ngram_analyzer'
        indexes :body, type: 'string', analyzer: 'ngram_analyzer'
        indexes :docs_projects do
          indexes :doc_id
          indexes :project_id
          indexes :projects do
            indexes :id, index: :not_analyzed
          end
        end
      end
  end

  def as_indexed_json(options={})
    as_json(
      only: [:id, :sourcedb, :sourceid, :body],
      include: { projects: {only: :id} }  
    )
  end
end

搜索方法如下

  search_docs = docs.search(
    query: {
      bool:{
        must: [
          {match: {
              'projects.id' => project_id
            }
          }
        ]
      }
    },
    size: 5000,
  ).records.order('sourcedb ASC, sourceid ASC').paginate(page:params[:page], per_page: 10)

此搜索方法完成时没有错误,但没有返回任何内容。

  Doc Load (4.6ms)  SELECT "docs".* FROM "docs" INNER JOIN "docs_projects" ON "docs"."id" = "docs_projects"."doc_id" WHERE "docs_projects"."project_id" = 56
   (0.4ms)  SELECT COUNT(*) FROM "docs" WHERE 1=0
  Doc Load (0.3ms)  SELECT "docs".* FROM "docs" WHERE 1=0 ORDER BY sourcedb ASC, sourceid ASC LIMIT 10 OFFSET 0
  CACHE (0.0ms)  SELECT COUNT(*) FROM "docs" WHERE 1=0

我尝试使用 where([projects.id IN (?)], project_ids) 进行搜索,但它无法搜索属于具有最大大小的项目的文档。
如何通过与关联匹配进行搜索?
提前致谢。

4

1 回答 1

0

现在,我找到了解决方案。我就是这样解决的。

文档.rb

  settings index: {
    analysis: {
      tokenizer: {
        ngram_tokenizer: {
          type: "nGram",
          min_gram: "2",
          max_gram: "3",
          token_chars: [
            "letter",
            "digit",
            "punctuation"
          ]
        }
      },
      analyzer: {
        ngram_analyzer: {
          tokenizer: "ngram_tokenizer"
        }
      },
    },
  } do
      mappings do
        indexes :sourcedb, type: 'string', analyzer: 'ngram_analyzer'
        indexes :sourceid, type: 'string', analyzer: 'ngram_analyzer'
        indexes :body, type: 'string', analyzer: 'ngram_analyzer'

        # indexes :docs_projects, type: 'nested' do
        indexes :docs_projects do
          indexes :doc_id
          indexes :project_id
        end

        indexes :projects do
          indexes :id, index: :not_analyzed
        end
      end
  end

搜索方法

  def self.search_docs(attributes = {})
    minimum_should_match = 0
    minimum_should_match += 1 if attributes[:sourcedb].present?
    minimum_should_match += 1 if attributes[:sourceid].present?
    minimum_should_match += 1 if attributes[:body].present?

    if attributes[:project_id].present?
      must_array = [
        {match: {
            'projects.id' => attributes[:project_id]
          }
        }
      ] 
    end

    docs = search(
      query: {
        bool:{
          must: must_array,
          should: [ 
            {match: {
              sourcedb: {
                query: attributes[:sourcedb],
                fuzziness: 0
              }
            }
            },
            {match: {
              sourceid: {
                query: attributes[:sourceid],
                fuzziness: 0
              }
            }
            },
            {match: {
              body: {
                query: attributes[:body],
                fuzziness: 'AUTO'
              }
            }
            },
          ],
          minimum_should_match: minimum_should_match
        }
      },
      size: SEARCH_SIZE,
    )
    return {
      total: docs.results.total,
      docs: docs.records.order('sourcedb ASC, sourceid ASC')
    }
  end
于 2016-01-04T03:29:25.207 回答