0

我正在研究 Solr 7.3,我有一个“索引工作”,它是一个黑盒子(不能看里面)。_default带有配置的“索引作业”的输出是这样的:

{
  "responseHeader":{
    "zkConnected":true,
    "status":0,
    "QTime":31,
    "params":{
      "q":"*:*"}},
  "response":{"numFound":18,"start":0,"maxScore":1.0,"docs":[
      {
        "lastaccesstime":["Thu Aug 09 16:37:52 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Hin1",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Hin1",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:52 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:52 UTC 2018"],
        "filename":"Hin1",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:52 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[3220],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "_version_":1608340155838496768,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:56 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Hin2",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Hin2",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:56 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:56 UTC 2018"],
        "filename":"Hin2",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:56 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[2649],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:56 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:56 UTC 2018"],
        "_version_":1608340160634683392,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:56 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:56 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:47 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Ar2",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Ar2",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:47 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:47 UTC 2018"],
        "filename":"Ar2",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:47 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[2649],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:47 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:47 UTC 2018"],
        "_version_":1608340160124026880,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:47 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:47 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:52 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Rus2",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Rus2",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:52 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:52 UTC 2018"],
        "filename":"Rus2",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:52 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[852],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "_version_":1608340160584351744,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:52 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:49 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Ar1",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Ar1",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:49 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:49 UTC 2018"],
        "filename":"Ar1",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:49 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[2678],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:49 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:49 UTC 2018"],
        "_version_":1608340155296382976,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:49 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:49 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:48 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Sp1",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Sp1",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:48 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:48 UTC 2018"],
        "filename":"Sp1",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:48 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[320],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:48 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:48 UTC 2018"],
        "_version_":1608340159144656896,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:48 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:48 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:53 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Ger2",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Ger2",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:53 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:53 UTC 2018"],
        "filename":"Ger2",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:53 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[626],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:53 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:53 UTC 2018"],
        "_version_":1608340161147437056,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:53 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:53 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:50 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Rus1",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Rus1",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:50 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:50 UTC 2018"],
        "filename":"Rus1",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:50 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[715],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "_version_":1608340156644851712,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:55 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Fr1",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Fr1",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:55 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:55 UTC 2018"],
        "filename":"Fr1",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:55 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[1211],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:55 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:55 UTC 2018"],
        "_version_":1608340158946476032,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:55 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:55 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]},
      {
        "lastaccesstime":["Thu Aug 09 16:37:50 UTC 2018"],
        "accessflags":[136],
        "id":"/ifs/language/Beng2",
        "readable_to":"S-1-22-1-0",
        "objecttype":["file"],
        "path":"/ifslanguage/Beng2",
        "fileattributes":[128],
        "dacls":"S-1-22-1-0:1442207, S-1-22-2-0:1179785, S-1-1-0:1179785",
        "groupsid":["S-1-22-2-0"],
        "ownersid":["S-1-22-1-0"],
        "lastwritetime":["Thu Aug 09 16:37:50 UTC 2018"],
        "creationtime":["Thu Aug 09 16:37:50 UTC 2018"],
        "filename":"Beng2",
        "allocationsize":[1048576],
        "filemode":[0],
        "clustername":["ISL-TEST-8-1-0-0-40"],
        "changetime":["Thu Aug 09 16:37:50 UTC 2018"],
        "clusterip":["172.19.2.140"],
        "stream_size":[2301],
        "x_parsed_by":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "stream_content_type":["smb"],
        "content_encoding":["UTF-8"],
        "content_type":["text/plain; charset=UTF-8"],
        "lastwritetime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "x_parsed_by_str":["org.apache.tika.parser.DefaultParser",
          "org.apache.tika.parser.txt.TXTParser"],
        "content_type_str":["text/plain; charset=UTF-8"],
        "stream_content_type_str":["smb"],
        "objecttype_str":["file"],
        "lastaccesstime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "_version_":1608340161101299712,
        "clusterip_str":["172.19.2.140"],
        "groupsid_str":["S-1-22-2-0"],
        "content_encoding_str":["UTF-8"],
        "ownersid_str":["S-1-22-1-0"],
        "changetime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "creationtime_str":["Thu Aug 09 16:37:50 UTC 2018"],
        "clustername_str":["ISL-TEST-8-1-0-0-40"]}]
  }}

我添加了LangDetectLanguageIdentifierUpdateProcessorFactory类来_default检测语言并将其显示language_s在其他字段旁边:

   <updateRequestProcessorChain name="langid">
     <processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
       <str name="langid.fl">text_general</str>
       <str name="langid.langField">language_s</str>
       <str name="langid.fallback">generic</str>
       <str name="langid.threshold">0.2</str>
       <bool name="langid.enforceSchema">false</bool>

我还将“langid”添加到initParams

  <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
    <lst name="defaults">
       <str name="df">_text_</str>
       <str name="update.chain">langid</str>
    </lst>
  </initParams>

总而言之,在添加langid并运行“索引作业”之后,我什么也没得到:

{
  "responseHeader":{
    "zkConnected":true,
    "status":0,
    "QTime":16,
    "params":{
      "q":"*:*"}},
  "response":{"numFound":0,"start":0,"maxScore":0.0,"docs":[]
  }}

有没有办法在不触及“索引作业”的情况下解决这个问题?我在配置文件中遗漏了什么吗?

4

0 回答 0