0

我想使用 lucene 将 320 万条记录的 RDBMS sql 查询结果存储在文本文件中,然后进行搜索。[我在这里看到了如何在 lucene 中将 RAMDirectory 集成到 FSDirectory 中的示例

[1]:如何在 lucene 中将 RAMDirectory 集成到 FSDirectory 中。我有这段代码对我有用

  public class lucetest {
        public static void main(String args[]) {
            lucetest lucetestObj = new lucetest();
            lucetestObj.main1(lucetestObj);
        }

        public void main1(lucetest lucetestObj) {
            final File INDEX_DIR = new File(
                    "C:\\Documents and Settings\\44444\\workspace\\lucenbase\\bin\\org\\lucenesample\\index");

            try {
                Connection conn;
                Class.forName("com.teradata.jdbc.TeraDriver").newInstance();
                conn = DriverManager.getConnection(
                        "jdbc:teradata://x.x.x.x/CHARSET=UTF16", "aaa", "bbb");
                StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

//              Directory index = new RAMDirectory(); //To use RAM space
Directory index = FSDirectory.open(INDEX_DIR); //To use Hard disk,This will not consume RAM

                IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                        analyzer);
                IndexWriter writer = new IndexWriter(index, config);

                // IndexWriter writer = new IndexWriter(INDEX_DIR, analyzer, true);
                System.out.println("Indexing to directory '" + INDEX_DIR + "'...");

                lucetestObj.indexDocs(writer, conn);
                writer.optimize();
                writer.close();
                System.out.println("pepsi");
                lucetestObj.searchDocs(index, analyzer, "india");
                try {
                    conn.close();
                } catch (SQLException e2) {
                    // TODO Auto-generated catch block
                    e2.printStackTrace();
                }
            } catch (Exception e) {
                e.printStackTrace();

            } finally {

            }

        }

        void indexDocs(IndexWriter writer, Connection conn) throws Exception {
            String sql = "select id, name, color from pet";

            String queryy = "  SELECT  CFMASTERNAME, " + "  ULTIMATEPARENTID,"
                    + "ULTIMATEPARENT, LONG_NAMEE FROM  XCUST_SRCH_SRCH"
                    + "sample 100000;";
            Statement stmt = conn.createStatement();
            ResultSet rs = stmt.executeQuery(queryy);
            int kk = 0;
            while (rs.next()) {
                Document d = new Document();
                d.add(new Field("id", rs.getString("CFMASTERID"), Field.Store.YES,
                        Field.Index.NO));
                d.add(new Field("name", rs.getString("CFMASTERNAME"),
                        Field.Store.YES, Field.Index.ANALYZED));
                d.add(new Field("color", rs.getString("LONG_NAMEE"),
                        Field.Store.YES, Field.Index.ANALYZED));
                writer.addDocument(d);
            }
            if (rs != null) {
                rs.close();
            }
        }

        void searchDocs(Directory index, StandardAnalyzer analyzer,
                String searchstring) throws Exception {

            String querystr = searchstring.length() > 0 ? searchstring : "lucene";
            Query q = new QueryParser(Version.LUCENE_35, "name", analyzer)
                    .parse(querystr);

            int hitsPerPage = 10;
            IndexReader reader = IndexReader.open(index);
            IndexSearcher searcher = new IndexSearcher(reader);
            TopScoreDocCollector collector = TopScoreDocCollector.create(
                    hitsPerPage, true);
            searcher.search(q, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;
            System.out.println("Found " + hits.length + " hits.");
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                System.out.println((i + 1) + ".CFMASTERNAME " + d.get("name")
                        + " ****LONG_NAMEE**" + d.get("color") + "****ID******"
                        + d.get("id"));
            }

            searcher.close();
        }
    }

如何格式化此代码,以便将 sql 结果表而不是 RAM 目录保存在指定路径的硬盘上。我无法制定解决方案。我的要求是通过 lucene 存储在磁盘上的此表数据返回结果非常快。因此我通过索引的lucene将数据保存在磁盘上。

4

1 回答 1

1
Directory index = FSDirectory.open(INDEX_DIR);

您提到将 sql 结果保存到文本文件,但这是不必要的开销。在遍历 ResultSet 时,将行直接保存到 Lucene 索引。

顺便说一句,这并不重要,但是以全部大写命名您的本地 var(final 或其他)是违反惯例的。使用驼峰式。所有大写仅适用于类级常量(类的静态最终成员)。

于 2012-04-06T14:30:33.847 回答