有两个问题:
- 使用时的不同结果
LIKE
和CONTAINS
- 搜索单个字符时严重缺乏速度
CONTAINS
由于性能原因,我试图摆脱搜索,LIKE
但 oracle 文本对我来说不能正常工作。
我不确定它是否重要,但 CLOB 主要包含俄语文本。在我看来,这并不重要,因为CONTAINS
搜索不使用词汇和词法元素分析。
表中的总行数:215577
-- index creation
create index schema_name.idx_01 on schema_name.t_searchable_table(clob_value)
indextype is ctxsys.context
parameters ('DATASTORE CTXSYS.DEFAULT_DATASTORE sync (on commit)');
-- index sync
begin
ctx_ddl.sync_index('SCHEMA_NAME.IDX_01');
end;
据我所知,索引已通过提交成功刷新,但我找不到任何对此的视觉认可。
这是我尝试的一些查询,LIKE
搜索结果用于性能和数量比较。我正在测试将数据插入表的性能(查询时间足够稳定)。
--- String search
-- original "not tuned" query with like '%%'
-- 116643 rows inserted in 8,653 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where lower(a.clob_value) like '%про%';
-- this is not correct query due to documentation but it's fast and sql%rowcount is same
-- 116643 rows inserted in 2,959 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, '%про%',1 ) > 0;
-- correct query due to oracle docs but absolutely incorrect amount
-- 11 rows inserted in 0,081 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, 'про',1 ) > 0;
--- Number search
-- original "not tuned" query with like '%%'
-- 121918 rows inserted in 8,045 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where lower(a.clob_value) like '%1%';
-- Little differs by amount but fast.
-- 117228 rows inserted in 2,065 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, '1',1 ) > 0;
-- Lost one row here (not sure why) but SUPERSLOW
-- 121917 rows inserted in 97,760 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, '%1%',1 ) > 0;
-- Single character
-- original "not tuned" query with like '%%'
-- 124095 rows inserted in 9,112 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where lower(a.clob_value) like '%а%';
-- Incorrect syntax, amount is good, performance is awful
-- 124095 rows inserted in 94,927 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, '%а%',1 ) > 0;
-- correct syntax, fast and smooth (and almost double rows lost)
-- 60345 rows inserted in 1,215 seconds
insert into schema_name.t_search_results t
(session_id, entity_id)
select 'a', a.entity_id
from schema_name.t_searchable_table a
where contains(a.clob_value, 'а',1 ) > 0;