8

I want a query that returns the number of sequential match of words in two strings example:

Table

Id  column1               column2     result   
1   'foo bar live'        'foo bar'       2  
2   'foo live tele'       'foo tele'      1  
3   'bar foo live'        'foo bar live'  0 

to get total number of occurrence I am using:

select id, column1,column2,
extractvalue(dbms_xmlgen.getxmltype('select cardinality (
  sys.dbms_debug_vc2coll(''' || replace(lower(column1), ' ', ''',''' ) || ''') multiset intersect
  sys.dbms_debug_vc2coll('''||replace(lower(column2), ' ', ''',''' )||'''))  x from dual'), '//text()') cnt
from table.

Can anyone please suggest a query on similar lines for sequential match also as I want number of sequential matches and number of occurrences shown together.

4

3 回答 3

3

为什么要放弃查询方式。我知道这有点复杂,我希望有人可以改进它,但是在我的业余时间进行这个工作,我能够在一个下午的电话中幸存下来......

SQLFidlle上

SELECT Table1.id,
       Table1.column1,
       Table1.column2,
       max(nvl(t.l,0)) RESULT
FROM (
  SELECT  id,
          column1,
          column2,
          LEVEL l,
          decode(LEVEL,
                     1,
                 substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                 substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  sub1,
          decode(LEVEL,
                     1,
                 substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                 substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )  sub2

     FROM (SELECT id,
                  column1 || ' ' column1,
                  column2 || ' ' column2
             FROM Table1)
    WHERE  decode(LEVEL,
                      1,
                  substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                  substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  =
           decode(LEVEL,
                      1,
                  substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                  substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )
  START WITH column1 IS NOT NULL
  CONNECT BY instr(column1,' ', 1, LEVEL) > 0
  ) t
RIGHT OUTER JOIN Table1 ON trim(t.column1) = Table1.column1
                       AND trim(t.column2) = Table1.column2
                       AND t.id = Table1.id
GROUP BY  Table1.id,
          Table1.column1,
          Table1.column2
ORDER BY  max(nvl(t.l,0)) DESC
于 2013-10-09T16:12:14.720 回答
3

就个人而言,在这种情况下,我会选择 PL/SQL 代码而不是普通 SQL。就像是:

包装规格:

create or replace package PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number;
end;

包体:

create or replace package body PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number
  is
    l_str1     varchar2(4000) := p_str1;
    l_str2     varchar2(4000) := p_str2;
    l_res      number  default 0;
    l_del_pos1 number;
    l_del_pos2 number;
    l_word1    varchar2(1000);
    l_word2    varchar2(1000);
  begin
    loop
      l_del_pos1 := instr(l_str1, ' ');
      l_del_pos2 := instr(l_str2, ' ');
      case l_del_pos1
        when 0 
        then l_word1 := l_str1;
             l_str1 := ''; 
        else l_word1 := substr(l_str1, 1, l_del_pos1 - 1);
      end case;
      case l_del_pos2
        when 0 
        then l_word2 := l_str2;
             l_str2 := ''; 
        else l_word2 := substr(l_str2, 1, l_del_pos2 - 1);
      end case;
      exit when (l_word1 <> l_word2) or 
                ((l_word1 is null) or (l_word2 is null));

      l_res := l_res + 1;
      l_str1 := substr(l_str1, l_del_pos1 + 1);
      l_str2 := substr(l_str2, l_del_pos2 + 1);
    end loop;
    return l_res;
  end;
end;

测试用例:

 with t1(Id1, col1, col2) as(
   select 1, 'foo bar live'  ,'foo bar'     from dual union all
   select 2, 'foo live tele' ,'foo tele'    from dual union all
   select 3, 'bar foo live'  ,'foo bar live'from dual
  )
  select id1
       , col1
       , col2
       , pkg.NumOfSeqWords(col1, col2) as res
    from t1
  ;

结果:

       ID1 COL1          COL2                RES
---------- ------------- ------------ ----------
         1 foo bar live  foo bar               2
         2 foo live tele foo tele              1
         3 bar foo live  foo bar live          0
于 2013-10-09T15:52:52.527 回答
0

我知道这个问题很老,但我找到了一个很好的解决方案:

您可以从这里测试https://rextester.com/l/oracle_online_compiler

select
id1,
col1,
col2,


(
 Select Count(*)

 From

 (Select Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) w1
  From xmltable(('"' || Replace(Replace(col1,' ', ','), ',', '","') || '"'))
      Where Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) Is Not Null) c1,

 (Select Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) w2
  From xmltable(('"' || Replace(Replace(col2,' ', ','), ',', '","') || '"'))
  Where Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) Is Not Null) c2

 Where c1.w1 = c2.w2


 ) Test


From 
(select 1 Id1, 'foo bar live' col1, 'foo bar' col2  from dual union all
 select 2, 'foo live tele pepe gato coche' ,'bar foo live tele perro gato' from dual union all
 select 3, 'bar foo live tele perro gato'  ,'foo bar live'from dual) t1
于 2021-02-25T13:22:02.210 回答