oracle - 使用 pl\sql 逐行读取 clob

Question

在我的项目中，我使用 oracle 作为主数据库，我遇到了解析 clob 的问题。所以假设我们有一个有价值的 clob

   aaaaaa
   cccccc
   bbbbbb

它存储在表测试中...

我需要编写 plsql 程序来获取这个 clob 并将其拆分，以便我将拥有包含三个项目的数组 [aaaaaa，cccccccc，bbbbbbb]。

有没有可能的解决方案？

score 15 · Accepted Answer

这是一段有效的代码。出于性能目的，我建议您使用显式游标而不是隐式游标（FOR i IN (select...)）。

首先是创建测试用例的脚本。

create table test (c clob);

insert into test (c) values (
'azertyuiop
qsdfghjklm
wxcvbn
');

然后这里是逐行读取 Clob 的脚本：

/* Formatted on 28/08/2012 14:16:52 (QP5 v5.115.810.9015) */
declare
    nStartIndex number := 1;
    nEndIndex number := 1;
    nLineIndex number := 0;
    vLine varchar2(2000);

    cursor c_clob is
    select c from test;

    c clob;
    -------------------------------
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      i pls_integer := 1;
    begin
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      amount := instr(p_clob, chr(10), offset);
      while ( offset < len )
      loop
        dbms_lob.read(p_clob, amount, offset, lc_buffer);
        dbms_output.put_line('Line #'||i||':'||lc_buffer);
       offset := offset + amount;
       i := i + 1;
      end loop; 
          if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
    ---------------------------
begin
    dbms_output.put_line('-----------');
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;

'amount' 变量用于检测行尾位置。请注意，在某些情况下，行尾是 CHR(10)||CHR(13) (CR + LF)，而在其他一些情况下，它只是 CHR(10)。

score 9 · Accepted Answer

虽然 SQL 正则表达式/按级别连接方法可能是最优雅的，但它在性能方面相当糟糕（对于我在 11.2.0.3.0 上的测试用例）。像这样的简单解析要快得多。

procedure parse_clob(p_clob in clob) is
l_offset pls_integer:=1;
l_line varchar2(32767);
l_total_length pls_integer:=length(p_clob);
l_line_length pls_integer;
begin
  while l_offset<=l_total_length loop
    l_line_length:=instr(p_clob,chr(10),l_offset)-l_offset;
    if l_line_length<0 then
      l_line_length:=l_total_length+1-l_offset;
    end if;
    l_line:=substr(p_clob,l_offset,l_line_length);
    dbms_output.put_line(l_line); --do line processing
    l_offset:=l_offset+l_line_length+1;
  end loop;
end parse_clob;

score 4 · Accepted Answer

如果... - 您安装了 APEX - 而且 clob 小于 32K，您可能还需要查看以下代码：

declare
  l_text varchar2(32767) := '...';
  l_rows wwv_flow_global.vc_arr2;
begin
  l_rows := apex_util.string_to_table(l_text, chr(10));
  for i in 1 .. l_rows.count loop
    dbms_output.put_line(l_rows(i));
  end loop;
end;
/

score 2 · Accepted Answer

一个流水线函数，带有一些额外的选项来驱动行为。在 Windows、Oracle 11g 上测试/工作（我怀疑它在 *nix 环境中可能会失败，因为行终止方式）。

CREATE OR REPLACE FUNCTION ETL_HELPER_PARSE
   (P_CLOB NCLOB, P_LINES_TO_SKIP INT DEFAULT 0, P_PUT_EMPTY_LINES CHAR DEFAULT 'N') RETURN SYS.ODCIVarchar2List PIPELINED
AS
  c_top_lines_to_skip  CONSTANT NUMBER  NOT NULL := P_LINES_TO_SKIP;
  c_output_empty_lines CONSTANT CHAR(1) NOT NULL := P_PUT_EMPTY_LINES; 
  --
  l_len     INT := DBMS_LOB.GETLENGTH(P_CLOB);
  l_hit     INT := 0;
  l_offset  INT := 1;
  l_amount  INT;  
  l_buffer  VARCHAR2(32767);
  l_cnt     INT := 1;  
BEGIN  
  WHILE ( l_offset < l_len )
  LOOP
    l_hit := DBMS_LOB.INSTR (
     lob_loc    => P_CLOB           -- IN   CLOB      CHARACTER SET ANY_CS
    ,pattern    => CHR(13)||CHR(10) -- IN   VARCHAR2  CHARACTER SET lob_loc%CHARSET
    ,offset     => l_offset         -- IN   INTEGER := 1
    ,nth        => 1                -- IN   INTEGER := 1
    );
    l_amount := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit - l_offset ELSE l_len - l_offset + 1 END;
    -- `l_amount=0` means a new empty line has been encountered
    IF l_cnt > c_top_lines_to_skip       
    THEN
      IF l_amount > 0
      THEN
        DBMS_LOB.READ(P_CLOB, l_amount, l_offset, l_buffer);
        PIPE ROW (l_buffer);
      ELSIF UPPER(c_output_empty_lines) = 'Y'
      THEN
        PIPE ROW ('');
      END IF;
    END IF;

    l_offset := CASE WHEN COALESCE(l_hit, 0) > 0  THEN l_hit + 2 ELSE l_len END;    
    l_cnt := l_cnt + 1;
  end loop;
EXCEPTION
  WHEN OTHERS THEN
     DBMS_OUTPUT.PUT_LINE('Error : '||SQLERRM);
END ETL_HELPER_PARSE;

score 1 · Accepted Answer

动态行长示例

以及 UNIX 和 WIN 文件的替代品

和 CR/LF 在文件末尾或没有它

为 TEST 创建表

drop table pbrev.test_SVT_tmp;
create table pbrev.test_SVT_tmp (xc clob);
insert into pbrev.test_SVT_tmp (xc) values (
--'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' || chr(13) || chr(10) );
'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' );
'azerty jhjh  huiop;11
qsdfgkj  hjklhhhhhhhhhhhm;7878
wxcvbn;0
dkjsk kjdsk5456 4654 5646 54645
FINISH'
);
delete from pbrev.test_SVT_tmp ;
select xc from pbrev.test_SVT_tmp;

--SET SERVEROUTPUT ON;
--SET SERVEROUTPUT OFF;
declare
    nStartIndex number := 1;
    nEndIndex number := 1;
    nLineIndex number := 0;
    vLine varchar2(2000);
    cursor c_clob is
    select xc from pbrev.test_SVT_tmp;
    c clob;
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      amount_last number := 0;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      line_seq pls_integer := 1;
      -- For UNIX type file - replace CHR(13) to NULL
      CR char := chr(13);
      --CR char := NULL;
      LF char := chr(10);      
      nCRLF number;
      sCRLF varchar2(2);
      b_finish boolean := true;
begin
      sCRLF := CR || LF;
      nCRLF := Length(sCRLF);
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      amount := instr(p_clob, sCRLF, offset);
      while ( offset < len )
      loop
        -- For without CR/LF on end file
        If amount < 0 then
          amount := len - offset + 1;
          b_finish := false;
        End If;
        dbms_lob.read(p_clob, amount, offset, lc_buffer);
        If b_finish then
          lc_buffer := SUBSTR(lc_buffer,1,Length(lc_buffer)-1);  
        End If;
        if (line_seq-1) > 0 then
          amount_last := amount_last + amount;
          offset := offset + amount; 
        else
          amount_last := amount;
          offset := amount + nCRLF;
        end if;
        amount := instr(p_clob, sCRLF, offset);
        amount := amount - amount_last;
        dbms_output.put_line('Line #'||line_seq||': '||lc_buffer);
        line_seq := line_seq + 1;
      end loop; 
      if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
begin
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;

score 1 · Accepted Answer

这是基于@Pierre-Gilles Levallois 他的回答的后续回答。因为我认为它包含一些错误，所以我试图修复它们。

空行导致错误
仍在打印输出中的换行符（可能需要）
"amount :=" 不在 while 循环内，我认为如果任何值比 CLOB 中的第一行短，这会导致一个大错误

我已经将其实现为相当快速和肮脏的修复。我敢肯定应该有更优雅的解决方案......来吧。示例表：

create table test (c clob);

insert into test (c) values (
-- line 1 (empty)
chr(10)||'line 2'
||chr(10) -- line 3 (empty)
||chr(10)||'line 4'
||chr(10)||'line 5'
||chr(10)); -- line 6 (empty)

以及修改后的代码：

set serveroutput on;
declare
    cursor c_clob is
    select c from test;

    c clob;
    -------------------------------
    procedure printout
       (p_clob in out nocopy clob) is
      offset number := 1;
      amount number := 32767;
      len    number := dbms_lob.getlength(p_clob);
      lc_buffer varchar2(32767);
      i pls_integer := 1;
    begin
      if ( dbms_lob.isopen(p_clob) != 1 ) then
        dbms_lob.open(p_clob, 0);
      end if;
      while ( offset < len )
          loop
            -- If no more newlines are found, read till end of CLOB
            if (instr(p_clob, chr(10), offset) = 0) then
                amount := len - offset + 1;
            else
                amount := instr(p_clob, chr(10), offset) - offset;
            end if;

            -- This is to catch empty lines, otherwise we get a NULL error
            if ( amount = 0 ) then
                lc_buffer := '';
            else
                dbms_lob.read(p_clob, amount, offset, lc_buffer);
            end if;
            dbms_output.put_line('Line #'||i||':'||lc_buffer);

            -- This is to catch a newline on the last line with 0 characters behind it
            i := i + 1;
            if (instr(p_clob, chr(10), offset) = len) then
                lc_buffer := '';
                dbms_output.put_line('Line #'||i||':'||lc_buffer);
            end if;

            offset := offset + amount + 1;
          end loop; 
     if ( dbms_lob.isopen(p_clob) = 1 ) then
        dbms_lob.close(p_clob);
      end if; 
    exception
      when others then
         dbms_output.put_line('Error : '||sqlerrm);
    end printout;
    ---------------------------
begin
    dbms_output.put_line('-----------');
    open c_clob;
    loop
       fetch c_clob into c;
       exit when c_clob%notfound;
       printout(c);
    end loop;
    close c_clob;
end;

score 0 · Accepted Answer

伊曼纽尔的回答

这是一个优雅的解决方案，可以很好地处理超过 32767 个字符或超过 4K 字符的行。

ANSI 标准查询：

DECLARE
  v_tmp clob :='aaaa'||chr(10)||
               'bbb'||chr(10)||
               'ccccc';
BEGIN
  FOR rec IN (WITH clob_table(c) as (SELECT v_tmp c FROM DUAL),
                   recurse(text,line) as (SELECT regexp_substr(c, '.+', 1, 1) text,1 line
                                            FROM clob_table
                                           UNION ALL
                                          SELECT regexp_substr(c, '.+', 1, line+1),line+1
                                            FROM recurse r,clob_table
                                           WHERE line<regexp_count(c, '.+'))
            SELECT text,line FROM recurse) LOOP
    dbms_output.put_line(rec.text);
  END LOOP;
END;

Oracle 特定查询（原帖）：

DECLARE
  v_tmp clob :='aaaa'||chr(10)||
               'bbb'||chr(10)||
               'ccccc';
BEGIN
  FOR rec IN (WITH clob_table(c) as (SELECT v_tmp c FROM DUAL)
            SELECT regexp_substr(c, '.+', 1, level) text,level line
             FROM clob_table
          CONNECT BY LEVEL <= regexp_count(c, '.+')) LOOP
      dbms_output.put_line(rec.text);
  END LOOP;
END;

score 0 · Accepted Answer

我创建了一个名为lixo_mq的表：

CREATE TABLE LIXO_MQ (CAMPO1 VARCHAR2(4000))

我复制了打印输出程序并将其更改为不同的工作方式：

PROCEDURE PRINTOUT (P_CLOB IN OUT NOCOPY CLOB) IS
   V_APARTIR                     NUMBER (20);
   V_CONTAR                      NUMBER (20);
   V_LINHA                       VARCHAR2 (4000);
   V_REG                         NUMBER (20);
   V_CORINGA                     VARCHAR2 (10) := CHR (10);
   V_ERRO                        VARCHAR2 (4000);
BEGIN
   IF (DBMS_LOB.ISOPEN (P_CLOB) != 1) THEN
      DBMS_LOB.OPEN (P_CLOB, 0);
   END IF;

   V_APARTIR                  := 1;
   V_REG                      := 1;

   WHILE DBMS_LOB.INSTR (LOB_LOC                       => P_CLOB
                        ,PATTERN                       => V_CORINGA
                        ,OFFSET                        => 1
                        ,NTH                           => V_REG
                        ) > 0
   LOOP
      V_CONTAR                   :=
                 DBMS_LOB.INSTR (LOB_LOC                       => P_CLOB
                                ,PATTERN                       => V_CORINGA
                                ,OFFSET                        => 1
                                ,NTH                           => V_REG
                                )
               - V_APARTIR;

      IF V_APARTIR > 1 THEN
         V_LINHA                    :=
                         DBMS_LOB.SUBSTR (LOB_LOC                       => P_CLOB
                                         ,AMOUNT                        =>   V_CONTAR
                                                                           - 1
                                         ,OFFSET                        =>   V_APARTIR
                                                                           + 1
                                         );
      ELSE
         V_LINHA                    :=
                                 DBMS_LOB.SUBSTR (LOB_LOC                       => P_CLOB
                                                 ,AMOUNT                        => V_CONTAR
                                                 ,OFFSET                        => V_APARTIR
                                                 );
      END IF;

      INSERT INTO LIXO_MQ
                  (CAMPO1
                  )
           VALUES (   V_REG
                   || ':'
                   || V_LINHA
                  );

      COMMIT;
      V_APARTIR                  :=
                           DBMS_LOB.INSTR (LOB_LOC                       => P_CLOB
                                          ,PATTERN                       => V_CORINGA
                                          ,OFFSET                        => 1
                                          ,NTH                           => V_REG
                                          );
      V_REG                      :=   V_REG
                                    + 1;
   END LOOP;

   IF (DBMS_LOB.ISOPEN (P_CLOB) = 1) THEN
      DBMS_LOB.CLOSE (P_CLOB);
   END IF;
EXCEPTION
   WHEN OTHERS THEN
      V_ERRO                     :=    'Error : '
                                    || SQLERRM;

      INSERT INTO LIXO_MQ
                  (CAMPO1
                  )
           VALUES (V_ERRO
                  );

      COMMIT;
END PRINTOUT;

score -1 · Accepted Answer

    declare
    c_clob clob := empty_clob();
    c_offset number;
    c_len number;
    read_cnt number;
    prev_buf number := 0;
    read_str varchar2(32000);
    BEGIN
   -- Read the clob in to the local variable
        select c into c_clob from test;
        c_offset := 1;
   -- Get the length of the clob
        c_len := dbms_lob.getlength(c_clob);
   -- Read till the current offset is less the length of clob
    while(c_offset <= c_len)
        loop
   -- Get the index of the next new line character
           read_cnt := instr(c_clob, CHR(10), c_offset, 1);
           exit when read_cnt = 0;
   -- Read the clob in the index
           read_str := dbms_lob.substr(c_clob, read_cnt-c_offset, c_offset);                                          
           dbms_output.put_line('Line#' || read_str);
   -- Now the current offset should point after the read line
           c_offset := read_cnt+1;
           end loop;
        END;
    /

oracle - 使用 pl\sql 逐行读取 clob

9 回答 9

为 TEST 创建表

Related

Reference