在我的项目中,我使用 oracle 作为主数据库,我遇到了解析 clob 的问题。所以假设我们有一个有价值的 clob
aaaaaa
cccccc
bbbbbb
它存储在表测试中...
我需要编写 plsql 程序来获取这个 clob 并将其拆分,以便我将拥有包含三个项目的数组 [aaaaaa,cccccccc,bbbbbbb]。
有没有可能的解决方案?
在我的项目中,我使用 oracle 作为主数据库,我遇到了解析 clob 的问题。所以假设我们有一个有价值的 clob
aaaaaa
cccccc
bbbbbb
它存储在表测试中...
我需要编写 plsql 程序来获取这个 clob 并将其拆分,以便我将拥有包含三个项目的数组 [aaaaaa,cccccccc,bbbbbbb]。
有没有可能的解决方案?
这是一段有效的代码。出于性能目的,我建议您使用显式游标而不是隐式游标(FOR i IN (select...))。
首先是创建测试用例的脚本。
create table test (c clob);
insert into test (c) values (
'azertyuiop
qsdfghjklm
wxcvbn
');
然后这里是逐行读取 Clob 的脚本:
/* Formatted on 28/08/2012 14:16:52 (QP5 v5.115.810.9015) */
declare
nStartIndex number := 1;
nEndIndex number := 1;
nLineIndex number := 0;
vLine varchar2(2000);
cursor c_clob is
select c from test;
c clob;
-------------------------------
procedure printout
(p_clob in out nocopy clob) is
offset number := 1;
amount number := 32767;
len number := dbms_lob.getlength(p_clob);
lc_buffer varchar2(32767);
i pls_integer := 1;
begin
if ( dbms_lob.isopen(p_clob) != 1 ) then
dbms_lob.open(p_clob, 0);
end if;
amount := instr(p_clob, chr(10), offset);
while ( offset < len )
loop
dbms_lob.read(p_clob, amount, offset, lc_buffer);
dbms_output.put_line('Line #'||i||':'||lc_buffer);
offset := offset + amount;
i := i + 1;
end loop;
if ( dbms_lob.isopen(p_clob) = 1 ) then
dbms_lob.close(p_clob);
end if;
exception
when others then
dbms_output.put_line('Error : '||sqlerrm);
end printout;
---------------------------
begin
dbms_output.put_line('-----------');
open c_clob;
loop
fetch c_clob into c;
exit when c_clob%notfound;
printout(c);
end loop;
close c_clob;
end;
'amount' 变量用于检测行尾位置。请注意,在某些情况下,行尾是 CHR(10)||CHR(13) (CR + LF),而在其他一些情况下,它只是 CHR(10)。
虽然 SQL 正则表达式/按级别连接方法可能是最优雅的,但它在性能方面相当糟糕(对于我在 11.2.0.3.0 上的测试用例)。像这样的简单解析要快得多。
procedure parse_clob(p_clob in clob) is
l_offset pls_integer:=1;
l_line varchar2(32767);
l_total_length pls_integer:=length(p_clob);
l_line_length pls_integer;
begin
while l_offset<=l_total_length loop
l_line_length:=instr(p_clob,chr(10),l_offset)-l_offset;
if l_line_length<0 then
l_line_length:=l_total_length+1-l_offset;
end if;
l_line:=substr(p_clob,l_offset,l_line_length);
dbms_output.put_line(l_line); --do line processing
l_offset:=l_offset+l_line_length+1;
end loop;
end parse_clob;
如果... - 您安装了 APEX - 而且 clob 小于 32K,您可能还需要查看以下代码:
declare
l_text varchar2(32767) := '...';
l_rows wwv_flow_global.vc_arr2;
begin
l_rows := apex_util.string_to_table(l_text, chr(10));
for i in 1 .. l_rows.count loop
dbms_output.put_line(l_rows(i));
end loop;
end;
/
一个流水线函数,带有一些额外的选项来驱动行为。在 Windows、Oracle 11g 上测试/工作(我怀疑它在 *nix 环境中可能会失败,因为行终止方式)。
CREATE OR REPLACE FUNCTION ETL_HELPER_PARSE
(P_CLOB NCLOB, P_LINES_TO_SKIP INT DEFAULT 0, P_PUT_EMPTY_LINES CHAR DEFAULT 'N') RETURN SYS.ODCIVarchar2List PIPELINED
AS
c_top_lines_to_skip CONSTANT NUMBER NOT NULL := P_LINES_TO_SKIP;
c_output_empty_lines CONSTANT CHAR(1) NOT NULL := P_PUT_EMPTY_LINES;
--
l_len INT := DBMS_LOB.GETLENGTH(P_CLOB);
l_hit INT := 0;
l_offset INT := 1;
l_amount INT;
l_buffer VARCHAR2(32767);
l_cnt INT := 1;
BEGIN
WHILE ( l_offset < l_len )
LOOP
l_hit := DBMS_LOB.INSTR (
lob_loc => P_CLOB -- IN CLOB CHARACTER SET ANY_CS
,pattern => CHR(13)||CHR(10) -- IN VARCHAR2 CHARACTER SET lob_loc%CHARSET
,offset => l_offset -- IN INTEGER := 1
,nth => 1 -- IN INTEGER := 1
);
l_amount := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit - l_offset ELSE l_len - l_offset + 1 END;
-- `l_amount=0` means a new empty line has been encountered
IF l_cnt > c_top_lines_to_skip
THEN
IF l_amount > 0
THEN
DBMS_LOB.READ(P_CLOB, l_amount, l_offset, l_buffer);
PIPE ROW (l_buffer);
ELSIF UPPER(c_output_empty_lines) = 'Y'
THEN
PIPE ROW ('');
END IF;
END IF;
l_offset := CASE WHEN COALESCE(l_hit, 0) > 0 THEN l_hit + 2 ELSE l_len END;
l_cnt := l_cnt + 1;
end loop;
EXCEPTION
WHEN OTHERS THEN
DBMS_OUTPUT.PUT_LINE('Error : '||SQLERRM);
END ETL_HELPER_PARSE;
动态行长示例
以及 UNIX 和 WIN 文件的替代品
和 CR/LF 在文件末尾或没有它
为 TEST 创建表
drop table pbrev.test_SVT_tmp; create table pbrev.test_SVT_tmp (xc clob); insert into pbrev.test_SVT_tmp (xc) values ( --'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' || chr(13) || chr(10) ); 'azertyuiop;11' || chr(13) || chr(10) ||'qsdfghjklm;7878' || chr(13) || chr(10) ||'wxcvbn;0' ); 'azerty jhjh huiop;11 qsdfgkj hjklhhhhhhhhhhhm;7878 wxcvbn;0 dkjsk kjdsk5456 4654 5646 54645 FINISH' ); delete from pbrev.test_SVT_tmp ; select xc from pbrev.test_SVT_tmp;
--SET SERVEROUTPUT ON;
--SET SERVEROUTPUT OFF;
declare
nStartIndex number := 1;
nEndIndex number := 1;
nLineIndex number := 0;
vLine varchar2(2000);
cursor c_clob is
select xc from pbrev.test_SVT_tmp;
c clob;
procedure printout
(p_clob in out nocopy clob) is
offset number := 1;
amount number := 32767;
amount_last number := 0;
len number := dbms_lob.getlength(p_clob);
lc_buffer varchar2(32767);
line_seq pls_integer := 1;
-- For UNIX type file - replace CHR(13) to NULL
CR char := chr(13);
--CR char := NULL;
LF char := chr(10);
nCRLF number;
sCRLF varchar2(2);
b_finish boolean := true;
begin
sCRLF := CR || LF;
nCRLF := Length(sCRLF);
if ( dbms_lob.isopen(p_clob) != 1 ) then
dbms_lob.open(p_clob, 0);
end if;
amount := instr(p_clob, sCRLF, offset);
while ( offset < len )
loop
-- For without CR/LF on end file
If amount < 0 then
amount := len - offset + 1;
b_finish := false;
End If;
dbms_lob.read(p_clob, amount, offset, lc_buffer);
If b_finish then
lc_buffer := SUBSTR(lc_buffer,1,Length(lc_buffer)-1);
End If;
if (line_seq-1) > 0 then
amount_last := amount_last + amount;
offset := offset + amount;
else
amount_last := amount;
offset := amount + nCRLF;
end if;
amount := instr(p_clob, sCRLF, offset);
amount := amount - amount_last;
dbms_output.put_line('Line #'||line_seq||': '||lc_buffer);
line_seq := line_seq + 1;
end loop;
if ( dbms_lob.isopen(p_clob) = 1 ) then
dbms_lob.close(p_clob);
end if;
exception
when others then
dbms_output.put_line('Error : '||sqlerrm);
end printout;
begin
open c_clob;
loop
fetch c_clob into c;
exit when c_clob%notfound;
printout(c);
end loop;
close c_clob;
end;
这是基于@Pierre-Gilles Levallois 他的回答的后续回答。因为我认为它包含一些错误,所以我试图修复它们。
我已经将其实现为相当快速和肮脏的修复。我敢肯定应该有更优雅的解决方案......来吧。示例表:
create table test (c clob);
insert into test (c) values (
-- line 1 (empty)
chr(10)||'line 2'
||chr(10) -- line 3 (empty)
||chr(10)||'line 4'
||chr(10)||'line 5'
||chr(10)); -- line 6 (empty)
以及修改后的代码:
set serveroutput on;
declare
cursor c_clob is
select c from test;
c clob;
-------------------------------
procedure printout
(p_clob in out nocopy clob) is
offset number := 1;
amount number := 32767;
len number := dbms_lob.getlength(p_clob);
lc_buffer varchar2(32767);
i pls_integer := 1;
begin
if ( dbms_lob.isopen(p_clob) != 1 ) then
dbms_lob.open(p_clob, 0);
end if;
while ( offset < len )
loop
-- If no more newlines are found, read till end of CLOB
if (instr(p_clob, chr(10), offset) = 0) then
amount := len - offset + 1;
else
amount := instr(p_clob, chr(10), offset) - offset;
end if;
-- This is to catch empty lines, otherwise we get a NULL error
if ( amount = 0 ) then
lc_buffer := '';
else
dbms_lob.read(p_clob, amount, offset, lc_buffer);
end if;
dbms_output.put_line('Line #'||i||':'||lc_buffer);
-- This is to catch a newline on the last line with 0 characters behind it
i := i + 1;
if (instr(p_clob, chr(10), offset) = len) then
lc_buffer := '';
dbms_output.put_line('Line #'||i||':'||lc_buffer);
end if;
offset := offset + amount + 1;
end loop;
if ( dbms_lob.isopen(p_clob) = 1 ) then
dbms_lob.close(p_clob);
end if;
exception
when others then
dbms_output.put_line('Error : '||sqlerrm);
end printout;
---------------------------
begin
dbms_output.put_line('-----------');
open c_clob;
loop
fetch c_clob into c;
exit when c_clob%notfound;
printout(c);
end loop;
close c_clob;
end;
这是一个优雅的解决方案,可以很好地处理超过 32767 个字符或超过 4K 字符的行。
ANSI 标准查询:
DECLARE
v_tmp clob :='aaaa'||chr(10)||
'bbb'||chr(10)||
'ccccc';
BEGIN
FOR rec IN (WITH clob_table(c) as (SELECT v_tmp c FROM DUAL),
recurse(text,line) as (SELECT regexp_substr(c, '.+', 1, 1) text,1 line
FROM clob_table
UNION ALL
SELECT regexp_substr(c, '.+', 1, line+1),line+1
FROM recurse r,clob_table
WHERE line<regexp_count(c, '.+'))
SELECT text,line FROM recurse) LOOP
dbms_output.put_line(rec.text);
END LOOP;
END;
Oracle 特定查询(原帖):
DECLARE
v_tmp clob :='aaaa'||chr(10)||
'bbb'||chr(10)||
'ccccc';
BEGIN
FOR rec IN (WITH clob_table(c) as (SELECT v_tmp c FROM DUAL)
SELECT regexp_substr(c, '.+', 1, level) text,level line
FROM clob_table
CONNECT BY LEVEL <= regexp_count(c, '.+')) LOOP
dbms_output.put_line(rec.text);
END LOOP;
END;
我创建了一个名为lixo_mq的表:
CREATE TABLE LIXO_MQ (CAMPO1 VARCHAR2(4000))
我复制了打印输出程序并将其更改为不同的工作方式:
PROCEDURE PRINTOUT (P_CLOB IN OUT NOCOPY CLOB) IS
V_APARTIR NUMBER (20);
V_CONTAR NUMBER (20);
V_LINHA VARCHAR2 (4000);
V_REG NUMBER (20);
V_CORINGA VARCHAR2 (10) := CHR (10);
V_ERRO VARCHAR2 (4000);
BEGIN
IF (DBMS_LOB.ISOPEN (P_CLOB) != 1) THEN
DBMS_LOB.OPEN (P_CLOB, 0);
END IF;
V_APARTIR := 1;
V_REG := 1;
WHILE DBMS_LOB.INSTR (LOB_LOC => P_CLOB
,PATTERN => V_CORINGA
,OFFSET => 1
,NTH => V_REG
) > 0
LOOP
V_CONTAR :=
DBMS_LOB.INSTR (LOB_LOC => P_CLOB
,PATTERN => V_CORINGA
,OFFSET => 1
,NTH => V_REG
)
- V_APARTIR;
IF V_APARTIR > 1 THEN
V_LINHA :=
DBMS_LOB.SUBSTR (LOB_LOC => P_CLOB
,AMOUNT => V_CONTAR
- 1
,OFFSET => V_APARTIR
+ 1
);
ELSE
V_LINHA :=
DBMS_LOB.SUBSTR (LOB_LOC => P_CLOB
,AMOUNT => V_CONTAR
,OFFSET => V_APARTIR
);
END IF;
INSERT INTO LIXO_MQ
(CAMPO1
)
VALUES ( V_REG
|| ':'
|| V_LINHA
);
COMMIT;
V_APARTIR :=
DBMS_LOB.INSTR (LOB_LOC => P_CLOB
,PATTERN => V_CORINGA
,OFFSET => 1
,NTH => V_REG
);
V_REG := V_REG
+ 1;
END LOOP;
IF (DBMS_LOB.ISOPEN (P_CLOB) = 1) THEN
DBMS_LOB.CLOSE (P_CLOB);
END IF;
EXCEPTION
WHEN OTHERS THEN
V_ERRO := 'Error : '
|| SQLERRM;
INSERT INTO LIXO_MQ
(CAMPO1
)
VALUES (V_ERRO
);
COMMIT;
END PRINTOUT;
declare
c_clob clob := empty_clob();
c_offset number;
c_len number;
read_cnt number;
prev_buf number := 0;
read_str varchar2(32000);
BEGIN
-- Read the clob in to the local variable
select c into c_clob from test;
c_offset := 1;
-- Get the length of the clob
c_len := dbms_lob.getlength(c_clob);
-- Read till the current offset is less the length of clob
while(c_offset <= c_len)
loop
-- Get the index of the next new line character
read_cnt := instr(c_clob, CHR(10), c_offset, 1);
exit when read_cnt = 0;
-- Read the clob in the index
read_str := dbms_lob.substr(c_clob, read_cnt-c_offset, c_offset);
dbms_output.put_line('Line#' || read_str);
-- Now the current offset should point after the read line
c_offset := read_cnt+1;
end loop;
END;
/