1

我必须将 .CSV 文件加载到我的 Oracle 表中。但问题是单个 CSV 文件将包含多个表的数据。诀窍是我们需要识别要插入特定表的第一列数据。即,如果第一列的值为“16”,则应将整行插入到 TABLE_16 中,它将有 16 列,如果值为 21,则 TABLE_21 将有 21 列,依此类推。还要注意的一件事是,我的 CSV 文件将有数百万条记录,所以我也需要考虑过程的性能,所以我认为BULK COLLECT并且FORALL将是快速插入数据的最佳方法。

当我尝试运行以下块时,我收到此错误:

01403. 00000 - "no data found" *Cause: No data was found from the objects. *Action: There was no data from the objects which may be due to end of fetch.


.CSV 数据示例

16,"Laura","Bissot","LBISSOT","650.124.5234",20-08-05,"ST_CLERK",3300,,121,50,"aaa",234,"asdf","ssedf","wsdrftd"
21,"Mozhe","Atkinson","MATKINSO","650.124.6234",30-10-05,"ST_CLERK",2800,,121,50,"aaa",234,"asdf","ssedf","wsdrftd","aaa",234,"asdf","ssedf","wsdrftd"
11,"James","Marlow","JAMRLOW","650.124.7234",16-02-05,"ST_CLERK",2500,,121,50
16,"TJ","Olson","TJOLSON","650.124.8234",10-04-07,"ST_CLERK",2100,,121,50,"aaa",234,"asdf","ssedf","wsdrftd"
19,"Jason","Mallin","JMALLIN","650.127.1934",14-06-04,"ST_CLERK",3300,,122,50,"aaa",234,"asdf","ssedf","wsdrftd","aaa",234,"asdf",
12,"Michael","Rogers","MROGERS","650.127.1834",26-08-06,"ST_CLERK",2900,,122,50,"aaa"
14,"Ki","Gee","KGEE","650.127.1734",12-12-07,"ST_CLERK",2400,,122,50,"aaa",234,"asdf"
30,"Ki","Gee","KGEE","650.127.1734",12-12-07,"ST_CLERK",2400,,122,50,"aaa",234,"asdf",11,"dd",23,43,789,9086,"1DRFtf","PST","RTF%$",123,"dsda",5656,"dsed",123,4333,112

create or replace type 
T_CSV_DATA as object 
(c001 varchar2(50),c002 varchar2(150),c003 varchar2(150),c004 varchar2(150),c005 varchar2(150),c006 varchar2(150),c007 varchar2(150),c008 varchar2(150),c009 varchar2(150), c010 varchar2(150), 
c011 varchar2(150),c012 varchar2(150),c013 varchar2(150),c014 varchar2(150),c015 varchar2(150),c016 varchar2(150),c017 varchar2(150),c018 varchar2(150),c019 varchar2(150), c020 varchar2(150), 
c021 varchar2(150),c022 varchar2(150),c023 varchar2(150),c024 varchar2(150),c025 varchar2(150),c026 varchar2(150),c027 varchar2(150),c028 varchar2(150),c029 varchar2(150), c030 varchar2(150));

create or replace type T_CSV_VAL as table of T_CSV_DATA;

DECLARE
 --variables to do with the copying the blob into a clob
    v_blob              BLOB;
    v_clob              CLOB;
    v_dest_offset       INTEGER := 1;
    v_src_offset        INTEGER := 1;
    v_lang_context      INTEGER := dbms_lob.default_lang_ctx;
    v_warning           INTEGER;
 --variables to do with iterating over each row of the clob
    v_new_line_pos      NUMBER;
    v_start_pos         NUMBER := 1;
    v_current_line      VARCHAR2(4000);
    v_total_len         NUMBER;
    v_curr_row          apex_application_global.vc_arr2;
    V_DATA_ASSIGN       T_CSV_VAL :=T_CSV_VAL();
    V_BULK_DATA         T_CSV_VAL :=T_CSV_VAL();
BEGIN --t_csv_line
    SELECT FILE_BLOB INTO v_blob FROM  FILE_UPLOAD  WHERE ID=7;

    dbms_lob.createtemporary(v_clob,true);
    dbms_lob.converttoclob(dest_lob => v_clob,src_blob => v_blob,amount => dbms_lob.lobmaxsize,dest_offset => v_dest_offset,src_offset
    => v_src_offset,blob_csid => dbms_lob.default_csid,lang_context => v_lang_context,warning => v_warning);

    v_total_len := dbms_lob.getlength(v_clob);
    WHILE ( v_start_pos <= v_total_len ) LOOP
        v_new_line_pos := instr(v_clob,chr(10),v_start_pos);
        IF v_new_line_pos = 0 THEN
            v_new_line_pos := v_total_len + 1;
        END IF;
        v_current_line := substr(v_clob,v_start_pos,v_new_line_pos - v_start_pos);
        v_curr_row := apex_util.string_to_table(v_current_line,',');

            V_DATA_ASSIGN.EXTEND;
            V_DATA_ASSIGN(V_DATA_ASSIGN.count) := T_CSV_DATA(v_curr_row(1),v_curr_row(2),v_curr_row(3),v_curr_row(4),v_curr_row(5),
                                                             v_curr_row(6),v_curr_row(7),v_curr_row(8),v_curr_row(9),v_curr_row(10),
                                                             v_curr_row(11),v_curr_row(12),v_curr_row(13),v_curr_row(14),v_curr_row(15),
                                                             v_curr_row(16),v_curr_row(17),v_curr_row(18),v_curr_row(19),v_curr_row(20),
                                                             v_curr_row(21),v_curr_row(22),v_curr_row(23),v_curr_row(24),v_curr_row(25),
                                                             v_curr_row(26),v_curr_row(27),v_curr_row(28),v_curr_row(29),v_curr_row(30));
            v_start_pos := v_new_line_pos + 1;
    END LOOP;

    FOR rec IN V_DATA_ASSIGN.first..V_DATA_ASSIGN.last LOOP
        IF V_DATA_ASSIGN(rec).c001 = 16 THEN -- If first value is 16, then insert into TABLE_16(will have 16 columns).
            INSERT INTO TABLE_16.....
        ELSIF  V_DATA_ASSIGN(rec).c001 = 21 THEN -- If first value is 21, then insert into TABLE_21(will have 21 columns).
            INSERT INTO TABLE_21.....
        ELSIF  V_DATA_ASSIGN(rec).c001 = 11 THEN -- If first value is 11, then insert into TABLE_11(will have 11 columns).
            INSERT INTO TABLE_11.....
        ...
            ...
        ELSIF  V_DATA_ASSIGN(rec).c001 = 30 THEN -- If first value is 30, then insert into TABLE_30(will have 30 columns). and so on...
            INSERT INTO TABLE_30.....       
        END IF;
    END LOOP;
END;
4

3 回答 3

1

在我看来,你走错了路。您倾向于无法击败 SQL*Loader 的 PL/SQL,尤其是当您启用直接路径和并行执行时。

这是一个简单的例子,展示了如何做到这一点。

创建表:我懒得创建 16 或 21 列的表,所以我使用 4 ( t1) 和 5 ( t1) 代替。

SQL> create table t1 (id number, fname varchar2(20), lname varchar2(20), salary number);

Table created.

SQL> create table t2 (id number, fname varchar2(20), lname varchar2(20), salary number,hiredate date);

Table created.

控制文件:

options (direct=true, parallel=true)
load data 
infile *

into table t1
  append
  when (1) = '4'
  fields terminated by ',' optionally enclosed by '"'
  trailing nullcols  
  (
  id position(1) integer external, 
  fname char,
  lname char,
  salary integer external
  )

into table t2
  append
  when (1) = '5'
  fields terminated by ',' optionally enclosed by '"'
  trailing nullcols
  (
  id position (1) integer external, 
  fname char,
  lname char,
  salary integer external,
  hiredate "to_date(:hiredate, 'dd-mm-rr')"
  )

begindata
4,"Laura","Bissot",6506,14-06-04
5,"Mozhe","Atkinson",1202,20-08-05
5,"James","Marlow",1244,30-10-05
4,"TJ","Olson",4345,16-02-05

加载会话和结果:

SQL> $sqlldr scott/tiger control=test05.ctl log=test05.log

SQL*Loader: Release 11.2.0.2.0 - Production on Pon Kol 27 15:21:17 2018

Copyright (c) 1982, 2009, Oracle and/or its affiliates.  All rights reserved.


Load completed - logical record count 4.

SQL> select * From t1;

        ID FNAME                LNAME                    SALARY
---------- -------------------- -------------------- ----------
         4 Laura                Bissot                     6506
         4 TJ                   Olson                      4345

SQL> select * From t2;

        ID FNAME                LNAME                    SALARY HIREDATE
---------- -------------------- -------------------- ---------- ----------
         5 Mozhe                Atkinson                   1202 20/08/2005
         5 James                Marlow                     1244 30/10/2005

SQL>
于 2018-08-27T13:21:57.950 回答
0

我认为最简单的解决方案是首先使用 Shell Script/SQL Loader 将所有数据转储到 PRE-Staging 表中。将数据转储到预登台后,您就可以编写一个 PLSQL 块/包来将特定行插入到所需的表中。

在 PLSQL 块中,您可以使用批量收集来最大化您的性能。

谢谢, 伊德里斯

于 2018-08-27T07:08:32.837 回答
0

我同意@Littlefoot SQL-Loader 是自然而快速的方式。
另一种方法是使用 Oracle 的“外部表”。

有了这个,您可以对驻留在操作系统(OS)文件系统上的文件使用 SELECT 语句。
例如 CSV 格式的文本文件。

书籍:数据库实用程序章节:外部表 https://docs.oracle.com/cd/E11882_01/server.112/e22490/part_et.htm#i436567外部表功能是对现有 SQL加载器功能
的补充。它使您能够像访问数据库中的表一样访问外部源中的数据。
请注意,
在需要对临时表进行额外索引的数据加载情况下,SQL Loader 可能是更好的选择。

基本步骤:

1 - 在运行 Oracle 实例的操作系统 (OS) 的文件系统中创建一个子目录。
例如:如果操作系统是 Windows,则在单元“C:”上的子目录“DATA”内创建一个子目录“IN_FILES”

如果操作系统是 Linux/Unix 风格,则在根目录下的子目录“data”内创建一个子目录“in_files”。

2 - 在操作系统上,为运行 Oracle 实例的操作系统用户授予子目录步骤 (1) 的读取和写入权限。

3 - 在 Oracle 上创建一个目录对象
您根据文件系统的规则使用完整路径。
如果操作系统是 Windows,则完整路径类似于“C:\DATA\IN_FILES”

Create directory external_info as 'C:\DATA\IN_FILES'

如果操作系统是 Linux/Unix 风格,那么完整路径类似于“/data/in_files”

Create directory external_info as '/data/in_files'

4 - 在 Oracle 将步骤 3 的目录对象的读/写权限授予 PUBLIC。

将目录EXTERNAL_INFO上的读、写权限授予公共;

5 - 创建一个外部表来访问 CSV 文件:

在此示例中,文件为:
a.CSV 格式
b.End-of-record 是 CARRIAGE_RETURN 后跟 LINE_FEED ( records delimited by '\r\n')
c.第一个记录是标题列名, ( skip 1)
d.Fields 分隔符是逗号 ( fields terminated by ',')
e.Data 可以是用引号括起来(ASCII 字符 34) ( optionally enclosed by '"') f.
缺少的字段将具有空值 ( missing field values are null)
g.文件名为“data01.csv”,此名称符合操作系统规则
RemenberWINDOWS 不区分大小写,但 LINUX/Unix 区分大小写。

create table data01_external
  (id       number,
   fname    varchar2(20),
   lname    varchar2(20),
   salary   number,
   hiredate date
  )
  organization external
  (type oracle_loader
   default directory external_info
   access parameters (records delimited by '\r\n'
                      badfile     'data01_%p.bad'
                      discardfile 'data01_%p.dis'
                      logfile     'data01_%p.log'
                      skip 1
                      fields terminated by ','
                             optionally enclosed by '"'
                             missing field values are null
                      (id        integer external,
                       fname     char,
                       lname     char,
                       salary    decimal external,
                       hiredate  char  date_format date mask 'dd-mm-rr'
                      )
                     )
   location ('data01.csv')
  )
  reject limit UNLIMITED;

6 - 现在你可以编写一个 PL/SQL 包来向每个表插入数据
对于这个例子,我使用@Littlefoot 的表 T1 和 T2:

Create or replace package pk_load_info
  is
    procedure pr_load(p_isbFile_name     varchar2,
                      p_onuErrCode   out number,
                      p_osbErrDesc   out varchar2
                     );
End;
/


Create or replace package body pk_load_info
is
  procedure pr_load(p_isbFile_name     varchar2,
                    p_onuErrCode   out number,
                    p_osbErrDesc    out varchar2
                   )
  is
    sbEvent    varchar2(20);
    sbSentence varchar2(200);
  Begin
    p_onuErrCode:=0;
    p_osbErrDesc:=null;
    --
    if trim(p_isbFile_name) is null then
       p_onuErrCode:=101;
       p_osbErrDesc:='The name of the file is not to be null';
       return;
    End if;
    --
    -- you can use always the same file name of maybe use different file name in every run.
    sbEvent:='alter table';
    sbSentence:='alter table data01_external default directory EXTERNAL_INFO location ('||chr(39)||trim(p_isbFile_name)||chr(39)||')';
    Dbms_Output.Put_Line('sbSentence='||sbSentence);
    execute immediate sbSentence;
    --
    -- hint "append", in direct-path INSERT, data is appended to the end of the table
    sbEvent:='insert table T1';
    insert /*+ append */ into t1
    select a.id,
           a.fname,
           a.lname,
           a.salary
    from data01_external a
    where a.id=4;
    --
    sbEvent:='insert table T2';
    insert /*+ append */ into t2
    select a.id,
           a.fname,
           a.lname,
           a.salary,
           hiredate
    from data01_external a
    where a.id=5;
  Exception
    when others then
         p_onuErrCode:=sqlcode;
         p_osbErrDesc:='Event "'||sbEvent||'" '||sqlerrm;
  End pr_load;
End pk_load_info;
/

7 - 将文件“data02.csv”复制到步骤(1)的子目录中
数据为:

ID,FNAME,LNAME,SALARY,HIREDATE
4,"Laura","Bissot",6506
5,"Mozhe","Atkinson",1202,20-08-05
5,"James","Marlow",1244,30-10-05
4,"TJ","Olson",4345

8 - 测试

Declare
  nuErrcode      number;
  sbErrdesc      varchar2(2000);
  Procedure print(p_isbTexto varchar2)
  is
  Begin
    if nvl(length(p_isbTexto),0)<=255 then
       dbms_output.put_line(p_isbTexto);
    Else
       dbms_output.put_line(substr(p_isbTexto,1,254)||'¬');
       dbms_output.put_line(substr(p_isbTexto,255,255));
    End if;
  End print;
Begin
  print(to_char(systimestamp,'yyyy-mm-dd hh24:mi:ss.ff4')||'|Begin');
  dbms_application_info.set_module('SQL','Inicio');
  --
  pk_load_info.pr_load('data02.csv',
                       nuErrcode,
                       sbErrdesc
                      );
  print('nuErrcode='||nuErrcode);
  print('sbErrdesc='||sbErrdesc);
  --
  if nuErrcode=0 then
     commit;
  Else
     rollback;
  end if;
  print(to_char(systimestamp,'yyyy-mm-dd hh24:mi:ss.ff4')||'|End');
  dbms_application_info.set_action('Fin');
End;
/

9 - 查看数据

select *
from t1;
ID|FNAME|LNAME |SALARY|
 4|Laura|Bissot|6506  |
 4|TJ   |Olson |4345  |

select *
from t2;
ID|FNAME|LNAME   |SALARY|HIREDATE           |
5 |Mozhe|Atkinson|  1202|2005-08-20 00:00:00|
5 |James|Marlow  |  1244|2005-10-30 00:00:00|

再见

于 2018-08-28T15:01:39.900 回答