我必须表格,每个表格至少有 35M 数据。我编写了一个批量更新脚本,如下所示我想知道我需要查看什么来调整Limit=10000
参数以更好地运行此脚本。
DECLARE
CURSOR rec_cur IS
SELECT id,sirname
FROM table_user;
TYPE num_tab_t IS TABLE OF NUMBER;
TYPE vc2_tab_t IS TABLE OF VARCHAR2(15);
id_tab NUM_TAB_T;
sirname_tab VC2_TAB_T;
BEGIN
OPEN rec_cur;
LOOP
FETCH rec_cur BULK COLLECT INTO id_tab, sirname_tab LIMIT 10000;
EXIT WHEN id_tab.COUNT() = 0;
FORALL i IN id_tab.FIRST .. id_tab.LAST
UPDATE table_user_backup
SET sirname = sirname_tab(i)
, sirname_date = sysdate
WHERE id = id_tab(i);
END LOOP;
CLOSE rec_cur;
END;
我已经测试了三种可能的批量更新方式,并分享了每种方式的运行时间。它表明SQL优于pl-sql forall批量更新。更改限制参数范围 1000-100000 后,我看不出有任何区别
-- Bulk update --for 2.5M entries
--Takes 4 Minutes
DECLARE
CURSOR rec_cur IS
SELECT id,name
FROM table_user;
BEGIN
FOR sub in rec_cur
LOOP
UPDATE table_user_backup
SET name_date = sysdate
WHERE id = sub.id;
END LOOP;
END;
--takes 2.5 minutes
DECLARE
CURSOR rec_cur IS
SELECT id,name
FROM table_user;
TYPE num_tab_t IS TABLE OF NUMBER;
TYPE vc2_tab_t IS TABLE OF VARCHAR2(20);
id_tab NUM_TAB_T;
name_tab VC2_TAB_T;
BEGIN
OPEN rec_cur;
LOOP
FETCH rec_cur BULK COLLECT INTO id_tab, name_tab LIMIT 10000;
EXIT WHEN id_tab.COUNT() = 0;
FORALL i IN id_tab.FIRST .. id_tab.LAST
UPDATE table_user_backup
SET name = name_tab(i)
, name_date = sysdate
WHERE id = id_tab(i);
END LOOP;
CLOSE rec_cur;
END;
--SQL is faster than pl sql
--takes 2.0 minutes
UPDATE table_user_backup tu_backup
SET (name, name_date) = (SELECT name, sysdate
FROM table_user tu
WHERE tu_backup.id = tu.id)
WHERE EXISTS( SELECT 1
FROM table_user tu
WHERE tu.id=tu_backup.id);