我目前正在将一些数据库从 excel 格式迁移到 PostgreSQL。其中一些记录在一行中,由分隔符分隔。我正在尝试编写一个函数,该函数通过以下方式将所选列与所选分隔符分开来转换表格:
create table janek.temp (a integer, b text);
insert into janek.temp values (1, 'cat');
insert into janek.temp values (2, 'dog;hound');
该表包含 2 行,我想要一个函数,当我执行时:
select * from janek.split_table ('janek', 'temp', 'b', ';')
我得到 3 行返回:
1;'cat'
2;'dog'
2;'hound'
目前我遇到的问题是 - 不知道如何将模式名称和表名传递给RETURNS setof $1.$2 AS
- 第一次执行的查询由于我不明白的原因不起作用,错误粘贴在下面
我希望你们清楚我的逻辑。我提供了示例数据和所需的结果。我是 PGSQL 函数的新手,知道的不多,但我希望我编写的代码很容易处理。
我希望这个功能会被重用,对我来说似乎对数据迁移很有帮助。
这是我的功能的代码:
create or replace function janek.split_table (table_schema text, table_name text, column_name text, separator text)
RETURNS setof $1.$2 AS
-- don't know how to pass table_schema.table_name as variable - need help here, this line doesn't work, but after manually entering schema and table name it works
$BODY$
DECLARE
-- string variable containing sql queries to execute
execute_query text;
-- string variable where column list of target table will be assigned, except for one column - column_name
column_list text;
-- variable where maximal count of separator will be assigned
separator_count integer;
BEGIN
-- assigning variable column_list
-- first error here, that I don't understand - when I execute this select statement manually it returns one column containing one string:
-- ERROR: query "SELECT 'array_to_string(array(select column_name::text
-- from information_schema.columns
-- where table_name = '''||table_name||'''
-- and table_schema = '''||table_schema||'''
-- and column_name <> '''||column_name||'''
-- ), ', ');'" returned 2 columns
-- CONTEXT: PL/pgSQL function "split_table" line 15 at assignment
execute_query := 'SELECT array_to_string(array(select column_name::text
from information_schema.columns
where table_name = '''||table_name||'''
and table_schema = '''||table_schema||'''
and column name <> '''||column_name||'''
), ', ');';
execute execute_query into column_list;
-- assigning variable separator_count - code of function janek.cointinstring below, it returns number of occurences of separator in string
execute_query := 'select max(janek.countinstring('''||column_name||''', '''||separator||''')) from '||table_schema||'.'||table_name||'''';
execute execute_query into separator_count;
-- redefining table we're transforming - splitting column_name to an array on delimiter + deleting spaces. If you guys know a way to remove spaces only 'touching' the separator it would be better:
execute_query := 'with t as (
select '||column_list||',
case when position('''||separator||''' in '||table_name||') > 0 then
regexp_split_to_array(replace('||column_name||', '' '', ''''), '''||separator||''')
elsif length('||table_name||') > 0 then array['||column_name||']
else null
as arr
from '||table_schema||'.'||table_name||')';
-- main loop, iterates on table in order to transform column 'arr' to separate strings instead of array using union all:
for i in 1..separator_count loop
execute_query := execute_query||'
select
'||column_list||', arr['||separator||'] as '||column_name||'
from t
where arr['||separator||'] is not null
union all';
end loop;
-- removing last union all
execute_query := substr(execute_query, 1, length(execute_query) - 9);
-- executing the main query
RETURN QUERY EXECUTE execute_query;
END $BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
ALTER FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text)
OWNER TO jsiekierski;
GRANT EXECUTE ON FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text) TO jsiekierski;
GRANT EXECUTE ON FUNCTION janek.split_table (table_schema text, table_name text, column_name text, separator text) TO wsd_users;
-- janek.countinstring function code:
CREATE OR REPLACE FUNCTION janek.countinstring(text, text)
RETURNS integer AS
$BODY$
SELECT(Length($1) - Length(REPLACE($1, $2, ''))) / Length($2) ;
$BODY$
LANGUAGE sql IMMUTABLE
COST 100;
ALTER FUNCTION janek.countinstring(text, text)
OWNER TO jsiekierski;