1

我正在尝试将数据从多值数据库(Unidata)导出到 MySQL。假设我的源数据是一个人的 ID 号、他们的名字和他们居住的所有州。states 字段是一个多值字段,我正在导出它们,以便该字段中的不同值由一个分隔~。示例提取如下所示:

"1234","Sally","NY~NJ~CT"
"1235","Dave","ME~MA~FL"
"3245","Fred","UT~CA"
"2344","Sue","OR"

我已将此数据加载到临时表中

Table:staging
Column 1: personId
Column 2: name
Column 3: states

我想要做的是使用一个过程将这些数据分成两个表:一个人员表和一个状态表。一个人在状态表中可以有许多条目:

Table 1: persons
Column 1: id
Column 2: name

Table 2: states
Column 1: personId
Column 2: state

我的过程从临时表中获取数据并将其转储到表 1 中就好了。但是,我有点迷失如何拆分数据并将其发送到表 2。Sally 需要在 states 表(NY、NJ、CT)中有三个条目,Dave 有 3 个,Fred 有 2 个而 Sue 将有 1 (OR)。关于如何做到这一点的任何想法?

4

1 回答 1

5

尝试这样的事情:http: //pastie.org/1213943

-- TABLES

drop table if exists staging;
create table staging
(
person_id int unsigned not null primary key,
name varchar(255) not null,
states_csv varchar(1024)
)
engine=innodb;

drop table if exists persons;
create table persons
(
person_id int unsigned not null primary key,
name varchar(255) not null
)
engine=innodb;

drop table if exists states;
create table states
(
state_id tinyint unsigned not null auto_increment primary key, -- i want a nice new integer based PK
state_code varchar(3) not null unique, -- original state code from staging
name varchar(255) null
)
engine=innodb;

/*
you might want to make the person_states primary key (person_id, state_id) depending on 
your queries as this is currently optimised for queries like - select all the people from NY
*/

drop table if exists person_states;
create table person_states
(
state_id tinyint unsigned not null,
person_id int unsigned not null,
primary key(state_id, person_id),
key (person_id)
)
engine=innodb;


-- STORED PROCEDURES

drop procedure if exists load_staging_data;

delimiter #

create procedure load_staging_data()
proc_main:begin

truncate table staging;

-- assume this is done by load data infile...

set autocommit = 0;

insert into staging values
(1234,'Sally','NY~NJ~CT'),
(1235,'Dave','ME~MA~FL'),
(3245,'Fred','UT~CA'),
(2344,'Sue','OR'),
(5555,'f00','OR~NY');

commit;

end proc_main #

delimiter ;


drop procedure if exists cleanse_map_staging_data;

delimiter #

create procedure cleanse_map_staging_data()
proc_main:begin

declare v_cursor_done tinyint unsigned default 0;

-- watch out for variable names that have the same names as fields !!

declare v_person_id int unsigned;

declare v_states_csv varchar(1024);
declare v_state_code varchar(3);
declare v_state_id tinyint unsigned;

declare v_states_done tinyint unsigned;
declare v_states_idx int unsigned;

declare v_staging_cur cursor for select person_id, states_csv from staging order by person_id;
declare continue handler for not found set v_cursor_done = 1;

-- do the person data

set autocommit = 0;

insert ignore into persons (person_id, name)
  select person_id, name from staging order by person_id;

commit;

-- ok now we have to use the cursor !!

set autocommit = 0; 

open v_staging_cur;
repeat

  fetch v_staging_cur into v_person_id, v_states_csv;

  -- clean up the data (for example)

  set v_states_csv = upper(trim(v_states_csv));

  -- split the out the v_states_csv and insert

  set v_states_done = 0;       
  set v_states_idx = 1;

  while not v_states_done do

    set v_state_code = substring(v_states_csv, v_states_idx, 
      if(locate('~', v_states_csv, v_states_idx) > 0, 
        locate('~', v_states_csv, v_states_idx) - v_states_idx, 
        length(v_states_csv)));

      set v_state_code = trim(v_state_code);

      if length(v_state_code) > 0 then

        set v_states_idx = v_states_idx + length(v_state_code) + 1;

        -- add the state if it doesnt already exist
        insert ignore into states (state_code) values (v_state_code);

        select state_id into v_state_id from states where state_code = v_state_code;

        -- add the person state
        insert ignore into person_states (state_id, person_id) values (v_state_id, v_person_id);

      else
        set v_states_done = 1;
      end if;

  end while;

until v_cursor_done end repeat;

close v_staging_cur;

commit;

end proc_main #


delimiter ;


-- TESTING


call load_staging_data();

select * from staging;

call cleanse_map_staging_data();

select * from states order by state_id;
select * from persons order by person_id;
select * from person_states order by state_id, person_id;
于 2010-10-11T20:54:13.733 回答