mysql将一个值拆分成多行,MYSQL – 将数据拆分为多行

我使用从IMDB收集信息并将它们传输到MYSQL数据库的应用程序导入了一些数据.

看起来字段没有被标准化并且在1个字段内包含许多值

例如:

Table Movie

MovieID Movie_Title Written_By

1 Movie1 Person1, Person2

2 Movie2 Person3

3 Movie3 Person4, Person2, Person6

有没有办法分离值,并将它们插入到另一个表中这样的东西,没有任何重复?

Table Writers

WriterID Written_By MovieId

1 Person1 1

2 Person2 1

3 Person3 3

我做了一些谷歌搜索,发现我应该使用PHP处理这些数据.

但我根本不了解PHP.

无论如何只使用MYSQL转换这些数据?

解决方法:

你可以使用一个使用游标来解决这个问题的存储过程,但它不是很优雅,但也不是以逗号分隔的编写器列表!

如果以下代码存在于类似问题中,但您最好彻底检查一下.

希望能帮助到你 :)

mysql> select * from movies_unf;

+---------+-------------+------------------------------------------------------+

| movieID | movie_title | written_by |

+---------+-------------+------------------------------------------------------+

| 1 | movie1 | person1, person2 |

| 2 | movie2 | person3 |

| 3 | movie3 | person4, person2, person6 |

| 4 | movie4 | person4, person4, person1, person2, person1,person8, |

| 5 | movie1 | person1, person2 |

+---------+-------------+------------------------------------------------------+

5 rows in set (0.00 sec)

call normalise_movies_unf();

mysql> select * from movies;

+----------+--------+

| movie_id | title |

+----------+--------+

| 1 | movie1 |

| 2 | movie2 |

| 3 | movie3 |

| 4 | movie4 |

+----------+--------+

4 rows in set (0.00 sec)

mysql> select * from writers;

+-----------+---------+

| writer_id | name |

+-----------+---------+

| 1 | person1 |

| 2 | person2 |

| 3 | person3 |

| 4 | person4 |

| 6 | person6 |

| 12 | person8 |

+-----------+---------+

6 rows in set (0.00 sec)

mysql> select * from movie_writers;

+----------+-----------+

| movie_id | writer_id |

+----------+-----------+

| 1 | 1 |

| 1 | 2 |

| 2 | 3 |

| 3 | 2 |

| 3 | 4 |

| 3 | 6 |

| 4 | 1 |

| 4 | 2 |

| 4 | 4 |

| 4 | 12 |

+----------+-----------+

10 rows in set (0.00 sec)

示例表

drop table if exists movies_unf;

create table movies_unf

(

movieID int unsigned not null primary key,

movie_title varchar(255) not null,

written_by varchar(1024) not null

)engine=innodb;

insert into movies_unf values

(1,'movie1','person1, person2'),

(2,'movie2','person3'),

(3,'movie3','person4, person2, person6'),

(4,'movie4','person4, person4, person1, person2, person1,person8,'), -- dodgy writers

(5,'movie1','person1, person2'); -- dodgy movie

drop table if exists movies;

create table movies

(

movie_id int unsigned not null auto_increment primary key,

title varchar(255) unique not null

)engine=innodb;

drop table if exists writers;

create table writers

(

writer_id int unsigned not null auto_increment primary key,

name varchar(255) unique not null

)engine=innodb;

drop table if exists movie_writers;

create table movie_writers

(

movie_id int unsigned not null,

writer_id int unsigned not null,

primary key (movie_id, writer_id)

)engine=innodb;

存储过程

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()

begin

declare v_movieID int unsigned default 0;

declare v_movie_title varchar(255);

declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;

declare v_writer_id int unsigned default 0;

declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;

declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;

declare v_cursor cursor for

select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;

repeat

fetch v_cursor into v_movieID, v_movie_title, v_writers;

set v_movie_title = trim(v_movie_title);

set v_writers = replace(v_writers,' ', '');

-- insert the movie

insert ignore into movies (title) values (v_movie_title);

select movie_id into v_movie_id from movies where title = v_movie_title;

-- split the out the writers and insert

set v_csv_done = 0;

set v_csv_idx = 1;

while not v_csv_done do

set v_name = substring(v_writers, v_csv_idx,

if(locate(',', v_writers, v_csv_idx) > 0,

locate(',', v_writers, v_csv_idx) - v_csv_idx,

length(v_writers)));

set v_name = trim(v_name);

if length(v_name) > 0 then

set v_csv_idx = v_csv_idx + length(v_name) + 1;

insert ignore into writers (name) values (v_name);

select writer_id into v_writer_id from writers where name = v_name;

insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);

else

set v_csv_done = 1;

end if;

end while;

until v_done end repeat;

close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;

编辑

修改过的sproc,以便它不会跳过关键值!

drop procedure if exists normalise_movies_unf;

delimiter #

create procedure normalise_movies_unf()

begin

declare v_movieID int unsigned default 0;

declare v_movie_title varchar(255);

declare v_writers varchar(1024);

declare v_movie_id int unsigned default 0;

declare v_writer_id int unsigned default 0;

declare v_name varchar(255);

declare v_csv_done tinyint unsigned default 0;

declare v_csv_idx int unsigned default 0;

declare v_done tinyint default 0;

declare v_cursor cursor for

select distinct movieID, movie_title, written_by from movies_unf;

declare continue handler for not found set v_done = 1;

start transaction;

open v_cursor;

repeat

fetch v_cursor into v_movieID, v_movie_title, v_writers;

set v_movie_title = trim(v_movie_title);

set v_writers = replace(v_writers,' ', '');

-- insert the movie

if not exists (select 1 from movies where title = v_movie_title) then

insert ignore into movies (title) values (v_movie_title);

end if;

select movie_id into v_movie_id from movies where title = v_movie_title;

-- split the out the writers and insert

set v_csv_done = 0;

set v_csv_idx = 1;

while not v_csv_done do

set v_name = substring(v_writers, v_csv_idx,

if(locate(',', v_writers, v_csv_idx) > 0,

locate(',', v_writers, v_csv_idx) - v_csv_idx,

length(v_writers)));

set v_name = trim(v_name);

if length(v_name) > 0 then

set v_csv_idx = v_csv_idx + length(v_name) + 1;

if not exists (select 1 from writers where name = v_name) then

insert ignore into writers (name) values (v_name);

end if;

select writer_id into v_writer_id from writers where name = v_name;

insert ignore into movie_writers (movie_id, writer_id) values (v_movie_id, v_writer_id);

else

set v_csv_done = 1;

end if;

end while;

until v_done end repeat;

close v_cursor;

commit;

truncate table movies_unf;

end#

delimiter ;

标签:mysql,split

来源: https://codeday.me/bug/20190926/1819395.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值