由于很多业务表因为历史原因或者性能原因,都使用了违反第一范式的设计模式。即同一个列中存储了多个属性值
具体结构见下表。这种模式下,应用常常需要将这个列依据分隔符进行分割,并得到列转行的结果。
表A:
那我们所期望的得到的结果是什么呢?如下所示:
这时候就需要用到列转行来处理表了
一、创建待处理的表
二、创建最终输出的表
三、利用mysql内部连续列表help_topic_id作为循环递增表
;
简要说明:
这个join最基本原理是笛卡尔积。通过这个方式来实现循环。
以下是具体问题分析:
length(a.Size) - length(replace(a.mSize,',',''))+1 表示了,按照逗号分割后,改列拥有的数值数量,下面简称n
join过程的伪代码:
根据ID进行循环
{
判断:i 是否 <= n
{
获取最靠近第 i 个逗号之前的数据, 即 substring_index(substring_index(a.mSize,',',b.ID),',',-1)
i = i +1
}
ID = ID +1
}
以下给出一个实际环境中,拆分表格的demo
具体结构见下表。这种模式下,应用常常需要将这个列依据分隔符进行分割,并得到列转行的结果。
表A:
id value
1 tiny,small,big
2 small,medium
3 tiny,big
那我们所期望的得到的结果是什么呢?如下所示:
表B:
id value
1 tiny
1 small
1 big
2 small
2 medium
3 tiny
3 big
这时候就需要用到列转行来处理表了
一、创建待处理的表
create table A (id int ,value varchar(100));
insert into A values (1,'tiny,small,big');
insert into A values (2,'small,medium');
insert into A values (3,'tiny,big');
二、创建最终输出的表
create table B (id int ,value varchar(100));
三、利用mysql内部连续列表help_topic_id作为循环递增表
select a.id,substring_index(substring_index(a.value,',',b.help_topic_id+1),',',-1)
from
A a
join
mysql.help_topic b
on b.help_topic_id < (length(a.value) - length(replace(a.value,',',''))+1)
;
简要说明:
这个join最基本原理是笛卡尔积。通过这个方式来实现循环。
以下是具体问题分析:
length(a.Size) - length(replace(a.mSize,',',''))+1 表示了,按照逗号分割后,改列拥有的数值数量,下面简称n
join过程的伪代码:
根据ID进行循环
{
判断:i 是否 <= n
{
获取最靠近第 i 个逗号之前的数据, 即 substring_index(substring_index(a.mSize,',',b.ID),',',-1)
i = i +1
}
ID = ID +1
}
以下给出一个实际环境中,拆分表格的demo
#==================0. 创建表====================================
DROP TABLE IF EXISTS `t_actor_tag`;
CREATE TABLE `t_actor_tag` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`actor` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `actor` (`actor`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `t_director_tag`;
CREATE TABLE `t_director_tag` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`director` varchar(50) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `director` (`director`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `t_district_tag`;
CREATE TABLE `t_district_tag` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`district` varchar(50) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `district` (`district`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `t_style_tag`;
CREATE TABLE `t_style_tag` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`style` varchar(50) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `style` (`style`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `t_years_tag`;
CREATE TABLE `t_years_tag` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`years` varchar(50) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `years` (`years`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `t_video_tag_relations`;
CREATE TABLE `t_video_tag_relations` (
`id` int(20) NOT NULL,
`tagid` varchar(50) DEFAULT NULL,
`tagtype` int(20) NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS `tmp_relations`;
CREATE TABLE `tmp_relations` (
`id` int(20) NOT NULL,
`tag` varchar(50) DEFAULT NULL,
`tagtype` varchar(10) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
#==================1. 将标签存入对应的标签表中====================================
INSERT INTO t_actor_tag(actor) (select distinct substring_index(substring_index(a.actor,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.actor) - length(replace(a.actor,',',''))+1)
);
INSERT INTO t_director_tag(director) (select distinct substring_index(substring_index(a.director,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.director) - length(replace(a.director,',',''))+1)
);
INSERT INTO t_district_tag(district) (select distinct substring_index(substring_index(a.district,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.district) - length(replace(a.district,',',''))+1)
);
INSERT INTO t_style_tag(style) (select distinct substring_index(substring_index(a.style,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.style) - length(replace(a.style,',',''))+1)
);
INSERT INTO t_years_tag(years) (select distinct substring_index(substring_index(a.years,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.years) - length(replace(a.years,',',''))+1)
);
#==================2. 依据标签将关联关系表存放到一个临时表中====================================
INSERT INTO tmp_relations(tagtype, id, tag) (select 1, a.id,substring_index(substring_index(a.actor,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.actor) - length(replace(a.actor,',',''))+1)
);
INSERT INTO tmp_relations(tagtype, id, tag) (select 2, a.id,substring_index(substring_index(a.director,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.director) - length(replace(a.director,',',''))+1)
);
INSERT INTO tmp_relations(tagtype, id, tag) (select 3, a.id,substring_index(substring_index(a.district,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.district) - length(replace(a.district,',',''))+1)
);
INSERT INTO tmp_relations(tagtype, id, tag) (select 4, a.id,substring_index(substring_index(a.style,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.style) - length(replace(a.style,',',''))+1)
);
INSERT INTO tmp_relations(tagtype, id, tag) (select 5, a.id,substring_index(substring_index(a.years,',',b.help_topic_id + 1),',',-1) from
douban3 a
join
mysql.help_topic b
on b.help_topic_id < (length(a.years) - length(replace(a.years,',',''))+1)
);
#==================3. 将临时表中的内容转移到关联关系表====================================
drop procedure if exists sp_name;
create procedure sp_name()
BEGIN
DECLARE Done INT DEFAULT 0;
DECLARE t_id INT;
DECLARE t_tag VARCHAR(30);
DECLARE t_tagtype INT(10);
/* 声明游标 */
DECLARE rs CURSOR FOR SELECT id, tag , tagtype FROM tmp_relations;
/* 异常处理 */
DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET Done = 1;
/* 打开游标 */
OPEN rs;
/* 逐个取出当前记录字段的值,需要进行类型判断 */
FETCH NEXT FROM rs INTO t_id, t_tag, t_tagtype;
/* 遍历数据表 */
REPEAT
IF NOT Done THEN
IF t_tagtype=1 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 1 from t_actor_tag where actor = concat('',t_tag,'');
ELSEIF t_tagtype=2 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 2 from t_director_tag where director = concat('',t_tag,'');
ELSEIF t_tagtype=3 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 3 from t_district_tag where district = concat('',t_tag,'');
ELSEIF t_tagtype=4 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 4 from t_style_tag where style = concat('',t_tag,'');
ELSEIF t_tagtype=5 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 5 from t_years_tag where years = concat('',t_tag,'');
END IF;
END IF;
FETCH NEXT FROM rs INTO t_id, t_tag, t_tagtype;
UNTIL Done END REPEAT;
/* 关闭游标 */
CLOSE rs;
END;
call sp_name();
#==================4. 删除临时表格====================================
#DROP TABLE IF EXISTS `tmp_relations`;
#当作为sql脚本用于mysql调用时,存储过程需要写成这样:
delimiter //
create procedure sp_name()
BEGIN
DECLARE Done INT DEFAULT 0;
DECLARE t_id INT;
DECLARE t_tag VARCHAR(30);
DECLARE t_tagtype INT(10);
/* 声明游标 */
DECLARE rs CURSOR FOR SELECT id, tag , tagtype FROM tmp_relations;
/* 异常处理 */
DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET Done = 1;
/* 打开游标 */
OPEN rs;
/* 逐个取出当前记录字段的值,需要进行类型判断 */
FETCH NEXT FROM rs INTO t_id, t_tag, t_tagtype;
/* 遍历数据表 */
REPEAT
IF NOT Done THEN
IF t_tagtype=1 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 1 from t_actor_tag where actor = concat('',t_tag,'');
ELSEIF t_tagtype=2 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 2 from t_director_tag where director = concat('',t_tag,'');
ELSEIF t_tagtype=3 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 3 from t_district_tag where district = concat('',t_tag,'');
ELSEIF t_tagtype=4 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 4 from t_style_tag where style = concat('',t_tag,'');
ELSEIF t_tagtype=5 THEN
insert into t_video_tag_relations(id, tagid, tagtype) select t_id,id, 5 from t_years_tag where years = concat('',t_tag,'');
END IF;
END IF;
FETCH NEXT FROM rs INTO t_id, t_tag, t_tagtype;
UNTIL Done END REPEAT;
/* 关闭游标 */
CLOSE rs;
END;//
delimiter ;
call sp_name();