今天要做个紧急需求,算法算出了400万的数据(只包含用户nick),需要导入定制化的数据库
mysql上32个数据库(icluster_1到icluster_32,根据nick的md5值求余然后分库的)
表结构:
CREATE TABLE `preferences` (
`nick` char(32) CHARACTER SET gbk COLLATE gbk_bin NOT NULL,
`enable` tinyint(1) DEFAULT '1',
`preference` varchar(1024) DEFAULT '',
PRIMARY KEY (`nick`)
) ENGINE=MyISAM DEFAULT CHARSET=gbk
由于分库的业务。貌似无法用mysql 的load命令或者mysqldump去导入,只能写php脚本去循环insert啦
<?php
header("content-type:text/html; charset=gbk");
set_time_limit(0);
ini_set('memory_limit', '1122M');
$con = mysql_connect('ip', 'username', 'password');
if (!$con)
exit("fail");
mysql_query("set names 'gbk'");
$arr = array();
$file = fopen("/home/admin/personal_combo_nick_gbk", "r");
$i = 0;
while (!feof($file)) {
$line = trim(fgets($file));
if ($line) {
$i++;
$nk_md5 = md5($line);
$signature = hexdec(substr($nk_md5, -2));
$dbname = (1 + ($signature % 32));
mysql_select_db("icluster_" . $dbname, $con);
$result = mysql_query("INSERT INTO preferences(nick,enable,preference) VALUES('{$line}',1,'your data;switch:1') ON DUPLICATE KEY UPDATE enable=1,preference=concat(preference,';switch:1')");
}
if (in_array($i, array(1, 1000, 10000, 50000, 100000, 1000000, 3000000))) {
shell_exec("/home/admin/mail/bin/email -V -smtp-server server_ip -smtp-port 25 -html -from-addr your_mail -from-name taozi -no-encoding -subject \"已经处理{$i}条\" your_mail <today.html");
}
}
fclose($file);
mysql_close($con);
?>
时间还是比较长的,用了40分钟:(