PHP批量导出100万条数据解决思路:
1、从数据库中读取要进行数据量分批读取,以防变量内存溢出
2、选择数据保存文件格式是csv文件,以方便导出之后的阅读、导入数据库等操作
3、以防不方便excel读取csv文件,需要104W之前就得把数据分割进行多个csv文件保存
4、多个csv文件输出给用户下载是不友好的,还需要把多个csv文件进行压缩,最后提供给一个ZIP格式的压缩包给用户下载就好。
<?php
/*
*
* @param string $sql 需要导出的数据SQL
* @param string $mark 生成文件的名字前缀
* @param bool $is_multiple 是否要生成多个CSV文件
* @param int $limit 每隔$limit行,刷新输出buffer,以及每个CSV文件行数限制
*
*/
function putCsv($sql, $mark, $is_multiple=0, $limit=100000)
{
set_time_limit(0);
header('Content-Type: application/vnd.ms-excel;charset=utf-8');
header('Content-Disposition: attachment;filename="' . $mark . '"');
header('Cache-Control: max-age=0');
// 每隔$limit行,刷新一下输出buffer,也可以控制多csv文件生成的行数限制
// buffer计数器
$file_num = 0; //文件名计数器
$row_num = 0;
$fileNameArr = array();
// 逐行取出数据,不浪费内存
$fp = fopen($mark .'_'.$file_num .'.csv', 'w'); //生成临时文件
$fileNameArr[] = $mark .'_'.$file_num .'.csv';
fwrite($fp, chr(0xEF).chr(0xBB).chr(0xBF));//转码,防止乱码
foreach (query($sql) as $a) {
$row_num++;
if ($limit <= $row_num) {
//刷新一下输出buffer,防止由于数据过多造成问题
if(ob_get_level()>0) ob_flush(); flush();
$row_num = 0;
$file_num++;
if($is_multiple>0){
fclose($fp); //每生成一个文件关闭
$fp = fopen($mark .'_'.$file_num .'.csv', 'w');
$fileNameArr[] = $mark .'_'.$file_num .'.csv';
fwrite($fp, chr(0xEF).chr(0xBB).chr(0xBF));//转码,防止乱码
}
}
fputcsv($fp, $a);
}
fclose($fp); //每生成一个文件关闭
getZip('test',$fileNameArr); //生成zip文件
}
$sql = 'SELECT id FROM `tablename` where id < 1200000';
$mark = 'test';//生成文件前缀
putCsv($sql,$mark);
?>
excel单元格数字变成字符串方法 '1001010002400000,数值前加单引号
当excel文件中的数据设置过,导入的数据和excel显示的不同,单个修改数据个单元格格式又很麻烦,这时把excel另存为csv格式即可,用文本方式打开看数据
<?php
require 'vendor/autoload.php';
use League\Csv\Reader;
use League\Csv\Writer;
/**
* Read CSV file sample.
*/
//load the CSV document from a file path
$csv = Reader::createFromPath('D:\wamp\www\yixun\test.csv', 'r');
$csv->setHeaderOffset(0);
$csv->stripBom(true);
$header = $csv->getHeader();
$records = $csv->getRecords();
echo $csv->getContent(); //returns the CSV document as a string
foreach ($records as $row) {
}
/**
* Write CSV file sample.
*/
$header = [ 'Site', 'Description', 'URL' ];
$records = [
[ 'League/Csv', 'The PHP League composer official site', 'https://csv.thephpleague.com/' ],
[ 'Symfony', 'Symfony official site', 'https://symfony.com' ],
[ 'Laravel', 'Laravel official site', 'https://laravel.com' ],
];
$writer = Writer::createFromPath('/data/www/html/out.csv', 'w');
//-------------utf8中文乱码解决 begin-------------------
$defaultNewline = $writer->getNewline();
$writer->setNewline("");
$writer->insertOne([Writer::BOM_UTF8]);
$writer->setNewline($defaultNewline);
//-------------utf8中文乱码解决 end-------------------
$writer->insertOne($header);
$writer->insertAll($records);
?>
解决PHP生成UTF-8编码的CSV文件用Excel打开乱码的问题
$tempFileName = '/data/www/html/a.csv';
$fp = fopen($tempFileName, 'a+');
fwrite($fp, chr(0xEF).chr(0xBB).chr(0xBF));
php读取淘宝数据包csv文件 unicode
public static function fopen_utf8($filename)
{
$encoding = '';
$handle = fopen($filename, 'r');
$bom = fread($handle, 2);
rewind($handle);
if ($bom === chr(0xff) . chr(0xfe) || $bom === chr(0xfe) . chr(0xff)) {
// UTF16 Byte Order Mark present
$encoding = 'UTF-16';
} else {
$file_sample = fread($handle, 1000) + 'e'; //read first 1000 bytes
rewind($handle);
$encoding = mb_detect_encoding($file_sample, 'UTF-8, UTF-7, ASCII, EUC-JP,SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP');
}
if ($encoding) {
stream_filter_append($handle, 'convert.iconv.' . $encoding . '/UTF-8');
}
return ($handle);
}
//得到csv data
public function getCsvFileData($filename)
{
$handle = Helper::fopen_utf8($filename, "r");
$csvData = array();
for ($j = 1; !feof($handle); $j++) {
$line = fgets($handle);
$val = explode("\t", $line);
if ($j > 1) {
$line = array(
'title' => $val[0],
//'cate' => $val[1],
'price' => $val[2], //价格
'inventory' => trim($val [3]), //数量
'descript' => trim($val [4]), //描述
'import_images' => trim($val[5]), //图片
'sku' => trim($val[6]), //商家编码
'weight' => trim($val[7]), //重量
);
$csvData[] = $line;
}
}
return $csvData;
}
//校验数据
public function checkProductImportData($adminId, $csvData)
{
$checkArray = array();
foreach ($csvData as $product) {
$checkArray[] = $product['sku'];
}
$list = 'db中的sku列表在文件sku中的'
$result = array('status' => 10000, 'msg' => '');
if (count($list) == count($checkArray)) {
$result['status'] = 2;
$result['msg'] = '文件里的商品已经导入过';
} elseif (count($list) > 0) {
$dbhasskulist = array();
$alllist = self::model()->findAllSkuFromDb();
foreach ($alllist as $info) {
if ($info->sku && in_array($info->sku, $checkArray)) {
$dbhasskulist[] = $info->sku;
}
}
$result['status'] = 1;
$result['msg'] = "商品货号重复:" . implode(",", array_unique($dbhasskulist));
}
return $result;
}