/*
* 通过xml解析Excel,合并文件
* 支持大文件合并,经测试共计100m的三个文件合并需要7分钟,12m内存
* 要求:所有被合并的Excel header结构必须一致
*/
public static function combineXlsx($source_xlsx_files, $dest_xlsx_file_name)
{
if (empty($source_xlsx_files)) throw new \Exception('文件不能为空');
$files = [];
if (!is_array($source_xlsx_files) && file_exists($source_xlsx_files)) $files = [$source_xlsx_files];
//如果是数组,取所有文件(支持多维数组)
if (is_array($source_xlsx_files)) {
array_walk_recursive($source_xlsx_files, function($v) use (&$files){
if (file_exists($v)) {
$files[] = $v;
}
});
}
if (empty($files)) throw new \Exception('文件不能为空');
if (file_exists($dest_xlsx_file_name)) unlink($dest_xlsx_file_name);
$writer = new \XLSXWriter();
$sheet_name = 'sheet1';
// 取header_types
$first_file = $files[0];
copy($first_file, $first_file.'.zip');
system("rm -rf {$first_file}_dir");
system("unzip -o {$first_file}.zip -d {$first_file}_dir");
$line = stream_get_line(fopen($first_file.'_dir/xl/worksheets/sheet1.xml', 'r'), 102400, '</row>');
preg_match_all('/\<[a-z]\>(.*?)\<\/[a-z]\>/', $line, $row);
$header_types = array_combine($row[1], array_fill(0, count($row[1]), 'GENERAL'));
unlink($first_file.'.zip');
system("rm -rf {$first_file}_dir");
$writer->writeSheetHeader($sheet_name, $header_types);
/*
* 1、将Excel文件后缀改为zip
* 2、将zip解压到当前目录
* 3、按行取出sheet1文件内容并解析。对于内容为空的单元格,Excel处理为单闭合标签,如<c/>,需要替换为对称闭合标签,如<c></c>。否则会损失一部分值为空的单元格,造成列对不齐
* 4、写Excel
* 5、清理解压的zip文件和文件夹
*/
foreach ($files as $path) {
$pathinfo = pathinfo($path);
$zip_path = $pathinfo['dirname'] . "/{$pathinfo['filename']}.zip";
copy($path, $zip_path);
$xml_path = $pathinfo['dirname'] . "/{$pathinfo['filename']}";
if (is_dir($xml_path)) system("rm -rf {$xml_path}");
system("unzip -o {$zip_path} -d {$xml_path}");
$sheet_path = $xml_path . "/xl/worksheets/sheet1.xml";
$handle = fopen($sheet_path, 'r');
if ($handle) {
$i = 0;
while (!feof($handle)) {
$line = stream_get_line($handle, 102400, '<row');
if ($i > 1) {
$line = preg_replace('/<c\s\w+="\w+"\s\w+="\d+"\/>/', '<c></c>', $line);
preg_match_all('/<[a-z]>(.*?)<\/[a-z]>/', $line, $row);
foreach ($row[1] as $k => &$r) {
if ($k >= count($header_types)) {
unset($row[$k]);
} else {
if (is_numeric($r)) $r *= 1;
}
}
$writer->writeSheetRow($sheet_name, $row[1]);
}
$i++;
}
fclose($handle);
unlink($zip_path);
system("rm -rf {$xml_path}");
}
}
$writer->writeToFile($dest_xlsx_file_name);
return $dest_xlsx_file_name;
}
通过xml解析、合并百兆Excel大文件,消耗内存低
最新推荐文章于 2023-03-14 01:39:32 发布