php
的phpspreadsheet
解析excel
是真的垃圾…非常耗内存,比如一个10W
条的excel
文件,内存一定溢出,现在的解决方案是用python
的xlrd
(比openpyxl
快的多) 解析excel
到文件系统,php读取文件按照业务配置文件校验单元格并写入数据库。
ajax
上传excel
文件。php
执行python
脚本解析excel到文件系统 (>/dev/null &
为了立即执行exec
的下一行代码),ajax
立刻返回。
exec(PYTHON." " . realpath(ROOT_PATH . '/../') . '/shell/excel.py' . " {$file} >/dev/null &");
- 设置定时器每
3秒
请求一次批量导入的状态。
效果如下:业务导入代码继承Product
抽象类即可。
解析excel并生成文件
#解析excel并生成文件
import xlrd,time,json,os,sys
# if(len(sys.argv)!=2) :
# print('需要1个参数')
# print('***********************')
# print('用法: python3 '+sys.argv[0] + ' 文件名 \n')
# sys.exit(2)
# 数字转字符
def changeNumToChar(s):
'''
把数字转换成相应的字符,1-->'A' 27-->'AA'
'''
a = [chr(i) for i in range(65, 91)]
ss = ''
b = []
if s <= 26:
b = [s]
else:
while s > 26:
c = s // 26 # 商
d = s % 26 # 余数
b = b + [d]
s = c
b = b + [s]
b.reverse()
for i in b:
ss = ss + a[i - 1]
return ss
full = sys.argv[1] #文件名
if not os.path.exists(full):
sys.exit(3)
filename = os.path.basename(full)
statusname = sys.path[0] + '/status/lock' #锁文件 保证只有一个请求处理excel
with open(statusname, 'w') as fo:
fo.write(json.dumps({'file':filename,'python': 'start'}))
start = time.time()
wb = xlrd.open_workbook(full)
sheet = wb.sheets()[0]
num = 0
txtname = sys.path[0]+'/txt/' + filename[0:filename.rfind('.')]
if os.path.exists(txtname):
os.remove(txtname)
with open(txtname, 'a',encoding='utf8') as file_object:
for i in range(1,sheet.nrows):
sub_data = {} #行数据
for j in range(0,sheet.ncols):
sub_data[changeNumToChar(j+1)] = sheet.cell_value(i, j)
# print(sub_data)
file_object.write(json.dumps(sub_data)+"\r\n")
end = time.time()
#导入成功,生成txt文件之后 python解析excel成功 状态写入lock文件
with open(statusname, 'w') as foa:
foa.write(json.dumps({'file':filename,'python': 'ok'}))
sys.exit(1)
php处理文件parse.php
<?php
include_once realpath(__DIR__.'/bootstrap.php');
include_once realpath(__DIR__.'/Factory.php');
set_time_limit(0);
// if(count($argv)!==3){
// $msg=<<<eof
// 需要2个参数
// ***********************
// 用法: php $argv[0] 文件名 类型
// eof;
// echo $msg.PHP_EOL;
// die;
// }
$file = $argv[1]; //文件名
$type = strtolower($argv[2]); //类型 site等
$full = realpath(__DIR__).'/txt/'.substr($file,0,strrpos($file,".")); //文件的路径+文件名
if (!file_exists($full)) die(4); //txt文件不存在
$fileError = realpath(__DIR__).'/error/'.substr($file,0,strrpos($file,".")); //错误日志
$fileStatus = realpath(__DIR__).'/status/lock'; //完成状态log
if(file_exists($fileError)){
unlink($fileError);
}
$product = Factory::getFactory($type);
if(!$product) die(5);
$product->setDb($db);
$config = $product->getImportConfig();
if(!$config) die(6);
//唯一的字段
$unique = [];
foreach ($config['col'] as $v) {
if(!isset($v['check']) || !is_array($v['check'])) continue;
if(in_array($product::check_is_exist_no,$v['check'])){
$unique[$v['name']] = [];
}
}
file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'start']));
$flag = true;
$num = 1; //实际是从第二行开始的
$allData = []; //总数据
$file_handle = fopen($full, "r");
while (!feof($file_handle)) {
$line = fgets($file_handle);
$num++;
if(is_null($line)||$line =='') continue;
$tmpRowData = json_decode($line,true);
if(!is_array($tmpRowData)) continue;
$rowData = []; //数组键值格式化成配置文件中键值
foreach ($config['col'] as $v) {
$rowData[$v['name']] = $tmpRowData[$v['index']]; //数组键值格式化成配置文件中键值
if(!isset($v['check']) || !is_array($v['check'])) continue;
$result = $product->check($tmpRowData[$v['index']],$v['name'],$v['check']);
if(count($result)>0){
// 本行校验未通过 写入数据库
if($flag) $flag = false;
file_put_contents($fileError, '第'.$num.'行 '.$v['title'].': '.$tmpRowData[$v['index']].' '.implode('、',$result).PHP_EOL, FILE_APPEND);
}
//文件系统校验唯一字段
if(in_array($product::check_is_exist_no,$v['check'])){
if(in_array($tmpRowData[$v['index']],$unique[$v['name']])){
if(count($result)==0 || ($count($result)>0 &&!in_array($product::check_is_exist_text,$result))){
// 本行校验未通过 写入数据库
if($flag) $flag = false;
file_put_contents($fileError, '第'.$num.'行 '.$v['title'].': '.$tmpRowData[$v['index']].' '.$product::check_is_exist_text.PHP_EOL, FILE_APPEND);
}
}
}
if(in_array($v['name'],array_keys($unique))){
array_push($unique[$v['name']],$tmpRowData[$v['index']]);
}
}
array_push($allData,$rowData);
}
fclose($file_handle);
if($flag){
$tmp = array_chunk($allData,50);
// $db->query('BEGIN');
foreach ($tmp as $v) {
$st = $product->import($v);
if(!$st){
// $db->query('ROLLBACK');
file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>true,'import'=>false]));
die(25);
}
}
// $db->query('COMMIT');
file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>true,'import'=>true]));
die(1);
}else{
//校验失败
file_put_contents($fileStatus,json_encode(['file'=>$file,'python'=> 'ok','php'=>'ok','check'=>false]));
die(25);
}
Factory.php
<?php
class Factory
{
const dir = 'code';
static function getFactory($product)
{
$file = realpath(__DIR__.'/'.self::dir.'/'.$product.'/Import'.$product.'.php');
if(!$file) return false;
require_once $file;
$cla = 'Import'.$product;
return new $cla;
}
}
Product.php
<?php
abstract class Product
{
public $dir = 'code';
const check_not_empty_no = 1;
const check_not_empty_text = '不能为空';
const check_is_exist_no = 2;
const check_is_exist_text = '信息已存在'; //唯一
const check_no_relevance_no = 3;
const check_no_relevance_text = '关联信息不存在';
const check_num_no = 4;
const check_num_text = '格式必须是数字';
const check_date_no = 5;
const check_date_text = '格式必须是 年-月-日';
const check_time_no = 6;
const check_time_text = '格式必须是 年-月-日 时:分:秒';
abstract function import($rows);
/**
* 检查唯一
*
* @param [type] $val 值
* @param [type] $col 列名
* @return boolean
*/
abstract function checkUnique($val, $col);
/**
* 检查关联信息是否存在
*
* @param [type] $val 值
* @param [type] $col 列名
* @return boolean
*/
abstract function checkRelevance($val, $col);
/**
* 校验单元格
*
* @param [type] $val 值
* @param [type] $col 列名
* @param [type] $rule 规则
* @return array
*/
function check($val,$col,$rule){
$result = [];
if (!isset($rule) || !is_array($rule)) return $result;
foreach ($rule as $v) {
switch ($v) {
case self::check_not_empty_no:
if ($val == '') array_push($result, self::check_not_empty_text);
break;
case self::check_is_exist_no:
if (!$this->checkUnique($val, $col)) array_push($result, self::check_is_exist_text);
break;
case self::check_no_relevance_no:
if (!$this->checkRelevance($val, $col)) array_push($result, self::check_no_relevance_text);
break;
case self::check_num_no:
if (!is_numeric($val)) array_push($result, self::check_num_text);
break;
// case self::check_date_no:
// if (!is_numeric($val)) array_push($result, self::check_date_text);
// break;
// case self::check_time_no:
// if (!is_numeric($val)) array_push($result, self::check_time_text);
// break;
}
}
return $result;
}
function getImportConfig(){
$file = realpath(__DIR__.'/'.$this->dir.'/'.$this->product.'/import.json');
if(!$file) return false;
$c = file_get_contents($file);
if(!$c) return false;
$j = json_decode($c,true);
return is_null($j)?false:$j;
}
function getExportConfig(){
$file = realpath(__DIR__.'/'.$this->dir.'/'.$this->product.'/export.json');
if(!$file) return false;
$c = file_get_contents($file);
if(!$c) return false;
$j = json_decode($c,true);
return is_null($j)?false:$j;
}
function setDb($db){
CW::SetDb('',$db);
}
}
importBase.vue
<template>
<div>
<b-overlay :show="loading">
<template v-slot:overlay>
<div class="text-center"><b-spinner></b-spinner></div>
<div><span style="font-size: 20px;font-weight: 500;color: #007bff;">{{ msg }}</span></div>
</template>
<b-row>
<b-col cols="12">
<iq-card>
<template v-slot:headerTitle>
<h4 class="card-title">{{ title }}</h4>
</template>
<template v-slot:body>
<b-row>
<b-col offset="1" cols="3">
<b-file id="upload" v-model="file" placeholder="未选择文件" accept="" browse-text="选择文件"></b-file>
</b-col>
<b-col cols="8">
<div>
<b-button variant="success" class="mb-3 mr-1" @click="upload">导入</b-button>
<!-- <b-button variant="danger" class="mb-3 mr-1">清除</b-button> -->
</div>
</b-col>
</b-row>
<b-row>
<b-col offset="1">
<b-alert v-if="msg" style="width: max-content;" show variant="dark">{{ msg }}</b-alert>
</b-col>
</b-row>
<b-row>
<b-col offset="1">
<span v-for="(v,i) in error" :key="i" class="error-span">{{ v }}</span>
</b-col>
</b-row>
</template>
</iq-card>
</b-col>
</b-row>
</b-overlay>
</div>
</template>
<script>
import { vito } from '@/config/pluginInit'
import { uploadAndParse, checkStatus } from '@/api/excel.js'
export default {
name: 'ImportBase',
components: { },
props: {
title: {
type: String,
default: '导入'
},
type: {
type: String,
default: ''
}
},
mounted() {
vito.index()
},
data() {
return {
loading: false,
file: null,
msg: '',
error: []
}
},
methods: {
async upload() {
this.loading = true
this.msg = '正在上传,请稍后...'
try {
const fd = new FormData()
fd.append('file', this.file)
const { data } = await uploadAndParse(fd)
if (data.status === 1) {
this.msg = '上传成功,正在解析...'
this.a = setInterval(() => {
this.checkStatus(data.data.src)
}, 3000)
} else {
this.msg = data.msg
this.loading = false
}
} catch (err) {
this.msg = '出错了,请重试'
this.loading = false
}
},
async checkStatus(src) {
const { data } = await checkStatus(this.type, src)
this.msg = data.msg
this.error = data.data
if (!data.reRequest) {
clearInterval(this.a)
this.loading = false
}
}
}
}
</script>
<style scoped>
.error-span{
display: block;
margin: 5px;
color: #b82e2e;
}
</style>
控制器主要方法
/**
* 上传excel
*
* @return void
*/
function uploadAndParse()
{
if(!isset($_FILES['file'])){
return $this->response([
'status' => 2,
'msg' => '系统错误',
'data' => []
], 1, '成功');
}
$fileArr = $_FILES['file'];
if( !isset($fileArr['type']) || $fileArr['type'] != 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ){
return $this->response([
'status' => 2,
'msg' => '文件类型错误',
'data' => []
], 1, '成功');
}
$file = CW_Unit::UploadExcel($fileArr);
$path = realpath(ROOT_PATH . '/../') . "/shell/status/lock";
if (file_exists($path)) {
return $this->response([
'status' => 2,
'msg' => '有正在导入的文件,请稍后重试!',
'data' => []
], 1, '成功');
}
exec(PYTHON." " . realpath(ROOT_PATH . '/../') . '/shell/excel.py' . " {$file} >/dev/null &");
return $this->response([
'status' => 1,
'msg' => '成功',
'data' => ['src'=>$file]
], 1, '成功');
}
/**
* 每3秒钟发送请求,查看上传进度
*
* @return void
*/
function checkStatus()
{
$src = $this->input->post('src');
$type = $this->input->post('type');
if(!isset($src)||$src == ''||!isset($type)||$type==''){
return $this->response([
'status' => 8,
'msg' => '文件或者导入类型不能为空',
'data' => [],
'reRequest' => false
], 1, '成功');
}
$file = basename($src);
// $file = '2.xlsx';
// $type = 'site';
$a = substr($file, 0, strrpos($file, "."));
$path = realpath(ROOT_PATH . '/../') . "/shell/status/lock";
if (file_exists($path)) {
$content = file_get_contents($path);
$res = json_decode($content, true);
if (isset($res['file']) && $res['file'] == $file) {
if (isset($res['python']) && $res['python'] == 'ok') {
if (isset($res['php'])) {
if ($res['php'] == 'ok') {
if (isset($res['check']) && $res['check']) {
if (isset($res['import']) && $res['import']) {
//导入成功
//删除锁文件 txt error
$this->delLockTxtError($file);
return $this->response([
'status' => 1,
'msg' => '导入成功',
'data' => [],
'reRequest' => false
], 1, '成功');
} else {
//导入失败
//删除锁文件 txt error
$this->delLockTxtError($file);
return $this->response([
'status' => 5,
'msg' => '导入数据库失败!请重试!',
'data' => [],
'reRequest' => false
], 1, '成功');
}
} else {
//校验未通过
$error = [];
$errPath = realpath(ROOT_PATH . '/../') . "/shell/error/{$a}";
$file_handle = fopen($errPath, "r");
while (!feof($file_handle)) {
$line = fgets($file_handle);
if (is_null($line) || $line == '') continue;
array_push($error, $line);
}
fclose($file_handle);
//删除锁文件 txt error
$this->delLockTxtError($file);
return $this->response([
'status' => 3,
'msg' => '校验未通过!请修改后重试!',
'data' => $error,
'reRequest' => false
], 1, '成功');
}
} else {
return $this->response([
'status' => 6,
'msg' => '正在校验excel单元格...',
'data' => [],
'reRequest' => true
], 1, '成功');
}
} else {
//执行php脚本
exec(PHP." " . realpath(ROOT_PATH . '/../') . '/shell/parse.php' . " {$file} {$type} >/dev/null &");
return $this->response([
'status' => 6,
'msg' => '正在校验excel单元格...',
'data' => [],
'reRequest' => true
], 1, '成功');
}
} else {
// python暂时还没解析完成 返回
return $this->response([
'status' => 2,
'msg' => '正在解析excel...',
'data' => [],
'reRequest' => true
], 1, '成功');
}
} else {
//删除锁文件 txt error
$this->delLockTxtError($file);
return $this->response([
'status' => 4,
'msg' => '锁异常',
'data' => [],
'reRequest' => false
], 1, '成功');
}
} else {
// python暂时还没解析完成 返回
return $this->response([
'status' => 2,
'msg' => '正在解析excel...',
'data' => [],
'reRequest' => true
], 1, '成功');
}
}
/**
* 删除锁文件 临时txt error文件
*
* @param [type] $filename 文件名 1.xlsx
* @return void
*/
private function delLockTxtError($filename){
$file = substr($filename, 0, strrpos($filename, "."));
//删除excel
@unlink(ROOT_PATH."/".CWConfig::_upload_path . "/excel/{$filename}");
//删除lock
@unlink(realpath(ROOT_PATH . "/../") . "/shell/status/lock");
//删除临时txt
@unlink(realpath(ROOT_PATH . "/../") . "/shell/txt/{$file}");
//删除error
$errorPath = realpath(ROOT_PATH . "/../") . "/shell/error/{$file}";
if(file_exists($errorPath)){
@unlink($errorPath);
}
return true;
}