本人最近在药房网采集了3w多数据,结果药品的批准文号是图片格式,结果ocr解析完之后,准确率达不到100%,于是想到把图片插入到excel中,给产品人员人工校对.于是就写了个php程序,切入正题:
1.首先去官网下载php excel lib,
2.然后写入如下代码,代码在附件
<?php require 'Classes/PHPExcel.php'; set_time_limit(0); ini_set("memory_limit", "512M"); // not required unless you are hitting a limit $dir = "./split_approve"; $files = scandir($dir); $fileType = 'Excel2007'; foreach ($files as $file) { if ($file != "." && $file != "..") { $fileName = $dir . "/" . $file; // Load the workbook $objPHPExcelReader = PHPExcel_IOFactory::createReader($fileType); $objPHPExcel = $objPHPExcelReader->load($fileName); $count = 0; $j = 0; try { foreach ($objPHPExcel->getWorksheetIterator() as $worksheet) { foreach ($worksheet->getRowIterator() as $row) { $rowIndex = $row->getRowIndex(); //if($rowIndex<22)continue; $cell = $worksheet->getCell('C' . $rowIndex); $pic_path = $cell->getCalculatedValue(); $cell = $worksheet->getCell('D' . $rowIndex); $pic_valid = $cell->getCalculatedValue(); if ($pic_valid == 2) continue; if (!file_exists($pic_path)) continue; // Add an image to the worksheet $objDrawing = new PHPExcel_Worksheet_Drawing(); $objDrawing->setPath($pic_path); $objDrawing->setCoordinates('E' . $rowIndex); $objDrawing->setWorksheet($objPHPExcel->getActiveSheet()); $count++; if ($count > 1000) { $j++; $num = $j * 1000; file_put_contents('excel.log', "count_" . $num . "_index_" . $rowIndex . PHP_EOL, FILE_APPEND); $count = 0; } } } } catch (exception $e) { echo $e->getMessage(); file_put_contents('excel.log', $e->getMessage(), FILE_APPEND); exit; } // Save the workbook $objPHPExcelWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, $fileType); $objPHPExcelWriter->save("./split_approve_pic/" . $file); echo "finished!"; } }