需求分析
在最近的项目开发中,笔者大大遇到了这样一个小需求:自动读取某一固定文件夹中Excel文件的数据并插入到数据库。实现这个需求的两个步骤是清晰可见的:1.读取Excel文件数据,2.将数据插入到数据库。本文主要将重心放在读取文件数据上。
常规解决方案
一个常规的思路是利用定时器定时扫描文件夹中的所有Excel文件并依次对文件进行处理,本文作者大大准备讨论另一种思路。
另一种解决方案
.NET提供了一个非常有用的类FileSystemWatcher用于监视文件系统的更改,它可以监视指定目录中的文件或子目录,并在发生创建、删除、更改或重命名等事件时通知你,适用于许多场景,比如自动处理文件、日志监控、备份等。所以作者打算使用FileSystemWatcher来实现自动读取文件的需求。
起初,笔者大大认为只需要监控指定目录下的文件创建事件即可,于是诞生了如下代码:
using System;
using System.Collections.Concurrent;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using NPOI.HSSF.UserModel;
using LineCartonMachine.Common;
using System.Collections.Generic;
using LineCartonMachine.IDAL;
using LineCartonMachine.Model;
using System.Configuration;
using System.Linq;
namespace LineCartonMachine
{
public class ImportUtil
{
private readonly string folderPath;
private readonly string historyFolderPath;
private FileSystemWatcher watcher;
private ConcurrentQueue<string> fileQueue;
private CancellationTokenSource cts;
private string[] supportFileTypes;
public bool Enabled { get; set; }
public ImportUtil(string folderPath, string historyFolderPath)
{
this.folderPath = folderPath;
this.historyFolderPath = historyFolderPath;
watcher = new FileSystemWatcher(this.folderPath);
fileQueue = new ConcurrentQueue<string>();
cts = new CancellationTokenSource();
supportFileTypes = new string[] { ".xls", ".xlsx" };
}
private int InitFileQueue()
{
var files = Directory.GetFiles(folderPath, "*.xls*");
foreach (var file in files)
fileQueue.Enqueue(file);
return fileQueue.Count;
}
private void OnNewFileCreated(object sender, FileSystemEventArgs e)
{
string newFilePath = e.FullPath;
if (File.Exists(newFilePath))
{
if (supportFileTypes.Contains(Path.GetExtension(newFilePath).ToLower()))
{
Log4NetHelper.Log.Info($"文件{newFilePath}已创建,准备导入数据!");
fileQueue.Enqueue(newFilePath);
}
else
Log4NetHelper.Log.Info($"文件{newFilePath}使用了不受支持的文件格式,将不会导入该文件数据!");
}
}
private bool IsFileReady(string filePath)
{
try
{
using(FileStream fileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.None))
{
if (fileStream.Length > 0)
return true;
else
Log4NetHelper.Log.Info($"文件{filePath}未就绪!");
}
}
catch(Exception e)
{
Log4NetHelper.Log.Error(e);
Log4NetHelper.Log.Warn(e.Message);
}
return false;
}
private void MoveFileToHistoryFolder(string filePath)
{
try
{
string fileName = Path.GetFileName(filePath);
string destinationPath = Path.Combine(historyFolderPath, fileName);
if (File.Exists(destinationPath))
File.Delete(destinationPath);
File.Move(filePath, destinationPath);
}
catch (Exception e)
{
Log4NetHelper.Log.Error($"无法移动板件数据文件{filePath}!", e);
Log4NetHelper.Log.Warn($"移动板件数据文件{filePath}失败:{e.Message}");
}
}
private void ProcessQueue()
{
while (!cts.Token.IsCancellationRequested)
{
if (fileQueue.TryDequeue(out string filePath))
{
if (!File.Exists(filePath))
{
Log4NetHelper.Log.Info($"文件{filePath}已被移除,跳过该文件的数据导入!");
continue;
}
if (!IsFileReady(filePath))
fileQueue.Enqueue(filePath);
else
{
string fileName = Path.GetFileNameWithoutExtension(filePath);
if (!CDBFactory.PackageItemInfoDao.Exist(fileName))
{
using (FileStream fileStream = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
IWorkbook workbook;
if (filePath.EndsWith(".xls"))
{
workbook = new HSSFWorkbook(fileStream);
}
else
{
workbook = new XSSFWorkbook(fileStream);
}
ISheet sheet = workbook.GetSheetAt(0);
List<BoardItemInfo> boardItemInfos = new List<BoardItemInfo>();
decimal maxLength = 0;
decimal maxWidth = 0;
for (int row = 1; row <= sheet.LastRowNum; row++)
{
IRow currentRow = sheet.GetRow(row);
if (currentRow == null) continue;
string itemCode = currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["BarcodeColumn"])).StringCellValue;
decimal length = (decimal)currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["LengthColumn"])).NumericCellValue;
decimal width = (decimal)currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["WidthColumn"])).NumericCellValue;
if (length < width)
(length, width) = (width, length);
maxLength = Math.Max(maxLength, length);
maxWidth = Math.Max(maxWidth, width);
boardItemInfos.Add(new BoardItemInfo
{
ItemCode = itemCode,
PackageCode = fileName,
ImportTime = DateTime.Now
});
}
int importedCount = CDBFactory.BoardItemInfoDao.Insert(boardItemInfos);
Log4NetHelper.Log.Info($"文件{filePath}成功导入{importedCount}条板件信息");
CDBFactory.PackageItemInfoDao.Insert(new PackageItemInfo
{
PackageCode = fileName,
Length = maxLength,
Width = maxWidth,
ImportTime = DateTime.Now
});
}
}
else
Log4NetHelper.Log.Info($"已跳过对重复文件{filePath}的数据导入");
MoveFileToHistoryFolder(filePath);
}
}
else
Thread.Sleep(1000);
}
}
public void Start()
{
if (Enabled)
return;
Log4NetHelper.Log.Info($"文件夹{InitFileQueue()}个初始文件的数据导入已准备就绪!");
watcher.NotifyFilter = NotifyFilters.FileName;
watcher.Created += OnNewFileCreated;
watcher.EnableRaisingEvents = true;
Task.Factory.StartNew(ProcessQueue, cts.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default);
Enabled = true;
Log4NetHelper.Log.Info($"文件自动导入服务已启动,监控文件夹{folderPath}!");
}
public void Stop()
{
watcher.EnableRaisingEvents = false;
cts.Cancel();
Enabled = false;
Log4NetHelper.Log.Info("文件自动导入服务已停止!");
}
}
}
解决FileSystemWatcher监控文件创建时的临时文件问题
但在项目的实际应用中,笔者大大遇到了一个有趣的小问题。在使用FileSystemWatcher监控文件夹中的Excel文件创建事件时,笔者大大发现当从网页下载Excel文件时,监控程序首先捕捉到的是一个临时文件(以.tmp扩展名),这导致笔者大大的程序在文件尚未下载完成时就试图处理它,结果自然是失败的。经过一番研究,笔者大大又找到了几个有效的解决方案,决定记录下来,以备日后参考。
方法一:监视文件的重命名事件(笔者大大正在使用)
大多数浏览器在下载文件时,会先创建一个.tmp文件,然后在下载完成后将其重命名为目标文件。因此,笔者大大决定监视文件的重命名事件,确保只在文件被重命名为.xls或.xlsx后才进行处理。
using System;
using System.Collections.Concurrent;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using NPOI.SS.UserModel;
using NPOI.XSSF.UserModel;
using NPOI.HSSF.UserModel;
using LineCartonMachine.Common;
using System.Collections.Generic;
using LineCartonMachine.IDAL;
using LineCartonMachine.Model;
using System.Configuration;
using System.Linq;
namespace LineCartonMachine
{
public class ImportUtil
{
private readonly string folderPath;
private readonly string historyFolderPath;
private FileSystemWatcher watcher;
private ConcurrentQueue<string> fileQueue;
private CancellationTokenSource cts;
private string[] supportFileTypes;
public bool Enabled { get; set; }
public ImportUtil(string folderPath, string historyFolderPath)
{
this.folderPath = folderPath;
this.historyFolderPath = historyFolderPath;
watcher = new FileSystemWatcher(this.folderPath);
fileQueue = new ConcurrentQueue<string>();
cts = new CancellationTokenSource();
supportFileTypes = new string[] { ".xls", ".xlsx" };
}
private int InitFileQueue()
{
var files = Directory.GetFiles(folderPath, "*.xls*");
foreach (var file in files)
fileQueue.Enqueue(file);
return fileQueue.Count;
}
private void Watcher_Renamed(object sender, RenamedEventArgs e)
{
string newFilePath = e.FullPath;
if (File.Exists(newFilePath))
{
if (supportFileTypes.Contains(Path.GetExtension(newFilePath).ToLower()))
{
Log4NetHelper.Log.Info($"文件{newFilePath}已下载完成,准备导入数据!");
fileQueue.Enqueue(newFilePath);
}
}
}
private bool IsFileReady(string filePath)
{
try
{
using(FileStream fileStream = File.Open(filePath, FileMode.Open, FileAccess.Read, FileShare.None))
{
if (fileStream.Length > 0)
return true;
else
Log4NetHelper.Log.Info($"文件{filePath}未就绪!");
}
}
catch(Exception e)
{
Log4NetHelper.Log.Error(e);
Log4NetHelper.Log.Warn(e.Message);
}
return false;
}
private void MoveFileToHistoryFolder(string filePath)
{
try
{
string fileName = Path.GetFileName(filePath);
string destinationPath = Path.Combine(historyFolderPath, fileName);
if (File.Exists(destinationPath))
File.Delete(destinationPath);
File.Move(filePath, destinationPath);
}
catch (Exception e)
{
Log4NetHelper.Log.Error($"无法移动板件数据文件{filePath}!", e);
Log4NetHelper.Log.Warn($"移动板件数据文件{filePath}失败:{e.Message}");
}
}
private void ProcessQueue()
{
while (!cts.Token.IsCancellationRequested)
{
if (fileQueue.TryDequeue(out string filePath))
{
if (!File.Exists(filePath))
{
Log4NetHelper.Log.Info($"文件{filePath}已被移除,跳过该文件的数据导入!");
continue;
}
if (!IsFileReady(filePath))
fileQueue.Enqueue(filePath);
else
{
string fileName = Path.GetFileNameWithoutExtension(filePath);
if (!CDBFactory.PackageItemInfoDao.Exist(fileName))
{
using (FileStream fileStream = File.Open(filePath, FileMode.Open, FileAccess.Read))
{
IWorkbook workbook;
if (filePath.EndsWith(".xls"))
{
workbook = new HSSFWorkbook(fileStream);
}
else
{
workbook = new XSSFWorkbook(fileStream);
}
ISheet sheet = workbook.GetSheetAt(0);
List<BoardItemInfo> boardItemInfos = new List<BoardItemInfo>();
decimal maxLength = 0;
decimal maxWidth = 0;
for (int row = 1; row <= sheet.LastRowNum; row++)
{
IRow currentRow = sheet.GetRow(row);
if (currentRow == null) continue;
string itemCode = currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["BarcodeColumn"])).StringCellValue;
decimal length = (decimal)currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["LengthColumn"])).NumericCellValue;
decimal width = (decimal)currentRow.GetCell(int.Parse(ConfigurationManager.AppSettings["WidthColumn"])).NumericCellValue;
if (length < width)
(length, width) = (width, length);
maxLength = Math.Max(maxLength, length);
maxWidth = Math.Max(maxWidth, width);
boardItemInfos.Add(new BoardItemInfo
{
ItemCode = itemCode,
PackageCode = fileName,
ImportTime = DateTime.Now
});
}
int importedCount = CDBFactory.BoardItemInfoDao.Insert(boardItemInfos);
Log4NetHelper.Log.Info($"文件{filePath}成功导入{importedCount}条板件信息");
CDBFactory.PackageItemInfoDao.Insert(new PackageItemInfo
{
PackageCode = fileName,
Length = maxLength,
Width = maxWidth,
ImportTime = DateTime.Now
});
}
}
else
Log4NetHelper.Log.Info($"已跳过对重复文件{filePath}的数据导入");
MoveFileToHistoryFolder(filePath);
}
}
else
Thread.Sleep(1000);
}
}
public void Start()
{
if (Enabled)
return;
Log4NetHelper.Log.Info($"文件夹{InitFileQueue()}个初始文件的数据导入已准备就绪!");
watcher.NotifyFilter = NotifyFilters.FileName;
watcher.Renamed += Watcher_Renamed; ;
watcher.EnableRaisingEvents = true;
Task.Factory.StartNew(ProcessQueue, cts.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default);
Enabled = true;
Log4NetHelper.Log.Info($"文件自动导入服务已启动,监控文件夹{folderPath}!");
}
public void Stop()
{
watcher.EnableRaisingEvents = false;
cts.Cancel();
Enabled = false;
Log4NetHelper.Log.Info("文件自动导入服务已停止!");
}
}
}
方法二:等待文件解锁
另外一种方法是继续监控.xlsx文件的创建事件,但在处理文件之前先等待文件解锁。通过这种方式,程序可以等待文件完全写入完成后再进行处理,避免因为文件仍在被使用而导致的错误。
private static void OnCreated(object source, FileSystemEventArgs e)
{
Task.Run(() =>
{
// 等待文件完全写入完成
while (IsFileLocked(new FileInfo(e.FullPath)))
{
Thread.Sleep(500);
}
// 文件已解锁,进行处理
Console.WriteLine($"文件{e.FullPath}已创建!");
});
}
private static bool IsFileLocked(FileInfo file)
{
try
{
using (FileStream stream = file.Open(FileMode.Open, FileAccess.Read, FileShare.None))
{
stream.Close();
}
}
catch (IOException)
{
return true;
}
return false;
}
方法三:监视临时文件的创建并等待重命名
还有一种方法是监视.tmp文件的创建事件,等待它被重命名为最终的.xlsx文件后再处理,这种方法在文件的重命名有一定的时间延迟时特别有效。
private static void OnTmpCreated(object source, FileSystemEventArgs e)
{
Task.Run(() =>
{
string excelFilePath = Path.ChangeExtension(e.FullPath, ".xlsx");
// 等待文件重命名为Excel文件
while (!File.Exists(excelFilePath))
{
Thread.Sleep(500);
}
// 文件已重命名,进行处理
Console.WriteLine($"文件{excelFilePath}已创建!");
});
}
补充
通过配置FileSystemWatcher对象的Filter属性可以实现对指定格式的文件进行监控,如:
watcher.Filter = "*.xlsx";