今天发现了一个超级快速插入大量数据到数据库的方法SqlBulkCopy,由于我这里是读Excel文件,所以我这里会提及到处理空行的方法和读取Excel的方法。下面我们会通过数据库跟踪器跟踪数据库发生了什么。
我们开始创建一个模型
public class TBModel
{
public string name { get; set; }
public string phone { get; set; }
public string addr { get; set; }
}
然后就是读取Excel的方法
#region 读取Excel文件到DataSet中
/// <summary>
/// 读取Excel文件到DataSet中
/// </summary>
/// <param name="filePath">文件路径</param>
/// <returns></returns>
public static DataSet ExcelToDataSet(string filePath, string fileName)
{
var connStr = string.Empty;
var fileType = Path.GetExtension(fileName);
if (string.IsNullOrEmpty(fileType)) return null;
if (fileType == ".xls")
connStr = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + filePath + ";" +
";Extended Properties=\"Excel 8.0;HDR=YES;IMEX=1\"";
else
connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source=" + filePath + ";" +
";Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\"";
var sql_F = "Select * FROM [{0}]";
OleDbConnection conn = null;
OleDbDataAdapter da = null;
DataTable dtSheetName = null;
var ds = new DataSet();
try
{
// 初始化连接,并打开
conn = new OleDbConnection(connStr);
conn.Open();
// 获取数据源的表定义元数据
var SheetName = "";
dtSheetName = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
// 初始化适配器
da = new OleDbDataAdapter();
for (var i = 0; i < dtSheetName.Rows.Count; i++)
{
SheetName = (string)dtSheetName.Rows[i]["TABLE_NAME"];
if (SheetName.Contains("$") && !SheetName.Replace("'", "").EndsWith("$"))
{
continue;
}
da.SelectCommand = new OleDbCommand(string.Format(sql_F, SheetName), conn);
var dsItem = new DataSet();
da.Fill(dsItem, "OA_MeetingPositionArrangement");
ds.Tables.Add(dsItem.Tables[0].Copy());
}
}
catch (Exception ex)
{
}
finally
{
// 关闭连接
if (conn.State == ConnectionState.Open)
{
conn.Close();
da.Dispose();
conn.Dispose();
}
}
return ds;
}
#endregion
去除datatable里面的空行
#region 去除datatable里面的空行
public static void RemoveEmpty(DataTable dt)
{
List<DataRow> removelist = new List<DataRow>();
for (int i = 0; i < dt.Rows.Count; i++)
{
bool IsNull = true;
for (int j = 0; j < dt.Columns.Count; j++)
{
if (!string.IsNullOrEmpty(dt.Rows[i][j].ToString().Trim()))
{
IsNull = false;
}
}
if (IsNull)
{
removelist.Add(dt.Rows[i]);
}
}
for (int i = 0; i < removelist.Count; i++)
{
dt.Rows.Remove(removelist[i]);
}
}
#endregion
然后就是使用SqlBulkCopy大批量导入数据
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.OleDb;
using System.Data.SqlClient;
using System.Diagnostics;
using System.IO;
namespace ConsoleAppTest
{
class Program
{
static void Main(string[] args)
{
//读取Excel文件
var ds = ExcelToDataSet("D:/", "temp.xlsx");
TBModel tbmodel = new TBModel();
DataTable titledt = new DataTable();
titledt.Columns.Add("Title");
titledt.Rows.Add(tbmodel.name);
titledt.Rows.Add(tbmodel.phone);
titledt.Rows.Add(tbmodel.addr);
//检查文件和列是否相同
if (ds.Tables[0].Columns.Count == titledt.Rows.Count)
{
for (int i = 0; i < titledt.Rows.Count; i++)
{
if (ds.Tables[0].Columns[i].ToString() != titledt.Rows[i]["Title"].ToString())
{
Console.WriteLine("表格有误");
}
}
}
else
{
Console.WriteLine("表格有误");
}
//去空行
RemoveEmpty(ds.Tables[0]);
//计时开始
Stopwatch st = new Stopwatch();
st.Start();
using (SqlConnection conn = new SqlConnection("server=.;uid=sa;pwd=1234;database=MangoDB"))
{
conn.Open();
using (SqlBulkCopy bulkCopy = new SqlBulkCopy(conn))
{
try
{
//插入到数据库的目标表:表名
bulkCopy.DestinationTableName = "tbs";
//内存表的字段 对应数据库表的字段
bulkCopy.ColumnMappings.Add("name", "TempData");
bulkCopy.WriteToServer(ds.Tables[0]);
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}
}
st.Stop();
Console.WriteLine("成功!测试时间为:" + st.ElapsedMilliseconds);
Console.Read();
}
}
}
这里导入一百万条数据大概8秒9秒左右。。。测试了好几次,快的不行
打开Sqlserver Profiler跟踪,会发现执行的是如下语句:
insert bulk tbs ([name] VarChar(50), phone VarChar(50) addr VarChar(50))
百度了一下msdn
BULK INSERT
[ database_name . [ schema_name ] . | schema_name . ] [ table_name | view_name ]
FROM 'data_file'
[ WITH
(
[ [ , ] BATCHSIZE = batch_size ]
[ [ , ] CHECK_CONSTRAINTS ]
[ [ , ] CODEPAGE = { 'ACP' | 'OEM' | 'RAW' | 'code_page' } ]
[ [ , ] DATAFILETYPE =
{ 'char' | 'native'| 'widechar' | 'widenative' } ]
[ [ , ] DATASOURCE = 'data_source_name' ]
[ [ , ] ERRORFILE = 'file_name' ]
[ [ , ] ERRORFILE_DATASOURCE = 'data_source_name' ]
[ [ , ] FIRSTROW = first_row ]
[ [ , ] FIRE_TRIGGERS ]
[ [ , ] FORMATFILE_DATASOURCE = 'data_source_name' ]
[ [ , ] KEEPIDENTITY ]
[ [ , ] KEEPNULLS ]
[ [ , ] KILOBYTES_PER_BATCH = kilobytes_per_batch ]
[ [ , ] LASTROW = last_row ]
[ [ , ] MAXERRORS = max_errors ]
[ [ , ] ORDER ( { column [ ASC | DESC ] } [ ,...n ] ) ]
[ [ , ] ROWS_PER_BATCH = rows_per_batch ]
[ [ , ] ROWTERMINATOR = 'row_terminator' ]
[ [ , ] TABLOCK ]
-- input file format options
[ [ , ] FORMAT = 'CSV' ]
[ [ , ] FIELDQUOTE = 'quote_characters']
[ [ , ] FORMATFILE = 'format_file_path' ]
[ [ , ] FIELDTERMINATOR = 'field_terminator' ]
[ [ , ] ROWTERMINATOR = 'row_terminator' ]
)]
这里是MSDN的地址:https://docs.microsoft.com/zh-cn/sql/t-sql/statements/bulk-insert-transact-sql?view=sql-server-2017
看着很像insert into tbs select * from tb这种,然后试了几次这种插入数据的方式,发现还是上面的那种比这种快了好几秒,不过这两种方式使用的地方不同,所以使用的时候各有所需吧