奇怪的需求又来了。
MySql数据迁移可以使用Navicat自带的工具进行很方便的迁移,但是需要筛选数据,想到了用ETL工具进行数据迁移,但是组长说用C#代码进行数据迁移……
SqlServer有自带的SqlBulkCopy方法进行批量插入,我找到了MySql的MySqlBulkLoader。
废话不多说,上代码。
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.TableName = "source";
dt.Columns.AddRange(new DataColumn[] {
new DataColumn("Id",typeof(int)),
new DataColumn("Name",typeof(string)),
new DataColumn("Age",typeof(int)),
new DataColumn("Address",typeof(string)),
new DataColumn("DataStatus",typeof(int))
});
for (int i = 0; i < 1000000; ++i)
{
DataRow dr = dt.NewRow();
dr["Id"] = i;
dr["Name"] = "张三." + i;
dr["Age"] = 18;
dr["Address"] = "广东省小溪路" + i + "号";
dr["DataStatus"] = 0;
dt.Rows.Add(dr);
}
string con = "database=sqlbulk;uid=root;pwd=root;server=localhost;";
var result = BulkInsert(con, dt);
Console.WriteLine("完成了:" + result + "条。共:" + dt1.Rows.Count);
Console.ReadKey();
}
///将DataTable转换为标准的CSV
/// </summary>
/// <param name="table">数据表</param>
/// <returns>返回标准的CSV</returns>
private static string DataTableToCsv(DataTable table)
{
//以半角逗号(即,)作分隔符,列为空也要表达其存在。
//列内容如存在半角逗号(即,)则用半角引号(即"")将该字段值包含起来。
//列内容如存在半角引号(即")则应替换成半角双引号("")转义,并用半角引号(即"")将该字段值包含起来。
StringBuilder sb = new StringBuilder();
DataColumn colum;
foreach (DataRow row in table.Rows)
{
for (int i = 0; i < table.Columns.Count; i++)
{
colum = table.Columns[i];
if (i != 0) sb.Append(",");
if (colum.DataType == typeof(string) && row[colum].ToString().Contains(","))
{
sb.Append("\"" + row[colum].ToString().Replace("\"", "\"\"") + "\"");
}
else sb.Append(row[colum].ToString());
}
sb.AppendLine();
}
return sb.ToString();
}
public static int BulkInsert(string connectionString, DataTable table)
{
if (string.IsNullOrEmpty(table.TableName)) throw new Exception("请给DataTable的TableName属性附上表名称");
if (table.Rows.Count == 0) return 0;
int insertCount = 0;
string tmpPath = Path.Combine(Directory.GetCurrentDirectory(), "Temp.csv"); //Path.GetTempFileName();
string csv = DataTableToCsv(table);
File.WriteAllText(tmpPath, csv);
using (MySqlConnection conn = new MySqlConnection(connectionString))
{
try
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
conn.Open();
MySqlBulkLoader bulk = new MySqlBulkLoader(conn)
{
FieldTerminator = ",",
FieldQuotationCharacter = '"',
EscapeCharacter = '"',
LineTerminator = "\r\n",
FileName = tmpPath,
NumberOfLinesToSkip = 0,
TableName = table.TableName,
};
insertCount = bulk.Load();
stopwatch.Stop();
Console.WriteLine("耗时:{0}", stopwatch.ElapsedMilliseconds);
}
catch (MySqlException ex)
{
throw ex;
}
}
File.Delete(tmpPath);
return insertCount;
}
是的,该方法是将DataTable转换为CSV文件,之后通过MySqlBulkLoader的load方法批量写入数据库,100w五列大概需要七秒钟左右,写入DataTable也比较吃内存,占用了四百兆左右。
……然后发现这个方法好像他喵的不可以筛选数据,遇到重复的数据就报错了。
于是乎改用另一种方式…
string path = "C:/MySql/data/temp.txt";
mark:
if (File.Exists(path))
{
Console.WriteLine($"目录文件已存在,回Y删除");
string temp = Console.ReadLine();
if (temp == "y" || temp == "Y")
{
File.Delete(path);
goto mark;
}
}
else
{
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
MySqlConnection conn = new MySqlConnection(con);
//复制源表A
MySqlCommand cmd = new MySqlCommand($"select * INTO OUTFILE '{path}' FROM SourceA", conn);
conn.Open();
int flag = cmd.ExecuteNonQuery();
conn.Close();
stopwatch.Stop();
Console.WriteLine($"导出txt花费了{stopwatch.ElapsedMilliseconds}毫秒");
Console.WriteLine("--------------------------------------");
if (flag > 0)
{
Console.WriteLine($"导出了{flag}条数据到{path}文件下");
stopwatch.Start();
MySqlConnection conn1 = new MySqlConnection(con1);
//REPLACE 遇重更新 IGNORE 遇重跳过 判断以主键唯一依赖
//复制至目标表B 注意表结构需要完全相同
cmd = new MySqlCommand($"LOAD DATA INFILE '{path}' IGNORE INTO TABLE SourceB", conn1);
conn1.Open();
flag = cmd.ExecuteNonQuery();
conn1.Close();
stopwatch.Stop();
Console.WriteLine($"复制txt花费了{stopwatch.ElapsedMilliseconds}毫秒");
if (flag > 0)
{
Console.WriteLine($"copy数据{flag}条至新的表中");
}
else
{
Console.WriteLine($"0条新数据无需copy");
}
Console.WriteLine("-----------------END---------------------");
}
}
Console.ReadKey();
大同小异,只不过这个方法只是用C#执行了两条MySql的语句,一切交给数据库引擎来操作,执行速度大概在9秒钟左右,虽然功能实现了,但是……感觉好弱智,不过需求如此。。