大数据量csv导入sql数据库

大数据量csv导入sql数据库

如题,百万级数据量csv入库

思路
读取csv文件转成DataTable,分批次步长1W批量入库,其中csv单元格内逗号做特殊处理防止串列

实现

using Dapper;
using System;
using System.Collections.Generic;
using System.Data;
using System.Data.SqlClient;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace CSVToSql
{
    class Program
    {
        static void Main(string[] args)
        {
            IDbConnection connection = ConnectionFactory.ASConnection;


            int count = 0;
            string readerPath = @"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\PROFESSION_20210604131046_001.csv";
            string writerPath = @"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\PROFESSION_20210604131046_write.csv";
            //if (File.Exists(writerPath))
            //{
            //    File.Delete(writerPath);
            //}
            //using (StreamReader reader = new StreamReader(readerPath, Encoding.UTF8))
            //{
            //    while (!reader.EndOfStream)
            //    {
            //        string line = reader.ReadLine();
            //        using (StreamWriter writer = new StreamWriter(writerPath, true, Encoding.Default))
            //        {
            //            writer.WriteLine(line);
            //        }
            //        count++;
            //        Console.WriteLine("正在转码第{0}行,请稍等", count);
            //    }
            //}
            //Console.WriteLine("转码完成,共转码{0}条数据", count);
            Console.WriteLine("开始导入数据,请稍等");



            /*
            //开始向数据库导入数据
            string sql = "BULK INSERT dbo.CRM_PROFESSION FROM 'C:\\Users\\kbjh372\\Desktop\\CRM数据表结构\\medical_examples\\test\\PROFESSION_20210604131046_write.csv' WITH(FIELDTERMINATOR=',',BATCHSIZE=100000,FIRSTROW=2)";
            try
            {
                //DbHelper.ExecuteSql(sql);//使用的是方法ExecuteNonQuery()方法,具体的Helper就不上传了。

                connection.Execute(sql);
            }
            catch (Exception ex)
            {
                using (StreamWriter writerLog = new StreamWriter(@"C:\Users\kbjh372\Desktop\CRM数据表结构\medical_examples\test\Log\Log.txt"))
                {
                    writerLog.WriteLine(ex.ToString());
                }
            }
            */

            var dt = CSVFileHelper.OpenCSV(writerPath);

            List<DataTable> dts = CSVFileHelper.SplitDataTable(dt, 10000);


            //声明一个事务对象
            SqlTransaction tran = null;//声明一个事务对象  
            try
            {
                using (SqlConnection conn = new SqlConnection("server=.;uid=sa;pwd=..;database=..;"))

                {
                    conn.Open();//打开链接  


                    for (int i = 0; i < dts.Count; i++)
                    {
                        using (tran = conn.BeginTransaction())
                        {
                            using (SqlBulkCopy copy = new SqlBulkCopy(conn, SqlBulkCopyOptions.Default, tran))
                            {
                                copy.DestinationTableName = "CRM_PROFESSION";  //指定服务器上目标表的名称  
                                copy.WriteToServer(dts[i]);                      //执行把DataTable中的数据写入DB  
                                tran.Commit();                                      //提交事务  
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                if (null != tran)
                    tran.Rollback();
                //LogHelper.Add(ex);  
            }
            
            /*

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                var CODE = dt.Rows[i]["CODE"];
                var NAME = dt.Rows[i]["NAME"];
                var TYPE = dt.Rows[i]["TYPE"];
                var GENDER = dt.Rows[i]["GENDER"];
                var MOBILE = dt.Rows[i]["MOBILE"];
                var EMAIL = dt.Rows[i]["EMAIL"];
                var INSTITUTION_CODE = dt.Rows[i]["INSTITUTION_CODE"];
                var INSTITUTION_NAME = dt.Rows[i]["INSTITUTION_NAME"];
                var DEPARTMENT_CODE = dt.Rows[i]["DEPARTMENT_CODE"];
                var DEPARTMENT_NAME = dt.Rows[i]["DEPARTMENT_NAME"];
                var TITLE_CODE = dt.Rows[i]["TITLE_CODE"];
                var TITLE_NAME = dt.Rows[i]["TITLE_NAME"];
                var POSITION_CODE = dt.Rows[i]["POSITION_CODE"];
                var POSITION_NAME = dt.Rows[i]["POSITION_NAME"];
                var QUALIFICATION = dt.Rows[i]["QUALIFICATION"];
                var MODIFIED_DATE = dt.Rows[i]["MODIFIED_DATE"];
                string sql = @"insert into [CRM_PROFESSION] values (@CODE,@NAME,@TYPE,@GENDER,@MOBILE,@EMAIL,@INSTITUTION_CODE,
                                                                    @INSTITUTION_NAME,@DEPARTMENT_CODE,@DEPARTMENT_NAME,@TITLE_CODE,
                                                                    @TITLE_NAME,@POSITION_CODE,@POSITION_NAME,@QUALIFICATION,@MODIFIED_DATE)";

                connection.Execute(sql, new
                {
                    CODE = CODE.ToString().Trim('"'),
                    NAME = NAME.ToString().Trim('"'),
                    TYPE = TYPE.ToString().Trim('"'),
                    GENDER = GENDER.ToString().Trim('"'),
                    MOBILE = MOBILE.ToString().Trim('"'),
                    EMAIL = EMAIL.ToString().Trim('"'),
                    INSTITUTION_CODE = INSTITUTION_CODE.ToString().Trim('"'),
                    INSTITUTION_NAME = INSTITUTION_NAME.ToString().Trim('"'),
                    DEPARTMENT_CODE = DEPARTMENT_CODE.ToString().Trim('"'),
                    DEPARTMENT_NAME = DEPARTMENT_NAME.ToString().Trim('"'),
                    TITLE_CODE = TITLE_CODE.ToString().Trim('"'),
                    TITLE_NAME = TITLE_NAME.ToString().Trim('"'),
                    POSITION_CODE = POSITION_CODE.ToString().Trim('"'),
                    POSITION_NAME = POSITION_NAME.ToString().Trim('"'),
                    QUALIFICATION = QUALIFICATION.ToString().Trim('"'),
                    MODIFIED_DATE = Convert.ToDateTime(MODIFIED_DATE.ToString().Trim('"')),
                });


            }

            */

            Console.WriteLine("数据导入完毕");
            Console.ReadKey();
        }
    }
}

工具类

using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace CSVToSql
{
    public class CSVFileHelper
    {
        /// <summary>
        /// 将CSV文件的数据读取到DataTable中
        /// </summary>
        /// <param name="fileName">CSV文件路径</param>
        /// <returns>返回读取了CSV数据的DataTable</returns>
        public static DataTable OpenCSV(string filePath)
        {
            Encoding encoding = GetType(filePath); //Encoding.ASCII;//
            DataTable dt = new DataTable();

            //加一个ID列名
            DataColumn dc0 = new DataColumn("ID");
            dt.Columns.Add(dc0);

            FileStream fs = new FileStream(filePath, System.IO.FileMode.Open, System.IO.FileAccess.Read);

            //StreamReader sr = new StreamReader(fs, Encoding.UTF8);
            StreamReader sr = new StreamReader(fs, encoding);
            //string fileContent = sr.ReadToEnd();
            //encoding = sr.CurrentEncoding;
            //记录每次读取的一行记录
            string strLine = "";
            //记录每行记录中的各字段内容
            string[] aryLine = null;
            string[] tableHead = null;
            //标示列数
            int columnCount = 0;
            //标示是否是读取的第一行
            bool IsFirst = true;
            //逐行读取CSV中的数据
            while ((strLine = sr.ReadLine()) != null)
            {
                //strLine = Common.ConvertStringUTF8(strLine, encoding);
                //strLine = Common.ConvertStringUTF8(strLine);

                if (IsFirst == true)
                {
                    
                    tableHead = strLine.Split(',');
                    IsFirst = false;
                    columnCount = tableHead.Length;
                    //创建列
                    for (int i = 0; i < columnCount; i++)
                    {
                        DataColumn dc = new DataColumn(tableHead[i]);
                        dt.Columns.Add(dc);
                    }
                }
                else
                {
                    //aryLine = strLine.Split(',');
                    aryLine = CSVstrToArry(strLine);
                    DataRow dr = dt.NewRow();
                    for (int j = 0; j < columnCount; j++)
                    {
                        //dr[j] = aryLine[j];
                        dr[j + 1] = aryLine[j].Trim('"');     //串一列ID出来
                    }
                    dt.Rows.Add(dr);
                }
            }
            if (aryLine != null && aryLine.Length > 0)
            {
                dt.DefaultView.Sort = tableHead[0] + " " + "asc";
            }

            sr.Close();
            fs.Close();
            return dt;
        }

        /// 给定文件的路径,读取文件的二进制数据,判断文件的编码类型
        /// <param name="FILE_NAME">文件路径</param>
        /// <returns>文件的编码类型</returns>

        public static System.Text.Encoding GetType(string FILE_NAME)
        {
            System.IO.FileStream fs = new System.IO.FileStream(FILE_NAME, System.IO.FileMode.Open,
                System.IO.FileAccess.Read);
            System.Text.Encoding r = GetType(fs);
            fs.Close();
            return r;
        }

        /// 通过给定的文件流,判断文件的编码类型
        /// <param name="fs">文件流</param>
        /// <returns>文件的编码类型</returns>
        public static System.Text.Encoding GetType(System.IO.FileStream fs)
        {
            byte[] Unicode = new byte[] { 0xFF, 0xFE, 0x41 };
            byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF, 0x00 };
            byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //带BOM
            System.Text.Encoding reVal = System.Text.Encoding.Default;

            System.IO.BinaryReader r = new System.IO.BinaryReader(fs, System.Text.Encoding.Default);
            int i;
            int.TryParse(fs.Length.ToString(), out i);
            byte[] ss = r.ReadBytes(i);
            if (IsUTF8Bytes(ss) || (ss[0] == 0xEF && ss[1] == 0xBB && ss[2] == 0xBF))
            {
                reVal = System.Text.Encoding.UTF8;
            }
            else if (ss[0] == 0xFE && ss[1] == 0xFF && ss[2] == 0x00)
            {
                reVal = System.Text.Encoding.BigEndianUnicode;
            }
            else if (ss[0] == 0xFF && ss[1] == 0xFE && ss[2] == 0x41)
            {
                reVal = System.Text.Encoding.Unicode;
            }
            r.Close();
            return reVal;
        }

        /// 判断是否是不带 BOM 的 UTF8 格式
        /// <param name="data"></param>
        /// <returns></returns>
        private static bool IsUTF8Bytes(byte[] data)
        {
            int charByteCounter = 1;  //计算当前正分析的字符应还有的字节数
            byte curByte; //当前分析的字节.
            for (int i = 0; i < data.Length; i++)
            {
                curByte = data[i];
                if (charByteCounter == 1)
                {
                    if (curByte >= 0x80)
                    {
                        //判断当前
                        while (((curByte <<= 1) & 0x80) != 0)
                        {
                            charByteCounter++;
                        }
                        //标记位首位若为非0 则至少以2个1开始 如:110XXXXX...........1111110X 
                        if (charByteCounter == 1 || charByteCounter > 6)
                        {
                            return false;
                        }
                    }
                }
                else
                {
                    //若是UTF-8 此时第一位必须为1
                    if ((curByte & 0xC0) != 0x80)
                    {
                        return false;
                    }
                    charByteCounter--;
                }
            }
            if (charByteCounter > 1)
            {
                throw new Exception("非预期的byte格式");
            }
            return true;
        }


        /// <summary>
        /// 分解数据表
        /// </summary>
        /// <param name="originalTab">需要分解的表</param>
        /// <param name="rowsNum">每个表包含的数据量</param>
        /// <returns></returns>
        public static List<DataTable> SplitDataTable(DataTable originalTab, int rowsNum)
        {
            //获取所需创建的表数量
            int tableNum = originalTab.Rows.Count / rowsNum;
            //获取数据余数
            int remainder = originalTab.Rows.Count % rowsNum;
            List<DataTable> ds = new List<DataTable>();
            //如果只需要创建1个表,直接将原始表存入DataSet
            if (tableNum == 0)
            {
                ds.Add(originalTab);
            }
            else
            {
                DataTable[] tableSlice = new DataTable[tableNum];
                for (int c = 0; c < tableNum; c++)
                {
                    tableSlice[c] = new DataTable();
                    foreach (DataColumn dc in originalTab.Columns)
                    {
                        tableSlice[c].Columns.Add(dc.ColumnName, dc.DataType);
                    }
                }
                for (int i = 0; i < tableNum; i++)
                {
                    if (i != tableNum - 1)
                    {

                        for (int j = i * rowsNum; j < ((i + 1) * rowsNum); j++)
                        {
                            tableSlice[i].ImportRow(originalTab.Rows[j]);
                        }
                    }
                    else
                    {
                        for (int k = i * rowsNum; k < ((i + 1) * rowsNum + remainder); k++)
                        {
                            tableSlice[i].ImportRow(originalTab.Rows[k]);
                        }
                    }
                }
                foreach (DataTable dt in tableSlice)
                {
                    ds.Add(dt);
                }
            }
            return ds;
        }

        /// <summary>
        /// 跳过引号中的逗号,进行逗号分隔(字段内容中的逗号不参与分隔)
        /// </summary>
        /// <param name="strLine"></param>
        /// <returns></returns>
        public static string[] CSVstrToArry(string splitStr)
        {
            var newstr = string.Empty;
            List<string> sList = new List<string>();

            bool isSplice = false;
            string[] array = splitStr.Split(new char[] { ',' });
            foreach (var str in array)
            {
                if (!string.IsNullOrEmpty(str) && str.IndexOf('"') > -1)
                {
                    var firstchar = str.Substring(0, 1);
                    var lastchar = string.Empty;
                    if (str.Length > 0)
                    {
                        lastchar = str.Substring(str.Length - 1, 1);
                    }
                    if (firstchar.Equals("\"") && !lastchar.Equals("\""))
                    {
                        isSplice = true;
                    }
                    if (lastchar.Equals("\""))
                    {
                        if (!isSplice)
                            newstr += str;
                        else
                            newstr = newstr + "," + str;

                        isSplice = false;
                    }
                }
                else
                {
                    if (string.IsNullOrEmpty(newstr))
                        newstr += str;
                }

                if (isSplice)
                {
                    //添加因拆分时丢失的逗号
                    if (string.IsNullOrEmpty(newstr))
                        newstr += str;
                    else
                        newstr = newstr + "," + str;
                }
                else
                {
                    sList.Add(newstr.Replace("\"", "").Trim());//去除字符中的双引号和首尾空格
                    newstr = string.Empty;
                }
            }
            return sList.ToArray();
        }


    }
}

仅供学习参考,如有侵权联系我删除

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值