C# 将bcp文件装换为csv文件
技术选型:C# + VS2015 + DevExpress + Sqlite
源文件:ZIP(xml文件+bcp文件)
前提:
XML文件:为集合标签,其中每个标签对应一个表,我们要拿该表的 表名,表头,对应的bcp文件名称。
bcp文件:单纯存放数据的文件,没有表头,只有数据,没有视频,音乐等数据,每列tab作为分隔,每行以换行符作为分隔。
在解析完XML文件后,根据每个表对应的bcp文件名称来解析,获取该表的对应数据。
思路:
1.解析ZIP文件,获取ZIP流,通过这个ZIP流对象获取XML流对象和bcp流对象(因为是未解压的情况,所以不可以直接通过路径来获取xml和bcp流对象)。
2.先解析XML流,获取 表名 、表头 、对应的bcp文件名 ,通过自定类MyDataTable 封装起来,如下所示,
// 表名
private string table_name;
// bcp文件路径
private List<string> bcp_file_name = new List<string>(); // 可能存在多个bcp文件
// 表头
private List<string> columns_field = new List<string>(); // 字段
private List<string> columns_alias= new List<string>(); // 字段別名
// 字段数据
private List<string[]> datas = new List<string[]>();
3.解析每个表对应的bcp文件,通过bcp流对象,转换为StreamReader对象,使用 ReadLine() 来读取每一行,然后使用 Split(’\t’) 拆分各列,形成数组。
有了string[]数据,就是有了每一列的数据,当数据量小时,你可以使用List<string[]>来封装所有行数据。
但是如果数据量太大,List集合承受不了,则需要使用分批插入思想。
后面的代码就是使用分批插入来入库的。(入库是为了用sql来清洗数据,不需要清洗的小伙伴可以直接导出为csv文件)
4.CSV文件:其实就是一个文本,每个列tab作为分隔,每个行以换行符作为分隔。
具体代码实现
一、解析ZIP文件
思路:使用ZipFile类解析ZIP文件。
// 解析zip
// 功能: 在不解压zip的情况下,获得对应的xml流和bcp流
public static void ZipPress(string ZipPath)
{
ZipEntry zipEntry_ = null;
FileStream fsFile_ = null;
ZipFile zipFile_ = null;
List<MyDataTable> list = null;
try
{
// 获取ZIP流
fsFile_ = new FileStream(ZipPath, FileMode.Open);
zipFile_ = new ZipFile(fsFile_);
// 解析XML文件
foreach (ZipEntry z in zipFile_)
{
if (z.Name.Contains(".xml"))
{
// 获取XML流
StreamReader s = new StreamReader(zipFile_.GetInputStream(z));
// 解析XML文件
list = XMLParsing(s);
}
}
// 解析bcp文件
foreach (MyDataTable table in list)
{
BCPFileParsing(zipFile_, table);
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
throw;
}
finally
{
if (zipFile_ != null)
{
if (zipFile_.IsUpdating)
zipFile_.CommitUpdate();
zipFile_.Close();
}
if (fsFile_ != null)
fsFile_.Close();
if (zipEntry_ != null)
zipEntry_ = null;
}
}
二、解析XML文件
思路:由于是在未解压的情况下进行解析,所以要在ZIP流对象基础上解析XML文件,形成对应的XML流对象,详情看上面的获取XML流代码。
// 解析xml文件 ====》 表名,bcp文件名,字段名
// in: 输入xml文件流
// out: 返回MyDataTable集合
public static List<MyDataTable> XMLParsing(StreamReader s)
{
XmlDocument doc = new XmlDocument();
XmlReaderSettings settings = new XmlReaderSettings();
settings.IgnoreComments = true;//忽略文档里面的注释
//XmlReader reader = XmlReader.Create(s, settings);
using (XmlReader reader = XmlReader.Create(s, settings))
{
doc.Load(reader);
XmlNode DATASET = doc.SelectSingleNode("MESSAGE").SelectSingleNode("DATASET").SelectSingleNode("DATA").SelectSingleNode("DATASET");
// Console.WriteLine("根节点名称:。。。。。。。。。。" + DATASET.Name);
// 得到根节点的所有子节点
XmlNodeList xnl = DATASET.ChildNodes;
// Console.WriteLine("数量 》》》》》》》》》》》" + xnl.Count);
List<MyDataTable> list = new List<MyDataTable>();
foreach (XmlNode xn1 in xnl)
{
bool flag = false;
// 存放数据
MyDataTable datatable = new MyDataTable();
// 获取表名
XmlNodeList tablelist = xn1.SelectNodes("ITEM");
// Console.WriteLine("tablelist 》》》》》》》》》》》" + tablelist.Count);
foreach (XmlNode tableNode in tablelist)
{
XmlElement tableEle = (XmlElement)tableNode;
if (tableEle.GetAttribute("key").ToString().Equals("A010004"))
{
datatable.tablename = tableEle.GetAttribute("val").ToString();
//Console.WriteLine("表名:" + tableEle.GetAttribute("val").ToString());
if (!tableNameLIst.ToList<String>().Contains(datatable.tablename.ToLower()))
{
flag = true;
continue;
}
}
}
if (flag)
{
continue;
}
XmlNodeList DateSetList = xn1.SelectNodes("DATASET");
foreach (XmlNode data in DateSetList)
{
XmlElement DateSetEle = (XmlElement)data;
// 获取bcp文件名称
if (DateSetEle.GetAttribute("name").ToString().Equals("WA_COMMON_010014"))
{
XmlNodeList bcpNameNodelist = data.SelectNodes("DATA");
foreach (XmlNode bcpNameNode in bcpNameNodelist)
{
XmlNodeList bcpItemList = bcpNameNode.SelectNodes("ITEM");
foreach (XmlNode bcpItem in bcpItemList)
{
XmlElement bcpEle = (XmlElement)bcpItem;
if (bcpEle.GetAttribute("key").ToString().Equals("H010020"))
{
datatable.bcpFileName.Add(bcpEle.GetAttribute("val").ToString());
//Console.WriteLine("对应文件名:" + bcpEle.GetAttribute("val").ToString());
}
}
}
}
// 获取表字段
else if (DateSetEle.GetAttribute("name").ToString().Equals("WA_COMMON_010015"))
{
XmlNode bcpColumnNode = data.SelectSingleNode("DATA");
XmlNodeList bcpItemList = bcpColumnNode.SelectNodes("ITEM");
foreach (XmlNode bcpItem in bcpItemList)
{
XmlElement bcpEle = (XmlElement)bcpItem;
datatable.columnField.Add(bcpEle.GetAttribute("eng").ToString());
datatable.columnAlias.Add(bcpEle.GetAttribute("chn").ToString() != "" ? bcpEle.GetAttribute("chn").ToString() : "未知名称");
//Console.WriteLine("字段名:"+bcpEle.GetAttribute("eng").ToString());
}
}
}
list.Add(datatable);
}
return list;
}
}
三、解析bcp文件并入库
思路:通过解析XML得到的表名,表头,bcp文件名来解析bcp文件,这里解析每个表都会进行入库操作
// 解析bcp文件
// 根据table中的表路径,获取对应的bcp文件,解析对应数据,入库!
private static void BCPFileParsing(ZipFile zipFile_, MyDataTable table)
{
SqLiteHelper helper = new SqLiteHelper(@"F:\sqlite\cap\dami.db");
helper.CreateTable(table); // 创建表
// 进入zip中的bcp文件
try
{
// 单个对应bcp文件
List<string> bcp_name_list = table.bcpFileName;
foreach (string bcp_name in bcp_name_list)
{
// 开始解析bcp文件
foreach (ZipEntry z in zipFile_)
{
if (z.Name.Equals(bcp_name))
{
int count = 0;
List<string[]> list = new List<string[]>();
StreamReader streamReader = new StreamReader(zipFile_.GetInputStream(z));
// 判断文件中是否有字符
while (streamReader.Peek() != -1)
{
// 读取文件中的一行字符
string str = streamReader.ReadLine(); // 行分割
if (str != null)
{
string[] content = str.Split('\t'); // 列分割
list.Add(content);
count++;
if (count >= 10000)
{
// 入库
helper.InsertValues(table, list);
sum += count;
count = 0;
list.Clear();
}
}
}
if (count != 0)
{
// 入库
helper.InsertValues(table, list);
sum += count;
count = 0;
list.Clear();
}
streamReader.Close();
}
}
}
}
catch (Exception e)
{
Console.WriteLine(e.Message);
throw;
}
finally
{
helper.CloseConnection();
}
}
四、数据入库sqlite,直接上工具类
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data.SQLite;
using System.Data.Common;
namespace WindowsFormsApplication1
{
/// <summary>
/// SQLite 操作类
/// </summary>
public class SqLiteHelper
{
/// <summary>
/// 数据库连接定义
/// </summary>
public SQLiteConnection dbConnection;
/// <summary>
/// SQL命令定义
/// </summary>
private SQLiteCommand dbCommand;
/// <summary>
/// 数据读取定义
/// </summary>
private SQLiteDataReader dataReader;
/// <summary>
/// 数据库连接字符串定义
/// </summary>
private SQLiteConnectionStringBuilder dbConnectionstr;
// 数据库
private string connectionString = @"F:\sqlite\cap\dami.db";
/// <summary>
/// 构造函数
public SqLiteHelper()
{
try
{
dbConnection = new SQLiteConnection();
dbConnectionstr = new SQLiteConnectionStringBuilder();
dbConnectionstr.DataSource = connectionString;
//设置密码,SQLite ADO.NET实现了数据库密码保护
dbConnection.ConnectionString = dbConnectionstr.ToString();
dbConnection.Open();
}
catch (Exception e)
{
Log(e.ToString());
}
}
/// <summary>
/// 执行SQL命令
/// <param name="queryString">SQL命令字符串</param>
public SQLiteDataReader ExecuteQuery(string queryString)
{
try
{
dbCommand = dbConnection.CreateCommand();
dbCommand.CommandText = queryString; //设置SQL语句
dataReader = dbCommand.ExecuteReader();
}
catch (Exception e)
{
Log(e.Message);
}
return dataReader;
}
/// <summary>
/// 关闭数据库连接
/// </summary>
public void CloseConnection()
{
//销毁Command
if (dbCommand != null)
{
dbCommand.Cancel();
}
dbCommand = null;
//销毁Reader
if (dataReader != null)
{
dataReader.Close();
}
dataReader = null;
//销毁Connection
if (dbConnection != null)
{
dbConnection.Close();
}
dbConnection = null;
}
/// <summary>
/// 创建数据表
/// </summary> +
/// <returns>The table.</returns>
public SQLiteDataReader CreateTable(MyDataTable table)
{
int i = table.columnField.ToArray().Count();
string[] colTypes = new string[i];
for (int j = 0; j < i; j++)
{
colTypes[j] = "text";
}
string tableName = table.tablename;
string[] colNames = table.columnField.ToArray();
string queryString = "CREATE TABLE IF NOT EXISTS " + tableName + "( " + colNames[0] + " " + colTypes[0];
for (int k = 1; k < colNames.Length; k++)
{
queryString += ", " + colNames[k] + " " + colTypes[k];
}
queryString += " ) ";
return ExecuteQuery(queryString);
}
/// <summary>
/// 删表
/// </summary>
/// <param name="tableName"></param>
/// <returns></returns>
public SQLiteDataReader dropTable(string tableName)
{
string queryString = "DROP TABLE IF EXISTS " + tableName;
Console.WriteLine("删除表:" + tableName);
return ExecuteQuery(queryString);
}
/// <summary>
/// 本类log
/// </summary>
/// <param name="s"></param>
static void Log(string s)
{
Console.WriteLine("class SqLiteHelper:::" + s);
}
/// <summary>
/// 添加数据
/// </summary>
/// <param name="table"></param>
/// <param name="valuelist"></param>
public void InsertValues(MyDataTable table, List<string[]> valuelist)
{
string tableName = table.tablename;
List<string> columns_field = table.columnField;
string sql = "Insert into " + tableName + " values(";
for (int i = 0; i < columns_field.Count(); i++)
{
if (i < columns_field.Count() - 1)
{
sql += "@" + columns_field[i] + ",";
}
else
{
sql += "@" + columns_field[i];
}
}
sql += ")";
int count = valuelist.Count();
int start = 0;
int end = 10000;
while (count > end)
{
using (SQLiteTransaction tx = dbConnection.BeginTransaction())
{
try
{
using (SQLiteCommand command = new SQLiteCommand(sql, dbConnection))
{
for (; start < end; start++)
{
string[] values = valuelist[start];
for (int i = 0; i < values.Count(); i++)
{
command.Parameters.AddWithValue("@" + columns_field[i], values[i] != null ? values[i] : "");
}
command.ExecuteNonQuery();
command.Parameters.Clear();
}
tx.Commit();
end += 10000;
}
}
catch
{
tx.Rollback();
throw;
}
}
}
using (SQLiteTransaction tx = dbConnection.BeginTransaction())
{
try
{
using (SQLiteCommand command = new SQLiteCommand(sql, dbConnection))
{
for (; start < count; start++)
{
string[] values = valuelist[start];
for (int i = 0; i < values.Count(); i++)
{
command.Parameters.AddWithValue("@" + columns_field[i], values[i] != null ? values[i] : "");
}
command.ExecuteNonQuery();
command.Parameters.Clear();
}
tx.Commit();
}
}
catch
{
tx.Rollback();
throw;
}
}
}
}
}
五、导出csv文件
/// <summary>
/// 导出为csv文件
/// </summary>
class BCPToCSVUtils
{
public static void TableToCsv(MyDataTable data, string filePath)
{
filePath = filePath + "\\" + data.tablename + ".csv";
FileInfo fi = new FileInfo(filePath);
string path = fi.DirectoryName;
string name = fi.Name;
//\/:*?"<>|
//把文件名和路径分别取出来处理
name = name.Replace(@"\", "\");
name = name.Replace(@"/", "/");
name = name.Replace(@":", ":");
name = name.Replace(@"*", "*");
name = name.Replace(@"?", "?");
name = name.Replace(@"<", "<");
name = name.Replace(@">", ">");
name = name.Replace(@"|", "|");
string title = "";
FileStream fs = new FileStream(path + "\\" + name, FileMode.Create);
StreamWriter sw = new StreamWriter(new BufferedStream(fs), System.Text.Encoding.Default);
// 表头定义
foreach (string alias in data.columnAlias)
{
title += alias + ",";
}
title = title.Substring(0, title.Length - 1) + "\n";
sw.Write(title);
// 表行
foreach (string[] datas in data.dataList)
{
string line = "";
for (int i = 0; i < datas.Count(); i++)
{
line += datas[i].ToString().Replace(",", "") + "\t" + ",";
}
line = line.Substring(0, line.Length - 1) + "\n";
sw.Write(line);
}
sw.Close();
fs.Close();
}
}