using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
namespace csv
{
public class XmlParseData
{
public string name = "";
public Dictionary<string, string> attrList = new Dictionary<string, string>(); // 属性列表
public List<XmlParseData> elemList = new List<XmlParseData>(); // 子节点列表
public XmlParseData parent = null;
public XmlParseData FindRoot()
{
if (parent == null)
{
return this;
}
return parent.FindRoot();
}
public bool IsEqual(XmlParseData other)
{
if (name != other.name)
{
return false;
}
var dz = attrList.Values.Except<string>(other.attrList.Values);
if (dz.Count<string>() != 0)
{// 值不相等
return false;
}
return true;
}
public bool Merge(XmlParseData other)
{
if (!IsEqual(other))
{
return false;
}
for (var i=0; i<other.elemList.Count; i++)
{// 相等的合并子列表
elemList.Add(other.elemList[i]);
}
// 删除列表
List<int> remove_list = new List<int>();
for (var i = 0; i < elemList.Count-1; i++ )
{// 遍历合并子节点
if (remove_list.Contains(i))
{
continue;
}
for (var k=i+1; k <elemList.Count; k++)
{
if (remove_list.Contains(k))
{
continue;
}
if (elemList[i].Merge(elemList[k]))
{
remove_list.Add(k);
}
}
}
for (var i = remove_list.Count - 1; i >= 0; i--)
{
elemList.RemoveAt(remove_list[i]);
}
return true;
}
public void GetAttr(Dictionary<string, string> line)
{
foreach (string k in attrList.Keys)
{
line.Add(name + "." + k, attrList[k]);
}
if (parent != null)
{
parent.GetAttr(line);
}
}
public void DoWriteTxt(List<string> headList, List<Dictionary<string, string>> lineList)
{
foreach (string k in attrList.Keys)
{
if (!headList.Contains(name + "." + k))
{
headList.Add(name + "." + k);
}
}
if (elemList.Count == 0)
{// 叶子节点占一行
Dictionary<string, string> line = new Dictionary<string,string>();
GetAttr(line);
lineList.Add(line);
}
else
{
for (var i = 0; i < elemList.Count; i++)
{
elemList[i].DoWriteTxt(headList, lineList);
}
}
}
}
public class XmlParser
{
public void WriteToTxt(XmlParseData data, string filepath, string filename)
{
List<string> headList = new List<string>();
List<Dictionary<string, string>> lineList = new List<Dictionary<string, string>>();
data.DoWriteTxt(headList, lineList);
StringBuilder strBuilder = new StringBuilder(10240);
for (var i=0; i<headList.Count; i++)
{
strBuilder.Append(headList[i]);
if (i != headList.Count - 1)
{
strBuilder.Append("\t");
}
}
strBuilder.Append("\r\n");
for (var i=0; i<lineList.Count; i++)
{
Dictionary<string, string> line = lineList[i];
for (var k = 0; k < headList.Count; k++)
{
if (line.ContainsKey(headList[k]))
{
strBuilder.Append(line[headList[k]]);
}
if (k != headList.Count - 1)
{
strBuilder.Append("\t");
}
}
if (i != lineList.Count-1)
{
strBuilder.Append("\r\n");
}
}
string infoFile = System.IO.Path.GetDirectoryName(filepath) + "/" + filename + ".txt";
File.WriteAllText(infoFile, strBuilder.ToString(), Encoding.UTF8);
}
public void ParseXml(XmlNode node, XmlParseData data)
{
if (data.parent != null)
{
data.name = data.parent.name + ":" + node.Name;
}
else
{
data.name = node.Name;
}
if (node.Attributes != null)
{
for (var k = 0; k < node.Attributes.Count; k++)
{
data.attrList.Add(node.Attributes[k].Name, node.Attributes[k].InnerText == "" ? "@#$989" : node.Attributes[k].InnerText);
}
}
XmlNodeList nodelist = node.ChildNodes;
for (var i = 0; i < nodelist.Count; i++)
{
if (nodelist[i].NodeType == XmlNodeType.Element)
{
XmlParseData elem = new XmlParseData();
elem.parent = data;
data.elemList.Add(elem);
ParseXml(nodelist[i], elem);
}
else if (nodelist[i].NodeType == XmlNodeType.Text)
{
data.attrList.Add(nodelist[i].Name, nodelist[i].InnerText);
}
}
}
public XmlParseData ParseLine(string[] tableHead, string[] line)
{
Dictionary<string, XmlParseData> data_list = new Dictionary<string, XmlParseData>();
for (var i=0; i<tableHead.Length; i++)
{
if (line[i] == "")
{
continue;
}
string[] path = tableHead[i].Split('.');
string dataname = path[0];
string attrname = path[1];
XmlParseData data = null;
if (data_list.ContainsKey(dataname))
{
data = data_list[dataname];
}
else
{
data = new XmlParseData();
data.name = dataname;
data_list.Add(dataname, data);
}
data.attrList.Add(attrname, line[i] == "@#$989" ? "" : line[i]);
}
List<XmlParseData> list = data_list.Values.ToList<XmlParseData>();
// 找爸爸
for (var i = 0; i < list.Count; i++)
{
string[] pathlist = list[i].name.Split(':');
if (pathlist.Length <= 1)
{// 没有父节点
continue;
}
StringBuilder parent_name = new StringBuilder();
for (var k=0; k<pathlist.Length-1; k++)
{
parent_name.Append(pathlist[k]);
if (k < pathlist.Length-2)
{
parent_name.Append(":");
}
}
XmlParseData parent_node = null;
if (data_list.ContainsKey(parent_name.ToString()))
{
parent_node = data_list[parent_name.ToString()];
}
else
{
parent_node = new XmlParseData();
parent_node.name = parent_name.ToString();
// 塞到队尾等待查找
list.Add(parent_node);
}
list[i].parent = parent_node;
parent_node.elemList.Add(list[i]);
}
return list[0].FindRoot();
}
public void ParseTxt(StreamReader sr, out string[] tableHead, out List<string[]> aryLine)
{
//记录每次读取的一行记录
string strLine = "";
//标示是否是读取的第一行
bool IsFirst = true;
// 表头
tableHead = null;
//记录每行记录中的各字段内容
aryLine = new List<string[]>();
//标示列数
int columnCount = 0;
//逐行读取CSV中的数据
while ((strLine = sr.ReadLine()) != null)
{
if (IsFirst == true)
{
tableHead = strLine.Split('\t');
IsFirst = false;
columnCount = tableHead.Length;
}
else
{
aryLine.Add(strLine.Split('\t'));
}
}
sr.Close();
}
public void WriteToXml(XmlParseData data, StringBuilder xmlBuilder)
{
string[] name_arr = data.name.Split(':');
int deep = name_arr.Length;
string name = name_arr[deep - 1];
xmlBuilder.Append("\n");
for (var j = 0; j < deep - 1; j++)
{
xmlBuilder.Append("\t");
}
List<string> keys = data.attrList.Keys.ToList<string>();
if (keys.Count == 1 && keys[0].StartsWith("#"))
{// 文字节点
xmlBuilder.AppendFormat("<{0}>{1}</{0}>", name, data.attrList[keys[0]]);
}
else
{
xmlBuilder.AppendFormat("<{0}", name);
for (var i = 0; i < keys.Count; i++)
{
xmlBuilder.AppendFormat(" {0}=\"{1}\"", keys[i], data.attrList[keys[i]]);
}
if (data.elemList.Count > 0)
{
xmlBuilder.Append(">");
for (var i = 0; i < data.elemList.Count; i++)
{
WriteToXml(data.elemList[i], xmlBuilder);
}
xmlBuilder.Append("\n");
for (var j = 0; j < deep - 1; j++)
{
xmlBuilder.Append("\t");
}
xmlBuilder.AppendFormat("</{0}>", name);
}
else
{
xmlBuilder.Append("/>");
}
}
}
public void ParseTxtFile(string filepath)
{
string filename = System.IO.Path.GetFileNameWithoutExtension(filepath); // 获取文件名,除去后缀名和前面路径
FileStream fs = new FileStream(filepath, System.IO.FileMode.Open, System.IO.FileAccess.Read);
StreamReader sr = new StreamReader(fs, Encoding.UTF8);
// 表头
string[] tableHead = null;
//记录每行记录中的各字段内容
List<string[]> aryLine = null;
ParseTxt(sr, out tableHead, out aryLine);
fs.Close();
List<XmlParseData> list = new List<XmlParseData>();
for (var i=0; i<aryLine.Count; i++)
{
XmlParseData line_data = ParseLine(tableHead, aryLine[i]);
list.Add(line_data);
}
XmlParseData root_data = list[0];
for (var i = 1; i < list.Count; i++)
{
root_data.Merge(list[i]);
}
StringBuilder xmlBuilder = new StringBuilder();
xmlBuilder.Append("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
WriteToXml(root_data, xmlBuilder);
string infoFile = System.IO.Path.GetDirectoryName(filepath) + "/" + filename + ".xml";
File.WriteAllText(infoFile, xmlBuilder.ToString(), Encoding.UTF8);
}
public void ParseXmlFile(string filepath)
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load(filepath);
string filename = System.IO.Path.GetFileNameWithoutExtension(filepath); // 获取文件名,除去后缀名和前面路径
XmlNode rootnode = xmlDoc.FirstChild.NextSibling;
while (rootnode.NodeType != XmlNodeType.Element)
{
rootnode = rootnode.NextSibling;
}
XmlParseData data = new XmlParseData();
ParseXml(rootnode, data);
WriteToTxt(data, filepath, filename);
}
}
}
c# xml和excel互转
最新推荐文章于 2023-02-06 17:36:53 发布