最近做了几个读取文件信息的小工具,在这里记录一下过程。
C#读取文件
- 新建控制台应用
- 设计界面,设置标签按钮等。
工具类
- 编写工具类获取目录下所有对应的文件
public static List<string> GetFileList(string sSourcePath)
{
List<String> list = new List<string>();
//遍历文件夹
DirectoryInfo theFolder = new DirectoryInfo(sSourcePath);
FileInfo[] thefileInfo = theFolder.GetFiles("*.xml*", SearchOption.TopDirectoryOnly);
foreach (FileInfo NextFile in thefileInfo) //遍历文件
list.Add(NextFile.FullName);
//遍历子文件夹
DirectoryInfo[] dirInfo = theFolder.GetDirectories();
foreach (DirectoryInfo NextFolder in dirInfo)
{
FileInfo[] fileInfo = NextFolder.GetFiles("*.xml*", SearchOption.AllDirectories);
foreach (FileInfo NextFile in fileInfo) //遍历文件
list.Add(NextFile.FullName);
}
return list;
}
读取xml文件
-
需求:有一些格式相似的xml文件,需要从中提取name字段的值和数量
-
xml文件
<annotation>
<folder>1602748977507</folder>
<filename>1602748977507.mp4-72.jpg</filename>
<path>E:\photo\1602748977507\1602748977507.mp4-72.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1080</width>
<height>1920</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>1</name> <- 要提取的字段 ->
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>246</xmin>
<ymin>1094</ymin>
<xmax>374</xmax>
<ymax>1125</ymax>
</bndbox>
</object>
<object>
<name>2</name> <- 要提取的字段 ->
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>949</xmin>
<ymin>1125</ymin>
<xmax>1067</xmax>
<ymax>1168</ymax>
</bndbox>
</object>
</annotation>
- 代码实现
private void button2_Click(object sender, EventArgs e)
{
string path = null;
string name;
int nameCount = 0;
//获取标签内的路径值
path= textBox1.Text;
if (path != "")
{
DirectoryInfo fileDir = new DirectoryInfo(path);
List<string> fileList;
//获取所有文件
fileList = FileGet.GetFileList(path);
//声明XmlDocument对象
XmlDocument doc = new XmlDocument();
XmlReaderSettings xrst = new XmlReaderSettings();
//忽略文档注释
xrst.IgnoreComments = true;
//使用Dictionary存放结果
Dictionary<string, int> nameDicionary = new Dictionary<string, int>();
//循环处理文件
for(int i = 0; i < fileList.Count; i++)
{
//使用 XmlReader 读取xml
XmlReader reader = XmlReader.Create(fileList[i], xrst);
doc.Load(reader); //加载XmlReader ,这个很重要!!!
XmlNode xn = doc.FirstChild; //获取头节点
XmlNodeList xnl = xn.ChildNodes; //子节点
if (xnl != null)
{
foreach (XmlNode xnn in xnl)
{
//找到object节点
if (xnn.Name != "object")
continue;
XmlNode xnName = xnn.FirstChild;
//获取object头节点name的节点元素
XmlElement xe = (XmlElement)xnName;
name = xe.InnerText.Trim(); //获取name的值并去除空格
if(nameDicionary.ContainsKey(name))
{
nameDicionary[name]++;
}
else
{
nameDicionary.Add(name,1);
}
}
}
}
textBox2.Text = "";
foreach (string name1 in nameDicionary.Keys)
{
textBox2.AppendText("name为" + name1 + "的有 " + nameDicionary[name1].ToString() + " 个\r\n");
}
}
}
读取json文件
- 需求:有一些格式相似的json文件,需要从中提取label字段的值和数量
{
"imageHeight": 1080,
"imagePath": "VID_20201019_112256.mp4-14.jpg",
"shapes": [
{
"flags": {},
"points": [
[
5.414634146341463,
869.3170731707316
],
[
185.90243902439022,
809.560975609756
],
[
331.0243902439024,
763.2195121951219
]
],
"label": "1",
"shape_type": "linestrip",
"group_id": null
},
{
"flags": {},
"points": [
[
1359.0731707317073,
1072.9756097560976
],
[
1195.6585365853657,
848.5853658536585
],
[
1016.3902439024389,
602.2439024390244
]
],
"label": "1",
"shape_type": "linestrip",
"group_id": null
}
],
"flags": {},
"imageData": .....
- 代码实现
private void button2_Click(object sender, EventArgs e)
{
//初始化
textBox2.Text = "";
//路径--JSON文件路径
string path = textBox1.Text;
if (path != "")
{
//DirectoryInfo fileDir = new DirectoryInfo(path);
List<string> fileList;
fileList = Tools.GetFileList(path);
Dictionary<string, int> labelDicionary = new Dictionary<string, int>();
List<string> lstr = null;
//遍历每个目录
for (int i = 0; i < fileList.Count; i++)
{
//每个目录的lable
lstr = Tools.Readjs("shapes", fileList[i]);
//正则表达式
//MatchCollection mc = Regex.Matches(lstr, "label.+?\\,");
//Match lnums = r.Match(lstr);
//foreach(Match m in mc)
//{
// string lnum = m.ToString();
// //测试
// //textBox2.AppendText("lnum= " + lnum + "\r\n");
// if (labelDicionary.ContainsKey(lnum))
// {
// labelDicionary[lnum]++;
// }
// else
// {
// labelDicionary.Add(lnum, 1);
// }
//}
foreach (string m in lstr)
{
Console.WriteLine(m);
if (labelDicionary.ContainsKey(m))
{
labelDicionary[m]++;
}
else
{
labelDicionary.Add(m, 1);
}
}
}
//输出值
foreach (string lnum in labelDicionary.Keys)
{
//string[] lres = lnum.Split('"');
textBox2.AppendText("label为 " + lnum + " 的有 " + labelDicionary[lnum].ToString() + " 个\r\n");
}
}
}
/// <summary>
/// 读取JSON文件
/// </summary>
/// <param name="key">JSON文件中的key值</param>
/// <returns>JSON文件中的value值</returns>
public static List<string> Readjs(string key,string jsPath)
{
List<string> lres = new List<string>();
using (System.IO.StreamReader file = System.IO.File.OpenText(jsPath))
{
using (JsonTextReader reader = new JsonTextReader(file))
{
JObject o = (JObject)JToken.ReadFrom(reader);
Console.WriteLine();
for(int i = 0; i < o[key].Count(); i++)
{
JToken jTokens = o[key][i]["label"];
//Console.WriteLine(jTokens.ToString());
lres.Add(jTokens.ToString());
}
return lres;
}
}
}
读取txt信息提取到excel
- 需求:有一些格式相似的json文件,需要从中提取字段信息存到excel
拍摄人:默认用户
拍摄时间:2020-12-09 16:31:39
拍摄地GPS:121.46634637,31.20487431
- 引入NPOI包:
C#读写excel主要有三种方法,包括使用微软自带的包Microsoft.Office.Interop.Excel,但是这个方法需要电脑有office,并且版本要对应,我这里用的是NPOI,具体引入方法见链接:
VS2015引用NPOI2.4.1和NuGet的安装方法 - 代码实现
private void button1_Click(object sender, EventArgs e)
{
//路径--TXT文件所在目录路径
string tPath = textBox1.Text;
//路径--excel文件路径
string xPath = textBox2.Text;
if (tPath != "")
{
List<string> fileList;
fileList = Tools.GetFileList(tPath);
var wk = new XSSFWorkbook();
var st = wk.CreateSheet("拍摄信息");
var r = st.CreateRow(0);
var MyCellStyle = wk.CreateCellStyle();
r.CreateCell(0).SetCellValue("文件名");
r.CreateCell(1).SetCellValue("拍摄时间");
r.CreateCell(2).SetCellValue("拍摄地X坐标");
r.CreateCell(3).SetCellValue("拍摄地Y坐标");
遍历每个文件
for (int columnCount = 0; columnCount < fileList.Count; columnCount++)
{
StreamReader sr = new StreamReader(fileList[columnCount], Encoding.GetEncoding("utf-8"));
String line;
r = st.CreateRow(columnCount + 1);
string[] fl = fileList[columnCount].Split('\\');
string name = fl[fl.Length - 1];
r.CreateCell(0).SetCellValue(name);
Console.WriteLine(fileList[columnCount]);
int rowCount = 0;
//按行读取
while ((line = sr.ReadLine()) != null)
{
if (rowCount == 1)
{
string time = line.Substring(5);
r.CreateCell(1).SetCellValue(time);
}
else if (rowCount == 2)
{
string xy = line;
string[] vs = xy.Split(':');
xy = vs[1];
string[] vs1 = xy.Split(',');
string x = vs1[0];
r.CreateCell(2).SetCellValue(vs1[0]);
string y = vs1[1];
r.CreateCell(3).SetCellValue(vs1[1]);
}
rowCount++;
Console.WriteLine(line.ToString());
}
}
var fs = new FileStream(@xPath, FileMode.Create, FileAccess.Write,FileShare.ReadWrite);
wk.Write(fs);
fs.Close();
}
}
- 写入excel结果
打包程序
https://www.cnblogs.com/kiba/p/11052619.html
总结
在读取xml或者json文件的时候并没有很好的方法可以一次性取出所有的节点,必须要一层一层的处理,所以要具体问题具体分析。