一、概述
通过C#分段读取和操作超大内存的文本文件,保证操作大于3GB的txt时内存不会爆,包括获取txt文件总行数,按行分段等。
二、操作流程
1.读取文件总行数
///获取txt的总行数
public int GetTunnelInfos(string txtPath)
{
int count = 0;
string line = "";
StreamReader sr = new StreamReader(txtPath); //创建一个StreamReader的实例来读取文件
//按行读取
while (sr.Peek() >= 0) //每次读多少数据
{
line = sr.ReadLine();
count++;
}
sr.Close();
return count - 1;
}
2.从txt中读一段数据
int isFirst = 0; //跳过整个txt的第一行而不是每一段的第一行都跳过
/// <summary>
/// 从txt中读取一段数据
/// </summary>
/// <param name="totalNums">txt数据总行数</param>
/// <param name="segmentNums"><每一段有多少行/param>
/// <param name="oneSegmentLines">一段数据</param>
/// <param name="_currentLine">当前读到了哪一行</param>
/// <returns></returns>
public List<Infos> ReadPointsSegmented(int totalNums, int segmentNums, IEnumerable<string> oneSegmentLines, int _currentLine)
{
List<Infos> points = new List<Infos>(); //存储读取的点云点坐标和颜色值
Infos info; //Infos是一个一个结构体,用于记录一行数据的信息
var lines = oneSegmentLines.Skip(_currentLine).Take(segmentNums); //Skip:从_currentLine开始截取 Take:截取多少个
if (_currentLine < totalNums)
{
foreach (var line in lines)
{
string[] tmp = line.Split(' ');
if ((isFirst == 0) && char.IsLetter(Convert.ToChar(tmp[0].Substring(0, 1)))) //判断第一行是否为数字
{
isFirst = 1;
continue;
}
info.X = Convert.ToDouble(tmp[0]);
info.Y = Convert.ToDouble(tmp[1]);
info.Z = Convert.ToDouble(tmp[2]);
info.W = Convert.ToDouble(tmp[3]);
points.Add(info);
}
}
return points;
}
3.循环读取整个txt中的每一段
private void ReadLargeTxt(string txtPath)
{
int segmentNums = 400000; //每段读多少行数据
int totalNums = GetTunnelInfos(txtPath); //总点数
int segments = totalNums / segmentNums; //段数
int _currentLine = 0; //从第一行开始分段并读取
IEnumerable<string> lines = File.ReadLines(cloudPointsPath, Encoding.UTF8);
for (int i = 0; i < segments + 1; i++) //按segments个段读取点云数据
{
points = ReadPointsSegmented(totalNums, segmentNums, lines, _currentLine); //取一段
/*
.........(添加对每一段数据进行操作的代码)
*/
if (points.Count != 0)
{
if (i < segments)
_currentLine += segmentNums;
else
_currentLine += totalNums % segmentNums; //取最后一段
}
}
}