1.如图,需要处理这样的中英混合文档,但是会出现单词跨行的情况。
2.处理代码如下
class Program
{
static void Main(string[] args)
{
StreamReader sr = new StreamReader("D:\\t.txt");
string sLine = "";
string pre_Line = "";//前一行的字符串
List<string> yList = new List<string>();//存放英文的list数组
List<string> cList = new List<string>();//存放中文
Regex regex = new Regex("[a-zA-Z]+");//匹配字母
Regex r2 = new Regex("[\u4e00-\u9fa5]+");
int index=0;
bool isCon = false;
while ((sLine = sr.ReadLine()) != null)//按行读取
{
//如果前一行的单词未写完整
if (pre_Line.Length != 0 && pre_Line.Substring(pre_Line.Length - 1, 1).Equals("-"))
{
Console.WriteLine(pre_Line);
Console.WriteLine(sLine);
index = yList.Count() - 1;//记录当前最后一个字符串的位置
isCon = true;
}
MatchCollection re = regex.Matches(sLine);//返回单词的结果集
MatchCollection c = r2.Matches(sLine);//返回中文的结果集
foreach(Match m in re)
{
yList.Add(m.ToString());
}
foreach (Match m in c)
{
cList.Add(m.ToString());
}
if (isCon)
{
string temp = yList[index];
string temp2 = yList[index + 1];
temp += temp2;
yList[index] = temp;
yList.RemoveAt(index + 1);
Console.WriteLine("{0}-{1}", temp, temp2);
}
pre_Line = sLine;
}
foreach(string s in yList)
{
Console.WriteLine("{0} ", s);
}
foreach(string s in cList)
{
Console.WriteLine("{0} ", s);
}
var v = from word in yList
where word[0] == 'a'
select word;
foreach(string s in v)
{
Console.WriteLine("{0};;;", s);
}
Console.Read();
}
}