static void xml2plain()
{
string src = "ec-news-2009-realsrc.xml";
string trg = "ec-news-2009-ref.xml";
XmlDocument xml = new XmlDocument();
XmlReaderSettings settings = new XmlReaderSettings();
settings.IgnoreComments = true;
XmlReader reader = XmlReader.Create(trg, settings);
xml.Load(reader);
XmlNodeList nodes = xml.GetElementsByTagName("seg");
StringBuilder buf = new StringBuilder();
int c = 0;
bool s = false;
int f_c = 1;
foreach (XmlNode n in nodes)
{
string v = n.InnerText;
v = v.Trim();
buf.AppendLine(v);
}
File.WriteAllText(name1, buf.ToString(), Encoding.UTF8);
}
这个比较简单,读取文件中所有节点名字为seg的xml的值,对于一般格式简单的文件很方便。
下面这个是从文件中读取数据,最终合并为一个文件。注意:默认读取文件夹中的文件是按照字母顺序读取的,对于按照数字排序的文件需要自定义。
static void trans2cor()
{
string folder = @"D:\Documents\Projects\xml2plain\xml2plain\bin\Debug\trans\google-src\";
DirectoryInfo dir = new DirectoryInfo(folder);
FileInfo[] files = dir.GetFiles("*.txt");
StringBuilder buf = new StringBuilder();
bool s = false;
int length = 11;
for (int i = 0; i < length; i++)
{
string name = folder + (i +1).ToString()+".txt";
string[] lines = File.ReadAllLines(name, Encoding.Default);
for (int j = 0; j < lines.Length; j++)
{
string tmp = lines[j];
tmp = tmp.Trim();
if (s)
{
string tt = "";
for (int k = 0; k < tmp.Length; k++)
{
tt += tmp[k];
tt += " ";
}
tmp = tt.Trim();
}
if (tmp != "")
{
buf.AppendLine(tmp);
}
}
}
File.WriteAllText("result.txt", buf.ToString(), Encoding.UTF8);
}