关于C#中的正则表达式

我一直对正则表达式似懂非懂,每次用到都要弄半天,现在已经两次涉及到了,记录下来,以备后用:

A.将一个包含英语单词信息的文本文件转换为XML文档

//文本文件的每行格式为:[W]disastrous[T]di'z1:str3s[M]a.灾难性的;糟透的[v]2584-5544

string strReg = @"(/[W/](?<word>.*))(/[T/](?<phonetic>.*))(/[M/](?<interpret>.*))";//正则表达式

Regex regN =new Regex(strReg);

if(regVoice.IsMatch(strLine))//看是否匹配文本文件中的当前行
       {

           ..................

           xeWord.InnerText = regex.Match(strLine).Result(@"${word}");//获取[W][T]之间的单词

           ...................

       }

B.将一个包含公交信息的文本文件转换为XML文档

//文本文件的每行格式为:W1路E路林市场E华业大厦E健美皮肤病医院E孔浦医院E大通桥E通达塑机公司E火车北站E白沙路E第三医院E轮船码头E灵桥西E大世界家私市场E兴宁桥E望湖市场E南站

            string strReg = @"(W(?<Line>.*))";
            string strSplit = @"(E)";
            Regex regex, regLine, regE;
            string str;
            using (StreamWriter sw = new StreamWriter(@"test.txt", false))
            {
                using (StreamReader sr = new StreamReader(@"宁波.txt"))
                {
                    while (sr.Peek() >= 0)
                    {
                        string strLine = sr.ReadLine();
                        if (strLine != "")
                        {
                            regex = new Regex(strSplit);
                            for (int i = 0; i < regex.Split(strLine).Length; i++)
                            {
                                str = regex.Split(strLine)[i].ToString();
                                regLine = new Regex(strReg);
                                regE = new Regex(strSplit);
                                if (regLine.IsMatch(str))
                                    sw.WriteLine(regLine.Match(str).Result(@"${Line}"));
                                else
                                    if (!regE.IsMatch(str))
                                        sw.WriteLine(str);
                            }
                        }
                    }
                }
            }
          
            XmlDocument xmlDoc = new XmlDocument();
            XmlElement xeRoot = xmlDoc.CreateElement("BusLines");
            XmlElement xeLine = null,xeStation;
            int j=0,k = 0;//统计站点数
            using (StreamReader sr = new StreamReader(txtOpenFile.Text, Encoding.GetEncoding("gb2312")))
            {
                while (sr.Peek() >= 0)
                {
                    string strLine = sr.ReadLine();

                    if (strLine != "")
                    {
                        regex = new Regex(strReg);
                        if (regex.IsMatch(strLine))
                        {
                           
                            regex = new Regex(strSplit);
                            for (int i = 0; i < regex.Split(strLine).Length; i++)//将每行文本按照"E"字母进行分组字符串
                            {
                                str = regex.Split(strLine)[i].ToString();
                                regLine = new Regex(strReg);
                                regE = new Regex(strSplit);
                                if (regLine.IsMatch(str))//判断行中是否匹配到"W"及相关文字,如果是则表时是线路信息
                                {
                                    k++;
                                    xeLine = xmlDoc.CreateElement("Line");
                                    xeLine.SetAttribute("LineID", k.ToString());
                                    xeLine.SetAttribute("Number", regLine.Match(str).Result(@"${Line}"));
                                    xeLine.SetAttribute("DayOrNight", "日");
                                    j = 0;//加上此句后,就可统计每条线路的站点数,如果不加此条则是所有的站点数
                                }
                                else
                                    if (!regE.IsMatch(str))//判断行中是否匹配到"E",如果不是就表明是站点,就记录此站点名
                                    {
                                       
                                        xeStation = xmlDoc.CreateElement("Station");
                                        xeStation.SetAttribute("StaID",j.ToString());
                                        xeStation.SetAttribute("Name", str);
                                        xeStation.SetAttribute("SMorning", "0500");
                                        xeStation.SetAttribute("SAfternoon", "1900");
                                        xeStation.SetAttribute("WMorning", "0600");
                                        xeStation.SetAttribute("WAfternoon", "1800");
                                        xeLine.AppendChild(xeStation);
                                        j++;
                                    }
                            }
                            xeRoot.AppendChild(xeLine);

                        }
                    }                  
                }
            }
            // Create an XML declaration 防止在VS2005中出现乱码
            XmlDeclaration xmldecl;
            xmldecl = xmlDoc.CreateXmlDeclaration("1.0", null, null);
            xmldecl.Encoding = "GB2312";
            xmldecl.Standalone = "yes";
            XmlElement root = xmlDoc.DocumentElement;
            xmlDoc.InsertBefore(xmldecl, root);
            xmlDoc.AppendChild(xeRoot);
            xmlDoc.Save("txt.xml");

 

以上两个例子的区别在于A是有多种分割符并且分割符数目固定,而B只有一种分割符,且数目不固定。解决的方法有很大的区别

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值