C#正则表达式入门(下)

一、匹配邮政编码,邮政编码为6位数字组成。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);

Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失败!");

 

二、匹配数值

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d+.?\d*$", RegexOptions.None);

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");

 

三、去掉字符串中的所有空格

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\s+", RegexOptions.None);//匹配所有空格符

Console.WriteLine("去掉空格符后的字符串为:"+reg.Replace(code, ""));

Console.WriteLine("原始字符串长度为:"+code.Length);

reg = new Regex(@"\s+$");//匹配尾部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉尾部空格符后的长度为:"+code.Length);

reg = new Regex(@"^\s+");//匹配首部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉首部空格符后的长度为:" + code.Length);

注:上面所说的空格符包括、制表符等。

 

四、提取输入字符串中的所有合法的数值

下面程序用于提取所有的整数或者浮点数

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\d+|\d+.?\d*", RegexOptions.Singleline);//单行模式

MatchCollection mc = reg.Matches(code,0);//从首个字符开始匹配

int counter=0;

foreach (Match mt in mc)

{

Console.WriteLine("数值【" + (++counter) + "】:" + mt.Value);

}

五、身份证号码验证

身份证号码为15位全数字,或者为18位,前17位为数字,最后一位为0-9的数字或者字符”x”、”X”。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{15}$|^\d{17}[0-9xX]$", RegexOptions.Singleline);//单行模式

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");

 

六、获取一个网页中包括的所有URL

包含的名字空间如下:

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Collections;

 

static void Main(string[] args)

        {

            Hashtable myUrl = new Hashtable();

            string pageContent;

            pageContent = GetWebPageContent(@"http://www.baidu.com");

            Regex reg = new Regex(@"http(s)?://[-\w]+(\.\w[-\w]*)+", RegexOptions.Singleline);//单行模式

            MatchCollection mc = reg.Matches(pageContent,0);

            Console.WriteLine("提取网址数量:"+mc.Count);

            int counter = 0;

            foreach (Match mt in mc)

            {

                if (!myUrl.ContainsValue(mt.Value))

                {

                    myUrl.Add(++counter, mt.Value);

                }

            }

            Console.WriteLine("去重后网址数量:" + counter);

            foreach (DictionaryEntry url in myUrl)

            {

                Console.WriteLine("网址【"+url.Key+"】:"+url.Value);

            }

        }

        /// <summary>

        /// 获取网页内容

        /// </summary>

        /// <param name="url">网址</param>

        /// <returns></returns>

        private static string GetWebPageContent(string url)

        {

            try

            {

                HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);

                req.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";

                req.Accept = "*/*";

                req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");

                req.ContentType = "text/xml";

 

                HttpWebResponse resp = (HttpWebResponse)req.GetResponse();

                Encoding enc;

                try

                {

                    if (resp.CharacterSet.ToUpper() != "ISO-8859-1")

                        enc = Encoding.GetEncoding(resp.CharacterSet);

                    else

                        enc = Encoding.UTF8;

                }

                catch

                {

                    // *** Invalid encoding passed

                    enc = Encoding.UTF8;

                }

                string sHTML = string.Empty;

                using (StreamReader read = new StreamReader(resp.GetResponseStream(), enc))

                {

                    sHTML = read.ReadToEnd();

                }

                return sHTML;

            }

            catch (Exception ex)

            {

                Console.WriteLine(ex.Message.ToString());

                return null;

            }

 

        }

七、获取图片链接

 

Regex reg = new Regex("<img\\s+[^<>]*(src|data-src)=[^<>\\s]+(\\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写

                MatchCollection mc = reg.Matches(strLine);

                foreach (Match mt in mc)

                {

                    Match tmp = Regex.Match(mt.Value, "(src|data-src)=(\")?(.*?)(\"|\\s|$)",RegexOptions.IgnoreCase);//再次匹配,获取图片链接

                    string res = tmp.Groups[3].Value;

                    if (res.Trim().Length > 1)

                    {

                        if (!myPicture.ContainsValue(res))

                        {

                            myPicture.Add(++counter, res);

                        }

                    }

 

                }

八、分组的使用

string content = @"vqwsdvasdhttp://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html asdf?gr";

            Regex reg1 = new Regex(@"http(s)?://([^/:]*)(.*?)\.html",RegexOptions.IgnoreCase);

            Match mt = reg1.Match(content,0);

 

            Console.WriteLine(mt.Value);//打印结果:http://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html

            Console.WriteLine(mt.Groups[2].Value);//打印结果:202.38.193.153

 

            Regex reg2 = new Regex(@":(\d{1,6})/", RegexOptions.IgnoreCase);

            mt = reg2.Match(content);

            Console.WriteLine(mt.Groups[1].Value);//打印结果:8000

 

            content = "<p><img class=news-smallimg-img height=40 width=68 src=\"http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959.jpg\" alt=\"\" /></p>";

            Regex reg = new Regex(@"<img\s+[^<>]*(src|data-src)=\s*""?(([^<>\s])+)(\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写

            mt = reg.Match(content);

            Console.WriteLine(mt.Groups[1].Value);//打印src

            Console.WriteLine(mt.Groups[2].Value);//打印http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959

            Console.WriteLine(mt.Groups[4].Value);//打印.jpg

            Console.WriteLine(mt.Groups[5].Value);//打印jpg

 

九、环视的使用

【例1】要求匹配teacher,但是不能匹配teachers,teacher’s等形式。

string content = "you i last teacher * 800 <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teacher(?=\s)",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

 

【例2】要求找出所有含teach的单词

string content = "you i last teacher * 800 teaching <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teach((?!\s).)*",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

【例3】将一个较大的数值每3位用一个逗号隔开

string content = "The population of 298444215 is growing";

Regex reg = new Regex(@"(?<=\d)(?=(\d\d\d)+(\s|\D|$))", RegexOptions.IgnoreCase);

string  res = reg.Replace(content,",");

Console.WriteLine(res);

十、其它

【例1】最左最长规则

string source = "oneselfsufficient";

Regex reg = new Regex("one(self)?(selfsufficient)?");

Console.WriteLine(reg.Match(source).Value);//打印oneself

【例2】多分支结构按照顺序优先匹配

string source = "oneselfsufficient";

Regex reg = new Regex("one(self|selfsufficient)");

Console.WriteLine(reg.Match(source).Value);//打印oneself

reg = new Regex("one(selfsufficient|self)");

Console.WriteLine(reg.Match(source).Value);//打印oneselfsufficient

注:.Net使用的正则表达式引擎类型为传统NFA,对于多分支结构,按照顺序优先匹配。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

数据之道

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值