C#正则表达式入门（下）

最新推荐文章于 2019-04-17 11:42:31 发布

数据之道

最新推荐文章于 2019-04-17 11:42:31 发布

阅读量295

点赞数

分类专栏： C# 程序人生文章标签： C# .NET 正则表达式

本文链接：https://blog.csdn.net/huzhizhewudi/article/details/84348024

版权

程序人生同时被 2 个专栏收录

56 篇文章 10 订阅

订阅专栏

32 篇文章 7 订阅

订阅专栏

一、匹配邮政编码，邮政编码为6位数字组成。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);

Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失败!");

二、匹配数值

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d+.?\d*$", RegexOptions.None);

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");

三、去掉字符串中的所有空格

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\s+", RegexOptions.None);//匹配所有空格符

Console.WriteLine("去掉空格符后的字符串为："+reg.Replace(code, ""));

Console.WriteLine("原始字符串长度为："+code.Length);

reg = new Regex(@"\s+$");//匹配尾部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉尾部空格符后的长度为："+code.Length);

reg = new Regex(@"^\s+");//匹配首部空格符

code = reg.Replace(code, "");

Console.WriteLine("去掉首部空格符后的长度为：" + code.Length);

注：上面所说的空格符包括、制表符等。

四、提取输入字符串中的所有合法的数值

下面程序用于提取所有的整数或者浮点数

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"\d+|\d+.?\d*", RegexOptions.Singleline);//单行模式

MatchCollection mc = reg.Matches(code,0);//从首个字符开始匹配

int counter=0;

foreach (Match mt in mc)

{

Console.WriteLine("数值【" + (++counter) + "】:" + mt.Value);

}

五、身份证号码验证

身份证号码为15位全数字，或者为18位，前17位为数字，最后一位为0-9的数字或者字符”x”、”X”。

string code;

code = Console.ReadLine();

Regex reg = new Regex(@"^\d{15}$|^\d{17}[0-9xX]$", RegexOptions.Singleline);//单行模式

Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");

六、获取一个网页中包括的所有URL

包含的名字空间如下：

using System.Text.RegularExpressions;

using System.Net;

using System.IO;

using System.Collections;

static void Main(string[] args)

{

Hashtable myUrl = new Hashtable();

string pageContent;

pageContent = GetWebPageContent(@"http://www.baidu.com");

Regex reg = new Regex(@"http(s)?://[-\w]+(\.\w[-\w]*)+", RegexOptions.Singleline);//单行模式

MatchCollection mc = reg.Matches(pageContent,0);

Console.WriteLine("提取网址数量："+mc.Count);

int counter = 0;

foreach (Match mt in mc)

{

if (!myUrl.ContainsValue(mt.Value))

{

myUrl.Add(++counter, mt.Value);

}

Console.WriteLine("去重后网址数量：" + counter);

foreach (DictionaryEntry url in myUrl)

{

Console.WriteLine("网址【"+url.Key+"】:"+url.Value);

}

/// <summary>

/// 获取网页内容

/// </summary>

/// <param name="url">网址</param>

/// <returns></returns>

private static string GetWebPageContent(string url)

{

try

{

HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);

req.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";

req.Accept = "*/*";

req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");

req.ContentType = "text/xml";

HttpWebResponse resp = (HttpWebResponse)req.GetResponse();

Encoding enc;

try

{

if (resp.CharacterSet.ToUpper() != "ISO-8859-1")

enc = Encoding.GetEncoding(resp.CharacterSet);

else

enc = Encoding.UTF8;

}

catch

{

// *** Invalid encoding passed

enc = Encoding.UTF8;

}

string sHTML = string.Empty;

using (StreamReader read = new StreamReader(resp.GetResponseStream(), enc))

{

sHTML = read.ReadToEnd();

}

return sHTML;

}

catch (Exception ex)

{

Console.WriteLine(ex.Message.ToString());

return null;

}

七、获取图片链接

Regex reg = new Regex("<img\\s+[^<>]*(src|data-src)=[^<>\\s]+(\\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写

MatchCollection mc = reg.Matches(strLine);

foreach (Match mt in mc)

{

Match tmp = Regex.Match(mt.Value, "(src|data-src)=(\")?(.*?)(\"|\\s|$)",RegexOptions.IgnoreCase);//再次匹配，获取图片链接

string res = tmp.Groups[3].Value;

if (res.Trim().Length > 1)

{

if (!myPicture.ContainsValue(res))

{

myPicture.Add(++counter, res);

}

八、分组的使用

string content = @"vqwsdvasdhttp://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html asdf?gr";

Regex reg1 = new Regex(@"http(s)?://([^/:]*)(.*?)\.html",RegexOptions.IgnoreCase);

Match mt = reg1.Match(content,0);

Console.WriteLine(mt.Value);//打印结果:http://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html

Console.WriteLine(mt.Groups[2].Value);//打印结果:202.38.193.153

Regex reg2 = new Regex(@":(\d{1,6})/", RegexOptions.IgnoreCase);

mt = reg2.Match(content);

Console.WriteLine(mt.Groups[1].Value);//打印结果:8000

content = "<p><img class=news-smallimg-img height=40 width=68 src=\"http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959.jpg\" alt=\"\" /></p>";

Regex reg = new Regex(@"<img\s+[^<>]*(src|data-src)=\s*""?(([^<>\s])+)(\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写

mt = reg.Match(content);

Console.WriteLine(mt.Groups[1].Value);//打印src

Console.WriteLine(mt.Groups[2].Value);//打印http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959

Console.WriteLine(mt.Groups[4].Value);//打印.jpg

Console.WriteLine(mt.Groups[5].Value);//打印jpg

九、环视的使用

【例1】要求匹配teacher，但是不能匹配teachers，teacher’s等形式。

string content = "you i last teacher * 800 <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teacher(?=\s)",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

【例2】要求找出所有含teach的单词

string content = "you i last teacher * 800 teaching <,?ojJOa sd teachers y628 TEACHER'S";

Regex reg=new Regex(@"teach((?!\s).)*",RegexOptions.IgnoreCase);

MatchCollection mc = reg.Matches(content);

foreach (Match mt in mc)

{

Console.WriteLine(mt.Value);

}

【例3】将一个较大的数值每3位用一个逗号隔开

string content = "The population of 298444215 is growing";

Regex reg = new Regex(@"(?<=\d)(?=(\d\d\d)+(\s|\D|$))", RegexOptions.IgnoreCase);

string res = reg.Replace(content,",");

Console.WriteLine(res);

十、其它

【例1】最左最长规则

string source = "oneselfsufficient";

Regex reg = new Regex("one(self)?(selfsufficient)?");

Console.WriteLine(reg.Match(source).Value);//打印oneself

【例2】多分支结构按照顺序优先匹配

string source = "oneselfsufficient";

Regex reg = new Regex("one(self|selfsufficient)");

Console.WriteLine(reg.Match(source).Value);//打印oneself

reg = new Regex("one(selfsufficient|self)");

Console.WriteLine(reg.Match(source).Value);//打印oneselfsufficient

注：.Net使用的正则表达式引擎类型为传统NFA，对于多分支结构，按照顺序优先匹配。

数据之道

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
C#正则表达式入门（下）

一、匹配邮政编码，邮政编码为6位数字组成。string code;code = Console.ReadLine();Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失败!"); 二、匹配数值string code;c...
复制链接

扫一扫