一、匹配邮政编码,邮政编码为6位数字组成。
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d{6}$",RegexOptions.None);
Console.WriteLine(reg.IsMatch(code)?"匹配成功!":"匹配失败!");
二、匹配数值
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d+.?\d*$", RegexOptions.None);
Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");
三、去掉字符串中的所有空格
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"\s+", RegexOptions.None);//匹配所有空格符
Console.WriteLine("去掉空格符后的字符串为:"+reg.Replace(code, ""));
Console.WriteLine("原始字符串长度为:"+code.Length);
reg = new Regex(@"\s+$");//匹配尾部空格符
code = reg.Replace(code, "");
Console.WriteLine("去掉尾部空格符后的长度为:"+code.Length);
reg = new Regex(@"^\s+");//匹配首部空格符
code = reg.Replace(code, "");
Console.WriteLine("去掉首部空格符后的长度为:" + code.Length);
注:上面所说的空格符包括、制表符等。
四、提取输入字符串中的所有合法的数值
下面程序用于提取所有的整数或者浮点数
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"\d+|\d+.?\d*", RegexOptions.Singleline);//单行模式
MatchCollection mc = reg.Matches(code,0);//从首个字符开始匹配
int counter=0;
foreach (Match mt in mc)
{
Console.WriteLine("数值【" + (++counter) + "】:" + mt.Value);
}
五、身份证号码验证
身份证号码为15位全数字,或者为18位,前17位为数字,最后一位为0-9的数字或者字符”x”、”X”。
string code;
code = Console.ReadLine();
Regex reg = new Regex(@"^\d{15}$|^\d{17}[0-9xX]$", RegexOptions.Singleline);//单行模式
Console.WriteLine(reg.IsMatch(code) ? "匹配成功!" : "匹配失败!");
六、获取一个网页中包括的所有URL
包含的名字空间如下:
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Collections;
static void Main(string[] args)
{
Hashtable myUrl = new Hashtable();
string pageContent;
pageContent = GetWebPageContent(@"http://www.baidu.com");
Regex reg = new Regex(@"http(s)?://[-\w]+(\.\w[-\w]*)+", RegexOptions.Singleline);//单行模式
MatchCollection mc = reg.Matches(pageContent,0);
Console.WriteLine("提取网址数量:"+mc.Count);
int counter = 0;
foreach (Match mt in mc)
{
if (!myUrl.ContainsValue(mt.Value))
{
myUrl.Add(++counter, mt.Value);
}
}
Console.WriteLine("去重后网址数量:" + counter);
foreach (DictionaryEntry url in myUrl)
{
Console.WriteLine("网址【"+url.Key+"】:"+url.Value);
}
}
/// <summary>
/// 获取网页内容
/// </summary>
/// <param name="url">网址</param>
/// <returns></returns>
private static string GetWebPageContent(string url)
{
try
{
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);
req.UserAgent = "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)";
req.Accept = "*/*";
req.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
req.ContentType = "text/xml";
HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
Encoding enc;
try
{
if (resp.CharacterSet.ToUpper() != "ISO-8859-1")
enc = Encoding.GetEncoding(resp.CharacterSet);
else
enc = Encoding.UTF8;
}
catch
{
// *** Invalid encoding passed
enc = Encoding.UTF8;
}
string sHTML = string.Empty;
using (StreamReader read = new StreamReader(resp.GetResponseStream(), enc))
{
sHTML = read.ReadToEnd();
}
return sHTML;
}
catch (Exception ex)
{
Console.WriteLine(ex.Message.ToString());
return null;
}
}
七、获取图片链接
Regex reg = new Regex("<img\\s+[^<>]*(src|data-src)=[^<>\\s]+(\\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写
MatchCollection mc = reg.Matches(strLine);
foreach (Match mt in mc)
{
Match tmp = Regex.Match(mt.Value, "(src|data-src)=(\")?(.*?)(\"|\\s|$)",RegexOptions.IgnoreCase);//再次匹配,获取图片链接
string res = tmp.Groups[3].Value;
if (res.Trim().Length > 1)
{
if (!myPicture.ContainsValue(res))
{
myPicture.Add(++counter, res);
}
}
}
八、分组的使用
string content = @"vqwsdvasdhttp://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html asdf?gr";
Regex reg1 = new Regex(@"http(s)?://([^/:]*)(.*?)\.html",RegexOptions.IgnoreCase);
Match mt = reg1.Match(content,0);
Console.WriteLine(mt.Value);//打印结果:http://202.38.193.153:8000/publishHtml/announcement_11024/announcement_11024.html
Console.WriteLine(mt.Groups[2].Value);//打印结果:202.38.193.153
Regex reg2 = new Regex(@":(\d{1,6})/", RegexOptions.IgnoreCase);
mt = reg2.Match(content);
Console.WriteLine(mt.Groups[1].Value);//打印结果:8000
content = "<p><img class=news-smallimg-img height=40 width=68 src=\"http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959.jpg\" alt=\"\" /></p>";
Regex reg = new Regex(@"<img\s+[^<>]*(src|data-src)=\s*""?(([^<>\s])+)(\.(png|gif|jpg|ico|bmp|jpeg|tiff|dxf|\s))", RegexOptions.Singleline | RegexOptions.IgnoreCase);//单行模式且不区分大小写
mt = reg.Match(content);
Console.WriteLine(mt.Groups[1].Value);//打印src
Console.WriteLine(mt.Groups[2].Value);//打印http://images.cnitblog.com/blog/347600/201304/27121407-76ee60d0689949668aed919e47f9b959
Console.WriteLine(mt.Groups[4].Value);//打印.jpg
Console.WriteLine(mt.Groups[5].Value);//打印jpg
九、环视的使用
【例1】要求匹配teacher,但是不能匹配teachers,teacher’s等形式。
string content = "you i last teacher * 800 <,?ojJOa sd teachers y628 TEACHER'S";
Regex reg=new Regex(@"teacher(?=\s)",RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
foreach (Match mt in mc)
{
Console.WriteLine(mt.Value);
}
【例2】要求找出所有含teach的单词
string content = "you i last teacher * 800 teaching <,?ojJOa sd teachers y628 TEACHER'S";
Regex reg=new Regex(@"teach((?!\s).)*",RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(content);
foreach (Match mt in mc)
{
Console.WriteLine(mt.Value);
}
【例3】将一个较大的数值每3位用一个逗号隔开
string content = "The population of 298444215 is growing";
Regex reg = new Regex(@"(?<=\d)(?=(\d\d\d)+(\s|\D|$))", RegexOptions.IgnoreCase);
string res = reg.Replace(content,",");
Console.WriteLine(res);
十、其它
【例1】最左最长规则
string source = "oneselfsufficient";
Regex reg = new Regex("one(self)?(selfsufficient)?");
Console.WriteLine(reg.Match(source).Value);//打印oneself
【例2】多分支结构按照顺序优先匹配
string source = "oneselfsufficient";
Regex reg = new Regex("one(self|selfsufficient)");
Console.WriteLine(reg.Match(source).Value);//打印oneself
reg = new Regex("one(selfsufficient|self)");
Console.WriteLine(reg.Match(source).Value);//打印oneselfsufficient
注:.Net使用的正则表达式引擎类型为传统NFA,对于多分支结构,按照顺序优先匹配。