常用的正则表达式

[转]正则表达

注意:发现部分正则表达有误,请思考后再用.谢谢...

正则表达(所有内容)

Email : /^/w+([-+.]/w+)*@/w+([-.]/w+)*/./w+([-.]/w+)*$/, 
Phone : /^((/(/d{2,3}/))|(/d{3}/-))?(/(0/d{2,3}/)|0/d{2,3}-)?[1-9]/d{6,7}(/-/d{1,4})?$/, 
Mobile : /^((/(/d{2,3}/))|(/d{3}/-))?13/d{9}$/, 
Url : /^http:[A-Za-z0-9]+/.[A-Za-z0-9]+[//=/?%/-&_~`@[/]/':+!]*([^<>/"/"])*$/, 
Currency : /^/d+(/./d+)?$/, 
Number : /^/d+$/, 
Zip : /^[1-9]/d{5}$/, 
QQ : /^[1-9]/d{4,8}$/, 
Integer : /^[-/+]?/d+$/, 
Double : /^[-/+]?/d+(/./d+)?$/, 
English : /^[A-Za-z]+$/, 
Chinese : /^[/u0391-/uFFE5]+$/, 

1、非负整数:^/d+$  

2、正整数:^[0-9]*[1-9][0-9]*$  

3、非正整数:^((-/d+)|(0+))$  

4、负整数:^-[0-9]*[1-9][0-9]*$  

5、整数:^-?/d+$  

6、非负浮点数:^/d+(/./d+)?$  

7、正浮点数:^((0-9)+/.[0-9]*[1-9][0-9]*)|([0-9]*[1-9][0-9]*/.[0-9]+)|([0-9]*[1-9][0-9]*))$  

8、非正浮点数:^((-/d+/./d+)?)|(0+(/.0+)?))$  

9、负浮点数:^(-((正浮点数正则式)))$  

10、英文字符串:^[A-Za-z]+$  

11、英文大写串:^[A-Z]+$  

12、英文小写串:^[a-z]+$  

13、英文字符数字串:^[A-Za-z0-9]+$  

14、英数字加下划线串:^/w+$  

15、E-mail地址:^[/w-]+(/.[/w-]+)*@[/w-]+(/.[/w-]+)+$  

16、URL:^[a-zA-Z]+://(/w+(-/w+)*)(/.(/w+(-/w+)*))*(/?/s*)?$  
或:^http:[A-Za-z0-9]+/.[A-Za-z0-9]+[//=/?%/-&_~`@[/]/':+!]*([^<>/"/"])*$ 

17、邮政编码:^[1-9]/d{5}$ 

18、中文:^[/u0391-/uFFE5]+$ 

19、电话号码:^((/(/d{2,3}/))|(/d{3}/-))?(/(0/d{2,3}/)|0/d{2,3}-)?[1-9]/d{6,7}(/-/d{1,4})?$ 

20、手机号码:^((/(/d{2,3}/))|(/d{3}/-))?13/d{9}$ 

21、双字节字符(包括汉字在内):^/x00-/xff 

22、匹配首尾空格:(^/s*)|(/s*$)(像vbscript那样的trim函数) 

23、匹配HTML标记:<(.*)>.*<///1>|<(.*) //>  

24、匹配空行:/n[/s| ]*/r 

25、提取信息中的网络链接:(h|H)(r|R)(e|E)(f|F)  *=  *('|")?(/w|//|//|/.)+('|"|  *|>)? 

26、提取信息中的邮件地址:/w+([-+.]/w+)*@/w+([-.]/w+)*/./w+([-.]/w+)* 

27、提取信息中的图片链接:(s|S)(r|R)(c|C)  *=  *('|")?(/w|//|//|/.)+('|"|  *|>)? 

28、提取信息中的IP地址:(/d+)/.(/d+)/.(/d+)/.(/d+) 

29、提取信息中的中国手机号码:(86)*0*13/d{9} 

30、提取信息中的中国固定电话号码:(/(/d{3,4}/)|/d{3,4}-|/s)?/d{8} 

31、提取信息中的中国电话号码(包括移动和固定电话):(/(/d{3,4}/)|/d{3,4}-|/s)?/d{7,14} 

32、提取信息中的中国邮政编码:[1-9]{1}(/d+){5} 

33、提取信息中的浮点数(即小数):(-?/d*)/.?/d+ 

34、提取信息中的任何数字 :(-?/d*)(/./d+)?   

35、IP:(/d+)/.(/d+)/.(/d+)/.(/d+) 

36、电话区号:/^0/d{2,3}$/ 

37、腾讯QQ号:^[1-9]*[1-9][0-9]*$ 

38、帐号(字母开头,允许5-16字节,允许字母数字下划线):^[a-zA-Z][a-zA-Z0-9_]{4,15}$

39、中文、英文、数字及下划线:^[/u4e00-/u9fa5_a-zA-Z0-9]+$

匹配方法
一、JS: if(document.formname.xxx.value.match(/^[/u4e00-/u9fa5_a-zA-Z0-9]+$/) == null){
alert('不匹配');
return false;



二、PHP: if(!eregi("^[/u4e00-/u9fa5_a-zA-Z0-9]+$",$xxx)){
echo '不匹配';



三、ASP:
Function CheckInput(patrn,strng)  
 Dim regEx,Match,Matches'建立变量 
 SET regEx = New RegExp'建立正则表达式 
 regEx.Pattern = patrn ' 设置模式 
 regEx.IgnoreCase = true ' 设置是否区分字符大小写 
 regEx.Global = True ' 设置全局可用性 
 Matches = regEx.test(strng)' 执行搜索 
 CheckInput = Matches 
 SET regEx = Nothing 
End Function  

If(CheckInput("^[/u4e00-/u9fa5_a-zA-Z0-9]+$",xxx)=False)Then Response.Write("不匹配") 



JS的正则表达式

//校验是否全由数字组成 


代码
function isDigit(s)    
{    
var patrn=/^[0-9]{1,20}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验登录名:只能输入5-20个以字母开头、可带数字、“_”、“.”的字串 


代码
function isRegisterUserName(s)    
{    
var patrn=/^[a-zA-Z]{1}([a-zA-Z0-9]|[._]){4,19}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验用户姓名:只能输入1-30个以字母开头的字串 


代码
function isTrueName(s)    
{    
var patrn=/^[a-zA-Z]{1,30}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
}}    
   
//校验密码:只能输入6-20个字母、数字、下划线    
[code]    
function isPasswd(s)    
{    
var patrn=/^(/w){6,20}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验普通电话、传真号码:可以“+”开头,除数字外,可含有“-” 


代码
function isTel(s)    
{    
//var patrn=/^[+]{0,1}(/d){1,3}[ ]?([-]?(/d){1,12})+$/;    
var patrn=/^[+]{0,1}(/d){1,3}[ ]?([-]?((/d)|[ ]){1,12})+$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验手机号码:必须以数字开头,除数字外,可含有“-” 


代码
function isMobil(s)    
{    
var patrn=/^[+]{0,1}(/d){1,3}[ ]?([-]?((/d)|[ ]){1,12})+$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验邮政编码 


代码
function isPostalCode(s)    
{    
//var patrn=/^[a-zA-Z0-9]{3,12}$/;    
var patrn=/^[a-zA-Z0-9 ]{3,12}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

//校验搜索关键字 


代码
function isSearch(s)    
{    
var patrn=/^[^`~!@#$%^&*()+=|///][/]/{/}:;'/,.<>/?]{1}[^`~!@$%^&()+=|///]    
        [/]/{/}:;'/,.<>?]{0,19}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
   
function isIP(s) //by zergling    
{    
var patrn=/^[0-9.]{1,20}$/;    
if (!patrn.exec(s)) return false   
return true   
}    
 

正则表达式 


代码
"^//d+$"  //非负整数(正整数 + 0)    
"^[0-9]*[1-9][0-9]*$"  //正整数     
"^((-//d+)|(0+))$"  //非正整数(负整数 + 0)     
"^-[0-9]*[1-9][0-9]*$"  //负整数     
"^-?//d+$"    //整数     
"^//d+(//.//d+)?$"  //非负浮点数(正浮点数 + 0)     
"^(([0-9]+//.[0-9]*[1-9][0-9]*)|([0-9]*[1-9][0-9]*//.[0-9]+)|([0-9]*[1-9][0-9]*))$"     
//正浮点数     
"^((-//d+(//.//d+)?)|(0+(//.0+)?))$"  //非正浮点数(负浮点数 + 0)     
"^(-(([0-9]+//.[0-9]*[1-9][0-9]*)|([0-9]*[1-9][0-9]*//.[0-9]+)|([0-9]*[1-9][0-9]*)))$"     
//负浮点数     
"^(-?//d+)(//.//d+)?$"  //浮点数     
"^[A-Za-z]+$"  //由26个英文字母组成的字符串     
"^[A-Z]+$"  //由26个英文字母的大写组成的字符串     
"^[a-z]+$"  //由26个英文字母的小写组成的字符串     
"^[A-Za-z0-9]+$"  //由数字和26个英文字母组成的字符串     
"^//w+$"  //由数字、26个英文字母或者下划线组成的字符串     
"^[//w-]+(//.[//w-]+)*@[//w-]+(//.[//w-]+)+$"    //email地址     
"^[a-zA-z]+://(//w+(-//w+)*)(//.(//w+(-//w+)*))*(//?//S*)?$"  //url    
"^[A-Za-z0-9_]*$"   
---------------------------------------------------------------------------------------------------------------------------------------

名称: sscanf() - 从一个字符串中读进与指定格式相符的数据.

语法: int sscanf( string str, string fmt, mixed var1, mixed var2 ... );

整数 sscanf( 字符串 str, 字符串 fmt, 混合 var1, 混合 var2 ... );

用法: 以指定的格式 fmt 去解读字符串 str. fmt 中除了 %d 和 %s 以外, 亦可包含其他的字符串作为格式. 每一个 %d 或 %s 都对应一个参数, 按顺序为 var1, var2 ... %d 读入一个整数到参数中, 而 %s 读入一个字符串. * 亦可用于格式中, (即 %*d 和 %*s) 加了星号 (*) 表示跳过此数据不读入. (也就是不把此数据读入参数中) LPC 的 sscanf() 与 C 的 sscanf() 虽然相似, 但仍有不同之处. LPC 的 sscanf() 不需要 (也不可) 提供变量的内存位址给 sscanf(), 只需要给予变量的名字. 另一个不同点是, LPC 的 sscanf() 对于: sscanf( str, "%s %s", str1, str2 ); 的语法, 将会把 str 中的第一个英文单字 (即第一个空白字符以前的内容) 读入 str1, 后面其余的内容读入 str2.

sscanf() 会返回符合格式的 %d 和 %s 总数.

以前曾经编写过这样的小程序:一个文本文件,每行是一条记录,每条记录中包含多个字段,每个字段之间以某种定界符分开,举例如下:

Notebook,IBM,ThinkPad X32,6,12000

(各字段以逗号分隔,内容依次是:物品名称,生产厂家,型号,数量,价格)

如果要对这样的一行记录进行处理,提取出各个字段,怎么做比较好呢?

我以前的做法是在一个循环中用strtok函数每次取一个字段,然后将内容保存到一个字符串数组中。这样做虽然可行,但我总感觉写出的代码有些啰嗦。

最近看到一段代码,用C的标准库函数sscanf,处理这样的数据,只需一行就可以了。我把代码整理了一下,去掉了无关的部分,核心部分如下:

float price;

int quantity;

char category[21], name[21];

char vendor[21], sku[21];

char buf[201];

fp = fopen(filename, "r");

fgets(buf, 200, fp);

sscanf(buf,

"%20[^#]#%20[^#]#%f#%i#%20[^#]#%20[^/n]",

name, sku, &price, &quantity, category, vendor);

下面简单做些解说:

%20[^#]# 最多读入20个字符,直到遇见定界符#,但不包含定界符

%f# 读入一个浮点数,直到遇见定界符#

%i# 读入一个整数,直到遇见定界符#

%20[^/n] 最多读入20个字符,忽略行尾的回车符

是不是很简洁明了呢?

#include <stdio.h>

int main()

{

char log[]="<14>2002-11-11 12:12:12 11.22.33.44 3 3 aaaa aaaaaa";

//char log[]="<1>2002-11-11 12:12:12 11.22.33.44 3 aaaa aaaaaa";

char test[]="<1111> 22";

char log2[200];

char str1[20];

char str2[20];

char str3[20];

char str4[20];

char str5[20];

char str6[20];

char str7[20];

int a1,a2,a3,a4,a5,a6;

sscanf(log,"<%d>%s %s %s %d %d %s",&a1,str2,str3,str4,&a5,&a6,str7);

printf("%d/n",a1);

printf("%s/n",str2);

printf("%s/n",str3);

printf("%s/n",str4);

printf("%d/n",a5);

printf("%d/n",a6);

printf("%s/n",str7);

sscanf(test,"<%d> %d",&a5,&a6);

printf("%d/n",a5);

printf("%d/n",a6);

sscanf(log,"<%[^>]>%[^ ] %[^ ] %[^ ] %[^ ] %[^ ] %[^$]",str1,str2,str3,str4,str5,str6,str7);

printf("%s/n",str1);

printf("%s/n",str2);

printf("%s/n",str3);

printf("%s/n",str4);

printf("%s/n",str5);

printf("%s/n",str6);

printf("%s/n",str7);

return 1;

}

const char *str = "drw-rw-rw- 1 user group 0 Oct 28 2003 -====流行音乐专用FTP=====-";

上面是源串,我要分别得到drw-rw-rw-,group,-====流行音乐专用FTP=====-字段

注意:因为这几个字段的值会变化,所以我要用格式化输入,分别存入下面的a b c中,高手帮忙!

下面这个是我没成功的尝试

char a[20];

char b[50];

char c[20];

int ret = sscanf(str, "%[^'' '']* %[''u''] %[^'' '']"

, a, b, c);

masterz(www.fruitfruit.com) 于 2005-8-6 17:43:49

sscanf可以支持regular expression?或许你要用boost,下面有一个简单的例子,虽然离你的问题还比较远

http://www.fruitfruit.com/vc/boost/boost_regex_test.cpp

newbiestar 于 2005-8-6 18:29:18

楼主一个问题好几个地方问……

int ret = sscanf(str, "%s%*s%*s%s%*s%*s%*s%*s%s", a, b, c);

这样就可以了,不要的东西都抛弃掉了

参见这个帖子里面我的回复

http://community.csdn.net/Expert/topic/4192/4192979.xml?temp=.2922632

ding8125(丁丁) 于 2005-8-6 19:19:12

可以通过字符数组输出一个字符串!~

今天看到一个奇怪的scanf。其实这只是用了正则表达式。

sscanf(user, "%127[^:]:%127[^ ]", user_name, password);

"%127[^:]:%127[^ ]",是正则表达式

用scanf或者printf,可以在%后面跟%d,%s等东西,也可以跟一个正则表达式。

这里,127表示最多可以接受127个字符,[^:]是正则表达式,表示非":",到":"结束

后面,%127[^ ],同样,其中[^ ]是正则表达式,表示非" ",到" "结束

所以,如果user是"wpc:123456"的字符串,那么经过上面的sscanf后,

user_name是wpc,而password是123456

SCANF(3) Linux Programmers Manual SCANF(3)

NAME

scanf, fscanf, sscanf, vscanf, vsscanf, vfscanf - input format conver-

sion

SYNOPSIS

#include <stdio.h>

int scanf(const char *format, ...);

int fscanf(FILE *stream, const char *format, ...);

int sscanf(const char *str, const char *format, ...);

#include <stdarg.h>

int vscanf(const char *format, va_list ap);

int vsscanf(const char *str, const char *format, va_list ap);

int vfscanf(FILE *stream, const char *format, va_list ap);

DESCRIPTION

The scanf() family of functions scans input according to format as

described below. This format may contain conversion specifications;

the results from such conversions, if any, are stored in the locations

pointed to by the pointer arguments that follow format. Each pointer

argument must be of a type that is appropriate for the value returned

by the corresponding conversion specification.

If the number of conversion specifications in format exceeds the number

of pointer arguments, the results are undefined. If the number of

pointer arguments exceeds the number of conversion specifications, then

the excess pointer arguments are evaluated, but are otherwise ignored.

The scanf() function reads input from the standard input stream stdin,

fscanf() reads input from the stream pointer stream, and sscanf() reads

its input from the character string pointed to by str.

The vfscanf() function is analogous to vfprintf(3) and reads input from

the stream pointer stream using a variable argument list of pointers

(see stdarg(3). The vscanf() function scans a variable argument list

from the standard input and the vsscanf() function scans it from a

string; these are analogous to the vprintf() and vsprintf() functions

respectively.

The format string consists of a sequence of directives which describe

how to process the sequence of input characters. If processing of a

directive fails, no further input is read, and scanf() returns. A

"failure" can be either of the following: input failure, meaning that

input characters were unavailable, or matching failure, meaning that

the input was inappropriate (see below).

A directive is one of the following:

A sequence of white-space characters (space, tab, newline, etc;

see isspace(3)). This directive matches any amount of white

space, including none, in the input.

An ordinary character (i.e., one other than white space or %).

This character must exactly match the next character of input.

A conversion specification, which commences with a % (percent)

character. A sequence of characters from the input is converted

according to this specification, and the result is placed in the

corresponding pointer argument. If the next item of input does

not match the the conversion specification, the conversion fails

this is a matching failure.

Each conversion specification in format begins with either the charac-

ter % or the character sequence "%n$" (see below for the distinction;

see below) followed by:

An optional * assignment-suppression character: scanf() reads

input as directed by the conversion specification, but discards

the input. No corresponding pointer argument is required, and

this specification is not included in the count of successful

assignments returned by scanf().

An optional a character. This is used with string conver-

sions, and relieves the caller of the need to allocate a corre-

sponding buffer to hold the input: instead, scanf() allocates a

buffer of sufficient size, and assigns the address of this

buffer to the corresponding pointer argument, which should be a

pointer to a char * variable (this variable does not need to be

initialised before the call). The caller should subsequently

free(3) this buffer when it is no longer required. This is a

GNU extension; C99 employs the a character as a conversion

specifier (and it can also be used as such in the GNU implemen-

tation).

An optional decimal integer which specifies the maximum field

width. Reading of characters stops either when this maximum is

reached or when a non-matching character is found, whichever

happens first. Most conversions discard initial whitespace

characters (the exceptions are noted below), and these discarded

characters dont count towards the maximum field width. String

input conversions store a null terminator (/0) to mark the end

of the input; the maximum field width does not include this ter-

minator.

An optional type modifier character. For example, the l type

modifier is used with integer conversions such as %d to specify

that the corresponding pointer argument refers to a long int

rather than a pointer to an int.

A conversion specifier that specifies the type of input conver-

sion to be performed.

The conversion specifications in format are of two forms, either begin-

ning with % or beginning with "%n$". The two forms should not be

mixed in the same format string, except that a string containing "%n$"

specifications can include %% and %*. If format contains % specifi-

cations then these correspond in order with successive pointer argu-

ments. In the "%n$" form (which is specified in SUSv3, but not C99), n

is a decimal integer that specifies that the converted input should be

placed in the location referred to by the n-th pointer argument follow-

ing format.

CONVERSIONS

The following type modifier characters can appear in a conversion spec-

ification:

h Indicates that the conversion will be one of diouxX or n and the

next pointer is a pointer to a short int or unsigned short int

(rather than int).

hh As for h, but the next pointer is a pointer to a signed char or

unsigned char.

j As for h, but the next pointer is a pointer to a intmax_t or

uintmax_t. This modifier was introduced in C99.

l Indicates either that the conversion will be one of diouxX or n

and the next pointer is a pointer to a long int or unsigned long

int (rather than int), or that the conversion will be one of efg

and the next pointer is a pointer to double (rather than float).

Specifying two l characters is equivalent to L. If used with %c

or %s the corresponding parameter is considered as a pointer to

a wide character or wide character string respectively.

L Indicates that the conversion will be either efg and the next

pointer is a pointer to long double or the conversion will be

dioux and the next pointer is a pointer to long long.

q equivalent to L. This specifier does not exist in ANSI C.

t As for h, but the next pointer is a pointer to a ptrdiff_t.

This modifier was introduced in C99.

z As for h, but the next pointer is a pointer to a size_t. This

modifier was introduced in C99.

The following conversion specifiers are available:

% Matches a literal %. That is, %% in the format string matches

a single input % character. No conversion is done, and

assignment does not occur.

d Matches an optionally signed decimal integer; the next pointer

must be a pointer to int.

D Equivalent to ld; this exists only for backwards compatibility.

(Note: thus only in libc4. In libc5 and glibc the %D is silently

ignored, causing old programs to fail mysteriously.)

i Matches an optionally signed integer; the next pointer must be a

pointer to int. The integer is read in base 16 if it begins

with 0x or 0X, in base 8 if it begins with 0, and in base 10

otherwise. Only characters that correspond to the base are

used.

o Matches an unsigned octal integer; the next pointer must be a

pointer to unsigned int.

u Matches an unsigned decimal integer; the next pointer must be a

pointer to unsigned int.

x Matches an unsigned hexadecimal integer; the next pointer must

be a pointer to unsigned int.

X Equivalent to x.

f Matches an optionally signed floating-point number; the next

pointer must be a pointer to float.

e Equivalent to f.

g Equivalent to f.

E Equivalent to f.

a (C99) Equivalent to f.

s Matches a sequence of non-white-space characters; the next

pointer must be a pointer to character array that is long enough

to hold the input sequence and the terminating null character

(/0), which is added automatically. The input string stops at

white space or at the maximum field width, whichever occurs

first.

c Matches a sequence of characters whose length is specified by

the maximum field width (default 1); the next pointer must be a

pointer to char, and there must be enough room for all the char-

acters (no terminating null byte is added). The usual skip of

leading white space is suppressed. To skip white space first,

use an explicit space in the format.

[ Matches a nonempty sequence of characters from the specified set

of accepted characters; the next pointer must be a pointer to

char, and there must be enough room for all the characters in

the string, plus a terminating null byte. The usual skip of

leading white space is suppressed. The string is to be made up

of characters in (or not in) a particular set; the set is

defined by the characters between the open bracket [ character

and a close bracket ] character. The set excludes those charac-

ters if the first character after the open bracket is a circum-

flex To include a close bracket in the set, make it the first

character after the open bracket or the circumflex; any other

position will end the set. The hyphen character - is also spe-

cial; when placed between two other characters, it adds all

intervening characters to the set. To include a hyphen, make it

the last character before the final close bracket. For

instance, [^]0-9-] means the set "everything except close

bracket, zero through nine, and hyphen". The string ends with

the appearance of a character not in the (or, with a circumflex,

in) set or when the field width runs out.

p Matches a pointer value (as printed by %p in printf(3); the next

pointer must be a pointer to a pointer to void.

n Nothing is expected; instead, the number of characters consumed

thus far from the input is stored through the next pointer,

which must be a pointer to int. This is not a conversion,

although it can be suppressed with the * assignment-suppression

character. The C standard says: "Execution of a %n directive

does not increment the assignment count returned at the comple-

tion of execution" but the Corrigendum seems to contradict this.

Probably it is wise not to make any assumptions on the effect of

%n conversions on the return value.

RETURN VALUE

These functions return the number of input items successfully matched

and assigned, which can be fewer than provided for, or even zero in the

event of an early matching failure.

The value EOF is returned if the end of input is reached before the

first successful conversion or matching failure occurs. EOF is also

returned if a read error occurs, in which case the error indicator for

the stream (see ferror(3)) is set, and errno is set indicate the error.

SEE ALSO

getc(3), printf(3), strtod(3), strtol(3), strtoul(3)

CONFORMING TO

The functions fscanf(), scanf(), and sscanf() conform to ANSI

X3.159-1989 (ANSI C).

The q specifier is the 4.4BSD notation for long long, while ll or the

usage of L in integer conversions is the GNU notation.

The Linux version of these functions is based on the GNU libio library.

Take a look at the info documentation of GNU libc (glibc-1.08) for a

more concise description.

BUGS

All functions are fully ANSI X3.159-1989 conformant, but provide the

additional specifiers q and a as well as an additional behaviour of the

L and l specifiers. The latter may be considered to be a bug, as it

changes the behaviour of specifiers defined in ANSI X3.159-1989.

Some combinations of the type modifiers and conversion specifiers

defined by ANSI C do not make sense (e.g. %Ld). While they may have a

well-defined behaviour on Linux, this need not to be so on other archi-

tectures. Therefore it usually is better to use modifiers that are not

defined by ANSI C at all, i.e. use q instead of L in combination with

diouxX conversions or ll.

The usage of q is not the same as on 4.4BSD, as it may be used in float

conversions equivalently to L.

LINUX MANPAGE 1995-11-01 SCANF(3)

-------------------------------------------------------------------------------------------------------------------------------------


只能输入m~n位的数字:。"^/d{m,n}$"
只能输入零和非零开头的数字:"^(0|[1-9][0-9]*)$"。
只能输入有两位小数的正实数:"^[0-9]+(.[0-9]{2})?$"。
只能输入有1~3位小数的正实数:"^[0-9]+(.[0-9]{1,3})?$"。
只能输入非零的正整数:"^/+?[1-9][0-9]*$"。
只能输入非零的负整数:"^/-[1-9][]0-9"*$。
只能输入长度为3的字符:"^.{3}$"。
只能输入由26个英文字母组成的字符串:"^[A-Za-z]+$"。
只能输入由26个大写英文字母组成的字符串:"^[A-Z]+$"。
只能输入由26个小写英文字母组成的字符串:"^[a-z]+$"。
只能输入由数字和26个英文字母组成的字符串:"^[A-Za-z0-9]+$"。
只能输入由数字、26个英文字母或者下划线组成的字符串:"^/w+$"。
验证用户密码:"^[a-zA-Z]/w{5,17}$"正确格式为:以字母开头,长度在6~18之间,只能包含字符、数字和下划线。
验证是否含有^%&',;=?$/"等字符:"[^%&',;=?$/x22]+"。
只能输入汉字:"^[/u4e00-/u9fa5]{0,}$"
验证Email地址:"^/w+([-+.]/w+)*@/w+([-.]/w+)*/./w+([-.]/w+)*$"。
验证InternetURL:"^http://([/w-]+/.)+[/w-]+(/[/w-./?%&=]*)?$"。
验证电话号码:"^(/(/d{3,4}-)|/d{3.4}-)?/d{7,8}$"正确格式为:"XXX-XXXXXXX"、"XXXX-XXXXXXXX"、"XXX-XXXXXXX"、"XXX-XXXXXXXX"、"XXXXXXX"和"XXXXXXXX"。
验证身份证号(15位或18位数字):"^/d{15}|/d{18}$"。
验证一年的12个月:"^(0?[1-9]|1[0-2])$"正确格式为:"01"~"09"和"1"~"12"。
验证一个月的31天:"^((0?[1-9])|((1|2)[0-9])|30|31)$"正确格式为;"01"~"09"和"1"~"31"。

匹配中文字符的正则表达式: [/u4e00-/u9fa5]

匹配双字节字符(包括汉字在内):[^/x00-/xff]

应用:计算字符串的长度(一个双字节字符长度计2,ASCII字符计1)
String.prototype.len=function(){return this.replace(/[^/x00-/xff]/g,"aa").length;}

匹配空行的正则表达式:/n[/s| ]*/r

匹配html标签的正则表达式:<(.*)>(.*)<//(.*)>|<(.*)//>

匹配首尾空格的正则表达式:(^/s*)|(/s*$)

应用:javascript中没有像vbscript那样的trim函数,我们就可以利用这个表达式来实现,如下:

String.prototype.trim = function()
{
return this.replace(/(^/s*)|(/s*$)/g, "");
}

利用正则表达式分解和转换IP地址:

下面是利用正则表达式匹配IP地址,并将IP地址转换成对应数值的Javascript程序:

function IP2V(ip)
{
re=/(/d+)/.(/d+)/.(/d+)/.(/d+)/g //匹配IP地址的正则表达式
if(re.test(ip))
{
return RegExp.$1*Math.pow(255,3))+RegExp.$2*Math.pow(255,2))+RegExp.$3*255+RegExp.$4*1
}
else
{
throw new Error("Not a valid IP address!")
}
}

不过上面的程序如果不用正则表达式,而直接用split函数来分解可能更简单,程序如下:

var ip="10.100.20.168"
ip=ip.split(".")
alert("IP值是:"+(ip[0]*255*255*255+ip[1]*255*255+ip[2]*255+ip[3]*1))

匹配Email地址的正则表达式:/w+([-+.]/w+)*@/w+([-.]/w+)*/./w+([-.]/w+)*

匹配网址URL的正则表达式:http://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?

利用正则表达式去除字串中重复的字符的算法程序:[注:此程序不正确,原因见本贴回复]

var s="abacabefgeeii"
var s1=s.replace(/(.).*/1/g,"$1")
var re=new RegExp("["+s1+"]","g")
var s2=s.replace(re,"")
alert(s1+s2) //结果为:abcefgi

我原来在CSDN上发贴寻求一个表达式来实现去除重复字符的方法,最终没有找到,这是我能想到的最简单的实现方法。思路是使用后向引用取出包括重复的字符,再以重复的字符建立第二个表达式,取到不重复的字符,两者串连。这个方法对于字符顺序有要求的字符串可能不适用。

得用正则表达式从URL地址中提取文件名的javascript程序,如下结果为page1

s="http://www.9499.net/page1.htm"
s=s.replace(/(.*//){0,}([^/.]+).*/ig,"$2")
alert(s)

利用正则表达式限制网页表单里的文本框输入内容:

用 正则表达式限制只能输入中文:οnkeyup="value=value.replace(/[^/u4E00-/u9FA5]/g,'')" onbeforepaste="clipboardData.setData('text',clipboardData.getData('text').replace(/[^/u4E00-/u9FA5]/g,''))"

用 正则表达式限制只能输入全角字符: οnkeyup="value=value.replace(/[^/uFF00-/uFFFF]/g,'')" onbeforepaste="clipboardData.setData('text',clipboardData.getData('text').replace(/[^/uFF00-/uFFFF]/g,''))"

用 正则表达式限制只能输入数字:οnkeyup="value=value.replace(/[^/d]/g,'') "onbeforepaste="clipboardData.setData('text',clipboardData.getData('text').replace(/[^/d]/g,''))"

用 正则表达式限制只能输入数字和英文:οnkeyup="value=value.replace(/[/W]/g,'') "onbeforepaste="clipboardData.setData('text',clipboardData.getData('text').replace(/[^/d]/g,''))"
---------------------------------------------------------------------------------------------------------------------

常用的C#正则表达

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值