常用到的类
java.lang.String
java.util.regex.Pattern
java.util.regex.Matcher
=====================================================================================
import java.util.logging.Handler;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
public class Test {
public static void main(String[] args) {
// 简单认识正则表达式
p("简单认识正则表达式 ------------------");
p("abc".matches("..."));
p("a8729".replaceAll("\\d","-"));
Pattern p=Pattern.compile("[a-z]{3}"); //建立匹配模式
Matcher m =p.matcher("fgh"); //对字符串进行匹配
p(m.matches());
//初步认识 。 * +
p("初步认识 。 * + ------------------");
p("a".matches("a"));
p("aaa".matches("a*"));
p("aaa".matches("a+"));
p("a".matches("a?"));
p("".matches("a?"));
p("214523145234532".matches("\\d{3,100}"));
p("192.168.0.aaa".matches("\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\\\d{1,3}"));
p("192".matches("[0-2][0-9][0-9]"));
//范围
p("范围---------------");
p("a".matches("[abc]"));
p("a".matches("[^abc]"));
p("A".matches("[a-zA-Z]"));
p("A".matches("[a-z]|[A-Z]"));
p("A".matches("[a-z[A-Z]]"));
p("R".matches("[A-Z&&[RFG]]"));
//认识 \s \w \d
p("认识 \\s \\w \\d------------");
p(" \n\r\t".matches("\\s{4}"));
p(" ".matches("\\S"));
p("a_8".matches("\\w{3}"));
p("abc8888$^%".matches("[a-z]{1,3}\\d+[&^#%]+"));
p("\\".matches("\\\\")); //字符串用两个\\ 正则表达式用 \\\\
//POSIX Style
p("POSIX Style----------");
p("a".matches("\\p{Lower}"));
//boundary
p("boundary--------------");
p("hello sir".matches("^h.*"));
p("hello sir".matches(".*ir$"));
p("hello sir".matches("^h[a-z]{1,3}o\\b.*")); // \\b一个单词的结尾
p("hellosir".matches("^h[a-z]{1,3}o\\b.*"));
//white lines
p("white lines----------");
p(" \n".matches("^[\\s&&[^\\n]]*\\n$")); //开头是空白符但不是换行符,最后是一个换行符
//email
p("email---------------");
p("asdfdsfds@fdsfdsfds.com".matches("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+"));
//matches find lookingAt
Pattern pa=Pattern.compile("\\d{3,5}");
String s="123-34345-234-00";
Matcher ma=pa.matcher(s);
p(ma.matches());
ma.reset(); //重置之前匹配的位置到开头
p(ma.find()); //匹配子串
p(ma.start()+"-"+ma.end()); //返回匹配的开始和结束位置
p(ma.find());
p(ma.find());
p(ma.find());
p(ma.lookingAt()); //从开头查找
p(ma.lookingAt());
p(ma.lookingAt());
p(ma.lookingAt());
//replacement
p("replacement--------------");
Pattern p1=Pattern.compile("java",Pattern.CASE_INSENSITIVE); //忽略匹配时的大小写
Matcher m1=p1.matcher("java Java JAva IloveJava you hateJava adfd");
StringBuffer buf=new StringBuffer();
int i=0;
while(m1.find()) {
i++;
if(i%2==0) {
m1.appendReplacement(buf,"java");
}else {
m1.appendReplacement(buf,"JAVA");
}
}
m1.appendTail(buf); //添加尾字符串
p(buf);
//group
p("group----------");
Pattern p2=Pattern.compile("(\\d{3,5})([a-z]{2})"); //使用()进行分组
String s1="123aaa-34345bb-234cc-000";
Matcher m2=p2.matcher(s1);
while(m2.find()) {
p(m2.group(1));
}
//qulifiers
p("qulifiers");
Pattern p3=Pattern.compile("(.{3,10})[0-9]"); //Greedy quantifiers 从最大的范围开始匹配 0-10
//Pattern p3=Pattern.compile("(.{3,10}?)[0-9]"); //Reluctant quantifiers 从最小的范围开始匹配 0-5
//Pattern p3=Pattern.compile("(.{3,10}+)[0-9]"); //Possessive quantifiers 吞进去不往外吐 not found
String s3="aaaa5bbbb6";
Matcher m3=p3.matcher(s3);
if(m3.find()) {
p(m3.start()+"-"+m3.end());
}else {
p("not found");
}
//non-capturing
p("non-capturing----------");
Pattern p4=Pattern.compile(".{3}(?=a)"); //不捕获a 444
String s4="444a66b";
Matcher m4=p4.matcher(s4);
while(m4.find()) {
p(m4.group());
}
//back references
p("back references-----------");
Pattern p5=Pattern.compile("(\\d\\d)\\1"); //向前引用 \\1 代表第一个组捕捉之后的字符串
String s5="1212";
Matcher m5=p5.matcher(s5);
p(m5.matches());
//flags的简写
p("Java".matches("(?i)(java)")); //Pattern.compile("java",Pattern.CASE_INSENSITIVE)的简写
}
public static void p(Object o) {
System.out.println(o);
}
}
==========================================================================================
抓取邮箱
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.bind.ParseConversionEvent;
public class EmailSpider {
public static void main(String[] args) throws IOException {
BufferedReader br=new BufferedReader(new FileReader("C:\\Users\\Public\\Documents\\workspace\\eclipse-workspace\\RegExp\\src\\Powered.html"));
String line="";
while((line=br.readLine()) !=null) {
Parse(line);
}
}
private static void Parse(String line) {
Pattern p=Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");
Matcher m=p.matcher(line);
while(m.find()) {
System.out.println(m.group());
}
}
}