正则表达式
1.什么是正则表达式
一个正则表达式就是由普通的字符(例如字符a到z)以及特殊字符(元字符)组成的文字模式,它用以描述在查找文字主体时待匹配的一个或多个字符串。是一种字符串的约束格式, 例如在某些网站上填写邮箱的时候, 如果乱写会提示输入不合法, 这种验证就是使用正则表达式做的.自从jdk1.4推出java.util.regex包,就为我们提供了很好的JAVA正则表达式应用平台。
2.匹配
String.matches() 验证一个字符串是否匹配指定的正则表达式
代码:
package cn.itcast.regex;
@SuppressWarnings("unused")
public class Demo1_Matches {
public static void main(String[] args) {
// demo1();
// demo2();
// demo3();
// demo4();
// demo5();
// demo6();
// demo7();
// demo8();
// demo9();
// demo10();
// demo11();
// demo12();
// demo13();
// demo14();
// demo15();
// demo16();
// demo17();
}
private static void demo17() {
// 密码. 任意字符, 6-16位
String regex = ".{6,16}";
System.out.println("abc".matches(regex));
System.out.println("abcdef".matches(regex));
System.out.println("1234567890123456".matches(regex));
System.out.println("中文中文中文".matches(regex));
}
private static void demo16() {
// 用户名. 字母数字下划线10位以内, 必须是字母开头
String regex = "[a-zA-Z]\\w{0,9}";
System.out.println("abc".matches(regex));
System.out.println("abc2013".matches(regex));
System.out.println("abc2013def".matches(regex));
System.out.println("abc2013defxxx".matches(regex));
System.out.println("2013abc".matches(regex));
}
private static void demo15() {
// Email
String regex = "[\\w-\\.]+@([\\w-]+\\.)+[a-zA-Z]{2,4}";
System.out.println("fd@itcast.cn".matches(regex));
System.out.println("59937493@qq.com".matches(regex));
System.out.println("donfoo@donfoo.com".matches(regex));
System.out.println("1234567890".matches(regex));
System.out.println("123@4567890.xxx".matches(regex));
}
private static void demo14() {
// 手机号
String regex = "1[3458]\\d{9}";
System.out.println("13912345678".matches(regex));
System.out.println("15912345678".matches(regex));
System.out.println("18600012345".matches(regex));
System.out.println("188987654321".matches(regex));
}
private static void demo13() {
// 1-6位字母或数字:
String regex = "[a-zA-Z0-9]{1,6}";
System.out.println("1a".matches(regex));
System.out.println("12b456".matches(regex));
System.out.println("1234567890".matches(regex));
}
private static void demo12() {
String regex = "[abc]{2,3}";
System.out.println("".matches(regex));
System.out.println("a".matches(regex));
System.out.println("ab".matches(regex));
System.out.println("abc".matches(regex));
System.out.println("abca".matches(regex));
System.out.println("d".matches(regex));
}
private static void demo11() {
String regex = "\\."; //代表 .
System.out.println("a".matches(regex));
System.out.println("b".matches(regex));
System.out.println("c".matches(regex));
System.out.println(".".matches(regex));
}
private static void demo10() {
String regex = "."; //任意字符
System.out.println(" ".matches(regex));
System.out.println("a".matches(regex));
System.out.println("6".matches(regex));
System.out.println("_".matches(regex));
System.out.println("啊".matches(regex));
System.out.println("abc".matches(regex));
}
private static void demo9() {
String regex = "\\W";
System.out.println(" ".matches(regex));
System.out.println("a".matches(regex));
System.out.println("6".matches(regex));
System.out.println("_".matches(regex));
System.out.println("啊".matches(regex));
}
private static void demo8() {
String regex = "\\S";
System.out.println(" ".matches(regex));
System.out.println("\t".matches(regex));
System.out.println("\r".matches(regex));
System.out.println("\n".matches(regex));
System.out.println(" ".matches(regex)); // tab
System.out.println(" ".matches(regex)); // 4个空格
System.out.println(" ".matches(regex)); // 1个中文空格
}
private static void demo7() {
String regex = "\\d"; // 1个字符, 任意数字, 等同于[0-9].
System.out.println("1".matches(regex));
System.out.println("11".matches(regex));
System.out.println("a".matches(regex));
}
private static void demo6() {
String regex = "[a-d&&c-f]"; // 1个字符, a-d和c-f的交集, cd
System.out.println("b".matches(regex));
System.out.println("c".matches(regex));
System.out.println("d".matches(regex));
System.out.println("e".matches(regex));
}
private static void demo5() {
String regex = "[a-d[m-p]]";
System.out.println("c".matches(regex));
System.out.println("o".matches(regex));
System.out.println("d".matches(regex));
}
private static void demo4() {
String regex = "[a-zA-Z]"; // 1个字符, a-z 或者 A-Z 之间的任意一个字符
System.out.println("a".matches(regex));
System.out.println("b".matches(regex));
System.out.println("c".matches(regex));
System.out.println("A".matches(regex));
System.out.println("x".matches(regex));
System.out.println(".".matches(regex));
System.out.println("abc".matches(regex));
System.out.println("a-z".matches(regex));
System.out.println("1".matches(regex));
System.out.println("9".matches(regex));
}
private static void demo3() {
String regex = "[^abc]"; // 1个字符, 除了abc以外的任意字符
System.out.println("a".matches(regex));
System.out.println("b".matches(regex));
System.out.println("c".matches(regex));
System.out.println("A".matches(regex));
System.out.println("x".matches(regex));
System.out.println(".".matches(regex));
System.out.println(" ".matches(regex));
System.out.println(" ".matches(regex));
System.out.println("xyz".matches(regex));
System.out.println("中".matches(regex));
}
private static void demo2() {
String regex = "[abc]"; // 1个字符, a或b或c
System.out.println("a".matches(regex));
System.out.println("b".matches(regex));
System.out.println("c".matches(regex));
System.out.println("abc".matches(regex)); // false, 3个字符
System.out.println("aa".matches(regex)); // false, 2个字符
System.out.println("d".matches(regex)); // 字符不匹配
System.out.println("A".matches(regex)); // 大小写不匹配
}
private static void demo1() {
String regex = "[1-9]\\d{4,10}"; // 定义了一个正则表达式(Java中正则表达式用字符串定义)
System.out.println("59937493".matches(regex)); // 判断一个字符串是否能匹配指定的正则表达式
System.out.println("1234".matches(regex));
System.out.println("123456789012".matches(regex));
System.out.println("123_45".matches(regex));
System.out.println("01234".matches(regex));
}
}
----------------------------------------------------------------------------------------------------------------------------------
package cn.itcast.regex;
@SuppressWarnings("unused")
public class Demo2_Group {
public static void main(String[] args) {
// demo1();
// demo2();
demo3();
}
private static void demo3() {
// 匹配一个尾号5连的手机号
String regex = "1[3458]\\d{4}(\\d)\\1{4}";
System.out.println("13900088888".matches(regex));
System.out.println("13900088788".matches(regex));
}
private static void demo2() {
// ABAB 乐呵乐呵 学习学习 死啦死啦
String regex = "(..)\\1";
System.out.println("AABB".matches(regex));
System.out.println("快快乐乐".matches(regex));
System.out.println("嘻嘻哈哈".matches(regex));
System.out.println("乐呵乐呵".matches(regex));
System.out.println("死啦死啦".matches(regex));
}
private static void demo1() {
// AABB 快快乐乐 嘻嘻哈哈
String regex = "(.)\\1(.)\\2"; // \\1代表第一个小括号中匹配到的部分再出现一次, \\2代表第二个小括号中匹配到的部分再次出现
System.out.println("AABB".matches(regex));
System.out.println("快快乐乐".matches(regex));
System.out.println("嘻嘻哈哈".matches(regex));
System.out.println("乐呵乐呵".matches(regex));
}
}
3.分割
String.split() 用指定正则表达式能匹配的字符作为分隔符, 分割字符串
示例:
package cn.itcast.regex;
@SuppressWarnings("unused")
public class Demo3_Split {
public static void main(String[] args) {
// demo1();
// demo2();
// demo3();
// demo4();
}
private static void demo4() {
String s = "abc....def..xxx...ooo.xyz";
String[] arr = s.split("\\.+");
for (String string : arr) {
System.out.println(string);
}
}
private static void demo3() {
String s = "abc.def.xxx.ooo.xyz";
String[] arr = s.split("[cey]");
for (String string : arr) {
System.out.println(string);
}
}
private static void demo2() {
String s = "abc.def.xxx.ooo.xyz";
String[] arr = s.split("\\.");
for (String string : arr) {
System.out.println(string);
}
}
private static void demo1() {
String s = "abc,def,xxx,ooo,xyz";
String[] arr = s.split(",");
for (String string : arr) {
System.out.println(string);
}
}
}
4.替换
String.replaceAll() 把字符串中能匹配正则表达式的部分替换为另一个字符串
示例:
package cn.itcast.regex;
@SuppressWarnings("unused")
public class Demo4_Replace {
public static void main(String[] args) {
// demo1();
// demo2();
// demo3();
demo4();
}
private static void demo4() {
String s = "我要要要娶娶媳媳媳媳妇"; // 我要娶媳妇
System.out.println(s.replaceAll("(.)\\1+", "$1")); // 任意字符出现2次以上, 替换为1个
}
private static void demo3() {
String s = "[abc]";
System.out.println(s.replaceAll("[abc]", "?")); // [???]
System.out.println(s.replace("[abc]", "?")); // ?
}
private static void demo2() {
String s = "abcxxxabcoooabc";
System.out.println(s.replaceAll("[ac]", "?")); // 把正则表达式能匹配的部分替换为另一个字符串
}
private static void demo1() {
String s = "abcxxxabcoooabc";
System.out.println(s.replace("a", "?"));
}
}
5.查找
Pattern.compile() 创建正则表达式对象
Pattern.matcher() 用正则表达式匹配一个字符串, 得到匹配器
Matcher.find() 查找字符串中是否包含能匹配正则表达式的部分
Matcher.group() 获取匹配的部分
示例:
package cn.itcast.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@SuppressWarnings("unused")
public class Demo5_Find {
public static void main(String[] args) {
// demo1();
// demo2();
// demo3();
}
private static void demo3() {
Pattern p = Pattern.compile("1[3458]\\d{9}");
Matcher m = p.matcher("我的电话号码是: 18600012345, 以前使用过: 18611054321, 在天津用: 13920398765");
while (m.find())
System.out.println(m.group());
}
private static void demo2() {
Pattern p = Pattern.compile("1[3458]\\d{9}");
Matcher m = p.matcher("我的电话号码是: 18600012345, 以前使用过: 18611054321, 在天津用: 13920398765");
System.out.println(m.matches());
System.out.println(m.find()); // 判断字符串中是否包含能匹配正则表达式的部分
System.out.println(m.group()); // 获取匹配的部分
System.out.println(m.find()); // 从上次找到的位置继续找, 判断还有没有
System.out.println(m.group()); // 获取
System.out.println(m.find()); // 再找
System.out.println(m.group()); // 再获取
System.out.println(m.find()); // 再找
}
private static void demo1() {
Pattern p = Pattern.compile("1[3458]\\d{9}"); // 创建正则表达式对象
Matcher m = p.matcher("13800099999"); // 用正则表达式匹配一个字符串, 得到匹配器
System.out.println(m.matches()); // 判断是否完全匹配
}
}
6.练习
1.提取一个文件里的所有手机号码,并存在令一个文件中。
代码:
package cn.itcast.regex.exercise;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Exercise1 {
public static void main(String[] args) throws IOException {
// 从文件中读取数据到内存
FileInputStream fis = new FileInputStream("text.txt");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len;
while ((len = fis.read(buffer)) != -1)
baos.write(buffer, 0, len);
fis.close();
baos.close();
// 把内存中数据转为字符串(文件内容), 定义TreeSet用来排序
String content = new String(baos.toByteArray());
Set<String> set = new TreeSet<>();
// 用正则表达式找出文件内容中的所有电话号码, 装入TreeSet排序
Pattern p = Pattern.compile("1[3458]\\d{9}");
Matcher m = p.matcher(content);
while (m.find())
set.add(m.group());
// 遍历TreeSet, 把排序后的所有电话号码写出
BufferedWriter bw = new BufferedWriter(new FileWriter("phone.txt"));
for (String num : set) {
bw.write(num);
bw.newLine();
}
bw.close();
}
}
2.把一个阿拉伯数字转为一个中文大写数字
代码:
package cn.itcast.regex.exercise;
public class Exercise2 {
private static char[] numArr = { '零', '壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖' };
private static char[] unitArr = { '圆', '拾', '佰', '仟', '万', '拾', '佰', '仟', '亿', '拾', '佰', '仟'};
public static void main(String[] args) {
System.out.println(toChinese(123456789)); // 壹亿贰仟叁佰肆拾伍萬陆仟柒佰捌拾玖圆
System.out.println(toChinese(101));
System.out.println(toChinese(1011));
System.out.println(toChinese(10111));
System.out.println(toChinese(10001));
System.out.println(toChinese(10001));
System.out.println(toChinese(1000000001));
System.out.println(toChinese(10000));
System.out.println(toChinese(0));
}
/**
* 把一个阿拉伯数字转为一个中文大写数字
* @param n 阿拉伯数字
* @return 中文大写数字
*/
public static String toChinese(int n) {
if (n == 0)
return "零圆";
StringBuilder sb = new StringBuilder();
for (int i = 0; n > 0; i++) {
sb.insert(0, unitArr[i]).insert(0, numArr[n % 10]);
n /= 10;
}
return sb.toString()
.replaceAll("零[仟佰拾]", "零")
.replaceAll("零+", "零")
.replaceAll("零([亿万圆])", "$1")
.replace("亿万", "亿");
}
}