package com.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Crawler {
//提取手机号,电话,邮箱,以400开头的
public static void main(String[] args) {
String s1="itcast@itcast.cn,电话18762832633,0203232323"+
"邮箱bozai@itcast.cn,400-100-3233 ,4001003232"+
"邮箱bozai@itcast.cn,400-100-3233 ,4001003232";
//定义爬取规则
String regex="(0\\d{2,6}-?\\d{5,20})|(1[3-9]\\d{9})|" +
"(\\w{1,30}@[0-9a-zA-Z]{2,20}(\\.[0-9a-zA-Z]{2,20}){1,2})|" +
"(400-?\\d{3,20}-?\\d{3,20})";
//把爬取规则编译成匹配对象
Pattern pattern = Pattern.compile(regex);
//得到内容匹配器对象
Matcher matcher = pattern.matcher(s1);
//开始找
while(matcher.find()){
String s = matcher.group();
System.out.println(s);
}
}
}
Java 正则表达式实现爬虫套路,爬取手机号,电话,邮箱信息
最新推荐文章于 2024-05-14 10:21:56 发布