DEMO1:
//定义匹配表达式即正则表达式语句,
String regex = "^Java.*";
//定义匹配模式
Pattern pattern = Pattern.compile(regex);
//定义匹配器
Matcher matcher = pattern.matcher("Java不是人,是个啥啊!");
//判断是否
boolean b = matcher.matches();
//打印输出
System.out.println(b);
//定义匹配表达式即正则表达式语句,
String regex = "^Java.*";
//定义匹配模式
Pattern pattern = Pattern.compile(regex);
//定义匹配器
Matcher matcher = pattern.matcher("Java不是人,是个啥啊!");
//判断是否
boolean b = matcher.matches();
//打印输出
System.out.println(b);
demo2:
public class LearnRegexp {
@Test
public void testSearch() {
String input = "hello1997&&2000";
// 将一个正则表达式编译成Pattern对象
Pattern pattern = Pattern.compile("\\w+");
Matcher matcher = pattern.matcher(input);
// matches尝试将整个字符序列与该模式匹配
System.out.println(matcher.matches());
// reset将matcher中的指针重新定位
matcher.reset();
// find 方法扫描整个字符串, 查找能否找到下一个符合该模式字符串
while (matcher.find()) {
String group = matcher.group();
System.out.println(group);
}
}
/**
* 将所有的数字都替换成'#'
*/
@Test
public void testReplace() {
String input = "1j2h3h4g5o";
Matcher replace = Pattern.compile("[0-9]").matcher(input);
input = replace.replaceAll("#");
System.out.println(input);
}
/**
* 将字符串按数字分割
*/
@Test
public void testSplit() {
String input = "1j24h356h467g589o";
String[] strings = input.split("\\d+");
for (String str : strings) {
System.out.println(str);
}
}
}
demo3:
/**
* 模仿网络爬虫, 抓取网站html, 将里面所有的超链接都分析出来
*/
public class HtmlAnalyzer {
private final String FILE_PATH = "/Users/jifang/save.txt";
@Test
public void client() throws IOException {
String html = downloadHtml("http://www.163.com/", "gbk");
// (?<=(href=\"))(?:[\w.\/\:\?\=\&]+)(?=\") 匹配url的正则
Set<String> urlSet = analyzeHtml(html, "(?<=(href=\\\"))(?:[\\w.\\/\\:\\?\\=\\&]+)(?=\\\")");
saveToFile(urlSet);
System.out.println();
}
private String downloadHtml(String url, String charset) throws IOException {
URL readUrl = new URL(url);
BufferedReader reader = new BufferedReader(new InputStreamReader(readUrl.openStream(), charset));
return CharStreams.toString(reader);
}
private Set<String> analyzeHtml(String html, String regex) {
Set<String> urlSet = new HashSet<>();
// 匹配url的正则表达式
Matcher matcher = Pattern.compile(regex).matcher(html);
while (matcher.find()) {
String group = matcher.group();
urlSet.add(group);
}
return urlSet;
}
private void saveToFile(Set<String> urlSet) throws IOException {
PrintStream printer = new PrintStream(new FileOutputStream(FILE_PATH));
for (String url : urlSet) {
printer.println(url);
}
printer.flush();
printer.close();
}
}