package cn.dss.Studay.正则表达式;
public class Demo1 {
public static void main(String[] args) {
String qq = "a";
/**
* 正则表达式的阅读性是比较差的。
*/
// String regex = "[^1-9][0-9]{4,14}";
// final boolean matches = qq.matches(regex);
// System.out.println(matches);
//
// String re = "ao*b" ;
String re = "[^0-9]" ; // \D
// ?一次或者一次都没有
// X? X,一次或一次也没有
// X* X,零次或多次
// X+ X,一次或多次
// X{n} X,恰好 n 次
// X{n,} X,至少 n 次
// X{n,m} X,至少 n 次,但是不超过 m 次
boolean res = qq.matches(re);
System.out.println(res);
}
}
package cn.dss.Studay.正则表达式;
public class Demo2 {
/**
* 匹配String数据。
* @param args
*/
public static void main(String[] args) {
String str = "18296842318";
// String regex = "1[358][0-9]{9}";
String regex = "1[358]\\d{9}";
boolean result=str.matches(regex);
System.out.println(result);
}
}
package cn.dss.Studay.正则表达式;
public class Demo3 {
/**
* 切割
* split
*
* @param args
*/
public static void main(String[] args) {
// String str = "dss love bigdata python";
String str = "dsstttlovesssbigdatavvvvvpython";
// String regex = " ";
// String regex = " +";
String regex = "(.)\\1+";//组的概念
final String[] split = str.split(regex);
for (String s : split) {
System.out.println(s);
}
}
}
package cn.dss.Studay.正则表达式;
public class Demo4 {
public static void main(String[] args) {
// String str = "dssitaaalove";
String str = "18296842318";
// String regex = "(.)\\1+";
String regex = "(\\d{3})\\d{4}(\\d{4})";
final String s = str.replaceAll(regex, "$1****$2");
System.out.println(s);
}
}
package cn.dss.Studay.正则表达式;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Demo5 {
public static void main(String[] args) {
String str = "ds love big data java python scala";
String regex = "\\b[a-z]{3}\\b";
// 把正则分装成对象
Pattern compile = Pattern.compile(regex);
Matcher matcher = compile.matcher(str);
// find查找符合正则的str。
while (matcher.find()) {
System.out.println(matcher.group());
System.out.println(matcher.start()+"->"+matcher.end());
}
}
}
java爬虫练习,本地开启tomcat,自己的网页1.html
package cn.dss.Studay.正则表达式;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Pacon {
public static void main(String[] args) throws Exception {
// List<String> demo_1 = getDemo_1();
// for (String d : demo_1) {
// System.out.println(d);
// }
final List<String> demo_2 = getDemo_2();
for (String d : demo_2) {
System.out.println(d);
}
}
private static List<String> getDemo_2() throws Exception {
URL url = new URL("http://127.0.0.1:8080/myweb/1.html");
// BufferedReader inputStream =new BufferedReader(new InputStreamReader(url.openStream()));
InputStream inputStream = url.openStream();
String regex = "\\w+@\\w+\\.\\w+";
List<String> list = new ArrayList<>();
//
byte[] buf = new byte[1024];
int i = 0;
while ((i = inputStream.read(buf)) != -1) {
String s = new String(buf, 0, i);
Pattern compile = Pattern.compile(regex);
Matcher matcher = compile.matcher(s);
while (matcher.find()) {
final String group = matcher.group();
list.add(group);
}
}
inputStream.close();
return list;
}
/**
* 获取本地的数据
*
* @return
* @throws Exception
*/
private static List<String> getDemo_1() throws Exception {
//读数据源
BufferedInputStream fileInputStream = new BufferedInputStream(new FileInputStream("1.html"));
List<String> list = new ArrayList<>();
// String regex = "[a-zA-Z0-9_]+@[a-zA-Z0-9_]+\\.[a-zA-Z0-9_]+";
String regex = "\\w+@\\w+\\.\\w+";
byte[] buf = new byte[1024];
int i = 0;
while ((i = fileInputStream.read(buf)) != -1) {
String s = new String(buf, 0, i);
Pattern compile = Pattern.compile(regex);
Matcher matcher = compile.matcher(s);
while (matcher.find()) {
final String group = matcher.group();
list.add(group);
}
}
fileInputStream.close();
return list;
}
}