List<String> list = new ArrayList<String>();
try {
InputStreamReader isr = new InputStreamReader(new URL(url)
.openStream());// 下载网页,获取输入流
StringBuilder input = new StringBuilder();
String patternStr = "(<a//s+href//s*=//s*(/"s//?wd=[^/"]*/"|[^//s>])//s*>).+</a>";// 抓取网页中相关的连接
int ch;
while ((ch = isr.read()) != -1)
input.append((char) ch);
Pattern pattern = Pattern.compile(patternStr,
Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
int start = matcher.start();
int end = matcher.end();
String match = input.substring(start, end);
System.out.println(match);
String ss = stringUtil.getString(match);// 对提取的连接进行二次过滤,获取需要的数据
list.add(ss);
正则匹配出数据
最新推荐文章于 2021-10-27 08:53:24 发布