工具类(待优化)
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CreeperUtil {
private static CreeperUtil test6;
private CreeperUtil () {
}
public static CreeperUtil getIntance() {
if (test6 == null) {
test6 = new CreeperUtil();
}
return test6;
}
public String threadTool (String url,String regex,String... group) {
//创建一个线程池
ExecutorService pool = Executors.newFixedThreadPool(2);
//创建两个有返回值的任务
Callable c1 = CreeperUtil.getIntance().new InitCallable(url,regex,group);
//执行任务并获取Future对象
Future f1 = pool.submit(c1);
//从Future对象上获取任务的返回值,并输出到控制台
try {
return f1.get().toString();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
return null;
}
class InitCallable implements Callable<String>{
private String url;
private String data;
private String regex;
private String group[];
InitCallable(){
}
public InitCallable(String url,String regex,String... group){
this.data = mesh(url);
this.url = url;
this.regex = regex;
this.group = group;
}
public String call() throws Exception {
return regular(data,regex,group);
}
}
/**
* @Title: mesh
* @Description: 获取资源
* @param @param url
* @param @return 设定文件
* @return String 返回类型
* @throws
*/
public String mesh (String url) {
StringBuilder sb = new StringBuilder();
URL urls = null;
URLConnection con = null;
InputStream stream = null;
InputStreamReader isr = null;
BufferedReader br = null;
try {
urls = new URL(url);
con = urls.openConnection();
stream = con.getInputStream();
isr = new InputStreamReader(stream);
br = new BufferedReader(isr);
String line = null;
while ((line = br.readLine()) != null){
sb.append(line);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
br.close();
isr.close();
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
/**
* @Title: regular
* @Description:正则匹配
* @param @param data
* @param @param regex
* @param @param group 设定文件
* @return void 返回类型
* @throws
*/
public String regular (String data,String regex,String... group) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(data);
StringBuilder sb = new StringBuilder();
while(matcher.find()) {
if (group.length == 0) {
sb.append(matcher.group());
} else {
for (int i = 1,j = group.length+1 ; i < j ; i++) {
sb.append(matcher.group(i)+"\t");
}
}
sb.append("\n");
}
return sb.toString();
}
}
客户端调用
class Client{
public static void main(String[] args) {
String url = "file:///C:/Users/Administrator/Desktop/HTML%E4%BB%A3%E7%A0%81/1.html";
String regex = "<li.*?J_Cat a-all\">.*?<a.*?data-dataid=\"(.*?)\".*?>(.*?)</a>.*?<a.*?data-dataid=\"(.*?)\".*?>(.*?)</a>.*?<a.*?data-dataid=\"(.*?)\".*?>(.*?)</a>.*?<\\/li>";
String group[] = {"1","2","3","4","5","6"};
long start = System.currentTimeMillis();
String result = CreeperUtil.getIntance().threadTool(url,regex,group);
long end = System.currentTimeMillis();
System.out.println(result+"\n花费时间:"+(end-start));
}
}