1.[代码][Java]代码
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
/**
* multiple threads get title, key words, description from url list
* @author lujianfeng@miaozhen.com
*/
public class GetTitleRunnable implements Runnable
{
private static BufferedReader br;
private static BufferedWriter bw;
public void run(){
while(true){
try{
String url = null;
synchronized(br){
url = br.readLine();
if(url == null)
break;
}
String title = getTitle(url);
if(title != null)
synchronized(bw){
bw.write(url + "\t" + title + "\n");
}
}catch(IOException e){
throw new RuntimeException(e);
}
}
}
public static void close(){
try{
if(br != null) br.close();
if(bw != null)bw.close();
}catch(IOException e){
throw new RuntimeException(e);
}
}
public GetTitleRunnable(){}
public GetTitleRunnable(String in, String out){
if(br == null || bw == null){
try {
br = new BufferedReader(new FileReader(in));
bw = new BufferedWriter(new FileWriter(out));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static String getTitle(String url){
Document doc;
try {
new URL(url);
Connection con = Jsoup.connect(url);
con.userAgent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)");
doc = con.get();
} catch (IOException e) {
System.out.println(url);
return null;
}
Elements heads = doc.getElementsByTag("head");
StringBuilder sb = new StringBuilder();
for(Element head : heads){
Elements titles = head.getElementsByTag("title");
for(Element title : titles){
sb.append(title.text());
}
sb.append("\t");
Elements keys = head.getElementsByAttributeValue("name", "keywords");
for(Element key : keys){
sb.append(key.attr("content"));
}
sb.append("\n");
Elements descs = head.getElementsByAttributeValue("name", "description");
for(Element desc : descs){
sb.append(desc.attr("content"));
}
}
return sb.length() > 3 ?sb.toString().replaceAll("[\r\n]", ""):null;
}
public void MultiThreadsGetTitle(int threadsNum) throws InterruptedException{
ExecutorService executor = Executors.newCachedThreadPool();
for(int i = 0; i < threadsNum; i++){
executor.execute(new GetTitleRunnable());
System.out.println("thread " + i + " started");
}
executor.shutdown();
executor.awaitTermination(5, TimeUnit.DAYS);
GetTitleRunnable.close();
}
public static void main( String[] args ) throws InterruptedException
{
System.out.print(getTitle("http://www.oschina.net/code/snippet_1417577_48298"));
new GetTitleRunnable(args[0], args[1]).MultiThreadsGetTitle(Integer.parseInt(args[2]));
}
}