package 爬虫;
import 多线程爬虫.DownloadImage;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class pachong2 {
public static void main(String[] args) {
try{
ExecutorService pool = Executors.newFixedThreadPool(100);
getUrl(pool);
pool.shutdown();
}catch(Exception e){
e.printStackTrace();
}
}
public static void getUrl(ExecutorService pool) {
String strurl = "http://www.bookschina.com/kinder/27000000/";
try {
URL url = new URL(strurl);
URLConnection com = url.openConnection();
InputStream word = com.getInputStream();
System.out.println(com.getContentEncoding());
BufferedReader br = new BufferedReader(new InputStreamReader(word, "GBK"));
String line = null;
while ((line = br.readLine()) != null) {
boolean status = line.contains("class=\"lazyImg\"");
String re = "(?<=data-original=\")http:.*?.jpg";
Pattern p = Pattern.compile(re);
if (status) {
Matcher m = p.matcher(line);
while (m.find()) {
System.out.println(m.group());
pool.execute(new DownloadImage(m.group()));
System.out.println(pool);
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
package 爬虫;
import java.io.*;
import java.net.*;
public class DownloadImage implements Runnable{
String downUrl;
public DownloadImage(String downUrl){
this.downUrl = downUrl;
}
public void run(){
BufferedInputStream enter = null;
FileOutputStream writer = null;
try{
URL url = new URL(downUrl);
URLConnection uc = url.openConnection();
enter = new BufferedInputStream(uc.getInputStream());
String[] p = downUrl.split("/");
String path = "E:\\text\\"+p[p.length-1];
writer = new FileOutputStream(path);
int c;
while((c=enter.read())!=-1){
writer.write(c);
System.out.println(c);
}
}catch(Exception e){
System.out.println(e);
}
}
}