package com.adolph.download;
public class DownloadPicture {
public static void main(String[] args){
GetHTML.getHtml();//先获得网页的html文件
GetImage gi = new GetImage();
gi.getImage();
Change.change();
//以上三行是分析其中的图片链接 并不是靠识别img标签来获得 那样很多网站什么都获取不到
DownTask.create();
//创建线程池
DownTask.play();
//下载任务
DownTask.pause();
//销毁线程池
}
}
package com.adolph.download;
import java.util.Scanner;
import java.net.*;
import java.io.*;
public class GetHTML {
public static void getHtml() {
Scanner sc = new Scanner(System.in);
print("*************");
print("输入你要得到图片的网页地址");
print("*************");
try {
URL url = new URL(sc.nextLine());
//FileOutputStream outputStream = new FileOutputStream("E:\\");
FileWriter fw = new FileWriter("E:\\web.html");
BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream()));
String s;
while((s = bf.readLine())!=null){
fw.write(s);
fw.flush();
}
fw.close();
sc.close();
}catch(Exception e){
e.printStackTrace();
}
}
private static void print(String s){
System.out.println(s);
}
}
package com.adolph.download;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GetImage {
public static HashSet<String> Set;
public GetImage() {
}
public void getImage() {//得到有图片的链接字符串 并且过滤重复的
try {
Set = new HashSet<String>();
print("正在解析...");
Document document = Jsoup.parse(new File("E:\\web.html"), "utf-8");
print("解析html成功!");
// Elements images = document.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
Elements ele = document.getAllElements();
Pattern p1 = Pattern.compile("\\S*\\.(?:png|jpe?g|gif)");
Iterator var6 = ele.iterator();
while(var6.hasNext()) {
Element eles = (Element)var6.next();
Matcher m = p1.matcher(eles.html());
if (m.find()) {
Set.add(m.group(0));
}
}
print("获得elements!");
} catch (IOException var8) {
var8.printStackTrace();
}
}
public static void print(String s) {
System.out.println(s);
}
}
package com.adolph.download;
import java.util.HashSet;
import java.util.regex.*;
import java.util.*;
public class Change {
public static HashSet<String> Set = new HashSet<String>();
public static HashSet<String> Set1 = new HashSet<String>();
public static List<String> list = new ArrayList<String>();
public static List<String> name = new ArrayList<String>();
public Change() {
}
public static void change() {
//分离用“
Pattern pattern = Pattern.compile("//\\S*\\.(?:png|jpe?g|gif)");
for(String str:GetImage.Set) {
String[] ss = str.split("\"");
for(String sss:ss) {
Matcher m = pattern.matcher(sss);
if(m.find()) {
Set.add(m.group(0));
}
}
}
//分离用//
Pattern pattern2 = Pattern.compile(".*\\.(?:png|jpe?g|gif)");
for(String str:Set) {
String[] ss = str.split("//");
for(String sss:ss) {
Matcher m = pattern2.matcher(sss);
if(m.find()) {
Set1.add(m.group(0));
}
}
}
for(String str:Set1) {
list.add(str);
String[] ss = str.split("/");
name.add(ss[ss.length-1]);
}
// int i=0;
// for(String str:list) {
// print(str);
// print(name.get(i));
// i++;
// }
}
public static void print(String s) {
System.out.println(s);
}
}
package com.adolph.download;
import java.util.concurrent.*;
public class DownTask {
static int i=0;
public static ExecutorService excute;
public static void create() {
excute = Executors.newFixedThreadPool(8);
}
public static void play() {
for(String str:Change.list) {
try {
Future future = excute.submit(new Task("http://"+str,"E:\\image\\"+Change.name.get(i)));
future.get();
}catch(Exception e) {
e.printStackTrace();
}finally {
i++;
}
}
pause();
print("*************");
print("下载结束!!!!!!");
print("*************");
}
public static void pause() {
try {
excute.shutdown();
}catch(Exception e) {
e.printStackTrace();
}
}
public static void print(String s) {
System.out.println(s);
}
}
package com.adolph.download;
import java.io.*;
import java.net.*;
public class Task implements Runnable{
private String picSourceURL;
private String picDestPath;
public Task(String picSourceURL, String picDestPath) {
this.picSourceURL = picSourceURL;
this.picDestPath=picDestPath;
}
public void run() {
//accept connection and communicate
BufferedOutputStream bos = null;
BufferedInputStream bis = null;
try {
URL url = new URL(picSourceURL);
bis = new BufferedInputStream(url.openStream());
bos = new BufferedOutputStream(new FileOutputStream(picDestPath));
byte[] b = new byte[1024 * 1024];
int len = 0;
print("正在下载"+this.picDestPath+"......");
while ((len = bis.read(b)) != -1) {
bos.write(b, 0, len);
bos.flush();
}
print("下载成功!!!");
}catch(Exception e) {
print("下载失败!!!");
e.printStackTrace();
}
finally {
try {
bos.close();
} catch (Exception e){
e.printStackTrace();
}
try {
bis.close();
}catch (Exception e){
e.printStackTrace();
}
}
}
public static void print(String s) {
System.out.println(s);
}
}
比如获取当当网首页图片