Java下载网络压缩包及解压

       Java可以实现简单的网络爬虫,可以爬网络上的静态图和GIF图,也可以爬网络压缩包,如Zip压缩包和Rar压缩包,因为有些图片或者GIF图一张一张的下载比较麻烦,所以有的网址就提供一个本网页的文件压缩包,供用户下载,本文就以下载网络Zip压缩包的几点内容进行分享,主要内容是讲如何从某一网址批量下载Zip压缩包本地并把它们进行批量解压。

一、任务及效果

1,如下图是一个网页,该网页提供了很多Jar包的下载,我现在想用程序把它们全部下载到本地。


2:下载到本地,如下所示,现在我想用程序把本地的这些压缩文件全部解压到一个文件夹里


3:解压后最终效果如下图所示



二、代码实现

package com.kendy.spider;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class ZipSpider {

	//根据网址返回网页源代码
	public static String getHtmlFromUrl(String url,String encoding){
		StringBuffer html = new StringBuffer();
		InputStreamReader isr=null;
		BufferedReader buf=null;
		String str = null;
		try {
			URL urlObj = new URL(url);
			URLConnection con = urlObj.openConnection();
			isr = new InputStreamReader(con.getInputStream(),encoding);
			buf = new BufferedReader(isr);
			while((str=buf.readLine()) != null){
				html.append(str+"\n");
			}
			//sop(html.toString());
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			if(isr != null){
				try {
					buf.close();
					isr.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return html.toString();
	}
	//根据网址下载网络文件到硬盘,包括图片,Gif图,以及压缩包
	public static void download(String url,String path){
		File file= null;
		FileOutputStream fos=null;
		String downloadName= url.substring(url.lastIndexOf("/")+1);
		HttpURLConnection httpCon = null;
		URLConnection  con = null;
		URL urlObj=null;
		InputStream in =null;
		byte[] size = new byte[1024];
		int num=0;
		try {
			file = new File(path+downloadName);
//			if(!file.exists()){
//				file.mkdir();
//			}
			fos = new FileOutputStream(file);
			if(url.startsWith("http")){
				urlObj = new URL(url);
				con = urlObj.openConnection();
				httpCon =(HttpURLConnection) con;
				in = httpCon.getInputStream();
				while((num=in.read(size)) != -1){
					for(int i=0;i<num;i++) 
					   fos.write(size[i]);
				}
			}
			
		} catch (Exception e) {
			e.printStackTrace();
		} finally{
			try {
				in.close();
				fos.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
	
	//解压本地文件至目的文件路径
	public static void unzip(String fromFile,String toFile){
		 try {  
	            ZipInputStream Zin=new ZipInputStream(new FileInputStream(fromFile));
	            BufferedInputStream Bin=new BufferedInputStream(Zin);  
	            String Parent=toFile; 
	            File Fout=null;  
	            ZipEntry entry;  
	            try {  
	                while((entry = Zin.getNextEntry())!=null && !entry.isDirectory()){  
	                    Fout=new File(Parent,entry.getName());  
	                    if(!Fout.exists()){  
	                        (new File(Fout.getParent())).mkdirs();  
	                    }  
	                    FileOutputStream out=new FileOutputStream(Fout);  
	                    BufferedOutputStream Bout=new BufferedOutputStream(out);  
	                    int b;  
	                    while((b=Bin.read())!=-1){  
	                        Bout.write(b);  
	                    }  
	                    Bout.close();  
	                    out.close();  
	                    System.out.println(Fout+"解压成功");      
	                }  
	                Bin.close();  
	                Zin.close();  
	            } catch (IOException e) {  
	                e.printStackTrace();  
	            }  
	        } catch (FileNotFoundException e) {  
	            e.printStackTrace();  
	        }  
	}
	
	//从总目录下解压文件里所有的压缩包至目的文件路径
	public static void unzipFromLoc(String filePath) throws Exception{
		File file = new File(filePath);
		File[] list = file.listFiles();
		String from = "";
		String to = "E:\\myDownload\\unzipFileFromWeb\\";
		for(File f : list){
			boolean bool = f.isFile();
			if(bool){
				from =  f.getAbsolutePath();
				from = from.replace("\\","\\\\");
				sop(from);
				unzip(from,to);
			}
		}
	}
	public static void sop(Object obj){
		System.out.println(obj);
	}
	
	public static void seperate(char c){
		for(int x=0;x<100;x++){
			System.out.print(c);
		}
		sop("");
	}	
	
	/**
	 * @author kendy
	 * @version 1.0
	 */
	public static void main(String[] args) throws Exception{
		int i=0;
		String zipUrl=null;
		List<String> list = new ArrayList<>();
		String fromFile="";
		String locFile="E:\\myDownload\\unzipFileFromWeb\\";
					
		String url = "http://cn.jarfire.org/axis2.json.html";
		String encoding = "utf-8";
		String html = getHtmlFromUrl(url,encoding);
		
		Document doc = Jsoup.parse(html);
		Elements elements = doc.select("div.main ul li a[href]"); //带有href属性的a元素
		
		unzipFromLoc("E:\\myDownload\\zipFile");
		seperate('*');

		sop(list.size());
		
	}
}


阅读更多
版权声明:本文为博主原创文章,欢迎转载分享。 https://blog.csdn.net/greatkendy123/article/details/51504366
个人分类: java
上一篇Java实现网络爬虫
下一篇Oracle用Loop循环实现大量CRUD操作
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭