JAVA多线程超时加载当网页图片

先上图:


这一次没有采取正则匹配,而采取了最简单的java分割和替代方法进行筛选图片


它能够筛选如下的图片并保存到指定的文件夹

如:

“http://xxxx/xxxx/xxx.jpg”

'http://xxxx/xxxx/xxx.jpg'

如果中间的分隔符为\/而不是/,可进行替换

如将http:\/\/xxxxx\/xxx\/xxx.jpg替换为http://xxxxx/xxx/xxx.jpg

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
 
/***
 * java抓取网络图片
 * 
 * @author ITWANG
 * 
 */
public class CatchImage
{
 
    // 抓去网页地址
    private static final String URL = "http://image.baidu.com/channel?c=%E7%BE%8E%E5%A5%B3&t=%E5%B0%8F%E6%B8%85%E6%96%B0&s=0";
    // 转换编码
    private static final String ECODING = "UTF-8";
    // 图片后缀
    private static final String[] picstuffix = { "jpg", "JPG", "gif", "GIF", "png", "PNG" };
    // 开启线程数
    private static int Threadcount = 3;
    // 超时时间
    private static int timeout = 4000;
 
    public static void main(String[] args) throws Exception
    {
        CatchImage cm = new CatchImage();
        // 获得html文本内容
        String HTML = cm.getHTML(URL);
        List<String> imgSrc = cm.getttr(HTML, picstuffix);
        List<String> pList = cm.picFilter(imgSrc);
        cm.TOThreadDownload(pList, "E:\\Imagesave" + saveDiff(), Threadcount, timeout);
    }
 
    /**
     * 过滤http:\/\/xxxxxx\/xxxx\/xxx.jpg为http://xxxxxx/xxxx/xxx.jpg
     * @param picurl 图片列表
     * @return 返回过滤后的图片列表
     */
    public List<String> picFilter(List<String> picurl)
    {
        List<String> list = new ArrayList<>();
        for (String string : picurl)
        {
            list.add(string.replace("\\/", "/"));
        }
        return list;
    }
 
    /**
     * 获取但网页图片
     * 
     * @param htmlsource
     *            html的string数据源
     * @param picstuffix
     *            后缀数组
     * @return 返回图片地址
     */
    public List<String> getttr(String htmlsource, String[] picstuffix)
    {
        List<String> listpic = new ArrayList<>();
        String[] htmlarray1 = htmlsource.split("\"");
        String[] htmlarray2 = htmlsource.split("\'");
        System.out.println("双引号分割:"+htmlarray1.length);
        for (int i = 0; i < htmlarray1.length; i++)
        {
            for (int j = 0; j < picstuffix.length; j++)
            {
                if (htmlarray1[i].startsWith("http") && htmlarray1[i].endsWith(picstuffix[j]))
                {
                    listpic.add(htmlarray1[i]);
                }
            }
        }
        System.out.println("单引号分割:"+htmlarray2.length);
        for (int i = 0; i < htmlarray2.length; i++)
        {
            for (int j = 0; j < picstuffix.length; j++)
            {
                if (htmlarray2[i].startsWith("http") && htmlarray2[i].endsWith(picstuffix[j]))
                {
                    listpic.add(htmlarray2[i]);
                }
            }
        }
        System.out.println(listpic.size());
        for (String string : listpic)
        {
            System.out.println(string);
        }
        return listpic;
    }
 
    /***
     * 获取HTML内容,并且转为String
     * 
     * @param url
     *            网页地址
     * @return 返回字符串
     * @throws Exception
     *             连接网络失败
     */
    private String getHTML(String url) throws Exception
    {
        URL uri = new URL(url);
        URLConnection connection = uri.openConnection();
        InputStream in = connection.getInputStream();
        byte[] buf = new byte[1024];
        int length = 0;
        StringBuffer sb = new StringBuffer();
        while ((length = in.read(buf, 0, buf.length)) > 0)
        {
            sb.append(new String(buf, ECODING));
        }
        in.close();
        return sb.toString();
    }
 
    /**
     * 多线程超时下载
     * 
     * @param listImgSrc
     *            图片地址列表
     * @param savedir
     *            保存文件夹
     * @param tnum
     *            开启线程数
     * @param timeout
     *            下载超时时间
     */
    private void TOThreadDownload(List<String> listImgSrc, String savedir, int tnum, int timeout)
    {
        for (int i = 0; i < listImgSrc.size(); i += tnum)
        {
            new TODThread(savedir, tnum, listImgSrc, i, timeout).start();
        }
    }
 
    /**
     * 
     * 2014-4-3上午10:52:07 Describe: 超时方式下载照片线程
     * 
     * @author: ITWANG
     */
    class TODThread extends Thread
    {
        private String savedir = null;
        private int tnum;
        private List<String> listImgSrc;
        private int bunm;
        private int timeout = 3000;
 
        public TODThread(String savedir, int tnum, List<String> listImgSrc, int bnum, int timeout)
        {
            this.savedir = savedir;
            this.tnum = tnum;
            this.listImgSrc = listImgSrc;
            this.bunm = bnum;
            this.timeout = timeout;
        }
 
        @Override
        public void run()
        {
            for (int i = 0; i < tnum; i++)
            {
                String url = listImgSrc.get(bunm + i);
                String sps = url.substring(url.lastIndexOf("."), url.length());
                String imageName = UUID.randomUUID().toString() + sps;
                try
                {
                    if (getPic(url, savedir, imageName, timeout))
                    {
                        System.out.println("*^_^*");
                    } else
                    {
                        System.out.println("-_-!");
                    }
                } catch (Exception e)
                {
                    System.out.println("下载异常:" + e);
                }
            }
        }
    }
 
    /**
     * GET方式下载照片
     * 
     * @param purl
     *            图片路径
     * @param folder
     *            保存文件夹
     * @param filename
     *            保存文件名
     * @param timeout
     *            超时时间
     * @return 返回保存状态
     * @throws Exception
     */
    public boolean getPic(String purl, String folder, String filename, int timeout) throws Exception
    {
        URL url = new URL(purl);
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setConnectTimeout(timeout);
        conn.setRequestMethod("GET");
        conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Linux; U; Android 4.0.2; en-us; Galaxy Nexus Build/ICL53F) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30");
        conn.setDoOutput(true);
        conn.setDoInput(true);
        if (conn.getResponseCode() == 200)
        {
            InputStream is = conn.getInputStream();
            byte[] bs = new byte[1024];
            int len;
            File sf = new File(folder);
            if (!sf.exists())
            {
                sf.mkdirs();
            }
            OutputStream os = new FileOutputStream(sf.getPath() + "\\" + filename);
            while ((len = is.read(bs)) != -1)
            {
                os.write(bs, 0, len);
            }
            os.close();
            is.close();
            System.out.println("成功:" + url);
            return true;
        }
        System.out.println("失败:" + url);
        return false;
    }
 
    /**
     * 时间文件夹
     * 
     * @return 返回当前时间
     */
    public static String saveDiff()
    {
        SimpleDateFormat formate = new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss");
        return "\\" + formate.format(System.currentTimeMillis()) + "\\";
    }
 
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值