Java Executor多线程框架

Java Executor多线程框架 Demo

package com.ws.springsplider.service;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;

public class ExecutorTest {
	private static Integer pages=1; // 网页数
	private static boolean exeFlag=true; // 执行标识
	public static void main(String[] args) {
		// 创建ExecutorService 连接池默认连接10个
		ExecutorService executorService=Executors.newFixedThreadPool(10);
		while(exeFlag){
		  if(pages<=100){
			executorService.execute(new Runnable() {
				@Override
				public void run() {
					System.out.println("爬取了第"+pages+"网页...");
					pages++;
				}
			});
		  }else{
			// 活动线程个数是0
			if(((ThreadPoolExecutor)executorService).getActiveCount()==0){ 
				executorService.shutdown(); // 结束所有线程
				exeFlag=false;
				System.out.println("爬虫任务已经完成");
		    }
		  }
		  try {
		  	Thread.sleep(100); // 线程休息0.1秒
		  } catch (InterruptedException e) {
		  	e.printStackTrace();
		  }
	    }
	}
}


线程池的submit和execute方法区别

线程池中的execute方法,即开启线程执行池中的任务,
还有一个方法submit也可以做到,它的功能是提交指定的任务去执行并且返回Future对象,即执行的结果
下面简要介绍一下两者的三个区别:

1、接收的参数不一样

2、submit有返回值,而execute没有
用到返回值的例子,比如说我有很多个做validation的task,我希望所有的task执行完,然后每个task告诉我它的执行结果,是成功还是失败,如果是失败,原因是什么。
然后我就可以把所有失败的原因综合起来发给调用者。 个人觉得cancel execution这个用处不大,很少有需要去取消执行的,而最大的用处应该是第二点

3、submit方便Exception处理
意思就是如果你在你的task里会抛出checked或者unchecked exception,
而你又希望外面的调用者能够感知这些exception并做出及时的处理,那么就需要用到submit,通过捕获Future.get抛出的异常。

下面一个小程序演示一下submit方法

public class RunnableTestMain {
    public static void main(String[] args) {
        ExecutorService pool = Executors.newFixedThreadPool(2);
        /**
         * execute(Runnable x) 没有返回值。可以执行任务,但无法判断任务是否成功完成。
         */
        pool.execute(new RunnableTest("Task1")); 
        /**
         * submit(Runnable x) 返回一个future。可以用这个future来判断任务是否成功完成。请看下面:
         */
        Future future = pool.submit(new RunnableTest("Task2"));      
        try {
            if(future.get()==null){//如果Future's get返回null,任务完成
                System.out.println("任务完成");
            }
        } catch (InterruptedException e) {
        } catch (ExecutionException e) {
            //否则我们可以看看任务失败的原因是什么
            System.out.println(e.getCause().getMessage());
        }
    }
}

public class RunnableTest implements Runnable {
    private String taskName;
    public RunnableTest(final String taskName) {
        this.taskName = taskName;
    }
    @Override
    public void run() {
        System.out.println("Inside "+taskName);
        throw new RuntimeException("RuntimeException from inside " + taskName);
    }
}

Executor多线程框架使用

    /**
     * 多线程爬取某吧内容数据
     */
    public void crawlingGbContentData() {
        //String path = "E:/localgbcontent/gbcontent.xlsx"; //Window路径
        String path = "/home/splider/gbcontent/gbcontent.xlsx"; //Linux路径
        File f=new File(path);
        InputStream inputStream= null;
        try {
            inputStream = new FileInputStream(f);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }

        ExcelLogs logs =new ExcelLogs();
        Collection<Map> excelList = ExcelUtil.importExcel(Map.class, inputStream, "yyyy/MM/dd HH:mm:ss", logs , 0);
        System.out.println(excelList.size());
        List<String> list = new ArrayList<String>();
        for(Map<String, String> m : excelList){
            String code = String.valueOf(m.get("code"));
            if(!list.contains(code)){
                list.add(code);
            }
        }
        System.out.println(list.toString());

        //定义线程池
        ExecutorService executorService = Executors.newFixedThreadPool(5);
        for (String code : list) {
            EastMoneyContentThread emThread = new EastMoneyContentThread(code,this);
            executorService.submit(emThread); //执行线程,submit有返回值,而execute没有
        }
        executorService.shutdown();
    }

实现Runnable 接口(线程池executorService.submit执行Runnable 接口的业务任务实现)

package com.ws.springsplider.thread;
import com.ws.springsplider.model.EastMoney_JMZY;
import com.ws.springsplider.service.CrawlingEastMoneyContentService;
import com.ws.springsplider.utils.TimeUtil;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.logging.log4j.util.Strings;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.List;

public class EastMoneyContentThread implements Runnable {

    public static Log logger = LogFactory.getLog(EastMoneyContentThread.class);

    private String code;

    private CrawlingEastMoneyContentService emcService;

    public EastMoneyContentThread(String code, CrawlingEastMoneyContentService emcService) {
        this.code = code;
        this.emcService = emcService;
    }

    public void run() {
        try {
            System.out.println("进入 子线程 ********" + Thread.currentThread().getName());
            logger.info("进入 子线程 ********" + Thread.currentThread().getName());

            logger.info("开始获取数据库列表......" + Thread.currentThread().getName());
            System.out.println("开始获取数据库列表......" + Thread.currentThread().getName());
            System.out.println("code="+code+"------" + Thread.currentThread().getName());

            List<EastMoney_JMZY> emList = emcService.selectEastMoneyListByCode(code);

            logger.info("emList数据大小="+emList.size());
            System.out.println("emList数据大小="+emList.size());

            int i=0;
            for(EastMoney_JMZY jmzy: emList){
                if(Strings.isNotBlank(jmzy.getContentUrl())){
                    logger.info("开始爬取内容数据......"+ Thread.currentThread().getName());
                    System.out.println("开始爬取内容数据......"+ Thread.currentThread().getName());
                    // 爬取内容页数据
                    String gbContent = emcService.postSubmit(jmzy.getContentUrl());
                    Document docTitle = Jsoup.parse(gbContent);
                    if (docTitle != null) {
                        if (docTitle.getElementById("zwcontent") != null) {
                            logger.info("进入解析内容流程********"+ Thread.currentThread().getName());
                            System.out.println("进入解析内容流程********" + Thread.currentThread().getName());
                            //获取内容页面时间
                            String date = docTitle.getElementsByClass("zwfbtime").get(0).text().trim();
                            if(Strings.isNotBlank(date)){
                                String dateSub = date.substring(date.indexOf(" "), date.lastIndexOf(" "));
                                String timeFormat = TimeUtil.formatDate(dateSub.trim(), "yyyy-MM-dd HH:mm:ss");
                                int ftime = TimeUtil.getTimeNumber(timeFormat);
                                int dtime = 20170101;
                                //如果时间小于2017-01-01,跳出循环,接着遍历下一条数据
                                if(ftime>dtime){
                                    //爬取的内容保存到本地
                                    //String path = "E:/gbfiles/" + code + "/" + "_" + i + ".txt"; //Window路径
                                    String path = "/home/splider/gbfiles/" + code + "/" + "_" + i + ".txt"; //Linux路径
                                    File file = new File(path);
                                    if (!file.exists() || file.length() == 0) {
                                        saveParseData(gbContent, file);
                                        logger.info("文件 :" + file + "保存" + "==" + new Date());
                                        System.out.println("文件 :" + file + "保存" + "==" + new Date());
                                    }
                                    //更新字段数据到对象,准备向数据库更新
                                    jmzy.setPublishTime(timeFormat);
                                    jmzy.setLastUpdateTime(TimeUtil.getDate(new Date()));
                                    jmzy.setContent(docTitle.getElementsByClass("stockcodec").text());
                                    //保存文件的路径,将保存本地文件的路径也更新到数据库
                                    jmzy.setContentFileUrl(path);

                                    //爬取的内容根据id更新数据
                                    int mark = emcService.updateSelective(jmzy);
                                    if (mark != 0) {
                                        System.out.println("code="+code+"--"+"id="+jmzy.getId()+"--"+file + "--  更新成功     ==" + new Date());
                                        logger.info("code="+code+"--"+"id="+jmzy.getId()+"--"+file + "--  更新成功     ==" + new Date());
                                    } else {
                                        System.out.println("code="+code+"--"+"id="+jmzy.getId()+"--"+file + "--  更新失败     ==" + new Date());
                                        logger.info("code="+code+"--"+"id="+jmzy.getId()+"--"+file + "--  更新失败     ==" + new Date());
                                    }
                                }
                            }
                        } else {
                            //内容为空的情况(可能IP被封),此时跳出循环,接着遍历下一条数据
                            break;
                        }
                    }
                }
                i++;
                Thread.sleep(500);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //保存文件到磁盘
    public static void saveParseData(String context, File file) {
        String oSystem = System.getProperty("os.name");
        //if (oSystem.contains("Windows")) {
            // 将数据写入文件
            // File file = new File(path);
            try {
                FileUtils.writeStringToFile(file, context, "UTF-8");
            } catch (IOException e) {
                e.printStackTrace();
            }
        //}
    }
}

发送http请求,获取页面内容

    //发送http请求,获取页面内容
    public static String postSubmit(String URL) throws Exception {
        CloseableHttpClient httpClient = null;
        CloseableHttpResponse response = null;
        org.apache.http.HttpEntity entity = null;
        String responseContent = null;
        try {
            // 创建默认的httpClient实例.
            String postUrl = URL;
            HttpGet httpPost = new HttpGet(postUrl);// 创建get请求

            // httpPost.setHeader("Authorization", token);

            RequestConfig config = RequestConfig.custom().setSocketTimeout(0).setConnectTimeout(0)
                    .setConnectionRequestTimeout(0).build();
            httpPost.setConfig(config);// 设置代理ip
            // 可根据响应状态进行换ip,ip在代理ip网站进行抓取,抓取到放到队列中

            httpPost.setHeader("Accept",
                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");

            httpPost.setHeader("Accept-Encoding", "gzip, deflate");

            httpPost.setHeader("Accept-Language", "zh-CN,zh;q=0.9");

            httpPost.setHeader("Cache-Control", "no-cache");
            httpPost.setHeader("Connection", "keep-alive");

            // httpPost.setHeader("Cookie",
            // "st_pvi=13101359948035; st_si=73199158729108;
            // qgqp_b_id=a64c2248dd015b209aa8ea01d6bb0d30;
            // _adsame_fullscreen_16884=1");

            httpPost.setHeader("Host", "guba.eastmoney.com");
            httpPost.setHeader("Pragma", "no-cache");

            httpPost.setHeader("Upgrade-Insecure-Requests", "1");
            httpPost.setHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");

            httpClient = HttpClients.createDefault();
            // 执行请求
            response = httpClient.execute(httpPost);
            entity = response.getEntity();
            responseContent = EntityUtils.toString(entity, "UTF-8");
            System.out.println("执行请求");
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                // 关闭连接,释放资源
                if (response != null) {
                    response.close();
                }
                if (httpClient != null) {
                    httpClient.close();
                }
            } catch (IOException e) {
            }
        }
        return responseContent;
    }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

nizhengjia888

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值