java爬虫爬取360安全卫士对手机号的标识

废话不多说,直接上代码。

使用多线程futrue+Callable

因为360做了ip限制,本人凑合着用,就没有做ip切换,了解了一下还挺麻烦

主要2个类 TestThread+MyCallable

/**

 *   测试类
 */
package reachPhone;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.alibaba.fastjson.JSONObject;

/**
 * @author Geese
 *
 */
public class TestThread {
    public static void main(String[] args) throws ExecutionException, InterruptedException{
        System.out.println("----程序开始运行----");
        List<String> phoneList = new ArrayList<String>();
        phoneList.add("01053934604");
        phoneList.add("01053934610");
        phoneList.add("01053934612");
        phoneList.add("01053934613");
        phoneList.add("01053934614");
        phoneList.add("01053934615");
        phoneList.add("01053934616");
        phoneList.add("01053934634");
        phoneList.add("01053934635");
        phoneList.add("01053934636");
        List<Map<String, String>> list = findPhone(phoneList);
        for (int i = 0; i < list.size(); i++) {
            System.out.println(list.get(i));
        }
        System.out.println("----程序结束运行----");
    }
    
    private static List<Map<String, String>> findPhone(List<String> phoneList) throws ExecutionException, InterruptedException{
        Map<String, String> phoneMap = new HashMap<String, String>();
        List<Map<String, String>> resultList = new ArrayList<Map<String, String>>();

        int taskSize = 100;
        // 创建一个线程池
        ExecutorService pool = Executors.newFixedThreadPool(taskSize);
        // 创建多个有返回值的任务
        List<Future> list = new ArrayList<Future>();
        for (int i = 0; i < phoneList.size(); i++) {
            Callable c = new MyCallable(phoneList.get(i));
            // 执行任务并获取Future对象
            Future f = pool.submit(c);
            list.add(f);
        }
        // 关闭线程池
        pool.shutdown();

        // 获取所有并发任务的运行结果
        for (Future f : list) {
            // 从Future对象上获取任务的返回值,并输出到控制台
            String phoneFlag = f.get().toString();
            JSONObject object = JSONObject.parseObject(phoneFlag.replace("=", ":"));
            phoneMap.put(object.getString("phone"), object.getString("name"));
        }
        Map<String, String> contentMap = null;
        for (int i = 0; i < phoneList.size(); i++) {
            contentMap = new HashMap<>();
            contentMap.put("phone", phoneList.get(i));
               if(phoneMap.containsKey(phoneList.get(i))){
                   contentMap.put("name", phoneMap.get(phoneList.get(i)));
               }
               resultList.add(contentMap);
            }
        return resultList;
    }
    

}

 

/**
 *   请求实体类
 */
package reachPhone;

import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

/**
 * @author Geese
 *
 */
public class MyCallable implements Callable<Object>{
    private String phoneNum;
    MyCallable(String phoneNum) {
        this.phoneNum = phoneNum;
    }
 
    @Override
    public Map<String, String> call() throws Exception {
        Map<String, String> phoneMap = new HashMap<String, String>();
        phoneMap.put("\"phone\"", "\""+phoneNum+"\"");
        Document doc=Jsoup.parse(new URL("https://www.so.com/s?q="+phoneNum), 2000);  
        String phoneFlag = "无";
        Elements elements2=doc.getElementsByClass("mohe-ph-mark");  
        if (!elements2.isEmpty()) {  
            phoneFlag=elements2.get(0).text();  
        }  
        phoneMap.put("\"name\"", "\""+phoneFlag+"\"");
        return phoneMap;
    }
}

 

运行结果 返回list集合

打印结果:

----程序开始运行----
{phone=01053934604, name=无}
{phone=01053934610, name=骚扰电话}
{phone=01053934612, name=骚扰电话}
{phone=01053934613, name=骚扰电话}
{phone=01053934614, name=骚扰电话}
{phone=01053934615, name=骚扰电话}
{phone=01053934616, name=骚扰电话}
{phone=01053934634, name=骚扰电话}
{phone=01053934635, name=广告推销}
{phone=01053934636, name=广告推销}
----程序结束运行----

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值