java爬虫被禁ip问题,案例

1.开机自启动爬虫

@CrossOrigin
@RestController
@RequestMapping("/property")
public class PropertyController implements CommandLineRunner {

@Autowired
private PropertyDaoPipeLine01 diYiPropertyDaoPipeLine;

@Override
public void run(String... args) throws Exception {
    property01();
}

@GetMapping("/start01")
public void property01() {
    Spider.create(new PropertyPageProcessor01())
            .addUrl("http://wh.01fy.cn/sale/list_2_0_0_0-0_0_0-0_0_0_0-0_0_0-0_2_0_1_.html")
            .addPipeline(diYiPropertyDaoPipeLine)
            .thread(1)
            .setExitWhenComplete(true)
            .setDownloader(Downloader.newIpDownloader())
            .runAsync();
}

2.实体类

import org.springframework.data.annotation.Id;

@Data
public class Property {

@Id
private Long id;

/**
 * 联系人 user_name
 */
private String userName;

/**
 * 联系电话 user_mobile
 */
private String userMobile;

/**
 * 楼盘名称 estate_name
 */
private String estateName;

/**
 * 房源标题 property_title
 */
private String propertyTitle;

/**
 * 区域 area_name
 */
private String areaName;

/**
 * 几室几厅几卫 house_type
 */
private String houseType;

/**
 * 面积 size
 */
private String size;

/**
 * 总价 price
 */
private String price;

/**
 * 单价 ava_price
 */
private String avaPrice;

/**
 * 房源详情链接 source_url
 */
private String sourceUrl;

/**
 * 状态 未查看 1 已查看 2 未同步 3 已同步 4 删除
 */
private Integer status;

}

3.自定义Agent

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**

  • @author wongH

  • @date 2019/5/7 9:51

  • @Version 1.0
    */
    public class Agent {
    private static final String AGENT_FILE_PATH = “user-agent/User-Agents.txt”;
    private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    private static List agents;

    public static String getRandom() {
    String random = getRandom(null);
    System.err.println(“Agent======================>” + random);
    return random;
    }

    private static String getRandom(String agent) {
    try {
    lock.readLock().lock();
    int size = agents.size();
    if (size == 0)
    return “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36”;
    Random random = new Random();
    if (null != agent) return agent;
    else return agents.get(random.nextInt(size));
    } catch (Exception e) {
    e.printStackTrace();
    return “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36”;
    } finally {
    lock.readLock().unlock();
    }
    }

    static {
    agents = new ArrayList<>();
    InputStream resourceAsStream = null;
    InputStreamReader inputStreamReader = null;
    BufferedReader bufferedReader = null;
    try {
    resourceAsStream = Agent.class.getClassLoader().getResourceAsStream(AGENT_FILE_PATH);
    inputStreamReader = new InputStreamReader(resourceAsStream);
    bufferedReader = new BufferedReader(inputStreamReader);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值