【爬虫用户代理和ip自动生成】

文章介绍了Java中CrawlerHelper类,用于生成随机的用户代理(包含浏览器内核标识、操作系统类型、版本等)以及随机的国内IP地址,以模拟真实爬虫行为。
摘要由CSDN通过智能技术生成

爬虫用户代理和ip自动生成


辛辛苦苦搬砖真辛苦啊

package com.glodon.gbes.utils;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

/**
 * 爬虫辅助工具类
 *
 * @author luochao
 * @since 20240126
 */
public class CrawlerHelper {
    // 浏览器内核标识
    private static final List<String> consumers = Arrays.asList("Mozilla/5.0 (", "Mozilla/5.01 (", "Mozilla/4.0 (");
    // 操作系统类型
    private static final List<String> systemTypes = Arrays.asList(
            "Windows NT 10.0; WOW64", "Macintosh; Intel Mac OS X 10_12_6", "Macintosh; Intel Mac OS X 10.12; rv:65.0",
            "Windows NT 10.0; Win64; x64", "Windows NT 10.0; WOW64; Trident/7.0; rv:11.0", "Windows NT 6.1; WOW64",
            "Windows NT 6.3; Win64; x64", "Windows NT 10.0; WOW64", "Macintosh mips64", "Macintosh mips64",
            "Macintosh; Intel Mac OS X 11_2_3", "Macintosh; Intel Mac OS X 11_2_1", "Macintosh; Intel Mac OS X 11_2_1",
            "Macintosh; Intel Mac OS X 11_2_1", "Macintosh; Intel Mac OS X 10_15_4", "Macintosh; Intel Mac OS X 10_14_6",
            "Macintosh; Intel Mac OS X 10_14_6", "Macintosh; Intel Mac OS X 10_14_6", "Macintosh; Intel Mac OS X 10_14_6",
            "Macintosh; Intel Mac OS X 10_14_4", "Macintosh; Intel Mac OS X 10_14_4", "Macintosh; Intel Mac OS X 10_14_3",
            "Macintosh; Intel Mac OS X 10_14_2", "Macintosh; Intel Mac OS X 10_14_0", "Macintosh; Intel Mac OS X 10_13_6",
            "Macintosh; Intel Mac OS X 10_13_6", "Macintosh; Intel Mac OS X 10_13_6", "Macintosh; Intel Mac OS X 10_13_6",
            "Macintosh; Intel Mac OS X 10_12_6", "Macintosh; Intel Mac OS X 10_11_6", "Macintosh; Intel Mac OS X 10_11_6"
    );
    private static final String kernelPrefix = ") AppleWebKit";
    // 浏览器内核
    private static final List<String> browserKernels = Arrays.asList(
            "/537.36", "/607.3.10", "/13605.3.8", "/605.1.15", "/546.36", "/543.36", "/542.36", "/540.36", "/538.36",
            "/536.36", "/535.36", "/533.36", "/530.36", "/528.36", "/525.36", "/511.36", "/509.36", "/508.36",
            "/506.36", "/504.36", "/503.36", "/500.36", "/499.36", "/498.36", "/496.36", "/494.36", "/491.36",
            "/490.36", "/489.36", "/483.36", "/482.36", "/481.36", "/476.36", "/473.36", "/472.36", "/471.36",
            "/469.36", "/466.36"
    );
    private static final String kernelSuffix= " (KHTML, like Gecko) ";
    // 浏览器标识
    private static final List<String> browserVersion = Arrays.asList(
            "Chrome/90.0.4430.72", "Chrome/72.0.3626.121", "Chrome/87.0.27523.82", "Chrome/88.0.30832.82", "Chrome/89.0.4389.128",
            "Chrome/88.0.48357.82", "Chrome/86.0.16571.82", "Chrome/80.0.3987.87", "Chrome/73.0.3683.75", "Chrome/108.0.5359.95",
            "Chrome/89.0.4350.7", "Chrome/89.0.4389.90", "Chrome/67.0.1762.3", "Chrome/91.0.4455.2", "Chrome/87.0.4280.88",
            "Chrome/87.0.34697.82", "Chrome/87.0.40937.82", "Chrome/89.0.20219.82", "Chrome/60.0.3112.90", "Chrome/86.0.33219.82",
            "Chrome/87.0.7030.82", "Chrome/64.0.3282.140", "Chrome/88.0.40585.82", "Chrome/89.0.5219.82", "Chrome/80.0.3987.122",
            "Chrome/87.0.48110.82", "Chrome/89.0.4389.82", "Chrome/86.0.49343.82", "Chrome/48.0.2564.116", "Chrome/86.0.27485.82",
            "Chrome/86.0.11902.82", "Chrome/89.0.4385.82", "Chrome/89.0.43907.82", "Chrome/87.0.32496.82", "Chrome/89.0.48906.82",
            "Chrome/91.0.4466.0", "Chrome/88.0.4324.192", "Chrome/89.0.45365.82", "Chrome/75.0.3770.80", "Chrome/87.0.17682.82",
            "Chrome/80.0.3987.132", "Chrome/86.0.5210.82", "Chrome/89.0.42050.82", "Chrome/76.0.3809.87", "Chrome/67.0.200.124",
            "Chrome/89.0.33519.82", "Chrome/78.0.3904.108", "Chrome/76.0.3809.100", "Chrome/88.0.46354.82", "Chrome/87.0.44790.82",
            "Chrome/88.0.4324.182", "Chrome/87.0.4280.67", "Chrome/82.0.4077.0", "Chrome/88.0.48271.82", "Chrome/81.0.4044.129",
            "Chrome/87.0.48788.82", "Chrome/86.0.36322.82", "Chrome/90.0.4430.11", "Chrome/87.0.7809.82", "Chrome/86.0.4240.198",
            "Chrome/88.0.35623.82", "Chrome/87.0.42434.82", "Chrome/89.0.4389.114", "Chrome/80.0.3987.100", "Chrome/89.0.34528.82",
            "Chrome/69.0.3497.100", "Chrome/53.0.2785.104", "Chrome/84.0.4147.105", "Chrome/87.0.37035.82", "Chrome/77.0.3865.90",
            "Chrome/83.0.4103.56 ", "Chrome/78.0.3872.0", "Chrome/81.0.4023.0", "Chrome/79.0.3945.136", "Chrome/86.0.10846.82",
            "Chrome/89.0.9922.82", "Chrome/80.0.3987.149", "Chrome/88.0.4324.50", "Chrome/80.0.3987.106", "Chrome/86.0.26591.82",
            "Chrome/89.0.14272.82", "Chrome/88.0.9787.82", "Chrome/87.0.28829.82", "Chrome/84.0.4147.89", "Chrome/41.0.2227.0"
    );
    //
    private static final List<String> browserTypes = Arrays.asList(
            " Edg/89.0.774.75", " Maxthon/5.1.60", " Explorer/10.15.0.21066", " Edg/90.0.818.39", " Edg/89.0.774.77",
            " Edg/88.0.705.29", " Edg/88.0.705.74", " Edg/87.0.664.66", " OPR/73.0.3856.260", " Edg/83.0.478.33",
            " Edg/78.0.244.0", " Edge/13.18362"
    );
    // ip池
    private static final int[][] range = {
            { 607649792, 608174079 },// 36.56.0.0-36.63.255.255
            { 1038614528, 1039007743 },// 61.232.0.0-61.237.255.255
            { 1783627776, 1784676351 },// 106.80.0.0-106.95.255.255
            { 2035023872, 2035154943 },// 121.76.0.0-121.77.255.255
            { 2078801920, 2079064063 },// 123.232.0.0-123.235.255.255
            { -1950089216, -1948778497 },// 139.196.0.0-139.215.255.255
            { -1425539072, -1425014785 },// 171.8.0.0-171.15.255.255
            { -1236271104, -1235419137 },// 182.80.0.0-182.92.255.255
            { -770113536, -768606209 },// 210.25.0.0-210.47.255.255
            { -569376768, -564133889 }, // 222.16.0.0-222.95.255.255
    };

    /**
     * 获取随机用户代理
     * @return
     */
    public static String getRandomAgent() {
        StringBuffer userAgent = new StringBuffer();
        Random random = new Random();
        userAgent.append(consumers.get(random.nextInt(consumers.size())));
        userAgent.append(systemTypes.get(random.nextInt(systemTypes.size())));
        userAgent.append(kernelPrefix);
        String kernelVersion = browserKernels.get(random.nextInt(browserKernels.size()));
        userAgent.append(kernelVersion);
        userAgent.append(kernelSuffix);
        userAgent.append(browserVersion.get(random.nextInt(browserVersion.size())));
        userAgent.append(" Safari"+ kernelVersion);
        int isAdditional = random.nextInt(10);
        if (isAdditional> 7) {
            userAgent.append(browserTypes.get(random.nextInt(browserTypes.size())));
        }
        return userAgent.toString();
    }

    /**
     * 随机生成国内IP地址
     * @return
     */
    public static String getRandomIp() {
        // ip范围


        Random random = new Random();
        int index = random.nextInt(10);
        int intIp = range[index][0] + new Random().nextInt(range[index][1] - range[index][0]);
        // 格式转换
        int[] b = new int[4];
        b[0] = (intIp >> 24) & 0xff;
        b[1] = (intIp >> 16) & 0xff;
        b[2] = (intIp >> 8) & 0xff;
        b[3] = intIp & 0xff;
        return b[0] + "." + b[1] + "." + b[2] + "." + b[3];
    }

    public static void main(String[] args) {
        Set<String> resSet = new HashSet<>();
        for (int i = 0; i < 10000; i++) {
            String randomIp = getRandomIp();
            resSet.add(randomIp);
            System.out.println(randomIp);
        }
        Set<String> agent = new HashSet<>();
        for (int i = 0; i < 10000; i++) {
            String userAgent = getRandomAgent();
            agent.add(userAgent);
            System.out.println(userAgent);
        }
        System.out.println("================================");
    }
}

  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值