代理IP 多线程 伪造表头 爬虫小框架

翻到一个两年前写的爬虫小框架

# coding=utf-8

import tushare as ts
import pandas as pd
import requests
import json
import re
import time
from retrying import retry
from concurrent.futures import ThreadPoolExecutor
import random

def get_pro():
    list = ['122.114.31.177:808', '61.135.217.7:80', '113.121.243.109:808', '171.39.40.5:8123', '121.31.199.30:8123',
            '111.155.116.240:8123', '125.121.121.171:808', '115.213.178.192:808']

    return list


start = time.clock()  # 计时-开始

urlnum = range(8)
listdo = urlnum


while True:
    listye = []
    listno = []
    event = []
    @retry(stop_max_attempt_number=8)  # 设置最大重试次数
    def crawl(n):

        pro_list = get_pro()

        header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64)'}

        proxies_l = {'http': pro_list[random.randint(0, len(pro_list))],

                     }
        print(proxies_l['http'])

        try:
            req = requests.get('http://httpbin.org/ip', headers=header, proxies=proxies_l)
            print('finish')
            listye.append(n)
            listdo.remove(n)
            print (listdo)

            return  req.text

        except:
            print('no proxies')
            listno.append(n)

    # 多线程
    def multithreading():

        number = listdo

        with ThreadPoolExecutor(max_workers=10) as executor:
            for result in executor.map(crawl, number, chunksize=10):
                event.append(result)

        return event


    event = multithreading()
    print ('listye')
    print (listye)
    print ('listno')
    print (listno)
    print ('listdo')
    print (listdo)




    if len(listdo) == 0:
        break

end = time.clock()  # 计时-结束
print ("爬取完成 用时:")
print (end - start)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值