私人

最新推荐文章于 2024-06-17 13:58:05 发布

小白蒋博客

最新推荐文章于 2024-06-17 13:58:05 发布

阅读量195

点赞数

分类专栏： python爬虫学习

小白蒋

本文链接：https://blog.csdn.net/weixin_37413070/article/details/103948601

版权

python爬虫学习专栏收录该内容

6 篇文章 1 订阅

订阅专栏


import pandas as pd
import numpy as np
import requests
import time
from user_agent import getheaders
import urllib3
urllib3.disable_warnings()

class Add_Fresh():
    def __init__(self,url):
        self.ips = []
        self.url = url
        self.li = []
        self.m = 1
        self.n = 1


    def read_csv(self):
        data = pd.read_csv('xici.csv')

        for i in data.values:
            self.ips.append(i[0])
            # print(i[0])
        # print(self.ips)

        for j in self.ips:
            a = j.split('//')[0]
            b = j.split('//')[1]
            http = a[:-1]

            # print(http)
            if http == 'HTTP':
                proxies = {
                    'http': j,
                }
                self.li.append(proxies)
            else:
                proxies2 = {
                    'https': j,
                }
                self.li.append(proxies2)

        self.request_function()


    def request_function(self):
        for proxies in self.li:
            # print(proxies)
            headers = {
                'User-Agent': getheaders(),
            }
            try:
                res = requests.get(url = self.url, proxies=proxies, headers = headers,timeout=5,verify = False)
                # print(getheaders()[:10])
                # print(res.text[:20])
                # time.sleep(0.001)
                print('第%s次正常访问' % self.m)
                self.m += 1
            except Exception as e:
                print('第%s次访问错误' % self.n)
                self.n += 1


def chongfu():

    for i in range(20):
        
        print('第%s次开始' % i)
        a = Add_Fresh('https://www.bmlink.com/jsydgkw/news/1334966.html')
        # a = Add_Fresh('http://www.sitongzixun.com/needs/detail/29014.html')
        # a = Add_Fresh('http://ha.waaku.com/b2b/d65a74/44-2985374.html')
        a.read_csv()


import threading

threads = [threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,),threading.Thread(target=chongfu,)]
for t in threads:
    t.start()

小白蒋博客

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
私人

import pandas as pdimport numpy as npimport requestsimport timefrom user_agent import getheadersimport urllib3urllib3.disable_warnings()class Add_Fresh(): def __init__(self,url): ...
复制链接

扫一扫