python-代理池

概念

将不同的用户代理,构建成一个池子,然后随机调用
用户代理池简单应用

import urllib.request
import re
import random
urlpoors=[
 "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36",
 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362",
 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2"
]
def ua(urlpools):
    thisua=random.choice(urlpoors)
    print(thisua)
    #浏览器伪装
    headers=("User-Agent",thisua)
    opener=urllib.request.build_opener()
    opener.addheaders=[headers]
    urllib.request.install_opener(opener)
for i in range(0,10):
    ua(urlpoors)
    #再加具体爬取代码即可

单个IP代理(没钱买代理,只写了个具体的形式,后面再补充)

import urllib.request
ip=""
proxy=urllib.request.ProxyHandler({"http":ip})
opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
urllib.request.install_opener(opener)
url="http://baidu.com"
data1=urllib.request.urlopen(url).read()
data=data1.decode("utf-8","ignore")
file = open(r"D:/python/mj/baidu.html", "wb")
file.write(data1)
file.close()

ip代理池实战
ip代理池构建的第一种方式(适用于稳定ip)

import urllib.request
impore randoom
ippools=[" "," '.
    ]
def ip(ippools):
    thisip=random.choice(ippools)
    print(thisip)
    proxy=urllib.request.ProxyHandler({"http",thisip})
    opener=urllib.request.build_opener(proxy,urllib.request.HTTPHandler)
    urllib.request.install_opener(opener)
fpr i in range(0,5):
    try:
        ip(ippools)
        url="http://baidu.com"
        data1=urllib.request.urlopen(url).read()
        data=data1.decode("utf-8","ignore")
        file = open(r"D:/python/mj/baidu.html", "wb")
        file.write(data1)
        file.close()
    except Exception as err:
        print(err)

ip代理池构建的第二种方式(接口,调用)

实战:爬取京东某商品图片

import urllib.request
import re
import random
urlpools=[
 "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Mobile Safari/537.36",
 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362",
 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2"
]
def ua(urlpools):
    thisua=random.choice(urlpools)
    print(thisua)
    #浏览器伪装
    headers=("User-Agent",thisua)
    opener=urllib.request.build_opener()
    opener.addheaders=[headers]
    urllib.request.install_opener(opener)
for i in range(1,3):
    url="https://list.jd.com/list.html?cat=1315,1342,1349&page="+str(i)
    ua(urlpools)
    data=urllib.request.urlopen(url).read().decode("utf-8")
    pat='src="//img(.*?).jpg">'
    imglist=re.compile(pat).findall(data)
    for j in range(0,len(imglist)):
        try:
            thisimg=imglist[j]
            thisimgurl="https://img"+thisimg+".jpg"
            localfile="D:\\python\\mj\\jd\\"+str(i)+str(j)+".jpg"
            urllib.request.urlretrieve(thisimgurl,filename=localfile)
            print("successful")
            print(thisimg)
        except Exception as err:
            print(err)
            print(thisimg)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值