Python 爬虫入门 IP代理使用

最新推荐文章于 2024-05-27 08:52:59 发布

安果移不动

最新推荐文章于 2024-05-27 08:52:59 发布

阅读量1.5k

点赞数

分类专栏： 005python

独学而无友，则孤陋寡闻。q群582951247

本文链接：https://blog.csdn.net/mp624183768/article/details/80466628

版权

005python 专栏收录该内容

212 篇文章 5 订阅

订阅专栏

首先获取Ip列表

#encoding=utf8
import urllib
from lxml import etree

import requests

from bs4 import BeautifulSoup

User_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
header = {}
header['User-Agent'] = User_Agent

url = 'http://www.xicidaili.com/nn/1'
req = urllib.request.Request(url,headers=header)
res = urllib.request.urlopen(req).read()

res =requests.get(url,headers=header).text
ip=etree.HTML(res)

ip=ip.xpath('//*[@id="ip_list"]/*')
myIp=""
myPort=""
data=""
for i in range(0,len(ip)):
    'IP地址 端口'
    if i==0:
        continue
    for j in range(0,len(ip[i])):

        if j==1:
            myIp=ip[i][j].text

        if j==2:

            res = myIp+ " "+ip[i][j].text
            data=data+res+"\n"


with open("ip.txt","w") as f:
    f.write(data)

然后检测ip是否可用

#encoding=utf8
import urllib
import socket
from urllib import request

socket.setdefaulttimeout(3)
f = open("ip.txt")
lines = f.readlines()
proxys = []
for i in range(0,len(lines)):
    ip = lines[i].strip("\n").split(" ")
    proxy_host = "http://"+ip[0]+":"+ip[1]
    proxy_temp = {"http":proxy_host}
    proxys.append(proxy_temp)
url = "http://ip.chinaz.com/getip.aspx"
for proxy in proxys:
    try:
        proxy_support = request.ProxyHandler(proxy)
        opener = request.build_opener(proxy_support)
        res = opener.open(url).read()
        print (res)
    except Exception as e :
        print (proxy)
        print (e)
        continue

安果移不动

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
打赏
0
评论
Python 爬虫入门 IP代理使用

首先获取Ip列表#encoding=utf8import urllibfrom lxml import etreeimport requestsfrom bs4 import BeautifulSoupUser_Agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'hea...
复制链接

扫一扫