# coding:utf-8
import re
import urllib.request
import os
import random
import socket
"""
date:2017-08-03
version:2.0
"""
class GetIP():
def __init__(self):
self.iplist = self.get_ip_port()
def get_ip_port(self):
"""
从网站http://www.xicidaili.com/获取iplist
:return:返回可用的ip
"""
url = "http://www.xicidaili.com/wt/"
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
req = urllib.request.Request(url=url, headers=headers)
try:
page = urllib.request.urlopen(req, timeout=1).read().decode('utf-8')
except:
return url + "网站不可达"
# 获取IP
iplist = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', page)
# 获取端口
ipports = re.findall(r'<td>\d{2,5}</td>', page)
ipport = [re.findall(r'\d{2,5}', port)[0] for port in ipports]
# 获取速度
speeds = re.findall(r'title="\d{1,3}\.\d{1,4}', page)
speed = [re.findall(r'\d{1,3}\.\d{1,4}', spd)[0] for spd in speeds]
ipspeed = [float(speed[i]) for i in range(0, len(speed), 2)]
ip = []
for k in range(len(ipspeed)):
if ipspeed[k] < 0.5:
ip.append((iplist[k] + ":" + ipport[k]))
print("我被执行了")
return ip
def ping(self, ip, timeout=2):
"""
使用socket模块用来判断IP和端口是否可用,入参形式为:139.208.85.232:80
:param ip:入参形式为:139.208.85.232:80
:param timeout:默认2s不通,则认为超时
:return:1表示可用,0表示不可用
"""
print(ip)
ip = (ip.split(":")[0], int(ip.split(":")[-1]))
# print(ip)
try:
cs = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
cs.settimeout(float(timeout))
address = ip
status = cs.connect_ex(address)
if status == 0:
cs.close()
return 1
else:
return 0
# print("%s可用" % str(ip))
except Exception as e:
# print(e.reason())
cs.close()
def useful_ip(self):
iplist = self.iplist
print(iplist)
while True:
ip = random.choice(iplist)
if self.ping(ip):
break
return ip
if __name__ == '__main__':
AA = GetIP()
print(AA.get_ip_port())
# for i in range(0, 100):
# AA.useful_ip()
爬虫实战----从免费IP代理网站获取连接率较好的可用IP
最新推荐文章于 2024-01-23 10:19:45 发布