使用使用ip117.135.250.134端口80作为代理服务器,爬取了百度首页的代码。
import urllib.request
import os
import sys
import re
def testArgument(url):
TP=TestProxy(url)
def tipUse():
print('改程序只能输入一个参数,这个参数必须是可用的proxy')
print('usage:python test Urllib2WithProxy.py http//1.2.3.4:5')
print('usage:python test Urllib2WithProxy.py https//1.2.3.4:5')
class TestProxy(object):
def __init__(self,proxy):
self.proxy = proxy
self.checkProxyFormat(self.proxy)
self.url = 'http://www.baidu.com'
self.timeout=5
self.flagWord='百度'
self.useProxy(self.proxy)
def checkProxyFormat(self,proxy):
try:
proxyMatch = re.compile('http[s]?://[\d]{1,3}\.{\d}{1,3}\.[\d]{1,3}:[\d]{1,5}$')
proxyMatch.match(proxy)
except AttributeError:
tipUse()
exit()
flag = 1
proxy = proxy.replace('//','')
try:
protocol = proxy.split(':')[0]
ip = proxy.split(':')[1]
port = proxy.split(':')[2]
except IndexError:
print('下标出界')
tipUse()
exit()
flag = flag and len(proxy.split(':')) and len(ip.split('.'))
flag = ip.split('.')[0] in map(str,range(1,256)) and flag
flag = ip.split('.')[1] in map(str,range(256)) and flag
flag = ip.split('.')[2] in map(str,range(256)) and flag
flag = ip.split('.')[3] in map(str,range(1,255)) and flag
flag = protocol in ['http','https'] and flag
flag = port in map(str,range(1,65535)) and flag
if flag:
print('输入的http代理服务器符合标准')
else:
tipUse()
exit()
def useProxy(self,proxy):
protocol = proxy.split('//')[0].replace(':','')
ip = proxy.split('//')[1]
opener = urllib.request.build_opener(urllib.request.ProxyHandler({protocol:ip}))
urllib.request.install_opener(opener)
try:
response = urllib.request.urlopen(self.url,timeout=self.timeout)
except:
print('连接错误,退出程序')
exit()
data = response.read()
data = data.decode('UTF-8')
print(data)
testArgument('https://117.135.250.134:80')