示例1:Python 3.X HTTP代理调用·爬虫(动态)代理IP
'''
Python 3.x
描述:本DEMO演示了使用爬虫(动态)代理IP请求网页的过程,代码使用了多线程
逻辑:每隔5秒从API接口获取IP,对于每一个IP开启一个线程去抓取网页源码
'''
import requests
import time
import threading
from requests.packages import urllib3
ips = []
class CrawlThread(threading.Thread):
def __init__(self,proxyip):
super(CrawlThread, self).__init__()
self.proxyip=proxyip
def run(self):
start = time.time()
urllib3.disable_warnings()
html=requests.get(url=targetUrl, proxies={
"http" : 'http://' + self.proxyip, "https" : 'https://' + self.proxyip}, verify=False, timeout=15).content.decode()
end = time.time()
print(threading.current_thread().getName() + "使用代理IP, 耗时 " + str(end - start) + "毫秒 " + self.proxyip + " 获取到如下HTML内容:\n" + html + "\n*************")
class GetIpThread(threading.Thread):
def __init__(self,fetchSecond):
super(GetIpThread, self).__init__()
self.fetchSecond=fetchSecond
def run(self):
global ips
while True:
res = requests.get(apiUrl).content.decode()
ips = res.split('\n'