下面展示一些 内联代码片
。
#快代理IP爬取 并建立可用IP池
import requests
import time
from lxml import etree
from fake_useragent import UserAgent
ua=UserAgent(use_cache_server=False)
headers={'User-Agent':ua.chrome}
http_proxies=[] #全IP容器
uesful_ip_pool=[]#有用IP容器
uesless_ip_pool=[]#无用IP容器
#proxy={'http':'IP:PORT'}
def ip_spider():#填写需要爬取IP的页数
n=int(input('请输入需要爬取的页数:'))
for a in range(1,n+1):
url=f'https://www.kuaidaili.com/free/inha/{a}/'
response=requests.get(url,headers=headers,proxies={'HTTP': '183.166.111.164:9999'})
#判断请求状态
if response.status_code==200:
html_str=response.text
re_tree=etree.HTML(html_str)
ips_list=re_tree.xpath('//td[@data-title="IP"]/text()')
types_list = re_tree.xpath('//td[@data-title="类型"]/text()')
ports_list = re_tree.xpath('//td[@data-title="PORT"]/text()')
print(ips_list)
print(types_list)
print(ports_list)
for i in list(zip(types_list,ips_list,ports_list)):
ip_port=i[1]+':'+i[2]
ip_dic={i[0]:ip_port}
http_proxies.append(ip_dic)
print(http_proxies)
def check_ip(): #定义IP检测
for proxy in http_proxies:
url='https://www.baidu.com/'
response=requests.get(url,headers=headers,proxies=proxy)
if response.status_code==200:
uesful_ip_pool.append(proxy)
else:
uesless_ip_pool.append(proxy)
ip_spider()
check_ip()
print(f'有{len(uesful_ip_pool)}个可用IP,可用IP池列表为\n{uesful_ip_pool}')
print('**************')
print(f'有{len(uesless_ip_pool)}个无用IP,无用IP池列表为\n{uesless_ip_pool}')
下面展示一些 内联代码片
。
结果展示(以需要爬取3页为例):
// An highlighted block
请输入需要爬取的页数:3
['115.218.0.53', '113.195.17.248', '183.166.96.57', '139.155.41.15', '119.119.104.75', '115.218.2.38', '125.108.78.124', '60.167.102.241', '183.166.102.122', '118.212.104.154', '115.218.214.57', '182.34.33.116', '121.232.148.79', '171.35.169.70', '115.221.240.109']
['HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP', 'HTTP']
['9000', '9999', '9999', '8118', '9000', '9000', '9000', '9999', '9999', '9999', '9000', '9999', '9000', '9999', '9999']
[{'HTTP': '115.218.0.53:9000'}, {'HTTP': '113.195.17.248:9999'}, {'HTTP': '183.166.96.57:9999'}, {'HTTP': '139.155.41.15:8118'}, {'HTTP': '119.119.104.75:9000'}, {'HTTP': '115.218.2.38:9000'}, {'HTTP': '125.108.78.124:9000'}, {'HTTP': '60.167.102.241:9999'}, {'HTTP': '183.166.102.122:9999'}, {'HTTP': '118.212.104.154:9999'}, {'HTTP': '115.218.214.57:9000'}, {'HTTP': '182.34.33.116:9999'}, {'HTTP': '121.232.148.79:9000'}, {'HTTP': '171.35.169.70:9999'}, {'HTTP': '115.221.240.109:9999'}]
有15个可用IP,可用IP池列表为
[{'HTTP': '115.218.0.53:9000'}, {'HTTP': '113.195.17.248:9999'}, {'HTTP': '183.166.96.57:9999'}, {'HTTP': '139.155.41.15:8118'}, {'HTTP': '119.119.104.75:9000'}, {'HTTP': '115.218.2.38:9000'}, {'HTTP': '125.108.78.124:9000'}, {'HTTP': '60.167.102.241:9999'}, {'HTTP': '183.166.102.122:9999'}, {'HTTP': '118.212.104.154:9999'}, {'HTTP': '115.218.214.57:9000'}, {'HTTP': '182.34.33.116:9999'}, {'HTTP': '121.232.148.79:9000'}, {'HTTP': '171.35.169.70:9999'}, {'HTTP': '115.221.240.109:9999'}]
**************
有0个无用IP,无用IP池列表为
[]