获取免费代理池

import requests
from lxml import etree #HTML解析模块
import pandas as pd
ip_list = []
def get_ip(url,headers):
#发送网络i请求
requests.DEFAULT_RETRIES = 5
s= requests.session()
s.keep_alive = False
response = requests.get(url=url, headers=headers, timeout=300, verify=False)
response.encoding =‘utf-8’
if response.status_code == 200:
html = etree.HTML(response.text)
li_all = html.xpath(’//li[@class=“f-list ’
'col-lg-12 col-md-12 ’
'col-sm-12 col-xs-12”]’)
for i in li_all:
ip = i.xpath(‘span[@class=“f-address”]/text()’)[0]#获取IP
port = i.xpath(‘span[@class=“f-port”]/text()’)[0] #获取端口
ip_list.append(ip+’:’+port)
print(‘代理IP为:’,ip,‘对应端口为:’,port)

#头部信息:
headers = {‘authority’: ‘www.dieniao.com’,
‘method’: ‘GET’,
‘path’: ‘/FreeProxy.html’,
‘scheme’: ‘https’,
‘accept’: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.9’,
‘referer’:‘https://www.dieniao.com/About.html’,
‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36 Edg/88.0.705.81’}
if name == ‘main’:
ip_table = pd.DataFrame(columns=[‘ip’])
# for i in range(1,5):
# url = ‘https://www.dieniao.com/FreeProxy/{page}.html’.format(page=i)
# get_ip(url,headers)
url=‘https://www.dieniao.com/FreeProxy.html’
get_ip(url,headers)
ip_table[‘ip’]=ip_list
ip_table.to_excel(‘ip.xlsx’,sheet_name=‘data’)

已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 数字20 设计师:CSDN官方博客 返回首页