获取免费代理池

import requests
from lxml import etree #HTML解析模块
import pandas as pd
ip_list = []
def get_ip(url,headers):
#发送网络i请求
requests.DEFAULT_RETRIES = 5
s= requests.session()
s.keep_alive = False
response = requests.get(url=url, headers=headers, timeout=300, verify=False)
response.encoding =‘utf-8’
if response.status_code == 200:
html = etree.HTML(response.text)
li_all = html.xpath(’//li[@class=“f-list ’
'col-lg-12 col-md-12 ’
'col-sm-12 col-xs-12”]’)
for i in li_all:
ip = i.xpath(‘span[@class=“f-address”]/text()’)[0]#获取IP
port = i.xpath(‘span[@class=“f-port”]/text()’)[0] #获取端口
ip_list.append(ip+’:’+port)
print(‘代理IP为:’,ip,‘对应端口为:’,port)

#头部信息:
headers = {‘authority’: ‘www.dieniao.com’,
‘method’: ‘GET’,
‘path’: ‘/FreeProxy.html’,
‘scheme’: ‘https’,
‘accept’: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.9’,
‘referer’:‘https://www.dieniao.com/About.html’,
‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36 Edg/88.0.705.81’}
if name == ‘main’:
ip_table = pd.DataFrame(columns=[‘ip’])
# for i in range(1,5):
# url = ‘https://www.dieniao.com/FreeProxy/{page}.html’.format(page=i)
# get_ip(url,headers)
url=‘https://www.dieniao.com/FreeProxy.html’
get_ip(url,headers)
ip_table[‘ip’]=ip_list
ip_table.to_excel(‘ip.xlsx’,sheet_name=‘data’)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值