免费ip
import requests # 导入网络请求模块 from lxml import etree # 导入HTML解析模块 import pandas as pd # 导入pandas模块 ip_list = [] # 创建保存ip地址的列表 def get_ip(url, headers): # 发送网络请求 response = requests.get(url, headers=headers) response.encoding = 'utf-8' # 设置编码方式 if response.status_code == 200: # 判断请求是否成功 html = etree.HTML(response.text) # 解析HTML # 获取所有带有IP的li标签 li_all = html.xpath('//li[@class="f-list col-lg-12 col-md-12 col-sm-12 col-xs-12"]') for i in li_all: # 遍历每行内容 ip = i.xpath('span[@class="f-address"]/text()')[0] # 获取ip port = i.xpath('span[@class="f-port"]/text()')[0] # 获取端口 ip_list.append(ip + ':' + port) # 将ip与端口组合并添加至列表当中 print('代理ip为:', ip, '对应端口为:', port) # 头部信息 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/72.0.3626.121 Safari/537.36'} if __name__ == '__main__': ip_table = pd.DataFrame(columns=['ip']) # 创建临时表格数据 for i in range(1, 5): # 请求地址 url = 'https://www.dieniao.com/FreeProxy/{page}.html'.format(page=i) get_ip(url, headers) ip_table['ip'] = ip_list # 将提取的ip保存至excel文件中的ip列 # 生成xlsx文件 ip_table.to_excel('ip.xlsx', sheet_name='data')
import
requests
# 导入网络请求模块
import
pandas
# 导入pandas模块
from
lxml
import
etree
# 导入HTML解析模块
ip_table
=
pandas.read_excel(
'ip.xlsx'
)
# 读取代理IP文件内容
ip
=
ip_table[
'ip'
]
# 获取代理ip列信息
# 头部信息
headers
=
{
'User-Agent'
:
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/72.0.3626.121 Safari/537.36'
}
# 循环遍历代理IP并通过代理发送网络请求
for
i
in
ip:
proxies
=
{
'http'
:
'http://{ip}'
.
format
(ip
=
i),
'https'
:
'https://{ip}'
.
format
(ip
=
i)}
try
:
response
=
requests.get(
'http://202020.ip138.com/'
,
headers
=
headers,proxies
=
proxies,timeout
=
2
)
if
response.status_code
=
=
200
:
# 判断请求是否成功,请求成功说明代理IP可用
response.encoding
=
'utf-8'
# 进行编码
html
=
etree.HTML(response.text)
# 解析HTML
info
=
html.xpath(
'/html/body/p[1]//text()'
)
print
(info)
# 输出当前ip匿名信息
except
Exception as e:
pass
# print('错误异常信息为:',e) # 打印异常信息