运行结果如下图所示
代码如下
import requests
from bs4 import BeautifulSoup
import re
import xlwt
datalist=[]
for i in range(0,6):
#循环打开页面
url_1 = "https://www.89ip.cn/index_"+str(i)+".html"
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36"}
#打开网页并得到text, 使用BeautifulSoup转化
response2 = requests.get(url_1,headers=header)
response=response2.text
soup = BeautifulSoup(response,"html.parser")
#找到所有含td标签的数据
all_eva = soup.findAll("td")
for num in all_eva:
data = []
item = str(num)
#对每一个td标签进行解析, 找到含IP地址的数据
ip2 = re.findall(re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}',re.S),item)
if(len(ip2)!=0):
data.append(ip2)
datalist.append(data[0])
workbook = xlwt.Workbook(encoding="utf-8",style_compression=0)
worksheet = workbook.add_sheet('sheet1',cell_overwrite_ok=True)
worksheet.write(0,0,"IP地址")
for i in range(len(datalist)):
worksheet.write(i+1, 0, *datalist[i])
workbook.save("students5.xls")