import requests
from bs4 import BeautifulSoup
import bs4
import os
import xlwt
import csv
import codecs
defgetHTMLtext(url):try:
r = requests.get(url,timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:return'getHTMLtext错误'deffillUltramanList(ulist,html):
soup = BeautifulSoup(html,'html.parser')for li in soup.body('ul')[1]('li'):ifisinstance(li,bs4.element.Tag):
spans = li.find_all('span')
ulist.append([spans[0].string,spans[1].string,spans[2].string,spans[3].string])defprintUnivList(ulist,num):for i inrange(41):
u = ulist[i]print(i+1,u[0],u[1],u[2],u[3])defmain():global uinfo #变为全局变量
uinfo =[]
url ='http://www.4399dmw.com/atm/legend/'
html = getHTMLtext(url)
fillUltramanList(uinfo,html)
printUnivList(uinfo,70)
main()
数据保存
##保存为excel格式------------------------------------------------------------------------------defdata_write(file_path, datas):
f = xlwt.Workbook()#创建工作簿
sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True)#创建sheet#将数据写入第 i 行,第 j 列
i =0for data in datas:for j inrange(len(data)):
sheet1.write(i,j,data[j])
i = i +1
f.save(file_path)#保存文件
data_write('C://Users//胡银洪//Desktop//picture//lili.xls', uinfo)#扩展名为‘xls’#保存为txt格式------------------------------------------------------------------------------------------deftext_save(filename, data):#filename为写入txt文件的路径,data为要写入数据列表.file=open(filename,'a')for i inrange(len(data)):
s =str(data[i]).replace('[','').replace(']','')#去除[],这两行按数据不同,可以选择
num =str(i+1)
s = num+s.replace("'",'').replace(',','')+'\n'#去除单引号,逗号,每行末尾追加换行符file.write(s)file.close()print("保存文件成功")
text_save('C://Users//胡银洪//Desktop//picture//ultrman.txt', uinfo)