爬取bilibili番剧排行
import requests
from bs4 import BeautifulSoup
import bs4
import re
def get_url(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def fill(uList,dList,html):
soup=BeautifulSoup(html,"html.parser")
for i in soup.find_all('a'):
if (i.string):
try:
str=i.string
href = i.attrs['href']
uList.append([href,str])
except:
continue
a=soup.find_all(name='span',attrs={'class':"data-box"})
for i in a:
dList.append(i.text)
def print_html(uList,dList):
j=0
tplt="{0:^5}\t{1:{6}^20}\t{2:^15}\t{3:{6}^15}\t{4:{6}^15}\t{5:{6}^15}"
print(tplt.format("排名","链接","总播放量","评论人数","喜欢","名称",chr(12288)))
for i in range(len(uList)):
u=uList[i]
print(tplt.format(i+1,u[0],dList[j],dList[j+1],dList[j+2],u[1],chr(12288)))
j=j+3
def main():
uinfo=[]
dinfo=[]
url="https://www.bilibili.com/ranking/bangumi/13/0/3"
html=get_url(url)
fill(uinfo,dinfo,html)
print_html(uinfo,dinfo)
main()