python爬取b站403_python3爬虫-爬取B站排行榜信息

importrequests, re, time, os

category_dic={"all": "全站榜","origin": "原创榜","rookie": "新人榜",

}

day_dic= {1: "日排行榜", 3: "三日排行榜", 7: "周排行榜", 30: "月排行榜"}

all_or_origin_dic={

0:"全站",1: "动画",168: "国创相关",3: "音乐",129: "舞蹈",4: "游戏",36: "科技",188: "数码",160: "生活",119: "鬼畜",155: "时尚",5: "娱乐",181: "影视",

}

bangumi_dic={"番剧": 1,"国产动画": 4,

}

cinema_dic={"记录篇": 177,"电影": 23,"电视剧": 11,

}

rookie_dic={

0:"全站",1: "动画",3: "音乐",129: "舞蹈",4: "游戏",36: "科技",188: "数码",160: "生活",119: "鬼畜",155: "时尚",5: "娱乐",181: "影视",

}

BaseDict={"all": all_or_origin_dic,"origin": all_or_origin_dic,#"bangumi": bangumi_dic,

#"cinema": cinema_dic,

"rookie": rookie_dic,

}

dic={"all": 1,"origin": 2,"rookie": 3,

}

base_path= "D:\图片\\bilibili_ranking" #文件保存的位置

defget_url():for first incategory_dic.keys():if first in ["all", "origin", "rookie"]:for second inBaseDict.get(first).keys():for third inday_dic.keys():

url= "https://api.bilibili.com/x/web-interface/ranking?jsonp=jsonp&rid={}&day={}&type={}&arc_type=0&callback=__jp1".format(

second, third, dic.get(first))yieldurl, [first, second, third]

s=requests.Session()

headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36","Referer": "https://www.bilibili.com/ranking/all/0/0/3"}

url_list=get_url()for url inurl_list:print("向{}发请求".format(url[0]))

response= s.get(url=url[0], headers=headers)

data= response.text.replace('"', "")

pattern= r'.*?author:(?P.*?),.*?play:(?P.*?),.*?pts:(?P.*?),.*?title:(?P.*?),'result_list=re.findall(pattern, data)

path= os.path.join(base_path, "{}-{}-{}".format(category_dic.get(url[1][0]),

rookie_dic.get(url[1][1]) or all_or_origin_dic.get(url[1][1]),

day_dic.get(url[1][2])))

f= open(path + ".txt", "a", encoding="utf-8")print('正在写入....{}'.format(path + ".txt"))for index, res inenumerate(result_list):#print("排名:{}".format(index + 1))

#print("作者:{}".format(res[0]))

#print("播放量:{}".format(res[1]))

#print("综合分数:{}".format(res[2]))

#print("标题:{}".format(res[3]))

#print("-" * 90)

f.write("排名:{}\n".format(index + 1))

f.write("标题:{}\n".format(res[3]))

f.write("作者:{}\n".format(res[0]))

f.write("播放量:{}\n".format(res[1]))

f.write("综合分数:{}\n".format(res[2]))

f.write("-" * 90 + "\n")

f.close()

time.sleep(2)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值