实例操作。非常规页面爬取
import requests
import lxml.html
import json
class Bili:
def __init__(self):
pass
def getMsg(self,url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2727.400'}
html=requests.get(url=url,headers=headers).content.decode('utf-8')
str1=json.loads(html)
for i in str1['data']['replies']:
print(i['member']['uname'],i['member']['sex'] ,' ' ,i['content']['message'])
if i['replies']==None:
pass
else:
for k in i['replies']:
print(k['member']['uname'], k['member']['sex'], ' ', k['content']['message'])
if __name__ == '__main__':
bl=Bili()
id=46820799
for i in range(20):
try:
url1='https://api.bilibili.com/x/v2/reply?&jsonp=jsonp&pn={}&type=1&oid={}&sort=0'.format(i,id)
bl.getMsg(url1)
except Exception:
pass