import requests
from bs4 import BeautifulSoup
import time
sess=requests.Session()
cookies={
'SUB':'_2AkMuo0GNf8NxqwJRmPoTzG_gboR_wgHEieKY_7BWJRMxHRl-yT83qm4GtRC7VX-9bp2fn3Ia6oA8_vvznl5b7g..',
}
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
}
def crawl_wb():
sess.cookies=requests.utils.cookiejar_from_dict(cookies)
for page in range(10):
url='https://weibo.com/a/aj/transform/loadingmoreunlogin?ajwvr=6&category=1760&page={}&lefnav=0&__rnd=1511159753282'.format(page)
res=sess.get(url,headers=headers,verify=False)
data=res.json()['data']
soup=BeautifulSoup(data,'lxml')
items=soup.select('.list_title_b a')
for item in items:
print(item.text,item['href'])
time.sleep(2)
if __name__ == '__main__':
crawl_wb()