# coding=utf-8 import requests from bs4 import BeautifulSoup import time url = ['https://www.qiushibaike.com/text/page/{}'.format(number) for number in range(1, 10)] i = 1 for urls in url: header = {'Accept': ' text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': ' gzip, deflate, br', 'Accept-Language': ' zh-CN,zh;q=0.9', 'Connection': ' keep-alive', 'Cookie': ' _qqq_uuid_="2|1', 'Host': ' www.qiushibaike.com', 'Upgrade-Insecure-Requests': ' 1', 'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'} html = requests.get(urls, params=header) soup = BeautifulSoup(html.text, 'html.parser') soup_name = soup.select('h2') soup_txt = soup.select('.content span') soup_funny = soup.select('div.stats > span.stats-vote > i') # print(soup_funny) # for fun in soup_funny: # print(fun.getText()) time.sleep(1) for name, txt, fun in zip(soup_name, soup_txt, soup_funny): names = name.get_text().strip() txts = txt.get_text().strip() funs = fun.getText() funs = int(funs) # print(type(funs)) if funs > 3000: a = str(i) + ':' + '<' + names + '>\n' c = '热度:' + fun.getText() + '\n' b = txts + '\n' print(a, c, b) i += 1 with open('d:/python/myweb/qiubai.txt', 'a+',encoding='utf-8') as f: f.write(a+ '>>>' +c+ '\n' +b) f.close()
抓取糗百文字版精华
最新推荐文章于 2019-05-05 10:01:41 发布