抓取糗百文字版精华

最新推荐文章于 2019-05-05 10:01:41 发布

Simo_Hank

最新推荐文章于 2019-05-05 10:01:41 发布

阅读量1k

点赞数

分类专栏： Python 文章标签： Python 糗百

本文链接：https://blog.csdn.net/youye0737/article/details/80487439

版权

Python 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import time

url = ['https://www.qiushibaike.com/text/page/{}'.format(number) for number in range(1, 10)]
i = 1
for urls in url:
    header = {'Accept': ' text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
              'Accept-Encoding': ' gzip, deflate, br',
              'Accept-Language': ' zh-CN,zh;q=0.9',
              'Connection': ' keep-alive',
              'Cookie': ' _qqq_uuid_="2|1',
              'Host': ' www.qiushibaike.com',
              'Upgrade-Insecure-Requests': ' 1',
              'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
    html = requests.get(urls, params=header)
    soup = BeautifulSoup(html.text, 'html.parser')
    soup_name = soup.select('h2')
    soup_txt = soup.select('.content span')
    soup_funny = soup.select('div.stats > span.stats-vote > i')
    # print(soup_funny)
    # for fun in soup_funny:
    #     print(fun.getText())
    time.sleep(1)
    for name, txt, fun in zip(soup_name, soup_txt, soup_funny):
        names = name.get_text().strip()
        txts = txt.get_text().strip()
        funs = fun.getText()
        funs = int(funs)
        # print(type(funs))
        if funs > 3000:
            a = str(i) + ':' + '<' + names + '>\n'
            c = '热度:' + fun.getText() + '\n'
            b = txts + '\n'

            print(a, c, b)
            i += 1
            with open('d:/python/myweb/qiubai.txt', 'a+',encoding='utf-8') as f:
                f.write(a+ '>>>' +c+ '\n' +b)
                f.close()