抓取糗百文字版精华

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import time

url = ['https://www.qiushibaike.com/text/page/{}'.format(number) for number in range(1, 10)]
i = 1
for urls in url:
    header = {'Accept': ' text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
              'Accept-Encoding': ' gzip, deflate, br',
              'Accept-Language': ' zh-CN,zh;q=0.9',
              'Connection': ' keep-alive',
              'Cookie': ' _qqq_uuid_="2|1',
              'Host': ' www.qiushibaike.com',
              'Upgrade-Insecure-Requests': ' 1',
              'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'}
    html = requests.get(urls, params=header)
    soup = BeautifulSoup(html.text, 'html.parser')
    soup_name = soup.select('h2')
    soup_txt = soup.select('.content span')
    soup_funny = soup.select('div.stats > span.stats-vote > i')
    # print(soup_funny)
    # for fun in soup_funny:
    #     print(fun.getText())
    time.sleep(1)
    for name, txt, fun in zip(soup_name, soup_txt, soup_funny):
        names = name.get_text().strip()
        txts = txt.get_text().strip()
        funs = fun.getText()
        funs = int(funs)
        # print(type(funs))
        if funs > 3000:
            a = str(i) + ':' + '<' + names + '>\n'
            c = '热度:' + fun.getText() + '\n'
            b = txts + '\n'

            print(a, c, b)
            i += 1
            with open('d:/python/myweb/qiubai.txt', 'a+',encoding='utf-8') as f:
                f.write(a+ '>>>' +c+ '\n' +b)
                f.close()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值