糗事百科前十页保存到txt

import requests
from bs4 import BeautifulSoup
# -*-coding:utf-8 -*-
import io
import sys
#改变标准输出的默认编码
sys.stdout=io.TextIOWrapper(sys.stdout.buffer, encoding= 'utf8')
# vscode打印中文问题


headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
base_url = 'https://www.qiushibaike.com/8hr/page/'   # 设定一个网址不变的部分,然后我们只要每次在这个后面加数字就可以了
file = open( 'qiubai.txt', 'w')
num = 1
while 1:
    
     file.write( '第' + str(num) + '页' + ' \n ')
    r = requests.get(base_url + str(num), headers = headers)
     # r = requests.get('https://www.qiushibaike.com', headers = headers)
    content = r.text
    soup = BeautifulSoup(content, 'lxml')

    divs_hot = soup.find_all( class_ = 'article block untagged mb15 typs_hot')

     for div in divs_hot:
         if div.find_all( class_ = 'thumb'):
             continue
        joke = div.span.get_text()
         # print('--热门--')
         # print(joke)
         # print('------')
         file.write( '--热门--')
         file.write(joke + ' \n ')
         file.write( '------' + ' \n ')
        
        
    divs_long = soup.find_all( class_ = 'article block untagged mb15 typs_long')

     for div in divs_long:
         if div.find_all( class_ = 'thumb'):
             continue
        joke = div.span.get_text()
         # print('--长篇--')
         # print(joke)
         # print('------')
         file.write( '--长篇--')
         file.write(joke + ' \n ')
         file.write( '------' + ' \n ')
        
    divs_recent = soup.find_all( class_ = 'article block untagged mb15 typs_recent')

     for div in divs_recent:
         if div.find_all( class_ = 'thumb'):
             continue
        joke = div.span.get_text()
         # print('--近期--')
         # print(joke)
         # print('------')
         file.write( '--近期--')
         file.write(joke + ' \n ')
         file.write( '------' + ' \n ')
    divs_old = soup.find_all( class_ = 'article block untagged mb15 typs_old')

     for div in divs_old:
         if div.find_all( class_ = 'thumb'):
             continue
        joke = div.span.get_text()
         # print('--经典--')
         # print(joke)
         # print('------')
         file.write( '--经典--')
         file.write(joke + ' \n ')
         file.write( '------' + ' \n ')
     print( '第 {} 页,完成!'.format(num))
    num += 1
     if num > 10:
         break

file.close
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值