import requests
from bs4
import BeautifulSoup
# -*-coding:utf-8 -*-
import io
import sys
#改变标准输出的默认编码
sys.stdout=io.TextIOWrapper(sys.stdout.buffer,
encoding=
'utf8')
# vscode打印中文问题
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
base_url =
'https://www.qiushibaike.com/8hr/page/'
# 设定一个网址不变的部分,然后我们只要每次在这个后面加数字就可以了
file =
open(
'qiubai.txt',
'w')
num =
1
while
1:
file.write(
'第' +
str(num) +
'页' +
'
\n
')
r = requests.get(base_url +
str(num),
headers = headers)
# r = requests.get('https://www.qiushibaike.com', headers = headers)
content = r.text
soup = BeautifulSoup(content,
'lxml')
divs_hot = soup.find_all(
class_ =
'article block untagged mb15 typs_hot')
for div
in divs_hot:
if div.find_all(
class_ =
'thumb'):
continue
joke = div.span.get_text()
# print('--热门--')
# print(joke)
# print('------')
file.write(
'--热门--')
file.write(joke +
'
\n
')
file.write(
'------' +
'
\n
')
divs_long = soup.find_all(
class_ =
'article block untagged mb15 typs_long')
for div
in divs_long:
if div.find_all(
class_ =
'thumb'):
continue
joke = div.span.get_text()
# print('--长篇--')
# print(joke)
# print('------')
file.write(
'--长篇--')
file.write(joke +
'
\n
')
file.write(
'------' +
'
\n
')
divs_recent = soup.find_all(
class_ =
'article block untagged mb15 typs_recent')
for div
in divs_recent:
if div.find_all(
class_ =
'thumb'):
continue
joke = div.span.get_text()
# print('--近期--')
# print(joke)
# print('------')
file.write(
'--近期--')
file.write(joke +
'
\n
')
file.write(
'------' +
'
\n
')
divs_old = soup.find_all(
class_ =
'article block untagged mb15 typs_old')
for div
in divs_old:
if div.find_all(
class_ =
'thumb'):
continue
joke = div.span.get_text()
# print('--经典--')
# print(joke)
# print('------')
file.write(
'--经典--')
file.write(joke +
'
\n
')
file.write(
'------' +
'
\n
')
print(
'第
{}
页,完成!'.format(num))
num +=
1
if num >
10:
break
file.close