先看看执行结果
同路径下会有个 “风云榜.csv” 文件,下面是文件内容
下面是完整代码,复制粘贴就可以运行。有问题请留言。
# -*- coding:utf-8 -*
import requests, csv
from lxml import etree
class baidu(object):
def __init__(self):
self.urls = ['http://top.baidu.com/buzz?b=274&c=17&fr=topbuzz_b277_c17',
'http://top.baidu.com/buzz?b=277&c=17&fr=topbuzz_b274_c17',
'http://top.baidu.com/buzz?b=276&c=17&fr=topbuzz_b277_c17']
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.3945.79 Safari/537.36'}
def get_all_data(self):
for url in self.urls:
response = requests.get(url, headers=self.headers) # 获取请求
response.encoding = response.apparent_encoding
soup = etree.HTML(response.text)
names = soup.xpath('//a[@class="list-title"]/text()') # 获取名称
name_ids = soup.xpath('//td[@class="last"]/span/text()') # 获取id
for (name, name_id) in zip(names, name_ids):
self.Save(name, name_id)
def Save(self, name, name_id):
"""保存数据"""
with open('风云榜.csv', 'a', newline='', encoding='utf8') as f:
spamwriter = csv.writer(f)
spamwriter.writerow([name, name_id])
def run(self):
self.get_all_data()
if __name__ == '__main__':
fps = baidu()
fps.run()