import requests import csv import time headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36' } url = 'https://www.mafengwo.cn/jd/10065/gonglve.html' # 发送请求获取页面信息 response = requests.get(url, headers=headers) # 解析页面信息,获取每个景点的链接 from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') jd_links = [] jd_names = [] for item in soup.select('.hot-list .item a.img'): jd_links.append(item['href']) jd_names.append(item['title']) # 定义保存数据的文件名 filename = '青岛景点信息.csv' # 打开文件,设置文件模式为w,即写入模式 with open(filename, mode='w', encoding='utf-8', newline='') as f: # 创建csv写入对象 writer = csv.writer(f) # 写入表头 writer.writerow(['景点名称', '经度', '纬度', '历史人流量', '消费金额', '评论数']) for jd_link, jd_name in zip(jd_links, jd_names): print(f'正在爬取景点:{jd_name}...') # 发送请求获取景点页面信息 jd_response = requests.get(jd_link, headers=headers) jd_soup = BeautifulSoup(jd_response.text, 'html.parser') # 获取经纬度 try: jd_lng, jd_lat = jd_soup.select('.bd-map-js')[0]['data-point'].split(',') except: jd_lng, jd_lat = None, None # 获取历史人流量和消费金额 jd_info_items = jd_soup.select('.info-item') jd_history_visitor_num = None jd_consume_amount = None for jd_info_item in jd_info_items: if '历史人流量' in jd_info_item.text: jd_history_visitor_num = jd_info_item.select('.num')[0].text elif '人均消费' in jd_info_item.text: jd_consume_amount = jd_info_item.select('.num')[0].text # 获取评论数 try: jd_comment_num = jd_soup.select('.rev-total')[0].text.split('条')[0] except: jd_comment_num = None # 将数据写入csv文件 writer.writerow([jd_name, jd_lng, jd_lat, jd_history_visitor_num, jd_consume_amount, jd_comment_num]) # 休眠1秒,防止频繁访问被封IP time.sleep(1) print('数据爬取完成!')
那位大佬看一下代码为什么代码运行成功但是csv文件里面没有内容
最新推荐文章于 2023-12-16 23:53:26 发布