以下代码的作用是爬去去哪儿旅行的攻略,据最新测验,2024年3月14日可用
源码可加qq:3100716242领取
核心代码如下
for page in range(1, 201):
url = f'https://travel.qunar.com/travelbook/list.htm?page={page}&order=hot_heat'
# 1. 发送请求
response = requests.get(url=url)
# 2. 获取数据
html_data = response.text
# 3. 提取数据
# re / css / xpath
# css: ul.b_strategy_list > li
# xpath: //ul[@class="b_strategy_list "]/li
select = parsel.Selector(html_data)
lis = select.xpath('//ul[@class="b_strategy_list "]/li')
# 二次的数据提取
for li in lis:
# li.css('h2 > a::text').get()
title = li.xpath('./h2/a/text()').get()
date = li.xpath('./p[@class="user_info"]//span[@class="date"]/text()').get("")
days = li.xpath('./p[@class="user_info"]//span[@class="days"]/text()').get("")
photo_nums = li.xpath('./p[@class="user_info"]//span[@class="photo_nums"]/text()').get("")
fee = li.xpath('./p[@class="user_info"]//span[@class="fee"]/text()').get("")
people = li.xpath('./p[@class="user_info"]//span[@class="people"]/text()').get("")
trip = li.xpath('./p[@class="user_info"]//span[@class="trip"]/text()').get("")
view = li.xpath('./p[@class="user_info"]//span[@class="icon_view"]/span/text()').get("")
love = li.xpath('./p[@class="user_info"]//span[@class="icon_love"]/span/text()').get("")
comment = li.xpath('./p[@class="user_info"]//span[@class="icon_comment"]/span/text()').get("")
href = li.xpath('./h2/a/@href').get()
print(title, date, days, photo_nums, fee, people, trip, view, love, comment, href)
# 4. 保存数据
with open('攻略.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([title, date, days, photo_nums, fee, people, trip, view, love, comment, href])