请求去哪网品质一日游中的门票信息,链接为:https://piao.qunar.com/daytrip/list.htm,请求前5页数据。在每次获得的html中利用xpath提取①一日游门票标题;②出发地;③已售数量;④用户评分;⑤景点图片链接+价格。将结果保存到csv文件中。
(提示:考查知识点包括
①利用requests库发起带参数的get请求,多次发起请求
②反爬的策略添加头部信息包括user-agent和cookies
③xpath解析网页
④正则表达式精细化提取数据
⑤以追加的方式将数据写入csv文件)
一、主要代码
import requests
from lxml import etree
import csv
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'}
url = 'http://piao.qunar.com/daytrip/list.htm'
cookie = {
"QN1": "000083002eb456c744c01b0e",
"QN300": "organic",
"qunar-assist": "{%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}",
"QN205": "organic",
"QN277": "organic",
"csrfToken": "rlFKrtkgxXI9gqRNu33Yl4gus33KLS7u",
"QN57": "16967522898110.5473583056798739",
"QN269": "5AB6CBD065B111EEA7FBFA163E48E4E0",
"_i": "VInJOy9d1YpCO0xxY6sppuTcmm2q",
"_vi": "xI4nTiivCvB4U_sjkyYMWuM7f3SHjDHp69MSbdIDwd94DZwL60y-0wDrSk5LFuOEMkJXE76jkQg_-tdDV02oMcm1S8ZrQFqVffFO99gviwl6Sp2Chv2q539vbPtjmv1L6XcGUFpPSAU4P5B-ixG3dyI9ElT9k45omS-0GDyVx-Us",
"Hm_lvt_15577700f8ecddb1a927813c81166ade": "1696752290",
"QN267": "01580970872c7349744",
"QN58": "1696752289810%7C1696752292786%7C2",
"JSESSIONID": "F89597DFF7EA343FA8B9AA119508992E",
"__qt": "v1%7CVTJGc2RHVmtYMTlCYnd0WVhvU0puY2Q2SmZFaHFVS3QySC9OV0NlL2RxL0RvalpqbHlIc3gralhzUFRGNlNBblBudHBoYVo4eTJLNlpXUEtvZ2pNTHFZRG9OaW1VV1ZPZFhla29vV0FHTXVyRE1nTG8wWFZsZWFnY2pqKzIweVFwQzFjS2lkc3A2UVhsR1ptZFlIR0RBPT0%3D%7C1696752293319%7CVTJGc2RHVmtYMTltS0htdGJNak82TUdjZnppMTNzUHJad2MwWVJBM1drSGJWYUQ0S214TVNNZHF4a3BMQm9RazVrZzVyNCtJcmYweUU2Vm92eStxa1E9PQ%3D%3D%7CVTJGc2RHVmtYMTllRUJRSjFQaG50TzJZYjlSWCtTWmZDVWZZQXhlQjBTempEcXFwMXVkNDBuM3hEQ1hZUXgwTWdtTFllaGdHTm9SSEpBNXc4eU1wTDEyZlQxVEFUSXQ1K21nd3RiS1B2bG5oNjB6VDNGTDRwOU9peWZ3RGg2ejlvNm42MlFld3ZqanJiTkNzZ0FjL3NoWitkaExPS25GYzgxM2NIZlcwYUJSUEFqYU83cHo1dkVhTmJLM1k3akdSNEZjT2pzTFNVekplQm14cnYzcFg5elIrdGxwaFZXZVY4OVFHaFdKVzhlQnJxWk85VVJYN2o5ai9mVTlkVWVmZW5BU1BMQW55eVg3aHNuQlkzOHBwekxJM1U4MkUvWVJrNWZVd3lBVWlGa3Vkc3dhZ1J3T0NDR094UTVVK3BvVkN1eGpnOTVqQ044cTJXNlV5Uld5SFpLYm14dk1ncTczVWd5VDl6UWJSMEMrMWZBbXlHamlRYVJhRWJPcnc5RHgxa3ROenl3QkJRU1c0TEdqNmg3V2RQZm52UjJOWWtYZDBYMXhheTJWVzd6eGtQb2RQNkRJOUh4YnpqK2Y5cFYxVHRJdGdpMWZONktRYmxRNDRZMkZMR3ZzcnhSTDl0YlhsUFJLQSswaURSak1nRnUwUEk1dXZ6clBGbEdYTklraTZiSUdRWnc4aWh1QWprR3JRQit1R3l3cnU5Q0VEcE5KYzh5L2YyMUwzbVhkakk5QmhLTkN1TU5PL3FBN1Q3SGdtOUd1RS9sMkJubG4zdExEZUN6emtvOEtqMWxiczVGRHRtWUlReXpRL1ZPK1lma294SlFxTnFmVEZGK3ZUTXRBdXFqOEs5QmZZZmpnRDdhMlUrMzNGZ2s0NTcvRkVreEl0Y0ZOcHlHbFQ5MVE9",
"Hm_lpvt_15577700f8ecddb1a927813c81166ade": "1696752293",
"fid": "cff91dd2-ce39-4833-8540-0167df462ecd",
"QN271": "693a3dfd-06ae-4a67-9096-e361c5322f1a"
}
for page in range(1, 6):
url = f'http://piao.qunar.com/daytrip/list.htm?keyword=&page={page}'
response = requests.get(url, headers=headers, cookies=cookie)
response.encoding = response.apparent_encoding
html = etree.HTML(response.text)
#print(response.text)
for htm in html:
title = htm.xpath('//h3[@class="sight_item_caption"]/a[@class="name"]/text()') # 一日游门票标题
place = htm.xpath('//span[@class="area"]/text()') # 出发地
sold = htm.xpath('//span[@class="relation_cap"]/text()') # 已售数量
rating = htm.xpath('//span[@class="relation_count"]/text()') # 用户评分
picture_url = htm.xpath('//img/@data-original') # 景点图片链接
price = htm.xpath('//span[@class="sight_item_price"]/em//text()') # 价格
list=zip(title,place,sold,rating,picture_url,price)
for i in list:
print(i)
with open(r'一日游.csv', 'a', encoding='utf-8', newline='') as f:
data = csv.writer(f)
message = i
data.writerow(message)