from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import xlwt
class QunarSpider():
def theSpider(self, city, startdate, enddate,totalpagenum):
driver = webdriver.Firefox()
driver.get("https://hotel.qunar.com")
# 选取城市
toCity = driver.find_element_by_xpath("//input[@tabindex='1']")
toCity.clear()
toCity.send_keys(city)
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "hot_key_list")))
toCity.send_keys(Keys.TAB)
time.sleep(2)
# 选取结束日期
checkOutDate = driver.find_element_by_xpath("//input[@tabindex='3']")
checkOutDate.click()
checkOutDate.send_keys(Keys.CONTROL, "a")
checkOutDate.send_keys(Keys.DELETE)
checkOutDate.send_keys(enddate)
# 选取开始日期
checkInDate = driver.find_element_by_xpath("//input[@tabindex='2']")
checkInDate.click()
checkInDate.send_keys(Keys.CONTROL, "a")
checkInDate.send_keys(Keys.DELETE)
checkInDate.send_keys(startdate)
# 进行搜索
search = driver.find_element_by_xpath("//a[@tabindex='5']")
search.click()
# 页面解析,并创建数据列表
# 酒店序号
num = 1
page_num = 1
totalPageNum = 5
contentList = []
for l in range(0,totalPageNum):
time.sleep(2)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser", from_encoding='utf-8')
infos = soup.find_all('div', class_="inner clearfix")
for info in infos:
content = info.select("div > div > p > a")
# 打印店名及价格
print(content[1].text, "——————————", content[0].text)
contentSgl = [num, content[1].text, content[0].text, '2020-07-18', '2020-07-19']
contentList.append(contentSgl)
num += 1
# 解析多页
nextPage = driver.find_element_by_xpath("//p[@class='next fl_right cur able']")
nextPage.click()
page_num += 1
# 创建Excel表,并写入数据
# 创建Excel表
f = xlwt.Workbook()
# 创建一个sheet对象,命名为“酒店信息”,cell_overwrite_ok表示是否可以覆盖单元格,是Worksheet实例化的一个参数,默认值是False
sheet1 = f.add_sheet('酒店信息', cell_overwrite_ok=True)
# 标题信息行列表
rowTitle = ['序号', '酒店名称', '价格', '入住如期', '离店日期']
# 遍历写入标题行信息
for i in range(0, len(rowTitle)):
# 其中的'0'表示行, 'i'表示列,0和i指定了表中的单元格,'rowTitle[i]'是向该单元格写入的内容
sheet1.write(0, i, rowTitle[i])
# 遍历写入数据信息
# 先遍历一下有多少行
for k in range(0, len(contentList)):
# 再把每行的每个单元格遍历一下,j表示列数据
for j in range(0, len(contentList[k])):
# k+1表示先去掉标题行,j表示列数据,contentList[k][j] 插入单元格数据
sheet1.write(k + 1, j, contentList[k][j])
f.save('D:/HInfo.xls')
if __name__=='__main__':
spider = QunarSpider()
city = input("请输入城市:")
startdate = input("请输入入住日期(yyyy-mm-dd):")
enddate = input("请输入离店日期(yyyy-mm-dd):")
totalpagenum = input("请输入爬取页数:")
print("正在爬取中...")
spider.theSpider(city, startdate, enddate,totalpagenum)