import requests
from bs4 import BeautifulSoup as bs
import re
import time
import pandas as pd
headers ={"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Cookie":"设置好地区 货币等信息后的cookie"}
url_list=["用列表 好让爬虫自动遍历列表翻页。这里输入的url是在页面上选好地区 货币等信息后的url 这点一定注意!"]
info =[]
for ii in url_list:
time.sleep(2)
response = requests.get(ii,headers=headers)
#print(response.text)
#提取单个酒店信息代码块
soup = bs(response.text,"lxml")
hotle_info = soup.select("#hotellist_inner div.sr_item_content.sr_item_content_slider_wrapper")
#print(len(hotle_info))
#print(hotle_info)
#提取下一页链接
try:
next_page = soup.select("#search_results_table di