该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
import urllib.request
from bs4 import BeautifulSoup
import csv
import re
def openlink(link):
maxTryNum=20
for tries in range(maxTryNum):
try:
response = urllib.request.urlopen(link).read().decode('utf-8')
return response
except:
if tries < (maxTryNum-1):
continue
else:
print("Has tried %d times to access url %s, all failed!", maxTryNum, link)
return None
break
cityurl='http://hotels.ctrip.com/jiudian/neimenggu'
cityhtml = openlink(cityurl)
citysoup = BeautifulSoup(cityhtml, 'html.parser')
city_list = citysoup.find_all('ul', attrs={"class":"p_n_list grid_8"})
citysoup1 = BeautifulSoup(str(city_list), 'html.parser')
city_list1 = citysoup1.find_all('a', attrs={"href":True})
city_id=[]
for city in city_list1:
city_id1 = city['href']
city_id.append(city_id1)
print(city_id)
for cityid in city_id[13:14]:
cityname=re.findall(r"/([a-z]+?)[0-9]",cityid)
csvFile2 = open('Hotel'+str(cityname)+'.csv','w', newline='',encoding='utf-8-sig') # 设置newline,否则两行之间会空一行
writer = csv.writer(csvFile2)