import requests
import xlrd as xlrd
from xlutils.copy import copy
from lxml import etree
def getData():
b = '枫丹丽城','锦泉源','金色阳光家园','奥林园','美域盛景','富士庄园','润泽园','骏腾名苑','泉水友好园','泉水人家幸福里','泉水家年华','龙畔金泉三期','龙畔金泉二期','龙畔金泉一期','龙畔金泉四期','龙畔金泉五期K1区','泉水N3区','泉水N1区','泉水N2区','龙畔金泉五期'
for b_1 in b:
for i in range(1,21):
url = f'https://dl.ke.com/chengjiao/pg{i}rs{b_1}/'
h = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.9 Safari/537.36',
'Cookie': ''需要带上自己的cookie}
res = requests.get(url,headers=h)
xp = etree.HTML(res.text)
list_l = xp.xpath('//ul[@class="listContent"]/li')
datalist=[]
for na in list_l:
title = na.xpath('.//div[@class="title"]/a/text()')[0]
houseInfo = na.xpath('.//div[@class="houseInfo"]/text()')[1].strip()
dealDate = na.xpath('.//div[@class="dealDate"]/text()')[0].strip()
totalPrice = na.xpath('.//div[@class="totalPrice"]/span/text()')[0]+'万'
positionInfo = na.xpath('.//div[@class="positionInfo"]/text()')[1].strip()
unitPrice = na.xpath('.//div[@class="unitPrice"]/span/text()')[0]+'元/平'
dealCycleTxt = na.xpath('.//span[@class="dealCycleTxt"]/span/text()')[0]
page = [title,houseInfo,dealDate,totalPrice,positionInfo,unitPrice,dealCycleTxt]
datalist.append(page)
index = len(datalist)
workbook = xlrd.open_workbook('贝壳成交.xls')
sheets = workbook.sheet_names()
worksheet = workbook.sheet_by_name(sheets[0])
rows_old = worksheet.nrows
new_workbook = copy(workbook)
new_worksheet = new_workbook.get_sheet(0)
for i in range(0, index):
for j in range(0, len(datalist[i])):
new_worksheet.write(i + rows_old, j, datalist[i][j])
new_workbook.save('贝壳成交.xls')
print("xls格式表格写入数据成功!")
if __name__ == '__main__':
getData()