from selenium import webdriver
import time
from numpy import *
import pandas as pd
driver = webdriver.Chrome(executable_path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver')
content_list = []
diqu = [1025,1028,1033,1026,1034,1031,1030,1032,1024,1023,1037,1027,1035,1029,1038,1036,]
for di in diqu:
list_year = [2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008]
for year in list_year:
item = {}
driver.get("https://www.58.com/fangjiawang/shi-{}-100/qy-{}/".format(year,di))
time.sleep(3)
span = driver.find_elements_by_xpath("//*[@id='main']/div/div[4]/div[3]/div/div/div[1]/div[2]/div[2]/ul/li/a/span")
qian = [int(x.text[0:-3]) for x in span]
# print(qian)
avg = sum(qian)/len(qian)
item["di"] = di
time.sleep(0.1)
item["year"] = year
time.sleep(0.1)
item["avg"] = avg
print(item)
content_list.append(item)
time.sleep(3)
di = [z1["di"] for z1 in content_list]
year = [z2["year"] for z2 in content_list]
avg = [z3["avg"] for z3 in content_list]
data = pd.DataFrame({'地区': di,'年份': year,'平均房价': avg})
data.to_excel("1.xlsx")
driver.quit()