from lxml import etree
from fake_useragent import UserAgent
import time
import json
import requests
import csv
import random
headers={
'User-Agent':UserAgent().random
}
# w=["小区名称","单价","物业类型","物业费","总建面积","总户数","竣工时间","容积率","绿化率","开发商"]
# with open('未央区.csv', 'a', newline='', encoding='utf-8-sig') as fp:
# writer = csv.writer(fp)
# writer.writerow(w)
#相关性url
proxies={'http':'125.120.201.120:808',
}
for p in range(1,50):
url='https://xa.anjuke.com/community/weiyangq/p{}'.format(p)
response=requests.get(url=url,headers=headers,proxies=proxies)
page_text=response.text
tree=etree.HTML(page_text)
div_list=tree.xpath('//*[@id="__layout"]/div/section/section[3]/section/div[2]/a')
print(p,"ok")
#爬链接
for div in div_list:
li=[]
name=div.xpath('./div[2]/div[1]//text()')[0]
# print(name)
price=div.xpath('./div[3]/div//text()')[0]
price1=div.xpath('./div[3]/div//text()')[1]
sum=price+price1
sum=sum.replace("\n","")
sum=sum.replace(" ","")
li.append(name)
li.append(sum)
url=div.xpath('./@href')[0]
response=requests.get(url=url,headers=headers,proxies=proxies)
page_text=response.text
tree=etree.HTML(page_text)
div_list1=tree.xpath('//*[@id="basic-infos-box"]/dl')
for div1 in div_list1:
name=div1.xpath('.//text()')
i=1
for i in range(len(name)):
a=name[3]
b=name[7].replace("\t","")
b=b.replace("\n","")
c=name[11].replace("\t","")
c=c.replace("\n","")
d=name[15]
e=name[19]
f=name[23]
f=name[27]
g=name[31]
h=name[35]
i=i+2
k=[]
k.append(a)
k.append(b)
k.append(c)
k.append(d)
k.append(e)
k.append(f)
k.append(g)
k.append(h)
li.extend(k)
print(li)
with open('未央区11.csv', 'a', newline='', encoding='utf-8-sig') as fp:
writer = csv.writer(fp)
writer.writerow(li)
time.sleep(random.randint(4,14))
安居客二手房
最新推荐文章于 2024-05-20 11:39:23 发布