目录
一、导入所需的库
import requests
from bs4 import BeautifulSoup
from lxml import etree
import pandas as pd
二、获取网站
这里以西安为例子
datas=[]
for i in range(2,7):
url ='https://xa.fang.lianjia.com/loupan/changan7/pg2/#changan7'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\AppleWebKit/537.36
(KHTML, like Gecko)\Chrome/104.0.0.0 Safari/537.36'}
res = requests.get(url,headers = headers)
data = res.content.decode('utf-8')
datas.append(data)
三、使用工具进行筛选及汇总
names=[];areas=[];Specific_locations=[];unit_prices=[];Total_prices=[]
for i in datas:
data1=etree.HTML(i)
for i in range(2,10):
name=data1.xpath('/html/body/div[3]/ul[2]/li['+str(i)+']/div/div[1]/a/text()')
area=data1.xpath('/html/body/div[3]/ul[2]/li['+str(i)+']/div/div[2]/span[2]/text()')
Specific_location=data1.xpath('/html/body/div[3]/ul[2]/li['+str(i)+']/div/div[2]/a/text()')
unit_price=data1.xpath('/html/body/div[3]/ul[2]/li['+str(i)+']/div/div[6]/div[1]/span[1]/text()')
Total_price=data1.xpath('/html/body/div[3]/ul[2]/li['+str(i)+']/div/div[6]/div[2]/text()')
names.append(name[0]);areas.append(area[0]);Specific_locations.append(Specific_location[0]);unit_prices.append(unit_price[0]);Total_prices.append(Total_price[0])
table=pd.DataFrame({'小区名字':names,'地区':areas,'具体位置':Specific_locations,'单价':unit_prices,'总价':Total_prices})
table