最近在学习Python,因为需要分析房价的信息,因此学习了一下网站爬取信息,目前已经能够爬取出房价信息,在此进行一个分享
需要提前安装requests bs4 time pandas库
获取一页的信息
import requests as req
import time
import pandas as pd
#res = req.get("https://cd.esf.fang.com/house-a016418/?rfss=1-1048a9a524cd8a4e0b-76" )
#使用beautifulsoup解析res
from bs4 import BeautifulSoup
#soup = BeautifulSoup(res.text,"html.parser")#解析器
#houses = soup.select(".shop_list.shop_list_4 dl")
def getHouseInfo(url):
#print(url)
info={} #存储解析的信息
# print(req.post(url).text)
mid = req.get(url)#中转
soup1 = BeautifulSoup(req.get(url).text, "html.parser")
for k in soup1.find_all('a'):
#print(k)
fin = k.get('href')
#print(fin)
soup = BeautifulSoup(req.get(fin).text,'html.parser')
res = soup.select(".tab-cont-right .trl-item1&