用python爬取二手房交易信息并分析
- 第一步:编写爬虫
爬取某平台上海市十个区共900条二手房的交易信息
#爬取上海十个区的二手房价信息
import requests
from bs4 import BeautifulSoup
import csv
#获取房价的文本信息
def gethousetext(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "error"
# 获得房子的价格、面积、详细信息等
def gethouseinfo(h_list, html, loc):
h_info = []
soup = BeautifulSoup(html, "html.parser")
house_info = soup.find_all('div', attrs={'class': "listX"})
# 将房屋信息储存在一个列表里
for h in house_info:
p = h.find_all('p')
h_info = [p[0].text, loc, p[2].text, p[3].text, p[4].text]
h_list.append(h_info)
def storehouseinfo(h_list, fpath):
# 打开csv文件,写入数据
with open(fpath, 'w', encoding='utf-8') as f:
house_csv = csv.wr