import requests
from bs4 import BeautifulSoup
import pandas as pd
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4644.87 Safari/537.36'
}
# 设置筛选条件
min_price = 200 # 最低价格(单位:万元)
max_price = 400 # 最高价格(单位:万元)
#regions = ['zhongshangongyuan']
regions = ['zsgy', 'caojiadu', 'dongwaitan', 'caohexi', 'xuhuibinjiang', 'xujiahui', 'huangpu', 'beiwaitan', 'xizangbeilu', 'nanjingxilu', 'jingansi']
# 创建空的DataFrame对象
column_names=['价格', '标题', '房屋信息', '所在小区', '所属地区']
pages = 3
# 爬取数据并存入DataFrame
for region in regions:
for i in range(1, 3):
# 构造URL
url = f'https://sh.ke.com/ershoufang/{region}/pg{i}bp{min_price}ep{max_price}'
response = requests.get(url, headers = headers)
soup = BeautifulSoup(response.text, 'html.parser') #发送请求
house_list = soup.find_all('div', class_='info clear')# 解析页面数据
house_data = []
for house in house_list:
houseprice = house.find('div', class_='totalPrice').span.text #获取价格
housetitle = house.find('div', class_='title').a.text
houseinfo = house.find('div', class_='houseInfo').text
houseposition = house.find('div', class_='positionInfo').a.text
house_data.append([houseprice,housetitle,houseinfo,houseposition,regions])
data = pd.DataFrame(house_data)
print (data)
data.to_csv('d:/house_data.csv',encoding='utf_8_sig')
求大神解答Python问题,为什么数据只能获取到首页的数据,已经设置了循环