在掌握基本python知识之后,动手简单实践下,目标是爬取贝壳网上杭州地区的楼盘数据,只涉及楼盘名称、楼盘价格以及楼盘所在区域,并且将爬取的数据信息保存到本地的csv文件之中。
import requests
from bs4 import BeautifulSoup
import csv
#楼盘信息列表,最后一次性写入文件
resblock_info = []
#在此爬取楼盘前50页数据
for i in range(1,51):
url = ("https://hz.fang.ke.com/loupan/pg{}/").format(i)
r_obj = requests.get(url)
#打印请求网页状态
print(r_obj.status_code)
print('======' + '爬取第{}个网页'.format(i))
bs = BeautifulSoup(r_obj.content, "lxml")
#寻找楼盘描述信息
resblock_desc_lst = bs.find_all('div', class_ = 'resblock-desc-wrapper')
for resblock_desc in resblock_desc_lst:
#楼盘名称
resblock_name = resblock_desc.find('a', class_ = 'name')['title']
#楼盘地理位置
resblock_loaction = resblock_desc.find('a', class_ = 'resblock-location').text
#楼盘价格