某某客信息爬取
import csv
import requests
from bs4 import BeautifulSoup
def get_net(url: str):
res = requests.get(
url=url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
)
return res.text
def save_file(html: str):
all_film = []
soup = BeautifulSoup(html, 'lxml')
list1 = soup.select('#esfMain>section .list>div')
for div in list1:
title = div.select_one('.property-content-title>h3').text
home_type_ = div.select('.property-content-info>.property-content-info-text>span')
home_type = ''
for ty in home_type_:
home_type += ty.text
size = div.select('.property-content-info-text')[1].text.strip()
towards = div.select('.property-content-info-text')[2].text.strip()
try:
layers = div.select('.property-content-info-text')[3].text.strip()
except IndexError:
layers = '无'
try:
put_time = div.select('.property-content-info-text')[4].text.strip()
except IndexError:
put_time = '无'
address = div.select_one('.property-content-info-comm-name').text
address1 = div.select_one('.property-content-info-comm-address>span').text
address2 = div.select('.property-content-info-comm-address>span')[1].text
address3 = div.select_one('.property-content-info-comm-address').text
address_end = f'{address} {address1}-{address2}-{address3}'
money = div.select_one('.property-price-total').text.replace(' ', '')
unit_price = div.select_one('.property-price-average').text
all_film.append([title, home_type, size, towards, layers, put_time, address_end, money, unit_price])
writer.writerows(all_film)
print('保存完成')
if __name__ == '__main__':
f = open('files/安居客.csv', 'w', encoding='utf-8', newline='')
writer = csv.writer(f)
writer.writerow(['标题', '房屋类型', '房屋大小', '房屋朝向', '楼层', '建造时间', '具体地址', '总价', '单价(每平方米)'])
i = 1
while i <= 10:
url = f'https://chengdu.anjuke.com/sale/p{i}/?from=HomePage_TopBar'
res = get_net(url)
save_file(res)
i += 1