import request
import csv
from bs4 import BeautifulSoup
from tqdm import tqdm
二、负责发送请求,得到响应结果,并返回网页源代码的函数
defget_response(link:str)->str:
Headers ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'}
response = requests.get(url=link, headers=Headers)return response.text if response.status_code ==200else''
三、负责提取页面信息的函数
defget_data(html_tree):# 1.先获取每页所有二手房所在的li标签
lilist = html_tree.select('html > body > div.content ul.sellListContent > li')
data =[]# 2.提取每一条二手房信息for i in lilist;
# 二手房标题
houseTitle = i.select_one('li > div.info.clear > div.title > a').text
# 二手房单价
priceInfo = i.select('li > div.info.clear > div.priceInfo span')# 二手房总价
total_price, unit_price = priceInfo[0].text +'万元', priceInfo[1].text
data.append([houseTitle, unit_price, total_price])return data