爬取58二手房源信息
具体代码如下:
#需求:爬取58二手房中的房源信息 import os import requests from lxml import etree import json # 文件夹 # if not os.path.exists('./58二手房信息.txt'): # os.mkdir('./58二手房信息.txt') # 爬取到页面源码数据 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36 Edg/89.0.774.63' 'cookie: aQQ_ajkguid=9FF736F9-3877-44CC-A02E-20939CAE451E; sessid=0E59B71C-E4E9-4632-8C99-15FC1E578F08; ajk-appVersion=; id58=CrIBZ2Jrw5uDNwpaDfW+Ag==; 58tj_uuid=620b97db-8fc7-4e68-ab52-db12d42a0821; 58home=xx; ctid=936; als=0; fzq_h=820c8468aad2a707bd1fb8a0bcd38d3e_1651375003851_35226f069a5148ecbe3a3aa44e29cc09_1973364391; new_uv=4; utm_source=; spm=; init_refer=https%253A%252F%252Fcallback.58.com%252F; ' 'JSESSIONID=C65773B591E415D1B23B8784545E8982; xxzl_cid=d0f8da6fb32f411aa8a41995a5c4fee2; xzuid=b4a99cd4-16a1-4b63-9d1c-26aa0f24feeb; new_session=0' } url = 'https://cn.58.com/ershoufang/' page_text = requests.get(url=url,headers=headers).text print(requests.get(url=url,headers=headers)) # 数据解析 tree = etree.HTML(page_text) # 每个房屋的数据 div_list = tree.xpath('//section[@class="list"]/div') #fp = open("58.txt",'w',encoding='utf-8') #print(div_list) content=[] # 局部解析 for div in div_list: title_list = div.xpath('./a/div[2]/div/div/h3/text()')[0] price_list = div.xpath('./a/div[2]/div[2]/p[2]/text()')[0] #print(title) print(title_list,price_list) content.append({'title':title_list,'price':price_list}) #fp.write(title+'\n') #持久化存储,存储为json格式 with open('D:/2007/58.json','a+') as fp: json.dump(content,fp=fp,ensure_ascii=False,indent=2)