目录
第四步:循环遍历分页获取每一页的html代码, 并进行解析,存贮到安居客.csv表格
需要源码可以滴滴我
第一步:导入需要的库包
import requests import parsel import csv
第二步:创新存贮数据的csv文件,并写入表头
with open('安居客.csv', mode='a', encoding='utf-8', newline='') as f: csv_writer = csv.writer(f) csv_writer.writerow(['标题', '户型', '面积', '朝向', '楼层', '年份', '小区名称', '小区地址', '小区标签', '总价', '单价']) def is_null(str_): try: str_ = str_.strip() except: str_ = '' return str_
第三步:伪装header头部信息
headers = { # 'cookie': 'aQQ_ajkguid=B7A0A0B5-30EC-7A66-7500-D8055BFFE0FA; ctid=27; id58=CpQCJ2Lbhlm+lyRwdY5QAg==; _ga=GA1.2.2086942850.1658553946; wmda_new_uuid=1; wmda_uuid=009620ee2a2138d3bd861c92362a5d28; wmda_visited_projects=%3B6289197098934; 58tj_uuid=8fd994c2-35cc-405f-b671-2c1e51aa100c; als=0; ajk-appVersion=; sessid=8D76CC93-E1C8-4792-9703-F864FF755D63; xxzl_cid=2e5a66fa054e4134a15bc3f5b47ba3ab; xzuid=e60596c8-8985-4ab3-a5df-90a202b196a3; fzq_h=4c8d83ace17a19ee94e55d91124e7439_1666957662955_85c23dcb9b084efdbc4ac519c0276b68_2936029006; fzq_js_anjuke_ershoufang_pc=75684287c0be96cac08d04f4d6cc6d09_1666957664522_25; twe=2; xxzl_cid=2e5a66fa054e4134a15bc3f5b47ba3ab; xxzl_deviceid=OOpJsA5XrQMdJFfv71dg+l+he0O1OKPQgRAQcFPbeRAyhjZ4/7gS3Gj4DfiLjxfc; isp=true; obtain_by=2; new_session=1; init_refer=https%253A%252F%252Fcs.anjuke.com%252F; new_uv=3', 'cookie':'aQQ_ajkguid=65203682-0359-9930-CBBA-9734693ED2B2; id58=CgAEDWS418kpr7crCGcNAg==; 58tj_uuid=9f096fbf-5437-4fd0-9dde-5afe0f2b9ae0; _ga=GA1.2.425045402.1689835426; _ga_DYBJHZFBX2=GS1.2.1689835426.1.0.1689835426.0.0.0; als=0; ajk-appVersion=; seo_source_type=0; isp=true; sessid=51A863B6-13DF-4B7A-A2F1-3B55F2D4153E; new_uv=4; cmctid=37; ctid=14; fzq_h=d95bbf8558990a7685835a69c5fd152c_1693302379932_db99838b4918448e898fd34ced00d89a_47901745104115407541590108365192691714; twe=2; fzq_js_anjuke_ershoufang_pc=99da23648cdbf1c479abd3137316814c_1693302593410_25; obtain_by=1; xxzl_cid=b5eeff6c372648c78ccd85b4088015d4; xxzl_deviceid=P1bQtvM8/2iHmRuAHtW1Z9I/dFTG1TgKIuK/d3obmqPPWXdsh3bOzmHEtq2HVyiU', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' }
第四步:循环遍历分页获取每一页的html代码, 并进行解析,存贮到安居客.csv表格
for page in range(1, 51): url = f'https://chongqing.anjuke.com/sale/p{page}/?from=navigation' # 1. 发送请求 response = requests.get(url=url, headers=headers) # 2. 获取数据 html_data = response.text # 3. 解析数据 # 网页开发基础 # html(网页元素存储..) css(负责好看) js(动态效果) # <div class="property-content"></div> select = parsel.Selector(html_data) divs = select.css('.property-content') print(html_data);break; for div in divs: # .property-content-title-name 标题 标题 = is_null(div.css('.property-content-title-name::text').get()) # .property-content-info:nth-child(1) .property-content-info-text:nth-child(1) span 户型 户型s = div.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(1) span::text').getall() 户型 = ' '.join(户型s) # .property-content-info:nth-child(1) .property-content-info-text:nth-child(2) 面积 面积 = is_null(div.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(2)::text').get()) # .property-content-info:nth-child(1) .property-content-info-text:nth-child(3) 朝向 朝向 = is_null(div.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(3)::text').get()) # .property-content-info:nth-child(1) .property-content-info-text:nth-child(4) 楼层 楼层 = is_null(div.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(4)::text').get()) # .property-content-info:nth-child(1) .property-content-info-text:nth-child(5) 年份 年份 = is_null(div.css('.property-content-info:nth-child(1) .property-content-info-text:nth-child(5)::text').get()) # .property-content-info:nth-child(2) .property-content-info-comm-name 小区名称 小区名称 = is_null(div.css('.property-content-info:nth-child(2) .property-content-info-comm-name::text').get()) # .property-content-info:nth-child(2) .property-content-info-comm-address 小区地址 小区地址 = is_null(div.css('.property-content-info:nth-child(2) .property-content-info-comm-address::text').get()) # .property-content-info:nth-child(3) span 小区标签 小区标签s = div.css('.property-content-info:nth-child(3) span::text').getall() 小区标签 = ' '.join(小区标签s) # .property-price .property-price-total .property-price-total-num 总价 总价 = is_null(div.css('.property-price .property-price-total .property-price-total-num::text').get()) # .property-price .property-price-average 每平方米的价格 单价 = is_null(div.css('.property-price .property-price-average::text').get()) print(标题, 户型, 面积, 朝向, 楼层, 年份, 小区名称, 小区地址, 小区标签, 总价, 单价) # 4. 保存数据 with open('安居客.csv', mode='a', encoding='utf-8', newline='') as f: csv_writer = csv.writer(f) csv_writer.writerow([标题, 户型, 面积, 朝向, 楼层, 年份, 小区名称, 小区地址, 小区标签, 总价, 单价])
以上就完成了安居客房产数据的爬取,下面是csv表格的数据信息:
如果我的原创文章对你有用,请关注点赞,谢谢