"""
爬取链家地址二手房信息
目标:爬取标题名称 地址 总价
"""
import requests
import urllib.request
import urllib.parse
import re
# https://bd.lianjia.com/ershoufang/pg1/
url = "http://bd.lianjia.com/ershoufang/pg"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
# print(L1)
# print(L2)
for j in range(100):
urls = url + str(j)
response = requests.get(urls, headers=headers)
response.encoding = "utf-8"
html = response.text
L1 = re.findall('data-el="region">(\D+)</a>', html)
L2 = re.findall('class="totalPrice"><span>(\d+)</span>万</div>', html)
data1 = response.content
for i in range(len(L1)):
L3 = L1[i] + " " + L2[i] + "万"+ " "
with open("房.txt", "a", encoding="utf-8") as f:
f.write(L3)
f.write("\n")
python爬虫技术 爬取链家二手房信息 保存到本地
最新推荐文章于 2024-06-16 10:39:57 发布