day16作业-爬取

qq_63449560

已于 2023-01-05 18:47:23 修改

阅读量82

点赞数

文章标签： python 前端开发语言

于 2023-01-04 14:16:59 首次发布

本文链接：https://blog.csdn.net/qq_63449560/article/details/128547380

版权

day16作业-爬取

import requests

from re import findall

headers = {...}
response = requests.get('...', headers=headers)
response.encoding = 'utf-8'

location1 = findall(r'<a target="_blank" href="/zufang/\w+/">(\w+)', response.text)
location2 = findall(r'<a href="/zufang/\w+/" target="_blank">(\w+)', response.text)
location3 = findall(r'<a title="\w+" href="/zufang/\w+/" target="_blank">(\w+)', response.text)
locations = list(map(lambda i1, i2, i3: i1 + i2 + i3, location1, location2, location3))

area = findall(r'(\d+\.\d+㎡)', response.text)

price = findall(r'<span class="content__list--item-price"><em>(\d+)', response.text)

data = list(map(lambda lo, ar, pr: {'位置': lo, '面积': ar, '价格': pr+'元/月'}, locations, area, price))
print(data)

#yt
import requests
from re import findall
from json import dumps
import csv

#发送请求
headers = {...}

response = requests.get('... headers=headers)
#获取结果
result = response.text
print(result)

#解析数据
price = findall(r'<span class="content__list--item-price"><em>(\d+)</em>', result)

area = findall(r'(\d+\.?\d*)㎡', result)
# print(area)

addreess = findall(r'<a target="_blank" href="/zufang/[a-z]+/">([\u4e00-\u9fa5]+)</a>-<a href="/zufang/[a-z]+/" target="_blank">([\u4e00-\u9fa5]+)</a>-<a title="(.+?)"', result)
# print(addreess)

data = map(lambda i1, i2, i3: {'区': i3[0], '地址': '-'.join(i3), '价格': i1, '面积': i2}, price, area, addreess)
# print(list(data))

# with open('files/租房信息.json', 'w', encoding='utf-8') as f:
#     f.write(dumps(list(data)))


f = open('files/zf.csv', 'w', encoding='utf-8', newline='')
w = csv.DictWriter(f, ['区', '地址', '价格', '面积'])
w.writeheader()
w.writerows(list(data))