day16作业-爬取
import requests
from re import findall
headers = {...}
response = requests.get('...', headers=headers)
response.encoding = 'utf-8'
location1 = findall(r'<a target="_blank" href="/zufang/\w+/">(\w+)', response.text)
location2 = findall(r'<a href="/zufang/\w+/" target="_blank">(\w+)', response.text)
location3 = findall(r'<a title="\w+" href="/zufang/\w+/" target="_blank">(\w+)', response.text)
locations = list(map(lambda i1, i2, i3: i1 + i2 + i3, location1, location2, location3))
area = findall(r'(\d+\.\d+㎡)', response.text)
price = findall(r'<span class="content__list--item-price"><em>(\d+)', response.text)
data = list(map(lambda lo, ar, pr: {'位置': lo, '面积': ar, '价格': pr+'元/月'}, locations, area, price))
print(data)
#yt
import requests
from re import findall
from json import dumps
import csv
#发送请求
headers = {...}
response = requests.get('... headers=headers)
#获取结果
result = response.text
print(result)
#解析数据
price = findall(r'<span class="content__list--item-price"><em>(\d+)</em>', result)
area = findall(r'(\d+\.?\d*)㎡', result)
# print(area)
addreess = findall(r'<a target="_blank" href="/zufang/[a-z]+/">([\u4e00-\u9fa5]+)</a>-<a href="/zufang/[a-z]+/" target="_blank">([\u4e00-\u9fa5]+)</a>-<a title="(.+?)"', result)
# print(addreess)
data = map(lambda i1, i2, i3: {'区': i3[0], '地址': '-'.join(i3), '价格': i1, '面积': i2}, price, area, addreess)
# print(list(data))
# with open('files/租房信息.json', 'w', encoding='utf-8') as f:
# f.write(dumps(list(data)))
f = open('files/zf.csv', 'w', encoding='utf-8', newline='')
w = csv.DictWriter(f, ['区', '地址', '价格', '面积'])
w.writeheader()
w.writerows(list(data))