post请求,解析的数据格式如:
{"code":0,"msg":"","count":20409,"data":[{"name":"张三"},{"SEX":"男"},{"age":"20"}]}
具体代码
# -*- coding = utf-8 -*-
# @Time: 2021/1/30
# @File: spiderOwner.py
# @Software: PyCharm
import requests
import json
import xlwt
def main():
baseurl = "http://www.behomesc.com/owner/listData"
datalist = []
for i in range(0,99):
print("第%d页" % (i + 1))
# 1.爬取网页
result = requests.post(baseurl, data={"limit": 10, "page": i+1},
cookies={"custom.name": "65235d4a-c208-4364-82ed-505c61df22af"})
# print(result.text)
# 2.解析数据
dictR = json.loads(result.text) # 转换成字典dict
datas = dictR.get("data")
for item in datas:
data = []
data.append(item.get("toId")) # 住户ID
data.append(item.get("toName")) # 住户名字
data.append(item.get("toCardNum")) # 身份证
data.append(item.get("toPhone")) # 电话
data.append(item.get("toBirthday")) # 生日
data.append(item.get("toEthnic")) # 民族
data.append(item.get("toPhoto")) # 照片
datalist.append(data)
# print(datalist)
# 3.保存数据
book = xlwt.Workbook(encoding="utf-8", style_compression=0)
sheet = book.add_sheet("住户信息", cell_overwrite_ok=True)
col = ('住户ID', "住户名字", "身份证", "电话", "生日", "民族", "照片")
print("开始保存。。。")
for i in range(0, 7):
sheet.write(0, i, col[i])
for i in range(0, len(datalist)-1):
print("第%d条" % (i + 1))
data = datalist[i]
for j in range(0, 7):
sheet.write(i + 1, j, data[j])
book.save("住户信息.xls")
print("爬取完毕。。。")
if __name__ == "__main__":
main()