import json
from openpyxl import Workbook
import time
import requests
#把page的页数设为变量{page}
#填写你自己需要爬数据的网址
url = "http://xxx.xxx.xxx.xxxx:xxxx/xxxxx/xxx/project/page?page={page}&limit=10&search=&status=-1&orderField=&order="
all_data = []
pagecount = 1
maxpage = 4
conti = True
#User-Agent,Content-Type是必须的信息
headers = {
"User-Agent" : "xxxxxxx",
"token" : "xxxxxxxx",
"Content-Type" : "application/json;charset=UTF-8"
}
while conti == True and pagecount <= maxpage:
time.sleep(2)
r = requests.get(url.format(page = pagecount), headers = headers)
#将数据转换为json格式
data = json.loads(r.text)
#获取date-->list下的所有数据已字典形式展现
worklist = data['data']['list']
#计算字典数据的数量
print(len(worklist))
#print(data)
if len(worklist) == 0:
conti = False
else:
for i in worklist:
#获取key值后面的value值
pj_name = i['name']
pj_owner = i['owner']
pj_ownerEmail = i['ownerEmail']
pj_ownerName = i['ownerName']
pj_createDate = i['createDate']
pj_description = i['description']
all_data.append((pj_name, pj_owner, pj_ownerEmail, pj_ownerName, pj_createDate, pj_description))
print(all_data)
pagecount = pagecount + 1
biaotou = [('Project Name', 'Login Id', 'Email', 'Owner', 'Date', 'Description')]
wb = Workbook()
ws = wb.active
for info in biaotou:
ws.append(info)
for i in all_data:
ws.append(i)
wb.save('D:\\xxxxxxx\\pachong.xlsx')
本人是初学者,如果有不对的地方,希望帮忙指正,非常感谢!
Python学习(5):网页爬虫并写入表格(以get方法为例)
最新推荐文章于 2022-02-17 14:55:11 发布