curl查看:
curl http://www.kuaidi100.com/query?type=yuantong&postid=11111111111
#!/usr/bin/python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from urllib.request import Request,urlopen
from urllib.parse import quote
import json
import openpyxl
def get_data(search_url):
requests = Request(search_url)
# requests.add_header('Cookie',cookie)
response = urlopen(requests)
html = BeautifulSoup(response, 'html.parser', from_encoding='utf-8')
# print(html)
return html
def handle_data(html):
# get all text from html
rawdata = html.get_text()
print(rawdata)
json1 = json.loads(rawdata)
print(json1)
json2 = json1['data']
print(json2)
result = []
for num in range(0,len(json2)):
result_part=[]
time= json2[num]['time']
ftime=json2[num]['ftime']
context=json2[num]['context']
location=json2[num]['location']
result_part.append(time)
result_part.append(ftime)
result_part.append(context)
result_part.append(location)
result.append(result_part)
print(result)
return result
#新建excel
def creatwb(wbname):
wb=openpyxl.Workbook()
wb.save(filename=wbname)
print ("新建Excel:"+wbname+"成功")
def savetoexcel(result, fields, wbname):
creatwb(wbname)
wb = openpyxl.load_workbook(filename=wbname)
sheet1 = wb.active # 获取默认打开的(active)的WorkSheet
field=1
for field in range(1,len(fields)+1): # 写入表头
_=sheet1.cell(row=1,column=field,value=str(fields[field-1]))
row1=1
col1=0
for row1 in range(2,len(result)+2): # 写入数据
for col1 in range(1,len(result[row1-2])+1):
_=sheet1.cell(row=row1,column=col1,value=(result[row1-2][col1-1]))
wb.save(filename=wbname)
print("保存成功")
if __name__=='__main__':
# curl查看
# curl http://www.kuaidi100.com/query?type=yuantong&postid=11111111111
url = 'http://www.kuaidi100.com/query?type=yuantong&postid=11111111111'
html = get_data(url)
result = handle_data(html)
fields = ['time','ftime','context','location']
wbname = './filename.xlsx'
savetoexcel(result, fields, wbname)
rawdata:
json1:
json2:
result:
存储到excel: