Python爬虫v1-API接口数据获取&处理&保存到excel

curl查看:

curl http://www.kuaidi100.com/query?type=yuantong&postid=11111111111

 

#!/usr/bin/python
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
from urllib.request import Request,urlopen
from urllib.parse import quote
import json
import openpyxl

def get_data(search_url):
    requests = Request(search_url)
    # requests.add_header('Cookie',cookie)
    response = urlopen(requests)
    html = BeautifulSoup(response, 'html.parser', from_encoding='utf-8')
    # print(html)
    return html

def handle_data(html):
    # get all text from html
    rawdata = html.get_text()
    print(rawdata)
    json1 = json.loads(rawdata)
    print(json1)
    json2 = json1['data']
    print(json2)

    result = []
    for num in range(0,len(json2)):
        result_part=[]
        time= json2[num]['time']
        ftime=json2[num]['ftime']
        context=json2[num]['context']
        location=json2[num]['location']

        result_part.append(time)
        result_part.append(ftime)
        result_part.append(context)
        result_part.append(location)
        result.append(result_part)
    print(result)
    return result


#新建excel
def creatwb(wbname):  
    wb=openpyxl.Workbook()
    wb.save(filename=wbname)
    print ("新建Excel:"+wbname+"成功")


def savetoexcel(result, fields, wbname):
    creatwb(wbname)
    wb = openpyxl.load_workbook(filename=wbname)

    sheet1 = wb.active #  获取默认打开的(active)的WorkSheet

    field=1
    for field in range(1,len(fields)+1):   # 写入表头
        _=sheet1.cell(row=1,column=field,value=str(fields[field-1]))

    row1=1
    col1=0
    for row1 in range(2,len(result)+2):  # 写入数据
        for col1 in range(1,len(result[row1-2])+1):
            _=sheet1.cell(row=row1,column=col1,value=(result[row1-2][col1-1]))

    wb.save(filename=wbname)
    print("保存成功")

if __name__=='__main__':
    # curl查看
    # curl http://www.kuaidi100.com/query?type=yuantong&postid=11111111111

    url = 'http://www.kuaidi100.com/query?type=yuantong&postid=11111111111'
    html = get_data(url)
    result = handle_data(html)

    fields = ['time','ftime','context','location']
    wbname = './filename.xlsx'
    savetoexcel(result, fields, wbname)





 

 

rawdata:

json1:

json2:

result:

 

存储到excel:

 

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值