1、基础知识简介: excel两种文件后缀: xlsx xls 3种保存库的方式: 1.openpyxl --> 只能针对xlsx 2.pandas 3.xlutils ---> 只能针对xls
2、pandas保存成excel数据的具体流程: # 1.导包 (取别名) import pandas as pd # 2.获取数据(字典)===> 每个键值对都是一个字段 dic = { '姓名': ['张三', '李四', '王五', '赵六'], '年龄': [18, 19, 20, 21], '性别': ['男', '女', '男', '女'] } # 3.构造一个数据帧(即数据格式),DataFram df = pd.DataFrame(dic) # 3.保存数据 df.to_excel('第一个.xlsx', index=False)
测试链接:https://www.1234567.com.cn/
源代码为:
import pandas as pd import requests class FirstSpider(object): def __init__(self): self.start_url = 'http://api.fund.eastmoney.com/f10/lsjz' self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Host': 'api.fund.eastmoney.com', 'Pragma': 'no - cache', 'Referer': 'http://fundf10.eastmoney.com/', 'Cookie': 'EMFUND1=null; EMFUND2=null; EMFUND3=null; qgqp_b_id=ddfd70f91cca153271f0738c2d1e033e; HAList=ty-116-08112-%u57FA%u77F3%u91D1%u878D; EMFUND0=null; EMFUND4=06-02%2016%3A57%3A52@%23%24%u4E1C%u5434%u53CC%u4E09%u89D2%u80A1%u7968A@%23%24005209; EMFUND5=06-02%2017%3A00%3A40@%23%24%u5609%u5B9E%u8D44%u6E90%u7CBE%u9009%u80A1%u7968C@%23%24005661; EMFUND6=06-02%2017%3A01%3A13@%23%24%u4FE1%u6FB3%u65B0%u80FD%u6E90%u4EA7%u4E1A%u80A1%u7968@%23%24001410; EMFUND7=06-02%2017%3A22%3A43@%23%24%u6C38%u8D62%u4F4E%u78B3%u73AF%u4FDD%u667A%u9009%u6DF7%u5408%u53D1%u8D77A@%23%24016386; EMFUND8=06-02%2017%3A25%3A03@%23%24%u534E%u590F%u6210%u957F%u6DF7%u5408@%23%24000001; EMFUND9=06-02 17:26:18@#$%u4E2D%u6B27%u533B%u7597%u5065%u5EB7%u6DF7%u5408A@%23%24003095; st_si=51497540182544; st_asi=delete; st_pvi=28856337848570; st_sp=2024-06-02%2016%3A57%3A53; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=2; st_psi=20240623165351758-112200305283-1605677910', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' } # 构造一个时间和单位净值的数据 self.dic = {} self.dic['时间'] = [] self.dic['单位净值'] = [] def request_start_url(self, page): # 构造请求参数 params = { # 'callback': 'jQuery18301778649497528504_1719132831772', 'fundCode': '000001', 'pageIndex': '{}'.format(page), 'pageSize': '20', 'startDate': '', 'endDate': '', '_': '1719132857884', } response = requests.get(self.start_url, headers=self.headers, params=params).json() self.parse_response(response) def parse_response(self, response): # 解析响应 for data in response['Data']['LSJZList']: sj = data['FSRQ'] jz = data['DWJZ'] self.dic['时间'].append(sj) self.dic['单位净值'].append(jz) print(sj, jz) def save_data(self): # 保存数据 df = pd.DataFrame(self.dic) df.to_excel('天天基金网.xlsx', index=False) def main(self): for page in range(1, 21): self.request_start_url(page) self.save_data() if __name__ == '__main__': f = FirstSpider() f.main()
实现效果: