python爬取股市信息–千股千评
一切得从我的爬虫代码说起
- 那天室友突然找我要爬虫源代码,我说你要什么样子的,他说随便:
然后我就想起的我的lsp代码
- 哈哈哈,万万没想到他说这个不行,他说要做数据分析,只有图片还不够,不过还好我一年前写了几个python爬虫代码应该能派上用场。
- 本来用豆瓣信息的话刚好,能满足他的要求,但是又万万没想到的是,豆瓣的反爬虫升级了(直接就把我的ip给封了,麻了)
但是我又想起之前有用过股市信息,觉得这个不错挺合适,于是就有了今日代码,爬取股市信息。据说他们数据处理,还要对数据清洗,我没学,就只能把数据存到excel文件中了。剩下了就交给他自己了,毕竟爸爸也不能一直帮他
import xlwt
import requests
from lxml import etree
import json
import demjson
url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?callback=datatable5593272&type=QGQP_LB&token=70f12f2f4f091e459a279469fe49eca5&cmd=&st=Code&sr=1&p={}&ps=50&js=(%7Bpages%3A(tp)%2Cdata%3A(x)%2Cfont%3A(font)%7D)&filter=&_=1622642424445"
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
def get_information(url,n):
r = requests.get(url,headers = headers)
data = r.content.decode()
data = data[28:]
data = data[:-58]
data = "{"+data+"}"
data = data.replace("\'","\"")
data = demjson.decode(data,encoding='utf-8')
AllData = data["data"]
#print(AllData)
k=n
for currentdata in AllData:
#时间
TDate = currentdata["TDate"]
print("时间:{}".format(TDate))
#代号
Code = currentdata["Code"]
print("代号:{}".format(Code))
#名字
Name = currentdata["Name"]
print("名字:{}".format(Name))
#最新价
New = currentdata["New"]
print("最新价:{}".format(New))
#涨跌幅
ChangePercent = currentdata["ChangePercent"]
print("涨跌幅:{}".format(ChangePercent))
#换手率
TurnoverRate = currentdata["TurnoverRate"]
print("换手率:{}".format(TurnoverRate))
#市盈率
PERation = currentdata["PERation"]
print("市盈率:{}".format(PERation))
#主力成本
ZLCB = currentdata["ZLCB"]
print("主力成本:{}".format(ZLCB))
#机构参与度
JGCYD = currentdata["JGCYD"]
print("机构参与度:{}".format(JGCYD))
#综合得分
TotalScore = currentdata["TotalScore"]
print("综合得分:{}".format(TotalScore))
#上升排名
RankingUp = currentdata["RankingUp"]
print("上升排名:{}".format(RankingUp))
#目前排名
Ranking = currentdata["Ranking"]
print("目前排名:{}".format(Ranking))
#关注指数
Focus = currentdata["Focus"]
print("关注指数:{}".format(Focus))
sheet1.write(k,0,TDate)
sheet1.write(k,1,Code)
sheet1.write(k,2,Name)
sheet1.write(k,3,New)
sheet1.write(k,4,ChangePercent)
sheet1.write(k,5,TurnoverRate)
sheet1.write(k,6,PERation)
sheet1.write(k,7,ZLCB)
sheet1.write(k,8,JGCYD)
sheet1.write(k,9,TotalScore)
sheet1.write(k,10,RankingUp)
sheet1.write(k,11,Ranking)
sheet1.write(k,12,Focus)
k=k+1
if __name__ == "__main__":
file_path = "./a.xls"
f = xlwt.Workbook()
sheet1 = f.add_sheet(u'sheet1',cell_overwrite_ok=True) #创建sheet
sheet1.write(0,0,"时间")
sheet1.write(0,1,"代号")
sheet1.write(0,2,"名字")
sheet1.write(0,3,"最新价")
sheet1.write(0,4,"涨跌幅")
sheet1.write(0,5,"换手率")
sheet1.write(0,6,"市盈率")
sheet1.write(0,7,"主力成本")
sheet1.write(0,8,"机构参与度")
sheet1.write(0,9,"综合得分")
sheet1.write(0,10,"上升排名")
sheet1.write(0,11,"目前排名")
sheet1.write(0,12,"关注指数")
#循环操作
x=1
for n in range(1,92):
urlx = url.format(n)
get_information(urlx,x)
x=n*50+1
f.save(file_path)
print("爬取完成,信息存储在{}中".format(file_path))
文件保存到当前目录下的a.xls中
需要pip的部分:pip install xlwt
pip install demjson
pip install lxml