python大数据股票分析

import  pandas as pd
import numpy as np
# 读取Excel文件,pd.read_excel(r'文件位置\文件名称.xlsx')
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
print(hq)
print(hq.shape)  # (2807, 13)
hq["涨跌幅"] = np.zeros((2807,1))  # 每种方法用完运行一次这段代码以重置数据

# 计算涨跌幅并填充
# 方法一:用while循环
i = 0
while i < hq.shape[0]:  # hq.shape[0]=2807
    hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)
    i += 1
print(hq)
# 方法二:用for循环
for i in range(0,hq.shape[0]):
    hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)
print(hq)
# 方法三:直接进行列运算
hq["涨跌幅"] = round((hq["今收"] / hq["前收"] - 1) * 100,2)
print(hq)

#成交量分析
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["成交量分析"] = np.zeros((hq.shape[0],1))
#用while
i = 0
while i < hq.shape[0]:
    a = hq.iloc[i,9]
    a = float(a.replace(",",""))
    if a < 1000:
        hq.loc[i,"成交量分析"] = "不活跃"
    elif a < 2000:
        hq.loc[i, "成交量分析"] = "一般"
    else:
        hq.loc[i, "成交量分析"] = "活跃"
    i += 1
#用for
for i in range(hq.shape[0]):
    a = hq.iloc[i,9]
    a = float(a.replace(",",""))
    if a < 1000:
        hq.loc[i,"成交量分析"] = "不活跃"
    elif a < 2000:
        hq.loc[i, "成交量分析"] = "一般"
    else:
        hq.loc[i, "成交量分析"] = "活跃"

#统计各区间股票数量
#方法一
Small,Medium,Large = 0,0,0
for i in range(hq.shape[0]):
    a = hq.iloc[i,9]
    a = float(a.replace(",",""))
    if a < 1000:
        hq.loc[i,"成交量分析"] = "不活跃"
        Small += 1
    elif a < 2000:
        hq.loc[i, "成交量分析"] = "一般"
        Medium += 1
    else:
        hq.loc[i, "成交量分析"] = "活跃"
        Large += 1
print(Small,Medium,Large)
#方法二
a = []
for i in hq["成交量分析"]:
    a.append(i)
print(a.count("不活跃"),a.count("一般"),a.count("活跃"))
#方法三
print(list(hq["成交量分析"]).count("不活跃"))
print(list(hq["成交量分析"]).count("一般"))
print(list(hq["成交量分析"]).count("活跃"))
#整理成表格
dic = {"不活跃":[a.count("不活跃"),a.count("不活跃")/hq.shape[0]],
       "一般":[a.count("一般"),a.count("一般")/hq.shape[0]],
       "活跃":[a.count("活跃"),a.count("活跃")/hq.shape[0]]}
df = pd.DataFrame(dic)
df.index = ["数量","占比"]  #定义行索引

#打分:涨跌幅为正1分,为负0分;活跃3分,一般2分,不活跃0分。————————————————————————————————————
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["成交量分析"] = np.zeros((hq.shape[0],1))
for i in range(hq.shape[0]):
    a = hq.iloc[i,9]
    a = float(a.replace(",",""))
    if a < 1000:
        hq.loc[i,"成交量分析"] = "不活跃"
    elif a < 2000:
        hq.loc[i, "成交量分析"] = "一般"
    else:
        hq.loc[i, "成交量分析"] = "活跃"
#书接上回,两个if
hq["分数1"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
    if hq.loc[i,"涨跌幅(%)"] > 0:
         hq.loc[i,"分数1"] = 1
    else:
        hq.loc[i, "分数1"] = 0
hq["分数2"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
    if hq.loc[i,"成交量分析"] == "活跃":
         hq.loc[i,"分数2"] = 3
    elif hq.loc[i,"成交量分析"] == "一般":
        hq.loc[i, "分数2"] = 2
    else:
        hq.loc[i, "分数2"] = 0
hq["总分"] = hq["分数1"] + hq["分数2"]
#书接上回,if嵌套if
hq["分数"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
    if hq.loc[i,"涨跌幅(%)"] > 0:
        if hq.loc[i,"成交量分析"] == "活跃":
            hq.loc[i,"分数"] = 4
        elif hq.loc[i,"成交量分析"] == "一般":
            hq.loc[i, "分数"] = 3
        else:
            hq.loc[i, "分数"] = 1
    else:
        if hq.loc[i, "成交量分析"] == "活跃":
            hq.loc[i, "分数"] = 3
        elif hq.loc[i, "成交量分析"] == "一般":
            hq.loc[i, "分数"] = 2
        else:
            hq.loc[i, "分数"] = 0
#if简洁写法
hq["分数"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
    if hq.loc[i,"涨跌幅(%)"] > 0:
        hq.loc[i, "分数"] = 4 if hq.loc[i,"成交量分析"] == "活跃" else 3 if hq.loc[i,"成交量分析"] == "一般" else 1
    else:
        hq.loc[i, "分数"] = 3 if hq.loc[i, "成交量分析"] == "活跃" else 2 if hq.loc[i, "成交量分析"] == "一般" else 0
hq.sort_index(axis=1,ascending=False,inplace=True)#axis=1按行标签排序
hq.sort_index(axis=1,ascending=True,inplace=True)#ascending=True升序
hq.sort_index(axis=0,ascending=True,inplace=True)#inplace=True不拷贝,即在原有数据上排序
hq.sort_index(axis=0,ascending=False,inplace=True)
del hq["分数1"]
del hq["分数2"]
del hq["总分"]
del hq["分数"]
del hq
#排序和排名(1)————————————————————————————————————————————————————
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["排名"] = np.zeros((hq.shape[0],1))
l = hq["涨跌幅(%)"]
for i in range(hq.shape[0]):
    hq.loc[hq.index[hq["涨跌幅(%)"]==l.max()],"排名"] = i + 1
    l.drop(l.index[l==l.max()],inplace=True)
#排序和排名(2)
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["排名"] = np.zeros((hq.shape[0],1))
hq["复制"] = sorted(hq["涨跌幅(%)"],reverse=True)  #reverse=True降序


#测试一
dic = {"名字":["张三","李四","王五","赵六"],
       "成绩":[70,60,90,80]}
df = pd.DataFrame(dic)
df["排名"] = np.zeros((df.shape[0],1))
l = df["成绩"]
for i in range(df.shape[0]):
    df.loc[df.index[df["成绩"]==l.max()],"排名"] = i + 1
    l.drop(l.index[l==l.max()],inplace=True)
#测试二
dic = {"名字":["张三","李四","王五","赵六"],
       "成绩":[70,60,90,80]}
df = pd.DataFrame(dic)
df["复制"] = sorted(df["成绩"],reverse=True)
df["排名"] = np.zeros((df.shape[0],1))
i = 0
j = 0
rank = 1
while i < df.shape[0]:
    if df.loc[i,"复制"] <= df.loc[j,"成绩"]:
        df.loc[i, "排名"] = rank
        i += 1
        j += 1
        rank += 1
    else:
        j += 1
        rank += 1
#排序标准答案20230515————————————————————————————————————————————————————
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["涨跌幅排名"] = np.zeros((hq.shape[0],1))
l = hq["涨跌幅(%)"]
for i in range(hq.shape[0]):
    hq.loc[hq.index[hq["涨跌幅(%)"]==l.max()],"涨跌幅排名"] = i + 1
    l.drop(l.index[l==l.max()],inplace=True)
#直接使用rank函数
import  pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx')  # hq=行情
hq["涨跌幅排名"] = hq["涨跌幅(%)"].rank(method="dense",ascending=False)  #涨跌幅高的排第一
hq["市盈率排名"] = hq["市盈率"].rank(method="dense",ascending=True)  #市盈率低的排第一
hq["成交量排名"] = hq["成交量(万股)"].rank(method="dense",ascending=False)  #成交量高的排第一
hq["临时"] =(hq["最高"] + hq["最低"]) / 2
hq["价格排名"] = hq["临时"].rank(method="dense",ascending=True)  #平均价格低的排第一
del hq["临时"]
hq["总排名"] = hq["涨跌幅排名"] + hq["市盈率排名"] + hq["成交量排名"] + hq["价格排名"]
hq["总排名"] = hq["总排名"] / hq["总排名"].max() * 100
#测试1
dic = {"名字":["张三","李四","王五","赵六","张三","李四","王五","赵六"],
        "测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试二"],
        "成绩":[80,90,60,70,100,80,75,85]}
df = pd.DataFrame(dic)
df.sort_values(by="名字",axis=0,ascending=True,inplace=True)  #按名字列升序排序
#测试2
dic = {"名字":["张三","李四","王五","赵六","张三","王五","赵六","张三","赵六"],
        "测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试三","测试三"],
        "成绩":[80,90,60,70,100,75,85,100,85]}
df = pd.DataFrame(dic)
df.sort_values(by="名字",axis=0,ascending=True,inplace=True)  #按名字列升序排序
#20230524
import  pandas as pd
import numpy as np
HQ = pd.read_excel(r'D:\火狐下载\HQ0508-0512.xlsx')  # HQ=行情
HQ.sort_values(by="证券代码",axis=0,ascending=True,inplace=True)  #排序,这一步可有可无
HQ5 = HQ.groupby(["证券代码","证券简称"])  #按证券代码和证券简称进行分组
HQ5 = HQ5["今收"].mean()  #今收的均值
HQ5 = pd.DataFrame(HQ5)
HQ5.to_excel(r'D:\火狐下载\HQ5.xlsx')

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值