import pandas as pd
import numpy as np
# 读取Excel文件,pd.read_excel(r'文件位置\文件名称.xlsx')
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
print(hq)
print(hq.shape) # (2807, 13)
hq["涨跌幅"] = np.zeros((2807,1)) # 每种方法用完运行一次这段代码以重置数据
# 计算涨跌幅并填充
# 方法一:用while循环
i = 0
while i < hq.shape[0]: # hq.shape[0]=2807
hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)
i += 1
print(hq)
# 方法二:用for循环
for i in range(0,hq.shape[0]):
hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)
print(hq)
# 方法三:直接进行列运算
hq["涨跌幅"] = round((hq["今收"] / hq["前收"] - 1) * 100,2)
print(hq)
#成交量分析
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["成交量分析"] = np.zeros((hq.shape[0],1))
#用while
i = 0
while i < hq.shape[0]:
a = hq.iloc[i,9]
a = float(a.replace(",",""))
if a < 1000:
hq.loc[i,"成交量分析"] = "不活跃"
elif a < 2000:
hq.loc[i, "成交量分析"] = "一般"
else:
hq.loc[i, "成交量分析"] = "活跃"
i += 1
#用for
for i in range(hq.shape[0]):
a = hq.iloc[i,9]
a = float(a.replace(",",""))
if a < 1000:
hq.loc[i,"成交量分析"] = "不活跃"
elif a < 2000:
hq.loc[i, "成交量分析"] = "一般"
else:
hq.loc[i, "成交量分析"] = "活跃"
#统计各区间股票数量
#方法一
Small,Medium,Large = 0,0,0
for i in range(hq.shape[0]):
a = hq.iloc[i,9]
a = float(a.replace(",",""))
if a < 1000:
hq.loc[i,"成交量分析"] = "不活跃"
Small += 1
elif a < 2000:
hq.loc[i, "成交量分析"] = "一般"
Medium += 1
else:
hq.loc[i, "成交量分析"] = "活跃"
Large += 1
print(Small,Medium,Large)
#方法二
a = []
for i in hq["成交量分析"]:
a.append(i)
print(a.count("不活跃"),a.count("一般"),a.count("活跃"))
#方法三
print(list(hq["成交量分析"]).count("不活跃"))
print(list(hq["成交量分析"]).count("一般"))
print(list(hq["成交量分析"]).count("活跃"))
#整理成表格
dic = {"不活跃":[a.count("不活跃"),a.count("不活跃")/hq.shape[0]],
"一般":[a.count("一般"),a.count("一般")/hq.shape[0]],
"活跃":[a.count("活跃"),a.count("活跃")/hq.shape[0]]}
df = pd.DataFrame(dic)
df.index = ["数量","占比"] #定义行索引
#打分:涨跌幅为正1分,为负0分;活跃3分,一般2分,不活跃0分。————————————————————————————————————
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["成交量分析"] = np.zeros((hq.shape[0],1))
for i in range(hq.shape[0]):
a = hq.iloc[i,9]
a = float(a.replace(",",""))
if a < 1000:
hq.loc[i,"成交量分析"] = "不活跃"
elif a < 2000:
hq.loc[i, "成交量分析"] = "一般"
else:
hq.loc[i, "成交量分析"] = "活跃"
#书接上回,两个if
hq["分数1"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
if hq.loc[i,"涨跌幅(%)"] > 0:
hq.loc[i,"分数1"] = 1
else:
hq.loc[i, "分数1"] = 0
hq["分数2"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
if hq.loc[i,"成交量分析"] == "活跃":
hq.loc[i,"分数2"] = 3
elif hq.loc[i,"成交量分析"] == "一般":
hq.loc[i, "分数2"] = 2
else:
hq.loc[i, "分数2"] = 0
hq["总分"] = hq["分数1"] + hq["分数2"]
#书接上回,if嵌套if
hq["分数"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
if hq.loc[i,"涨跌幅(%)"] > 0:
if hq.loc[i,"成交量分析"] == "活跃":
hq.loc[i,"分数"] = 4
elif hq.loc[i,"成交量分析"] == "一般":
hq.loc[i, "分数"] = 3
else:
hq.loc[i, "分数"] = 1
else:
if hq.loc[i, "成交量分析"] == "活跃":
hq.loc[i, "分数"] = 3
elif hq.loc[i, "成交量分析"] == "一般":
hq.loc[i, "分数"] = 2
else:
hq.loc[i, "分数"] = 0
#if简洁写法
hq["分数"] = np.zeros((hq.shape[0],1))
for i in range(0,hq.shape[0]):
if hq.loc[i,"涨跌幅(%)"] > 0:
hq.loc[i, "分数"] = 4 if hq.loc[i,"成交量分析"] == "活跃" else 3 if hq.loc[i,"成交量分析"] == "一般" else 1
else:
hq.loc[i, "分数"] = 3 if hq.loc[i, "成交量分析"] == "活跃" else 2 if hq.loc[i, "成交量分析"] == "一般" else 0
hq.sort_index(axis=1,ascending=False,inplace=True)#axis=1按行标签排序
hq.sort_index(axis=1,ascending=True,inplace=True)#ascending=True升序
hq.sort_index(axis=0,ascending=True,inplace=True)#inplace=True不拷贝,即在原有数据上排序
hq.sort_index(axis=0,ascending=False,inplace=True)
del hq["分数1"]
del hq["分数2"]
del hq["总分"]
del hq["分数"]
del hq
#排序和排名(1)————————————————————————————————————————————————————
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["排名"] = np.zeros((hq.shape[0],1))
l = hq["涨跌幅(%)"]
for i in range(hq.shape[0]):
hq.loc[hq.index[hq["涨跌幅(%)"]==l.max()],"排名"] = i + 1
l.drop(l.index[l==l.max()],inplace=True)
#排序和排名(2)
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["排名"] = np.zeros((hq.shape[0],1))
hq["复制"] = sorted(hq["涨跌幅(%)"],reverse=True) #reverse=True降序
#测试一
dic = {"名字":["张三","李四","王五","赵六"],
"成绩":[70,60,90,80]}
df = pd.DataFrame(dic)
df["排名"] = np.zeros((df.shape[0],1))
l = df["成绩"]
for i in range(df.shape[0]):
df.loc[df.index[df["成绩"]==l.max()],"排名"] = i + 1
l.drop(l.index[l==l.max()],inplace=True)
#测试二
dic = {"名字":["张三","李四","王五","赵六"],
"成绩":[70,60,90,80]}
df = pd.DataFrame(dic)
df["复制"] = sorted(df["成绩"],reverse=True)
df["排名"] = np.zeros((df.shape[0],1))
i = 0
j = 0
rank = 1
while i < df.shape[0]:
if df.loc[i,"复制"] <= df.loc[j,"成绩"]:
df.loc[i, "排名"] = rank
i += 1
j += 1
rank += 1
else:
j += 1
rank += 1
#排序标准答案20230515————————————————————————————————————————————————————
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["涨跌幅排名"] = np.zeros((hq.shape[0],1))
l = hq["涨跌幅(%)"]
for i in range(hq.shape[0]):
hq.loc[hq.index[hq["涨跌幅(%)"]==l.max()],"涨跌幅排名"] = i + 1
l.drop(l.index[l==l.max()],inplace=True)
#直接使用rank函数
import pandas as pd
import numpy as np
hq = pd.read_excel(r'D:\火狐下载\股票行情20230406.xlsx') # hq=行情
hq["涨跌幅排名"] = hq["涨跌幅(%)"].rank(method="dense",ascending=False) #涨跌幅高的排第一
hq["市盈率排名"] = hq["市盈率"].rank(method="dense",ascending=True) #市盈率低的排第一
hq["成交量排名"] = hq["成交量(万股)"].rank(method="dense",ascending=False) #成交量高的排第一
hq["临时"] =(hq["最高"] + hq["最低"]) / 2
hq["价格排名"] = hq["临时"].rank(method="dense",ascending=True) #平均价格低的排第一
del hq["临时"]
hq["总排名"] = hq["涨跌幅排名"] + hq["市盈率排名"] + hq["成交量排名"] + hq["价格排名"]
hq["总排名"] = hq["总排名"] / hq["总排名"].max() * 100
#测试1
dic = {"名字":["张三","李四","王五","赵六","张三","李四","王五","赵六"],
"测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试二"],
"成绩":[80,90,60,70,100,80,75,85]}
df = pd.DataFrame(dic)
df.sort_values(by="名字",axis=0,ascending=True,inplace=True) #按名字列升序排序
#测试2
dic = {"名字":["张三","李四","王五","赵六","张三","王五","赵六","张三","赵六"],
"测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试三","测试三"],
"成绩":[80,90,60,70,100,75,85,100,85]}
df = pd.DataFrame(dic)
df.sort_values(by="名字",axis=0,ascending=True,inplace=True) #按名字列升序排序
#20230524
import pandas as pd
import numpy as np
HQ = pd.read_excel(r'D:\火狐下载\HQ0508-0512.xlsx') # HQ=行情
HQ.sort_values(by="证券代码",axis=0,ascending=True,inplace=True) #排序,这一步可有可无
HQ5 = HQ.groupby(["证券代码","证券简称"]) #按证券代码和证券简称进行分组
HQ5 = HQ5["今收"].mean() #今收的均值
HQ5 = pd.DataFrame(HQ5)
HQ5.to_excel(r'D:\火狐下载\HQ5.xlsx')
python大数据股票分析
最新推荐文章于 2024-06-15 11:24:04 发布