想了好久,不懂怎么取题目,哈哈,直接上数据和代码吧!
大概意思就是根据label列,在特征值列根据label列连续相同标签的区间取最大值,比如:在黄色背景内取3.2,在绿色背景内取24.2,以此类推!
import pandas as pd
import numpy as np
# 读入数据
path = "stock_data.xlsx"
data = pd.read_excel(path)
# 将标签转换为列表
stock_data = np.array(data)
stock_label = list(stock_data[:,1])
stock_label = list(map(int, stock_label))
# 找出“-1”和“1”分割位置
split_index = []
for index in range(len(stock_label)):
if index < len(stock_label)-1 and stock_label[index]== 1 and stock_label[index+1]==-1:
split_index.append(index)
elif index < len(stock_label)-1 and stock_label[index]== -1 and stock_label[index+1]==1:
split_index.append(index)
split_len = len(stock_label)
split_index.append(split_len)
split_idx = [i+1 for i in split_index]
split_idx.insert(0,0)
# 取出“特征值”列每个对应区间的最大值
index_interval = []
s_1 = list(stock_data[:,0])
for idx,ele in enumerate(split_idx):
print(ele)
if idx < len(split_idx)-1:
num_max = max(s_1[ele:split_index[idx]+1])
print("the max num is: ", num_max)