import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 预设风格
from matplotlib.pylab import style #自定义图表风格
style.use('ggplot')
# 绘图参数设置
plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体)
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
import warnings
warnings.filterwarnings("ignore")
pd.set_option('mode.chained_assignment', None) # 直接关闭警告
def equal_interval_sampling(data, mnum):
col = 'SEL_MW'
# mnum = 2000 # 样本数量
astep = (data.max()[col] - data.min()[col]) / mnum # 计算步长
astep = np.round(astep, 1)
age_bins = np.round(np.arange(data.min()[col], data.max()[col] + 1, astep), 3) # 计算工况区间
print(f'步长:{astep}\n区间个数:{len(age_bins) - 1}')
ind_list = []
for val in age_bins:
fh = data.iloc[:, -1]
bf = np.abs(fh - val)
sbf = bf.sort_values().round(3) # 按照距离排序
sbf = sbf.reset_index()
sbf.columns = ['编号', '分割点']
ind_list.append(sbf.loc[0, '编号'] ) # 保存距离最近点的索引
print('排序前样本数目:', len(ind_list))
end_row = sorted(set(ind_list))
# print(end_row)
print('排序后样本数目:', len(end_row))
end_df = data.iloc[end_row, :]
end_df = end_df.reset_index(drop=True)
return end_df
from Memorymatrix import IntervalSampl
mnum = 2000 # 等间隔抽样数量
cdata = pd.read_csv(r"a.csv", encoding='gbk')
sdata =IntervalSampl.equal_interval_sampling(cdata, mnum)
print(sdata.shape)
sdata.head(3)
等间距抽样
最新推荐文章于 2022-08-23 11:22:18 发布