自抽样算法原理及python实现
1. 算法原理介绍
后续补充
2. 例子
'''采用自抽样方式对数据进行选择'''
# coding=utf-8
# 引入数据库包
import pymysql
# 引入操作excel包
import xlrd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from matplotlib import rcParams
import numpy as np
from datetime import date, datetime
import os
rcParams['font.family']='simhei'
matplotlib.rcParams['axes.unicode_minus'] = False
# 导入数据
root=r'G:\Users\Administrator\PycharmProjects\train.xls'
df=pd.DataFrame(pd.read_excel(root))
# 删除空值的行
df=df.dropna()
# 重置索引
df = df.reset_index(drop = True)
# 随机生成0-1000之间的整数
a = np.random.randint(0, 1000)
res=pd.DataFrame(df.iloc[[a]])
for i in range(1000):
b=np.random.randint(0,1000)
bootstrapping=pd.DataFrame(df.iloc[[b]])
res = pd.concat([res,bootstrapping], ignore_index=True, axis=0, sort=False) #
print(res)
df=res
print(df)
df.to_excel(r'G:\Users\Administrator\PycharmProjects\数据处理' + '/bootstrapp.xlsx', 'Sheet0')
#df.to_excel('bootstrapping.xlsx', sheet_name='Sheet0',index=False)