import pandas as pd df = pd.DataFrame([[1,5.56], [2,5.7], [3,5.91], [4, 6.4], [5, 6.8], [6,7.05], [7, 8.9], [8, 8.7], [9, 9], [10, 9.05]], columns=['X', 'Y']) # print(df) # frac:抽样比,就是样本量占全样本的比例,如frac=0.3 ,注意n和frac不能共存 replace:是否放回,默认是不放回,如果有放回(replace=True) dfsample = df.sample(frac=1.0, replace=False) print(dfsample) print(dfsample.shape) X Y 9 10 9.05 0 1 5.56 1 2 5.70 5 6 7.05 3 4 6.40 6 7 8.90 2 3 5.91 4 5 6.80 7 8 8.70 8 9 9.00 (10, 2) df_1 = dfsample.drop_duplicates() # 去重 print(df_1) print(df_1.shape)
X Y
9 10 9.05
0 1 5.56
1 2 5.70
5 6 7.05
3 4 6.40
6 7 8.90
2 3 5.91
4 5 6.80
7 8 8.70
8 9 9.00
(10, 2)