sklearn代码18 python自动化处理数据

import numpy as np

import pandas as pd

from pandas import Series,DataFrame

import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_excel('./18级高一体测成绩汇总.xls')
data
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
01高孜阳06114'138.88195121278517072.6
11郝少杰10134'167.70225117313317452.7
21郝梓烨06194'098.45218141390116946.5
31何弘源10104'218.05206131494618379.7
41刘硕鹏12123'447.52210139353817154.7
51刘运硕03143'497.94190207397017566.4
61吕晓瑶03143'547.75186117371017353.9
71米孜聪06364'38.0619531557817883.1
81聂浩然27194'017.752201510382117566.5
91牛苗嘉12114'127.382451711442316753.9
101牛砚哲281347.822191311403117357.4
111齐子涵185x4'137.37228915435416354.6
121乔一甲06163'457.6620273223817961.1
131任晓波03113'467.6624537481117763.9
141戎小龙26330000000NaN
151桑淳熙06163'577.6019275414717459.2
161田晓龙24114'188.1421084424117961.9
171田玉聪27163'327.202552212532418363.4
181王晨宇06133'568.152071312436317360.5
191王家梁06303'478.152021316536417456
201王乐天33313'537.8521037344517756.9
211王一钊12133'577.8522092567017755.5
221王子天06343'427.232121215570918572.3
231王子鑫00124'37.68218153478017783.7
241未晓锟12144'148.30206151335817346.6
251张国瑞033x4'048.1520595349416948.3
261张皓天06324'047.55190125328616950.1
271张泽地03104'027.55240512448317158.4
281张智贤03183'577.89220911425416654.8
291赵博翰101x4'168.19212277349816968
....................................
46317王亚楠26364'158.36217202545217583.4
46417陈核涛26124'367.22267611555517962.2
46517曹佳尧213X3'487.372251712551917662.2
46617贾存生78123'587.372361211424616960.1
46717杨辰阳06314'028210187403416756.8
46817张雨康181X4'028196124573817266.5
46917刘帅怡03194'388.09223218516816978
47017张世荣0326NaNNaNNaNNaNNaNNaNNaNNaN
47117刘泽阳181X4'28.37208218567717263.7
47217王鹏鑫00144'267.89232218705218082.9
47317贾耀杰10344'098.46205157420817161
47417刘艺通06193'497.662321110589717556.1
47517段佳硕27374'367.772361120515817655.2
47617刘鼎03154'378.27208171631117795.6
47717张浩27343'448.27217157507517057.6
47817庞慧谦07073'557.982122010556416854.5
47917李垚泽06153'417.5722595559918174.8
48017胡德皓36145'299.02210120671218395.9
48117张博03164'117.512382114559017967.7
48217张育森30114'567.42252913515918070
48317吴宜凯00153'547.96229149525418264.1
48417左一萌1015NaNNaNNaNNaNNaNNaNNaNNaN
48517王鹏飞06154'48.0218081459218764.6
48617张泽琼18153'547.512381311557217659.5
48717张晓波061X4'588.76200129453316951.3
48817张乔楠03114'238.27208100464717669.5
48917郭泽森03335'199.55210156704217776
49017陈子龙061X3'257.52521313575518165
49117王丹龙06364'397.812081411568817251.7
49217王玉涵0636NaNNaNNaNNaNNaNNaNNaNNaN

493 rows × 11 columns

data[:45]
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
01高孜阳06114'138.88195121278517072.6
11郝少杰10134'167.70225117313317452.7
21郝梓烨06194'098.45218141390116946.5
31何弘源10104'218.05206131494618379.7
41刘硕鹏12123'447.52210139353817154.7
51刘运硕03143'497.94190207397017566.4
61吕晓瑶03143'547.75186117371017353.9
71米孜聪06364'38.0619531557817883.1
81聂浩然27194'017.752201510382117566.5
91牛苗嘉12114'127.382451711442316753.9
101牛砚哲281347.822191311403117357.4
111齐子涵185x4'137.37228915435416354.6
121乔一甲06163'457.6620273223817961.1
131任晓波03113'467.6624537481117763.9
141戎小龙26330000000NaN
151桑淳熙06163'577.6019275414717459.2
161田晓龙24114'188.1421084424117961.9
171田玉聪27163'327.202552212532418363.4
181王晨宇06133'568.152071312436317360.5
191王家梁06303'478.152021316536417456
201王乐天33313'537.8521037344517756.9
211王一钊12133'577.8522092567017755.5
221王子天06343'427.232121215570918572.3
231王子鑫00124'37.68218153478017783.7
241未晓锟12144'148.30206151335817346.6
251张国瑞033x4'048.1520595349416948.3
261张皓天06324'047.55190125328616950.1
271张泽地03104'027.55240512448317158.4
281张智贤03183'577.89220911425416654.8
291赵博翰101x4'168.19212277349816968
301赵泽凯03114'017.89213511432217455.9
311赵泽宇06164'088.212081920391716651.9
321左晶川12174'068.71206114397017247.8
33班级性别姓名800米50米跳远体前屈仰卧起坐肺活量身高体重
342贾和06334'227.9721599386517558.7
352李森06360000000NaN
362李一帆18124'468.7917271475017488.6
372李子阳06184'017.3721027471418262.5
382吕星繁03120000000NaN
392赵凌云105x4'137.7720887432717356
402赵鹏悦26124'278.8185105474516474.8
41班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
423宫诚博06123'436.89276161252121.8473.1
433郭亚浩181X4'047.2524013847561.7672
443郝晓辰00133'387.36246221144331.8462.5
cond = data['班级']!='班级'  # 删除多余的班级信息
data = data[cond]
data[:45]
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
01高孜阳06114'138.88195121278517072.6
11郝少杰10134'167.70225117313317452.7
21郝梓烨06194'098.45218141390116946.5
31何弘源10104'218.05206131494618379.7
41刘硕鹏12123'447.52210139353817154.7
51刘运硕03143'497.94190207397017566.4
61吕晓瑶03143'547.75186117371017353.9
71米孜聪06364'38.0619531557817883.1
81聂浩然27194'017.752201510382117566.5
91牛苗嘉12114'127.382451711442316753.9
101牛砚哲281347.822191311403117357.4
111齐子涵185x4'137.37228915435416354.6
121乔一甲06163'457.6620273223817961.1
131任晓波03113'467.6624537481117763.9
141戎小龙26330000000NaN
151桑淳熙06163'577.6019275414717459.2
161田晓龙24114'188.1421084424117961.9
171田玉聪27163'327.202552212532418363.4
181王晨宇06133'568.152071312436317360.5
191王家梁06303'478.152021316536417456
201王乐天33313'537.8521037344517756.9
211王一钊12133'577.8522092567017755.5
221王子天06343'427.232121215570918572.3
231王子鑫00124'37.68218153478017783.7
241未晓锟12144'148.30206151335817346.6
251张国瑞033x4'048.1520595349416948.3
261张皓天06324'047.55190125328616950.1
271张泽地03104'027.55240512448317158.4
281张智贤03183'577.89220911425416654.8
291赵博翰101x4'168.19212277349816968
301赵泽凯03114'017.89213511432217455.9
311赵泽宇06164'088.212081920391716651.9
321左晶川12174'068.71206114397017247.8
342贾和06334'227.9721599386517558.7
352李森06360000000NaN
362李一帆18124'468.7917271475017488.6
372李子阳06184'017.3721027471418262.5
382吕星繁03120000000NaN
392赵凌云105x4'137.7720887432717356
402赵鹏悦26124'278.8185105474516474.8
423宫诚博06123'436.89276161252121.8473.1
433郭亚浩181X4'047.2524013847561.7672
443郝晓辰00133'387.36246221144331.8462.5
453李国玺23104'198.1722018144381.7472.2
463李一帆12184'087.822715160331.7785.6
data.fillna(0,inplace=True)  #没参加处理为0
C:\Users\LXQ\Anaconda3\lib\site-packages\pandas\core\frame.py:2754: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  downcast=downcast, **kwargs)
# 没有空数据了
data.isnull().any()
班级       False
性别       False
姓名       False
1000米    False
50米      False
跳远       False
体前屈      False
引体       False
肺活量      False
身高       False
体重       False
dtype: bool
data.head()
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
01高孜阳06114'138.88195.01212785170.072.6
11郝少杰10134'167.70225.01173133174.052.7
21郝梓烨06194'098.45218.01413901169.046.5
31何弘源10104'218.05206.01314946183.079.7
41刘硕鹏12123'447.52210.01393538171.054.7
def convert(x):    #将数据转换为成绩
    if isinstance(x,str):
        minute,second = x.split("'")
        minute = int(minute)
        second = int(second)
        return minute + second/100.0
    else:
        return x
    
data['1000米'] = data['1000米'].map(convert)

data.head()
C:\Users\LXQ\Anaconda3\lib\site-packages\ipykernel_launcher.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
班级性别姓名1000米50米跳远体前屈引体肺活量身高体重
01高孜阳06114.138.88195.01212785170.072.6
11郝少杰10134.167.70225.01173133174.052.7
21郝梓烨06194.098.45218.01413901169.046.5
31何弘源10104.218.05206.01314946183.079.7
41刘硕鹏12123.447.52210.01393538171.054.7
score = pd.read_excel('体侧成绩评分表.xls',header = [0,1])
score
男肺活量男肺活量女肺活量男50米跑女50米跑男体前屈女体前屈...女跳远男引体女仰卧男1000女800
成绩分数成绩分数成绩分数成绩分数成绩分数成绩...成绩分数成绩分数成绩分数成绩分数成绩分数
454010031501007.11007.810023.610024.2...20410016.0100531003'30"1003'24"100
4420953100957.2957.99521.59522.5...1989515.09551953'35"953'30"95
4300903050907.3908.09019.49020.8...1929014.09049903'40"903'36"90
4050852900857.4858.38517.28519.1...1858513.08546853'47"853'43"85
3800802750807.5808.68015.08017.4...1788012.08043803'55"803'50"80
3680782650787.7788.87813.67816.1...17578NaN7841784'00"783'55"78
3560762550767.9769.07612.27614.8...1727611.07639764'05"764'00"76
3440742450748.1749.27410.87413.5...16974NaN7437744'10"744'05"74
3320722350728.3729.4729.47212.2...1667210.07235724'15"724'10"72
3200702250708.5709.6708.07010.9...16370NaN7033704'20"704'15"70
3080682150688.7689.8686.6689.6...160689.06831684'25"684'20"68
2960662050668.96610.0665.2668.3...15766NaN6629664'30"664'25"66
2840641950649.16410.2643.8647.0...154648.06427644'35"644'30"64
2720621850629.36210.4622.4625.7...15162NaN6225624'40"624'35"62
2600601750609.56010.6601.0604.4...148607.06023604'45"604'40"60
2470501710509.75010.8500.0503.6...143506.05021505'05"504'50"50
2340401670409.94011.040-1.0402.8...138405.04019405'25"405'00"40
22103016303010.13011.230-2.0302.0...133304.03017305'45"305'10"30
20802015902010.32011.420-3.0201.2...128203.02015206'05"205'20"20
19501015501010.51011.610-4.0100.4...123102.01013106'25"105'30"10

20 rows × 23 columns

# 男生的成绩进行了转化
def convert(item):
    m,s = item.strip('"').split("'")
    m,s = int(m),int(s)
    return m + s/100.0
score.iloc[:,-4] = score.iloc[:,-4].map(convert)
# 女生成绩,进行转化
def convert(item):
    m,s = item.strip('"').split("'")
    m,s = int(m),int(s)
    return m + s/100.0
score.iloc[:,-2] = score.iloc[:,-2].map(convert)
score

男肺活量男肺活量女肺活量男50米跑女50米跑男体前屈女体前屈...女跳远男引体女仰卧男1000女800
成绩分数成绩分数成绩分数成绩分数成绩分数成绩...成绩分数成绩分数成绩分数成绩分数成绩分数
454010031501007.11007.810023.610024.2...20410016.0100531003.301003.24100
4420953100957.2957.99521.59522.5...1989515.09551953.35953.3095
4300903050907.3908.09019.49020.8...1929014.09049903.40903.3690
4050852900857.4858.38517.28519.1...1858513.08546853.47853.4385
3800802750807.5808.68015.08017.4...1788012.08043803.55803.5080
3680782650787.7788.87813.67816.1...17578NaN7841784.00783.5578
3560762550767.9769.07612.27614.8...1727611.07639764.05764.0076
3440742450748.1749.27410.87413.5...16974NaN7437744.10744.0574
3320722350728.3729.4729.47212.2...1667210.07235724.15724.1072
3200702250708.5709.6708.07010.9...16370NaN7033704.20704.1570
3080682150688.7689.8686.6689.6...160689.06831684.25684.2068
2960662050668.96610.0665.2668.3...15766NaN6629664.30664.2566
2840641950649.16410.2643.8647.0...154648.06427644.35644.3064
2720621850629.36210.4622.4625.7...15162NaN6225624.40624.3562
2600601750609.56010.6601.0604.4...148607.06023604.45604.4060
2470501710509.75010.8500.0503.6...143506.05021505.05504.5050
2340401670409.94011.040-1.0402.8...138405.04019405.25405.0040
22103016303010.13011.230-2.0302.0...133304.03017305.45305.1030
20802015902010.32011.420-3.0201.2...128203.02015206.05205.2020
19501015501010.51011.610-4.0100.4...123102.01013106.25105.3010

20 rows × 23 columns

data.columns
Index(['班级', '性别', '姓名', '1000米', '50米', '跳远', '体前屈', '引体', '肺活量', '身高', '体重'], dtype='object')
data.columns = ['班级', '性别', '姓名', '男1000', '男50米跑', '跳远', '体前屈', '引体', '肺活量', '身高', '体重']
data['男50米跑'] = data['男50米跑'].astype(np.float)
C:\Users\LXQ\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
score['男1000']
成绩成绩分数
45403.30100
44203.3595
43003.4090
40503.4785
38003.5580
36804.0078
35604.0576
34404.1074
33204.1572
32004.2070
30804.2568
29604.3066
28404.3564
27204.4062
26004.4560
24705.0550
23405.2540
22105.4530
20806.0520
19506.2510
for col in [ '男1000', '男50米跑']:
    #     获取成绩的标准
    s = score[col]
    
    def convert(x):
        for i in range(len(s)):
            if x <= s['成绩'].iloc[0]:
                if x == 0:
                    return 0   #没有参加这个项目
                return 100
            elif x > s['成绩'].iloc[-1]:
                return 0 #跑的太慢
            elif (x > s['成绩'].iloc[i - 1]) and (x <= s['成绩'].iloc[i]):
                return s['分数'].iloc[i]
    
    data[col + '成绩'] = data[col].map(convert)
C:\Users\LXQ\Anaconda3\lib\site-packages\ipykernel_launcher.py:16: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
data.head()
班级性别姓名男1000男50米跑跳远体前屈引体肺活量身高体重男1000成绩男50米跑成绩
01高孜阳06114.138.88195.01212785170.072.67266
11郝少杰10134.167.70225.01173133174.052.77078
21郝梓烨06194.098.45218.01413901169.046.57470
31何弘源10104.218.05206.01314946183.079.76874
41刘硕鹏12123.447.52210.01393538171.054.78578
for col in ['跳远', '体前屈', '引体']:
    s = score['男'+col]
    
    def convert(x):
        for i in range(len(s)):
            if x >= s['成绩'].iloc[i]:
                return s['分数'].iloc[i]
        return 0
    
    data[col + '成绩'] = data[col].map(convert)
C:\Users\LXQ\Anaconda3\lib\site-packages\ipykernel_launcher.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
data.head()
班级性别姓名男1000男50米跑跳远体前屈引体肺活量身高体重男1000成绩男50米跑成绩跳远成绩体前屈成绩引体成绩
01高孜阳06114.138.88195.01212785170.072.6726660740
11郝少杰10134.167.70225.01173133174.052.77078747460
21郝梓烨06194.098.45218.01413901169.046.5747070780
31何弘源10104.218.05206.01314946183.079.7687464760
41刘硕鹏12123.447.52210.01393538171.054.78578667668
cols = ['班级', '性别', '姓名', '男1000','男1000成绩', '男50米跑', '男50米跑成绩',
 '跳远', '跳远成绩', '体前屈', '体前屈成绩',  '引体', '引体成绩','肺活量','身高','体重']
# 根据索引的顺序去DataFrame中取值
data = data[cols]
data.head()
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重
01高孜阳06114.13728.8866195.0601274102785170.072.6
11郝少杰10134.16707.7078225.07411747603133174.052.7
21郝梓烨06194.09748.4570218.0701478103901169.046.5
31何弘源10104.21688.0574206.0641376104946183.079.7
41刘硕鹏12123.44857.5278210.06613769683538171.054.7
data
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重
01高孜阳06114.13728.8866195.0601274102785170.072.6
11郝少杰10134.16707.7078225.07411747603133174.052.7
21郝梓烨06194.09748.4570218.0701478103901169.046.5
31何弘源10104.21688.0574206.0641376104946183.079.7
41刘硕鹏12123.44857.5278210.06613769683538171.054.7
51刘运硕03143.49807.9474190.05020907603970175.066.4
61吕晓瑶03143.54807.7576186.04011747603710173.053.9
71米孜聪06364.03768.0674195.060362105578178.083.1
81聂浩然27194.01767.7576220.072158010723821175.066.5
91牛苗嘉12114.12727.3885245.085178011764423167.053.9
101牛砚哲28134.00787.8276219.072137611764031173.057.4
111齐子涵185x4.13727.3785228.07697015954354163.054.6
121乔一甲06163.45857.6678202.0627683202238179.061.1
131任晓波03113.46857.6678245.0853627604811177.063.9
141戎小龙26330.0000.0000.000500000.00.0
151桑淳熙06163.57787.6078192.0507685404147174.059.2
161田晓龙24114.18708.1472210.0668704304241179.061.9
171田玉聪27163.32957.2095255.090229512805324183.063.4
181王晨宇06133.56788.1572207.064137612804363173.060.5
191王家梁06303.47858.1572202.0621376161005364174.056.0
201王乐天33313.53807.8576210.0663627603445177.056.9
211王一钊12133.57787.8576220.0729702105670177.055.5
221王子天06343.42857.2390212.068127415955709185.072.3
231王子鑫00124.03767.6878218.07015803204780177.083.7
241未晓锟12144.14728.3072206.0641580103358173.046.6
251张国瑞033x4.04768.1572205.0649705403494169.048.3
261张皓天06324.04767.5578190.05012745403286169.050.1
271张泽地03104.02767.5578240.08056412804483171.058.4
281张智贤03183.57787.8976220.07297011764254166.054.8
291赵博翰101x4.16708.1972212.068271007603498169.068.0
...................................................
46317王亚楠26364.15728.3670217.07020902105452175.083.4
46417陈核涛26124.36627.2290267.010066611765555179.062.2
46517曹佳尧213X3.48807.3785225.074178012805519176.062.2
46617贾存生78123.58787.3785236.080127411764246169.060.1
46717杨辰阳06314.02768.0074210.06618857604034167.056.8
46817张雨康181X4.02768.0074196.06012744305738172.066.5
46917刘帅怡03194.38628.0974223.07421908645168169.078.0
47017张世荣03260.0000.0000.000500000.00.0
47117刘泽阳181X4.02768.3770208.06621908645677172.063.7
47217王鹏鑫00144.26667.8976232.07821908647052180.082.9
47317贾耀杰10344.09748.4670205.06415807604208171.061.0
47417刘艺通06193.49807.6678232.078117410725897175.056.1
47517段佳硕27374.36627.7776236.0801174201005158176.055.2
47617刘鼎03154.37628.2772208.0661780106311177.095.6
47717张浩27343.44858.2772217.07015807605075170.057.6
47817庞慧谦07073.55807.9874212.068209010725564168.054.5
47917李垚泽06153.41857.5778225.0749705405599181.074.8
48017胡德皓36145.29309.0264210.0661274006712183.095.9
48117张博03164.11727.5178238.080219014905590179.067.7
48217张育森30114.56507.4280252.09097013855159180.070.0
48317吴宜凯00153.54807.9674229.07614789685254182.064.1
48417左一萌10150.0000.0000.000500000.00.0
48517王鹏飞06154.04768.0274180.030870104592187.064.6
48617张泽琼18153.54807.5178238.080137611765572176.059.5
48717张晓波061X4.58508.7666200.06212749684533169.051.3
48817张乔楠03114.23688.2772208.0661072004647176.069.5
48917郭泽森03335.19409.5550210.06615806507042177.076.0
49017陈子龙061X3.251007.5080252.090137613855755181.065.0
49117王丹龙06364.39627.8176208.066147811765688172.051.7
49217王玉涵06360.0000.0000.000500000.00.0

477 rows × 16 columns

def convert(x):
    if x > 100:
        return x/100
    else:
        return x
data['身高'] = data['身高'].map(convert)
data['BMI'] = (data['体重']/(data['身高'])**2).round(1)
'''≤16.4
23.3~26.3'''
def convert_bmi(x):   #在取名时可以更加准确比如取为convert_bmi
    if x >= 26.4:
        return 60
    elif (x <= 16.4) or (x >=23.3 and x <= 26.3):
        return 80
    elif x >=16.5 and x <=23.2:
        return 100
    else:
        return 0

data['BMI_score'] = data['BMI'].map(convert_bmi)
data.head(50)
班级性别姓名男1000男1000成绩男50米跑男50米跑成绩跳远跳远成绩体前屈体前屈成绩引体引体成绩肺活量身高体重BMIBMI_score
01高孜阳06114.13728.8866195.06012741027851.7072.625.180
11郝少杰10134.16707.7078225.074117476031331.7452.717.4100
21郝梓烨06194.09748.4570218.07014781039011.6946.516.380
31何弘源10104.21688.0574206.06413761049461.8379.723.880
41刘硕鹏12123.44857.5278210.066137696835381.7154.718.7100
51刘运硕03143.49807.9474190.050209076039701.7566.421.7100
61吕晓瑶03143.54807.7576186.040117476037101.7353.918.0100
71米孜聪06364.03768.0674195.0603621055781.7883.126.280
81聂浩然27194.01767.7576220.0721580107238211.7566.521.7100
91牛苗嘉12114.12727.3885245.0851780117644231.6753.919.3100
101牛砚哲28134.00787.8276219.0721376117640311.7357.419.2100
111齐子涵185x4.13727.3785228.076970159543541.6354.620.6100
121乔一甲06163.45857.6678202.06276832022381.7961.119.1100
131任晓波03113.46857.6678245.08536276048111.7763.920.4100
141戎小龙26330.0000.0000.000500000.000.0NaN0
151桑淳熙06163.57787.6078192.05076854041471.7459.219.6100
161田晓龙24114.18708.1472210.06687043042411.7961.919.3100
171田玉聪27163.32957.2095255.0902295128053241.8363.418.9100
181王晨宇06133.56788.1572207.0641376128043631.7360.520.2100
191王家梁06303.47858.1572202.06213761610053641.7456.018.5100
201王乐天33313.53807.8576210.06636276034451.7756.918.2100
211王一钊12133.57787.8576220.07297021056701.7755.517.7100
221王子天06343.42857.2390212.0681274159557091.8572.321.1100
231王子鑫00124.03767.6878218.070158032047801.7783.726.760
241未晓锟12144.14728.3072206.06415801033581.7346.615.680
251张国瑞033x4.04768.1572205.06497054034941.6948.316.9100
261张皓天06324.04767.5578190.050127454032861.6950.117.5100
271张泽地03104.02767.5578240.080564128044831.7158.420.0100
281张智贤03183.57787.8976220.072970117642541.6654.819.9100
291赵博翰101x4.16708.1972212.0682710076034981.6968.023.880
301赵泽凯03114.01767.8976213.068564117643221.7455.918.5100
311赵泽宇06164.08748.2172208.06619852010039171.6651.918.8100
321左晶川12174.06748.7166206.064117443039701.7247.816.280
342贾和06334.22687.9774215.07097096838651.7558.719.2100
352李森06360.0000.0000.000500000.000.0NaN0
362李一帆18124.46508.7966172.0107681047501.7488.629.360
372李子阳06184.01767.3785210.06626076047141.8262.518.9100
382吕星繁03120.0000.0000.000500000.000.0NaN0
392赵凌云105x4.13727.7776208.06687076043271.7356.018.7100
402赵鹏悦26124.27668.8066185.040107254047451.6474.827.860
423宫诚博06123.43856.89100276.01001680128052121.8473.121.6100
433郭亚浩181X4.04767.2590240.080137686447561.7672.023.2100
443郝晓辰00133.38907.3685246.0852295117644331.8462.518.5100
453李国玺23104.19708.1772220.07218851044381.7472.223.880
463李一帆12184.08747.8076227.07615801060331.7785.627.360
473刘凡12184.09748.0674208.066107221041061.7068.723.880
483刘哲垚12174.09748.1672190.05026065042141.6760.721.8100
493米卓凡241X4.05768.1672200.062137696838571.7251.417.4100
503牛卓凡06144.02768.2772228.0761478128032661.6252.219.9100
513苏仕一12334.01768.5070215.07066696835781.6449.918.6100
# 统计分析
# 定义需求,画图,对比分析
(data['BMI_score'].value_counts()).plot(kind = 'pie',autopct = '%0.2f%%')
<matplotlib.axes._subplots.AxesSubplot at 0x1a2f4996b70>
(data['BMI_score'].value_counts()).plot(kind = 'bar')

请添加图片描述

<matplotlib.axes._subplots.AxesSubplot at 0x1a2f45e9080>
data.groupby(['男1000成绩'])['BMI_score'].count().plot(kind = 'bar')

请添加图片描述

<matplotlib.axes._subplots.AxesSubplot at 0x1a2f4906b70>

请添加图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值