## 01数据导入
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.read_csv('D:\\pytest\mkc\\singledata\\data.csv', encoding = 'gbk')
print df.columns
df.head(2)
Index([u’恋爱次数’, u’年级’, u’性别’, u’追过人数’, u’被追人数’, u’每周自习时间’, u’每周娱乐时间’, u’每周睡觉时间’, u’每周运动时间’, u’每月话费’, u’学生组织个数’, u’班干部’, u’党员’, u’足球’, u’篮球’, u’乒乓球’, u’羽毛球’, u’跑步’, u’台球’, u’唱歌’, u’主持’, u’舞蹈’, u’乐器’, u’其他才艺’, u’家乡’, u’成绩水平’, u’生活费_百元’, u’寝室同学情况’, u’身高’, u’体重’, u’眼镜’, u’颜值’], dtype=’object’)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
## 02 数据预处理
恋爱次数 | 年级 | 性别 | 追过人数 | 被追人数 | 每周自习时间 | 每周娱乐时间 | 每周睡觉时间 | 每周运动时间 | 每月话费 | … | 乐器 | 其他才艺 | 家乡 | 成绩水平 | 生活费_百元 | 寝室同学情况 | 身高 | 体重 | 眼镜 | 颜值 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5 | 大三 | 女 | 2 | 4 | 37 | 24 | 55 | 10 | 45 | … | 否 | 是 | 1一线城市 | 1 | 25 | 1 | 168 | 56.0 | 不戴眼镜 | 4 |
1 | 4 | 大一 | 女 | 1 | 5 | 6 | 10 | 40 | 9 | 100 | … | 否 | 是 | 1一线城市 | 39 | 20 | 1 | 158 | 47.0 | 戴眼镜 | 9 |
2 rows × 32 columns
df.info()
### 值替换 为了便于研究,将汉字转为英语单词,有的值用到在替换
df[u"性别"].replace([u"女",u"男"],["female","male"],inplace=True)
df[u'年级'].replace([u'大一', u'大二', u'大三', u'大四'], ['freshman', 'sophomore', 'junior', 'senior'], inplace = True)
df[u"眼镜"].replace([u"戴眼镜",u"不戴眼镜"],["wear","not_wear"],inplace=True)
df
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
## 03描述性分析 1,恋爱次数分析
恋爱次数 | 年级 | 性别 | 追过人数 | 被追人数 | 每周自习时间 | 每周娱乐时间 | 每周睡觉时间 | 每周运动时间 | 每月话费 | … | 乐器 | 其他才艺 | 家乡 | 成绩水平 | 生活费_百元 | 寝室同学情况 | 身高 | 体重 | 眼镜 | 颜值 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5 | junior | female | 2 | 4 | 37 | 24 | 55 | 10 | 45 | … | 否 | 是 | 1一线城市 | 1 | 25 | 1 | 168 | 56.0 | not_wear | 4 |
1 | 4 | freshman | female | 1 | 5 | 6 | 10 | 40 | 9 | 100 | … | 否 | 是 | 1一线城市 | 39 | 20 | 1 | 158 | 47.0 | wear | 9 |
2 | 3 | senior | male | 1 | 3 | 14 | 16 | 50 | 20 | 100 | … | 否 | 是 | 3三线城市 | 85 | 20 | 1 | 180 | 78.0 | not_wear | 8 |
3 | 2 | senior | female | 0 | 3 | 10 | 14 | 60 | 20 | 100 | … | 否 | 是 | 5农村 | 30 | 10 | 1 | 168 | 54.0 | wear | 5 |
4 | 1 | junior | male | 0 | 3 | 21 | 5 | 42 | 20 | 100 | … | 否 | 是 | 1一线城市 | 50 | 45 | 1 | 182 | 85.0 | not_wear | 10 |
5 | 1 | sophomore | male | 1 | 2 | 27 | 8 | 49 | 7 | 38 | … | 否 | 是 | 4县级市 | 30 | 30 | 1 | 177 | 67.0 | wear | 7 |
6 | 1 | junior | female | 2 | 5 | 32 | 3 | 57 | 5 | 50 | … | 是 | 否 | 5农村 | 50 | 10 | 1 | 161 | 57.0 | wear | 7 |
7 | 5 | senior | female | 0 | 8 | 8 | 41 | 63 | 6 | 30 | … | 是 | 否 | 2二线城市 | 45 | 40 | 1 | 176 | 65.0 | wear | 5 |
8 | 4 | sophomore | male | 2 | 3 | 27 | 25 | 76 | 11 | 45 | … | 否 | 是 | 4县级市 | 26 | 21 | 1 | 175 | 77.0 | wear | 8 |
9 | 4 | senior | female | 1 | 7 | 35 | 22 | 57 | 10 | 36 | … | 是 | 否 | 5农村 | 35 | 15 | 1 | 165 | 53.0 | wear | 5 |
10 | 4 | senior | female | 3 | 10 | 50 | 7 | 44 | 9 | 40 | … | 否 | 是 | 2二线城市 | 50 | 20 | 1 | 168 | 54.0 | wear | 6 |
11 | 4 | senior | female | 1 | 6 | 20 | 4 | 60 | 3 | 49 | … | 是 | 否 | 4县级市 | 60 | 20 | 1 | 163 | 50.0 | not_wear | 6 |
12 | 4 | sophomore | female | 0 | 4 | 30 | 30 | 49 | 0 | 60 | … | 是 | 否 | 1一线城市 | 30 | 30 | 1 | 166 | 45.0 | wear | 10 |
13 | 4 | senior | female | 0 | 6 | 29 | 19 | 55 | 6 | 40 | … | 是 | 是 | 1一线城市 | 43 | 14 | 1 | 159 | 46.0 | not_wear | 8 |
14 | 3 | senior | male | 3 | 3 | 3 | 6 | 57 | 2 | 50 | … | 是 | 否 | 3三线城市 | 25 | 21 | 1 | 178 | 88.0 | wear | 10 |
15 | 3 | senior | male | 5 | 3 | 20 | 19 | 44 | 4 | 79 | … | 是 | 否 | 2二线城市 | 36 | 25 | 1 | 172 | 90.0 | wear | 5 |
16 | 3 | senior | female | 1 | 6 | 36 | 20 | 60 | 14 | 100 | … | 是 | 否 | 3三线城市 | 42 | 32 | 1 | 160 | 48.0 | not_wear | 5 |
17 | 3 | senior | male | 4 | 7 | 10 | 20 | 56 | 7 | 50 | … | 否 | 是 | 2二线城市 | 75 | 20 | 1 | 170 | 57.0 | not_wear | 5 |
18 | 3 | senior | female | 1 | 5 | 0 | 27 | 60 | 0 | 50 | … | 否 | 是 | 4县级市 | 13 | 20 | 1 | 158 | 53.0 | not_wear | 8 |
19 | 3 | senior | female | 0 | 5 | 2 | 4 | 80 | 5 | 60 | … | 否 | 是 | 2二线城市 | 30 | 100 | 1 | 160 | 50.0 | not_wear | 5 |
20 | 3 | senior | female | 1 | 1 | 13 | 50 | 60 | 4 | 67 | … | 是 | 否 | 3三线城市 | 40 | 20 | 1 | 157 | 50.0 | wear | 6 |
21 | 3 | senior | male | 3 | 5 | 15 | 32 | 63 | 4 | 71 | … | 否 | 是 | 2二线城市 | 21 | 14 | 1 | 179 | 68.0 | not_wear | 8 |
22 | 3 | sophomore | male | 6 | 2 | 20 | 20 | 60 | 10 | 100 | … | 否 | 否 | 2二线城市 | 91 | 25 | 1 | 183 | 93.0 | wear | 6 |
23 | 2 | senior | female | 0 | 4 | 17 | 10 | 58 | 7 | 60 | … | 是 | 否 | 3三线城市 | 10 | 22 | 1 | 166 | 57.0 | wear | 7 |
24 | 2 | senior | female | 0 | 2 | 20 | 9 | 49 | 6 | 100 | … | 是 | 否 | 3三线城市 | 15 | 20 | 1 | 160 | 57.5 | wear | 8 |
25 | 2 | junior | male | 2 | 4 | 5 | 4 | 70 | 18 | 15 | … | 是 | 否 | 4县级市 | 28 | 9 | 1 | 180 | 75.0 | wear | 4 |
26 | 2 | junior | female | 0 | 10 | 30 | 10 | 35 | 10 | 28 | … | 否 | 否 | 3三线城市 | 30 | 20 | 1 | 170 | 50.0 | wear | 5 |
27 | 2 | junior | female | 0 | 1 | 23 | 9 | 56 | 2 | 38 | … | 是 | 否 | 1一线城市 | 50 | 15 | 1 | 168 | 48.0 | not_wear | 6 |
28 | 2 | junior | male | 1 | 3 | 30 | 19 | 50 | 4 | 100 | … | 是 | 否 | 2二线城市 | 60 | 16 | 1 | 180 | 75.0 | wear | 5 |
29 | 2 | junior | male | 2 | 1 | 21 | 2 | 21 | 8 | 41 | … | 否 | 是 | 2二线城市 | 39 | 14 | 1 | 170 | 66.0 | wear | 7 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
263 | 0 | freshman | female | 0 | 5 | 43 | 5 | 49 | 3 | 31 | … | 是 | 否 | 2二线城市 | 100 | 30 | 0 | 163 | 50.0 | wear | 8 |
264 | 0 | freshman | female | 0 | 0 | 8 | 3 | 50 | 3 | 38 | … | 是 | 否 | 3三线城市 | 100 | 13 | 0 | 164 | 53.0 | wear | 10 |
265 | 0 | freshman | female | 1 | 1 | 7 | 2 | 44 | 1 | 38 | … | 否 | 否 | 3三线城市 | 31 | 15 | 0 | 159 | 52.5 | not_wear | 5 |
266 | 0 | sophomore | female | 0 | 3 | 10 | 10 | 60 | 0 | 50 | … | 否 | 是 | 3三线城市 | 0 | 15 | 0 | 153 | 48.0 | not_wear | 5 |
267 | 0 | senior | male | 4 | 3 | 20 | 12 | 50 | 16 | 80 | … | 否 | 是 | 5农村 | 60 | 7 | 0 | 165 | 55.0 | not_wear | 8 |
268 | 0 | junior | male | 0 | 0 | 31 | 25 | 38 | 14 | 37 | … | 是 | 否 | 2二线城市 | 71 | 0 | 0 | 173 | 60.0 | wear | 7 |
269 | 0 | freshman | male | 2 | 1 | 20 | 6 | 49 | 7 | 39 | … | 是 | 否 | 4县级市 | 47 | 20 | 0 | 155 | 49.0 | not_wear | 7 |
270 | 0 | freshman | male | 0 | 0 | 6 | 8 | 52 | 2 | 30 | … | 否 | 否 | 4县级市 | 20 | 15 | 0 | 175 | 52.0 | wear | 10 |
271 | 0 | senior | female | 0 | 0 | 14 | 20 | 50 | 3 | 40 | … | 是 | 否 | 3三线城市 | 50 | 15 | 0 | 156 | 50.0 | wear | 5 |
272 | 0 | senior | female | 1 | 3 | 34 | 17 | 50 | 5 | 63 | … | 否 | 否 | 5农村 | 30 | 9 | 0 | 156 | 49.0 | not_wear | 10 |
273 | 0 | freshman | male | 0 | 1 | 10 | 11 | 50 | 5 | 80 | … | 否 | 是 | 4县级市 | 100 | 15 | 0 | 171 | 60.0 | wear | 8 |
274 | 0 | sophomore | male | 0 | 0 | 36 | 3 | 56 | 1 | 21 | … | 否 | 是 | 5农村 | 66 | 10 | 0 | 155 | 49.0 | not_wear | 10 |
275 | 0 | sophomore | female | 0 | 3 | 14 | 14 | 55 | 7 | 100 | … | 否 | 是 | 4县级市 | 80 | 100 | 0 | 167 | 57.0 | not_wear | 8 |
276 | 0 | senior | female | 1 | 4 | 35 | 17 | 55 | 7 | 100 | … | 是 | 否 | 3三线城市 | 47 | 25 | 0 | 161 | 48.0 | not_wear | 9 |
277 | 0 | junior | female | 0 | 5 | 30 | 21 | 58 | 0 | 31 | … | 否 | 是 | 1一线城市 | 4 | 9 | 0 | 167 | 52.0 | wear | 5 |
278 | 0 | senior | female | 1 | 0 | 9 | 16 | 57 | 0 | 100 | … | 否 | 是 | 5农村 | 100 | 15 | 0 | 158 | 47.0 | wear | 2 |
279 | 0 | sophomore | female | 0 | 5 | 7 | 2 | 55 | 2 | 50 | … | 否 | 是 | 2二线城市 | 20 | 10 | 0 | 168 | 53.0 | not_wear | 5 |
280 | 0 | sophomore | male | 1 | 0 | 20 | 20 | 46 | 0 | 36 | … | 否 | 否 | 2二线城市 | 50 | 20 | 0 | 181 | 61.0 | not_wear | 2 |
281 | 0 | freshman | female | 0 | 5 | 10 | 3 | 56 | 8 | 20 | … | 是 | 是 | 2二线城市 | 1 | 34 | 0 | 165 | 45.0 | not_wear | 7 |
282 | 0 | sophomore | female | 1 | 3 | 2 | 8 | 55 | 5 | 20 | … | 否 | 是 | 3三线城市 | 50 | 22 | 0 | 170 | 53.0 | not_wear | 5 |
283 | 0 | sophomore | female | 0 | 4 | 20 | 1 | 45 | 2 | 20 | … | 是 | 否 | 1一线城市 | 50 | 8 | 0 | 160 | 55.0 | not_wear | 8 |
284 | 0 | junior | female | 6 | 0 | 32 | 14 | 62 | 3 | 47 | … | 是 | 否 | 4县级市 | 40 | 23 | 0 | 162 | 65.0 | wear | 7 |
285 | 0 | senior | female | 2 | 5 | 14 | 42 | 49 | 12 | 100 | … | 是 | 否 | 2二线城市 | 29 | 100 | 0 | 161 | 60.0 | wear | 7 |
286 | 0 | senior | male | 1 | 0 | 50 | 16 | 42 | 7 | 100 | … | 是 | 否 | 2二线城市 | 30 | 29 | 0 | 189 | 101.0 | wear | 5 |
287 | 0 | senior | female | 0 | 2 | 10 | 15 | 50 | 2 | 23 | … | 是 | 否 | 2二线城市 | 50 | 100 | 0 | 165 | 50.5 | wear | 7 |
288 | 0 | sophomore | male | 2 | 2 | 30 | 1 | 50 | 2 | 60 | … | 是 | 否 | 3三线城市 | 73 | 18 | 0 | 176 | 68.0 | wear | 8 |
289 | 0 | senior | female | 0 | 2 | 40 | 17 | 55 | 2 | 50 | … | 是 | 否 | 3三线城市 | 20 | 100 | 0 | 160 | 49.0 | wear | 4 |
290 | 0 | sophomore | male | 0 | 0 | 10 | 20 | 56 | 0 | 50 | … | 是 | 否 | 2二线城市 | 55 | 100 | 0 | 184 | 90.0 | wear | 3 |
291 | 0 | sophomore | male | 0 | 2 | 5 | 14 | 50 | 4 | 58 | … | 否 | 是 | 2二线城市 | 50 | 12 | 0 | 175 | 64.0 | not_wear | 5 |
292 | 0 | sophomore | female | 0 | 0 | 24 | 16 | 51 | 6 | 20 | … | 否 | 否 | 2二线城市 | 40 | 100 | 0 | 162 | 64.0 | wear | 5 |
293 rows × 32 columns
grouped = df.groupby([u'恋爱次数']).count()[u'年级']# 不懂
grouped
恋爱次数 0 82 1 110 2 54 3 27 4 18 5 2 Name: 年级, dtype: int64
print '平均恋爱次数: ', df[u'恋爱次数'].mean()
print '恋爱次数的中位数: ', df[u'恋爱次数'].median()
grouped.plot(kind = 'bar', color = 'gray', align = 'center')
plt.xlabel('Times')
plt.ylabel('counts(person)')
平均恋爱次数: 1.30034129693 恋爱次数的中位数: 1.0
grouped1 = df.groupby([u'被追人数']).count()[u'年级']
grouped1
被追人数 0 75 1 47 2 33 3 44 4 17 5 31 6 13 7 5 8 6 9 3 10 19 Name: 年级, dtype: int64
print "平均被追人数:",df[u'被追人数'].mean()
print "被追人数的中位数:",df[u'被追人数'].median()
grouped1.plot(kind = 'bar', color = 'gray', align = 'center')
plt.xlabel('Times')
plt.ylabel('counts(person)')
平均被追人数: 2.88737201365 被追人数的中位数: 2.0
grouped2 = df.groupby([u'追过人数']).count()[u'年级']
grouped2
追过人数 0 134 1 82 2 44 3 18 4 6 5 3 6 4 7 1 10 1 Name: 年级, dtype: int64
print "平均追过人数:",df[u'追过人数'].mean()
print "追过人数的中位数:",df[u'追过人数'].median()
grouped1.plot(kind = 'bar', color = 'gray', align = 'center')
plt.xlabel('Times')
plt.ylabel('counts(person)')
平均追过人数: 1.03754266212 追过人数的中位数: 1.0 ## 探索性分析(1) 1,恋爱次数与性别关系 先统计男女比列
grouped0 = df.groupby([u'性别']).count()[u'年级']
grouped0
性别 female 149 male 144 Name: 年级, dtype: int64
grouped0.plot(kind = 'bar', color = 'r')
plt.ylabel('numbers')
grouped3 = df.groupby([u'恋爱次数', u'性别']).count()[u'年级']
grouped3
恋爱次数 性别 0 female 38 male 44 1 female 54 male 56 2 female 29 male 25 3 female 11 male 16 4 female 15 male 3 5 female 2 Name: 年级, dtype: int64
grouped3.plot(kind = 'bar', stacked = 'True',
color = ['g','b'], label = 'female')#为毛color不对啊
plt.xlabel('Times&sex')
plt.ylabel('counts(person)')
plt.legend()
#grouped4 = df.groupby([u'被人追数', u'性别']).count()[u'年级']
grouped4=df.groupby([u'被追人数',u'性别']).count()[u"年级"]
grouped4
被追人数 性别 0 female 25 male 50 1 female 12 male 35 2 female 15 male 18 3 female 20 male 24 4 female 12 male 5 5 female 27 male 4 6 female 11 male 2 7 female 4 male 1 8 female 5 male 1 9 female 3 10 female 15 male 4 Name: 年级, dtype: int64
grouped4.plot(kind="bar",stacked=True,color=['r', 'g'],label=("female"))
plt.xlabel('Times&sex')
plt.ylabel('counts(person)')
plt.legend()
grouped5=df.groupby([u'追过人数',u'性别']).count()[u"年级"]
grouped5
追过人数 性别 0 female 92 male 42 1 female 37 male 45 2 female 16 male 28 3 female 2 male 16 4 female 1 male 5 5 male 3 6 female 1 male 3 7 male 1 10 male 1 Name: 年级, dtype: int64
grouped5.plot(kind="bar",stacked=True,color=['r', 'g'],label=("female"))
plt.xlabel('Times&sex')
plt.ylabel('counts(person)')
plt.legend()
grouped5=df.groupby([u'被追人数',u'颜值']).count()[u"年级"]
grouped5
被追人数 颜值 0 0 8 1 3 2 6 3 4 4 3 5 20 6 8 7 7 8 4 9 4 10 8 1 2 1 4 7 5 16 6 6 7 9 8 4 9 3 10 1 2 2 1 4 4 5 7 6 6 7 5 8 5 9 4 10 1 3 3 1 4 2 5 12 .. 5 3 1 4 2 5 5 6 4 7 8 8 5 9 2 10 3 6 0 1 5 1 6 1 8 7 9 3 7 5 3 6 1 7 1 8 5 4 6 1 9 1 9 7 2 10 1 10 1 1 3 1 4 1 5 3 6 1 7 2 8 1 9 5 10 4 Name: 年级, Length: 74, dtype: int64
grouped5.plot(kind="bar",stacked=True,color=['r', 'g'],label=("female"))
plt.xlabel('Times&sex')
plt.ylabel('counts(person)')
#plt.legend()
## 04探索性分析(2) 那些没追过别人和没被别人追过的孩子又会有啥不一样的呢
zero=df[df[u"恋爱次数"]==0]#筛选出恋爱次数为0的同学
zero.head(5)
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
恋爱次数 | 年级 | 性别 | 追过人数 | 被追人数 | 每周自习时间 | 每周娱乐时间 | 每周睡觉时间 | 每周运动时间 | 每月话费 | … | 乐器 | 其他才艺 | 家乡 | 成绩水平 | 生活费_百元 | 寝室同学情况 | 身高 | 体重 | 眼镜 | 颜值 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
76 | 0 | senior | male | 0 | 0 | 50 | 45 | 70 | 0 | 70 | … | 否 | 是 | 3三线城市 | 30 | 13 | 1 | 170 | 59.0 | wear | 0 |
77 | 0 | junior | male | 0 | 0 | 5 | 45 | 63 | 5 | 0 | … | 是 | 否 | 3三线城市 | 10 | 7 | 1 | 171 | 70.0 | wear | 0 |
78 | 0 | junior | female | 0 | 0 | 6 | 4 | 60 | 1 | 30 | … | 否 | 是 | 3三线城市 | 60 | 12 | 1 | 160 | 61.0 | wear | 5 |
79 | 0 | senior | female | 3 | 2 | 16 | 11 | 56 | 10 | 38 | … | 否 | 是 | 1一线城市 | 31 | 8 | 1 | 162 | 50.0 | wear | 6 |
80 | 0 | senior | male | 0 | 0 | 35 | 25 | 42 | 5 | 40 | … | 否 | 是 | 5农村 | 55 | 10 | 1 | 175 | 72.0 | wear | 5 |
5 rows × 32 columns
1、恋爱与男女比例关系
先统计没有恋爱经验的男女比例
grouped7=zero.groupby([u"性别"]).count()[u"年级"]
grouped7.plot(kind="bar",color="gray")
plt.xlabel('sex')
plt.ylabel('counts(people)')
grouped8 = zero.groupby([u'年级']).count()[u'性别']
grouped8
年级 freshman 19 junior 12 senior 33 sophomore 18 Name: 性别, dtype: int64
grouped8.plot(kind = 'bar', color = 'gray')
plt.xlabel('grade')
plt.ylabel('counts(peopel)')
### 统计男性身高
grouped9 = df[df[u"性别"] == "male"]
grouped9
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
恋爱次数 | 年级 | 性别 | 追过人数 | 被追人数 | 每周自习时间 | 每周娱乐时间 | 每周睡觉时间 | 每周运动时间 | 每月话费 | … | 乐器 | 其他才艺 | 家乡 | 成绩水平 | 生活费_百元 | 寝室同学情况 | 身高 | 体重 | 眼镜 | 颜值 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 3 | senior | male | 1 | 3 | 14 | 16 | 50 | 20 | 100 | … | 否 | 是 | 3三线城市 | 85 | 20 | 1 | 180 | 78.0 | not_wear | 8 |
4 | 1 | junior | male | 0 | 3 | 21 | 5 | 42 | 20 | 100 | … | 否 | 是 | 1一线城市 | 50 | 45 | 1 | 182 | 85.0 | not_wear | 10 |
5 | 1 | sophomore | male | 1 | 2 | 27 | 8 | 49 | 7 | 38 | … | 否 | 是 | 4县级市 | 30 | 30 | 1 | 177 | 67.0 | wear | 7 |
8 | 4 | sophomore | male | 2 | 3 | 27 | 25 | 76 | 11 | 45 | … | 否 | 是 | 4县级市 | 26 | 21 | 1 | 175 | 77.0 | wear | 8 |
14 | 3 | senior | male | 3 | 3 | 3 | 6 | 57 | 2 | 50 | … | 是 | 否 | 3三线城市 | 25 | 21 | 1 | 178 | 88.0 | wear | 10 |
15 | 3 | senior | male | 5 | 3 | 20 | 19 | 44 | 4 | 79 | … | 是 | 否 | 2二线城市 | 36 | 25 | 1 | 172 | 90.0 | wear | 5 |
17 | 3 | senior | male | 4 | 7 | 10 | 20 | 56 | 7 | 50 | … | 否 | 是 | 2二线城市 | 75 | 20 | 1 | 170 | 57.0 | not_wear | 5 |
21 | 3 | senior | male | 3 | 5 | 15 | 32 | 63 | 4 | 71 | … | 否 | 是 | 2二线城市 | 21 | 14 | 1 | 179 | 68.0 | not_wear | 8 |
22 | 3 | sophomore | male | 6 | 2 | 20 | 20 | 60 | 10 | 100 | … | 否 | 否 | 2二线城市 | 91 | 25 | 1 | 183 | 93.0 | wear | 6 |
25 | 2 | junior | male | 2 | 4 | 5 | 4 | 70 | 18 | 15 | … | 是 | 否 | 4县级市 | 28 | 9 | 1 | 180 | 75.0 | wear | 4 |
28 | 2 | junior | male | 1 | 3 | 30 | 19 | 50 | 4 | 100 | … | 是 | 否 | 2二线城市 | 60 | 16 | 1 | 180 | 75.0 | wear | 5 |
29 | 2 | junior | male | 2 | 1 | 21 | 2 | 21 | 8 | 41 | … | 否 | 是 | 2二线城市 | 39 | 14 | 1 | 170 | 66.0 | wear | 7 |
31 | 2 | sophomore | male | 2 | 0 | 35 | 2 | 55 | 12 | 40 | … | 否 | 是 | 3三线城市 | 20 | 7 | 1 | 174 | 62.0 | not_wear | 4 |
33 | 2 | junior | male | 1 | 8 | 18 | 18 | 56 | 12 | 100 | … | 否 | 是 | 4县级市 | 52 | 20 | 1 | 175 | 66.0 | wear | 9 |
34 | 2 | senior | male | 1 | 1 | 7 | 24 | 51 | 6 | 100 | … | 是 | 否 | 1一线城市 | 51 | 27 | 1 | 169 | 62.0 | wear | 4 |
35 | 2 | senior | male | 3 | 3 | 0 | 3 | 56 | 5 | 60 | … | 否 | 是 | 2二线城市 | 40 | 100 | 1 | 180 | 73.0 | wear | 10 |
36 | 2 | senior | male | 2 | 2 | 20 | 2 | 49 | 1 | 41 | … | 否 | 是 | 5农村 | 51 | 9 | 1 | 172 | 58.0 | wear | 7 |
38 | 2 | senior | male | 2 | 0 | 8 | 30 | 50 | 1 | 100 | … | 否 | 否 | 3三线城市 | 18 | 22 | 1 | 183 | 68.0 | wear | 9 |
44 | 1 | senior | male | 2 | 6 | 50 | 5 | 54 | 7 | 54 | … | 否 | 是 | 2二线城市 | 2 | 14 | 1 | 187 | 80.0 | wear | 8 |
45 | 1 | junior | male | 2 | 1 | 16 | 5 | 63 | 4 | 75 | … | 否 | 是 | 4县级市 | 58 | 24 | 1 | 168 | 51.0 | wear | 4 |
48 | 1 | freshman | male | 1 | 0 | 2 | 1 | 59 | 4 | 25 | … | 是 | 否 | 4县级市 | 50 | 16 | 1 | 171 | 57.0 | wear | 5 |
49 | 1 | junior | male | 1 | 2 | 25 | 11 | 49 | 7 | 49 | … | 是 | 否 | 4县级市 | 70 | 8 | 1 | 174 | 65.0 | not_wear | 7 |
52 | 1 | freshman | male | 1 | 1 | 18 | 12 | 53 | 5 | 49 | … | 否 | 是 | 3三线城市 | 48 | 14 | 1 | 180 | 61.0 | wear | 6 |
55 | 1 | junior | male | 1 | 4 | 50 | 1 | 40 | 1 | 45 | … | 是 | 否 | 3三线城市 | 2 | 15 | 1 | 184 | 72.0 | wear | 4 |
57 | 1 | junior | male | 1 | 1 | 20 | 23 | 50 | 10 | 40 | … | 否 | 是 | 2二线城市 | 40 | 10 | 1 | 183 | 62.0 | not_wear | 7 |
60 | 1 | freshman | male | 1 | 0 | 20 | 10 | 40 | 6 | 100 | … | 是 | 是 | 3三线城市 | 80 | 10 | 1 | 182 | 90.0 | not_wear | 8 |
62 | 1 | junior | male | 0 | 1 | 28 | 8 | 53 | 8 | 50 | … | 是 | 是 | 3三线城市 | 62 | 19 | 1 | 168 | 58.0 | wear | 7 |
63 | 1 | senior | male | 3 | 3 | 9 | 30 | 56 | 6 | 25 | … | 否 | 是 | 5农村 | 100 | 9 | 1 | 176 | 75.0 | wear | 3 |
68 | 1 | sophomore | male | 1 | 2 | 9 | 22 | 70 | 4 | 51 | … | 否 | 是 | 2二线城市 | 63 | 40 | 1 | 170 | 52.0 | wear | 9 |
70 | 1 | sophomore | male | 1 | 3 | 40 | 6 | 42 | 3 | 50 | … | 否 | 是 | 3三线城市 | 45 | 20 | 1 | 183 | 80.0 | wear | 6 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
230 | 3 | junior | male | 6 | 1 | 10 | 8 | 63 | 6 | 30 | … | 否 | 是 | 1一线城市 | 100 | 20 | 0 | 175 | 55.0 | wear | 5 |
232 | 3 | senior | male | 10 | 1 | 1 | 40 | 50 | 10 | 100 | … | 否 | 否 | 2二线城市 | 50 | 60 | 0 | 173 | 64.0 | wear | 5 |
233 | 3 | senior | male | 1 | 3 | 50 | 40 | 54 | 0 | 62 | … | 否 | 是 | 2二线城市 | 1 | 11 | 0 | 185 | 72.0 | wear | 10 |
234 | 2 | junior | male | 2 | 0 | 24 | 36 | 64 | 14 | 40 | … | 是 | 否 | 3三线城市 | 50 | 12 | 0 | 184 | 62.0 | wear | 6 |
236 | 2 | freshman | male | 1 | 1 | 33 | 10 | 54 | 11 | 40 | … | 是 | 否 | 3三线城市 | 34 | 15 | 0 | 174 | 88.0 | wear | 4 |
238 | 1 | sophomore | male | 0 | 2 | 21 | 14 | 40 | 10 | 20 | … | 是 | 否 | 2二线城市 | 10 | 15 | 0 | 185 | 65.0 | wear | 5 |
241 | 1 | freshman | male | 0 | 3 | 6 | 18 | 43 | 7 | 20 | … | 否 | 是 | 2二线城市 | 74 | 10 | 0 | 181 | 72.0 | wear | 10 |
242 | 1 | freshman | male | 0 | 1 | 14 | 17 | 55 | 18 | 50 | … | 否 | 是 | 1一线城市 | 100 | 20 | 0 | 186 | 71.0 | wear | 5 |
243 | 1 | senior | male | 2 | 0 | 0 | 15 | 55 | 6 | 40 | … | 否 | 否 | 2二线城市 | 1 | 16 | 0 | 185 | 85.0 | wear | 4 |
244 | 1 | freshman | male | 2 | 2 | 20 | 31 | 40 | 1 | 30 | … | 否 | 否 | 1一线城市 | 98 | 24 | 0 | 180 | 65.0 | wear | 6 |
245 | 1 | senior | male | 2 | 1 | 15 | 49 | 59 | 10 | 35 | … | 是 | 否 | 3三线城市 | 58 | 10 | 0 | 181 | 86.0 | wear | 9 |
246 | 1 | junior | male | 1 | 3 | 10 | 7 | 56 | 1 | 36 | … | 否 | 否 | 3三线城市 | 50 | 20 | 0 | 170 | 65.0 | wear | 5 |
247 | 1 | junior | male | 3 | 4 | 11 | 33 | 70 | 4 | 16 | … | 是 | 否 | 4县级市 | 100 | 25 | 0 | 175 | 62.0 | wear | 10 |
249 | 1 | sophomore | male | 1 | 0 | 20 | 31 | 56 | 1 | 50 | … | 是 | 否 | 4县级市 | 50 | 20 | 0 | 181 | 60.0 | wear | 10 |
255 | 1 | senior | male | 1 | 0 | 50 | 2 | 56 | 0 | 30 | … | 否 | 是 | 3三线城市 | 55 | 12 | 0 | 176 | 79.0 | wear | 2 |
257 | 1 | senior | male | 1 | 1 | 7 | 28 | 42 | 1 | 30 | … | 是 | 否 | 3三线城市 | 30 | 10 | 0 | 180 | 73.0 | wear | 6 |
258 | 1 | senior | male | 1 | 2 | 10 | 50 | 55 | 0 | 30 | … | 否 | 是 | 2二线城市 | 80 | 15 | 0 | 179 | 85.0 | wear | 5 |
259 | 0 | freshman | male | 0 | 1 | 20 | 16 | 50 | 6 | 100 | … | 否 | 是 | 2二线城市 | 50 | 15 | 0 | 179 | 61.0 | not_wear | 8 |
262 | 0 | freshman | male | 0 | 0 | 20 | 27 | 54 | 8 | 36 | … | 否 | 是 | 3三线城市 | 100 | 6 | 0 | 172 | 59.0 | wear | 0 |
267 | 0 | senior | male | 4 | 3 | 20 | 12 | 50 | 16 | 80 | … | 否 | 是 | 5农村 | 60 | 7 | 0 | 165 | 55.0 | not_wear | 8 |
268 | 0 | junior | male | 0 | 0 | 31 | 25 | 38 | 14 | 37 | … | 是 | 否 | 2二线城市 | 71 | 0 | 0 | 173 | 60.0 | wear | 7 |
269 | 0 | freshman | male | 2 | 1 | 20 | 6 | 49 | 7 | 39 | … | 是 | 否 | 4县级市 | 47 | 20 | 0 | 155 | 49.0 | not_wear | 7 |
270 | 0 | freshman | male | 0 | 0 | 6 | 8 | 52 | 2 | 30 | … | 否 | 否 | 4县级市 | 20 | 15 | 0 | 175 | 52.0 | wear | 10 |
273 | 0 | freshman | male | 0 | 1 | 10 | 11 | 50 | 5 | 80 | … | 否 | 是 | 4县级市 | 100 | 15 | 0 | 171 | 60.0 | wear | 8 |
274 | 0 | sophomore | male | 0 | 0 | 36 | 3 | 56 | 1 | 21 | … | 否 | 是 | 5农村 | 66 | 10 | 0 | 155 | 49.0 | not_wear | 10 |
280 | 0 | sophomore | male | 1 | 0 | 20 | 20 | 46 | 0 | 36 | … | 否 | 否 | 2二线城市 | 50 | 20 | 0 | 181 | 61.0 | not_wear | 2 |
286 | 0 | senior | male | 1 | 0 | 50 | 16 | 42 | 7 | 100 | … | 是 | 否 | 2二线城市 | 30 | 29 | 0 | 189 | 101.0 | wear | 5 |
288 | 0 | sophomore | male | 2 | 2 | 30 | 1 | 50 | 2 | 60 | … | 是 | 否 | 3三线城市 | 73 | 18 | 0 | 176 | 68.0 | wear | 8 |
290 | 0 | sophomore | male | 0 | 0 | 10 | 20 | 56 | 0 | 50 | … | 是 | 否 | 2二线城市 | 55 | 100 | 0 | 184 | 90.0 | wear | 3 |
291 | 0 | sophomore | male | 0 | 2 | 5 | 14 | 50 | 4 | 58 | … | 否 | 是 | 2二线城市 | 50 | 12 | 0 | 175 | 64.0 | not_wear | 5 |
144 rows × 32 columns
age_train_p=grouped9[u"身高"]
age_train_p
age_train_p[age_train_p < 170]
34 169
45 168
62 168
81 168
99 169
121 130
122 168
138 163
173 169
202 164
213 167
267 165
269 155
274 155
Name: 身高, dtype: int64
ages=np.arange(150,200,5) #150~200岁,每10厘米一段(年龄最小153,最大为192)
age_cut=pd.cut(age_train_p,ages) #待分组值,分组条件
age_cut_grouped=age_train_p.groupby(age_cut).count()
age_cut_grouped.plot(kind="bar",color="gray")
plt.xlabel('height')
plt.ylabel('counts(person)')
<matplotlib.text.Text at 0xbbf8c88>
!png
df.iloc[121]
恋爱次数 2
年级 大三
性别 男
追过人数 0
被追人数 10
每周自习时间 0
每周娱乐时间 4
每周睡觉时间 50
每周运动时间 5
每月话费 100
学生组织个数 1
班干部 是
党员 否
足球 否
篮球 否
乒乓球 否
羽毛球 否
跑步 否
台球 否
唱歌 是
主持 是
舞蹈 否
乐器 是
其他才艺 否
家乡 2二线城市
成绩水平 20
生活费_百元 100
寝室同学情况 0
身高 130
体重 30
眼镜 不戴眼镜
颜值 10
Name: 121, dtype: object
按身高分组
grouped11 = df[u'性别'].groupby(df[u'身高'])
grouped11.count()
身高
130 2
150 1
153 1
154 4
155 4
156 9
157 3
158 9
159 3
160 18
161 10
162 11
163 13
164 6
165 12
166 7
167 5
168 24
169 4
170 19
171 4
172 15
173 8
174 6
175 20
176 6
177 6
178 7
179 10
180 9
181 10
182 4
183 11
184 4
185 3
186 1
187 2
189 1
192 1
Name: 性别, dtype: int64
按性别分组,统计不同性别的身高信息,
方法有mean: 平均
max: 最大值
min: 最小
count: 统计个数
grouped11 = df[u'身高'].groupby(df[u'性别'])
grouped11.mean()
性别
女 163.006711
男 175.861111
Name: 身高, dtype: float64
扎心了 老铁