# coding: utf-8
# In[38]:
# 导入相关库
import numpy as np
import pandas as pd
# In[39]:
index = pd.Index(data=["Tom", "Bob", "Mary", "James"], name="name")
data = {
"age": [18, 30, 25, 40],
"city": ["BeiJing", "ShangHai", "GuangZhou", "ShenZhen"],
"sex": ["male", "male", "female", "male"]
}
user_info = pd.DataFrame(data=data, index=index)
user_info
# In[40]:
user_info.info()
# In[41]:
user_info.shape
# In[42]:
user_info.T
# In[43]:
user_info
# # 如果我们想要通过 DataFrame 来获取它包含的原有数据,可以通过 .values 来获取,获取后的数据类型其实是一个 ndarray。
# In[44]:
user_info.values
# In[45]:
user_info.age.max()
# In[46]:
user_info.describe()
# In[47]:
user_info.sex.value_counts()
# In[48]:
#pd.cut(user_info.age, 3)
# In[49]:
pd.cut(user_info.age, [1, 18, 30, 50], labels=["childhood", "youth", "middle"])
# In[50]:
user_info
# In[51]:
user_info.sort_index(axis=1, ascending=False)
# In[52]:
user_info.age.map(lambda x: "yes" if x >= 30 else "no")
# In[53]:
city_map = {
"BeiJing": "north",
"ShangHai": "south",
"GuangZhou": "south",
"ShenZhen": "south"
}
# 传入一个 map
user_info.city.map(city_map)
# In[54]:
user_info
# In[55]:
user_info.rename(columns={"age": "Age", "city": "City", "sex": "Sex"})
# In[56]:
user_info.rename(index={"Tom": "tom", "Bob": "bob"})
# In[57]:
user_info.get_dtype_counts()
# In[58]:
user_info["age"].astype(float)
user_info
# In[65]:
user_info.to_csv("data.csv",sep=" ")
# In[ ]: