支持的类型
pandas dtype | Python类型 | numpy类型 | 描述 |
---|
object | str | string_, unicode_ | 文本 |
int64 | int | int_, int8, int16, int32, int64, unit8, unit16, unit32, unit64 | 整数 |
float64 | float | float_, float16, float32, float64 | 浮点数 |
bool | bool | bool_ | 布尔值 |
datetime64 | datetime64[ns] | datetime | NA |
datedelta[ns] | NA | NA | 时间差 |
category | NA | NA | 有限长度的文本值列 |
df.dtypes
df.dtype
df['col'].astype('object')
df[['col1', 'col2']].apply(pd.to_numeric)
df['col'].astype('object').query('col == "1"')
df['col'].astype('int64').query('col == 1')
类别类型Categoricals
s = pd.Series(['a','b','b','a','a','e'], dtype='category')
df =pd.DataFrame({"id":[1,2,3,4,5,6],"raw_grade":pd.Series(['a','b','b','a','a','e'], dtype='category')})
df =pd.DataFrame({"id":[1,2,3,4,5,6],"raw_grade":['a','b','b','a','a','e']})
df["grade"]=df["raw_grade"].astype("category")
df['grade'].describe()
df['grade'].cat.categories
df['grade'].cat.ordered
df["grade"].cat.categories =["very good","good","very bad"]
df["grade"]=df["grade"].cat.set_categories(["very bad","bad","medium","good","very good"])
df['grade']=df['grade'].cat.set_categories(["very bad","bad","medium","good","very good"], ordered=True)
df['grade'] = df['grade'].cat.rename_categories([f'new_{i}' for i in df['grade'].cat.categories])
df['grade'] = df['grade'].cat.rename_categories({'a1':'new_a1', 'b1':'new_b1', 'c1':'new_c1'})
df['grade'] = df['grade'].cat.as_ordered()
df['grade'] = df['grade'].cat.as_unordered()
df['grade'] = df['grade'].cat.reorder_categories(['a', 'e', 'b'], ordered=True)
df.sort_values(['grade'], ascending=False)
df.sort_values(by='grade')
df.groupby('grade').size()