-
data.astype()
-
给series重新指定索引的时候,能够和原来对得上的取其值,对不上的值为NaN
- ndarray的很多方法适用于series,比如argmax,clip等,但是注意where的使用方法不一样
pandas里字符串方法¶
In [1]:
import pandas as pd
import numpy as np
import string
In [2]:
d1 = {"name":["xiaoming","xiaogang"],"age":[12,15],"tel":[10010,11186]}
t1 = pd.DataFrame(d1)
print(t1)
In [3]:
d2 = [{"name":"xiaoming","age":10,"tel":10010},{"name":"xiaogang","tel":10086},{"name":"xiaohong","age":13}]
t2 = pd.DataFrame(d2)
print(t2)
In [4]:
csv_data = pd.read_csv("./IMDB-Movie-Data.csv")
print(csv_data.columns)
In [5]:
#取需要的字段
csv_data = pd.DataFrame(csv_data,columns=["Rank","Title","Director","Actors","Votes"])
#按投票数排列
csv_data = csv_data.sort_values(by="Votes",ascending=False)
#loc根据index和column来取数据
print(csv_data.loc[:100,["Title","Votes"]]) #因为数据经过重新排序,所以取到的并非前100行,而是取到index==100为止
In [6]:
#iloc根据行数和列数来取数据
print(csv_data.iloc[:100,[1,4]])
In [7]:
d3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list(string.ascii_uppercase[:3]),columns=list(string.ascii_uppercase[-4:]))
d3.loc["B":"C","W":"X"]=np.nan
print(d3)
In [8]:
print(pd.notnull(d3["W"]))
In [9]:
d3.dropna(axis=0)
Out[9]:
In [10]:
#当数据全为nan时才删除
d3.dropna(axis=0,how="all")
Out[10]:
In [11]:
#有一个nan时就删除
d3.dropna(axis=0,how="any")
Out[11]:
In [12]:
#inplace对原数据进行修改
d3.dropna(axis=1,inplace=True)
In [13]:
#pandas计算时会忽略nan
t2.fillna(t2.mean())
Out[13]:
In [14]:
t2["age"] = t2["age"].fillna(t2["age"].mean())
print(t2)