一、pandas
1、pandas常用的数据类型
(1)series:一维,带标签数组;(标签是指索引),也可以自己指定索引。
(2)DataFrame:二维,series容器。
pandas取行或者列的注意点:
方括号写数字,表示取行,对行进行操作df[:20] ;写字符串表示取列索引,对列进行操作df[“row_labels”] 结果是个series类型 因为只有一列;若取了两列则还是DataFrame类型。
pandas还有更多的经过优化的选择方式:
df.loc通过标签索引取行数据
df.iloc通过位置取行数据
import pandas as pd
#(1)series and DataFrame
t=pd.Series([1,13,21,12,3,4],index=list("abcdef"))
#自定义索引,注意元素个数要和索引个数一致,默认索引从0开始
print(t)
print(type(t))
#通过字典定义series 字典的键就是索引 字典的值就是series的值
t1={"name": "xiaoyu","age":30,"tel":10086}
t2=pd.Series(t1)
print(t2)
#修改dtype同numpy
print(t.dtype)
t3=t.astype(float)
print(t3.dtype)
#series的切片和索引
#通过索引来取
print(t2["name"])
print(t["f"])
#通过位置来取 下标 从0开始
print(t2[2])
#取连续的 不连续的多行
print("连续",t2[:2])
print("不连续",t2[[0,2]])
print(t2[["name","tel"]])
print(t[t>10])
#可遍历、可迭代
for i in t2.index:
print(i)
print(type(t2.index))
print(len(t2.index))
print(list(t2.index))
print(list(t2.index)[:2])
print(t2.values)
print(type(t2.values))
a 1
b 13
c 21
d 12
e 3
f 4
dtype: int64
<class 'pandas.core.series.Series'>
name xiaoyu
age 30
tel 10086
dtype: object
int64
float64
xiaoyu
4
10086
连续 name xiaoyu
age 30
dtype: object
不连续 name xiaoyu
tel 10086
dtype: object
name xiaoyu
tel 10086
dtype: object
b 13
c 21
d 12
dtype: int64
name
age
tel
<class 'pandas.core.indexes.base.Index'>
3
['name', 'age', 'tel']
['name', 'age']
['xiaoyu' 30 10086]
<class 'numpy.ndarray'>
import pandas as pd
import numpy as np
#创建DataFrame
t=pd.DataFrame(np.arange(12).reshape(3,4))
print(t)
#index 行索引 竖着的 0轴 ;columns 列索引 横着的 1轴 默认从0开始
t1=pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("wxyz"))
print(t1)
t2={"name":["xiao","yu"],"age":[12,22],"tel":[100,101]}#字典
print(t2,type(t2))
t3=pd.DataFrame(t2)
print(t3,type(t3))
t4=[{"name":"xiao","age":21,"tel":10086},{"name":"xzz","age":24,"tel":10066}]#列表
print(t4,type(t4))
t5=pd.DataFrame(t4)
print(t5,type(t5))
#缺失的数据为NaN
t6=[{"name":"xiao","age":21,"tel":10086},{"name":"xzz","age":24},{"age":24,"tel":10086}]
t7=pd.DataFrame(t6)
print(t7)
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
w x y z
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
{'name': ['xiao', 'yu'], 'age': [12, 22], 'tel': [100, 101]} <class 'dict'>
name age tel
0 xiao 12 100
1 yu 22 101 <class 'pandas.core.frame.DataFrame'>
[{'name': 'xiao', 'age': 21, 'tel': 10086}, {'name': 'xzz', 'age': 24, 'tel': 10066}] <class 'list'>
name age tel
0 xiao 21 10086
1 xzz 24 10066 <class 'pandas.core.frame.DataFrame'>
name age tel
0 xiao 21 10086.0
1 xzz 24 NaN
2 NaN 24 10086.0
按by字段排序 降序 True是升序
import pandas as pd
import numpy as np
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("WXYZ"))
print(t3)
print(t3.loc["a","Z"])
print(type(t3.loc["a","Z"]))
print(t3.loc["a"])#取整行
print(type(t3.loc["a"]))
print(t3.loc[:,"Y"])#取整列
#取多行或者多列
print(t3.loc[["a","c"]])
print(t3.loc[["a","c"],:])
print(t3.loc[:,["W","Y"]])
print(t3.loc[["a","b"],["W","Y"]])
print(type(t3.loc[["a","b"],["W","Y"]]))
print(t3.loc["a":"c",["W","Y"]])#使用标签也可以用冒号 选择从哪到哪 但这里冒号在Loc中是闭合的包含c这行,即会选择到冒号后的数据
#iloc通过位置获取数据
print(t3.iloc[1])#行
print(t3.iloc[:,1])#列
print(t3.iloc[:,[0,2]])#取不连续的多列
print(t3.iloc[1:,:2])
t3.iloc[1:,:2]=30
print(t3)
t3.iloc[1:,:2]=np.nan#这里不会报错 pandaas自动帮我们转化为float numpy里会报错print(t3)
print(t3)
W X Y Z
a 0 1 2 3
b 4 5 6 7
c 8 9 10 11
3
<class 'numpy.int32'>
W 0
X 1
Y 2
Z 3
Name: a, dtype: int32
<class 'pandas.core.series.Series'>
a 2
b 6
c 10
Name: Y, dtype: int32
W X Y Z
a 0 1 2 3
c 8 9 10 11
W X Y Z
a 0 1 2 3
c 8 9 10 11
W Y
a 0 2
b 4 6
c 8 10
W Y
a 0 2
b 4 6
<class 'pandas.core.frame.DataFrame'>
W Y
a 0 2
b 4 6
c 8 10
W 4
X 5
Y 6
Z 7
Name: b, dtype: int32
a 1
b 5
c 9
Name: X, dtype: int32
W Y
a 0 2
b 4 6
c 8 10
W X
b 4 5
c 8 9
W X Y Z
a 0 1 2 3
b 30 30 6 7
c 30 30 10 11
W X Y Z
a 0.0 1.0 2 3
b NaN NaN 6 7
c NaN NaN 10 11
2、pandas读取外部数据
读csv文件:pd.read_csv("./dogName.csv")
读数据库文件:pd.read_sql(sql_sentence,connertion)
从MongoDB中取数据:
3、pandas索引和缺失值的处理
(1)pandas的布尔索引
df[df[“count_aname”]>800]# count_aname大于800的会被取出
df[1000>df[“count_aname”]>800]这种不行会报错,应该改成df[(df[“count_aname”]>800)&(df[“count_aname”]<1000 )]
如果是这种多个条件就要每一个条件都用括号括起来 在使用符号连接& 且 | 或。
(2)pandas字符串方法
import pandas as pd
import numpy as np
t2={"name":["xiao","yu"],"age":[12,22],"tel":[100,101],"info":["王伟/潘粤明/肖战","肖战/王一博/李现"]}
df=pd.DataFrame(t2)
print(df.head(1))
print(df["info"].str.split("/"))#这是个列表
print(df["info"].str.split("/").tolist())#tolist可以把series转化为列表
'''
[['王伟', '潘粤明', '肖战'], ['肖战', '王一博', '李现']]
一个大列表 里边每一个部分都是一个列表
'''
name age tel info
0 xiao 12 100 王伟/潘粤明/肖战
0 [王伟, 潘粤明, 肖战]
1 [肖战, 王一博, 李现]
Name: info, dtype: object
[['王伟', '潘粤明', '肖战'], ['肖战', '王一博', '李现']]
(3)pandas缺失值处理
import pandas as pd
import numpy as np
#缺失值的处理
t3=pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("WXYZ"))
t3.iloc[1:,:2]=np.nan
print(t3)
print(pd.isnull(t3))#哪些是NaN
print(pd.notnull(t3))#哪些不是NaN 这两个刚好相反
#布尔索引
print(t3[pd.notnull(t3["W"])])
print(pd.notnull(t3["W"]))
#all 这行全部都是NaN才删掉
print(t3.dropna(axis=0,how="all"))
#删除 存在NaN的行 默认how是 any 这行只要有一个NaN就把这行删掉
print(t3.dropna(axis=0))
print(t3)
#inplace就地修改 上边那些删除方式如果不重新给t3赋值 原来的t3是不变的
#t3=t3.dropna(axis=0) 此时输出的t3的效果等同于 原地修改的t3.dropna(axis=0,how="any",inplace=True)
# t3.dropna(axis=0,how="any",inplace=True)
# print(t3)
#用均值填充
print(t3["Y"].mean())
t4=t3.fillna(t3["Y"].mean())
print(t4)
t2={"name":["xiao","yu"],"age":[12,22],"tel":[100,101],"info":["王伟/潘粤明/肖战","肖战/王一博/李现"]}
t2=pd.DataFrame(t2)
t2["age"][1]=np.nan
print(t2)
print(t2["age"].mean())#均值
print(t2["age"].median())#中位数
W X Y Z
a 0.0 1.0 2 3
b NaN NaN 6 7
c NaN NaN 10 11
W X Y Z
a False False False False
b True True False False
c True True False False
W X Y Z
a True True True True
b False False True True
c False False True True
W X Y Z
a 0.0 1.0 2 3
a True
b False
c False
Name: W, dtype: bool
W X Y Z
a 0.0 1.0 2 3
b NaN NaN 6 7
c NaN NaN 10 11
W X Y Z
a 0.0 1.0 2 3
W X Y Z
a 0.0 1.0 2 3
b NaN NaN 6 7
c NaN NaN 10 11
6.0
W X Y Z
a 0.0 1.0 2 3
b 6.0 6.0 6 7
c 6.0 6.0 10 11
name age tel info
0 xiao 12.0 100 王伟/潘粤明/肖战
1 yu NaN 101 肖战/王一博/李现
12.0
12.0
4、pandas常用统计方法
数据来源:https://www.kaggle.com/damianpanek/sunday-eda/data?select=IMDB-Movie-Data.csv
import pandas as pd
import numpy as np
#2006--2016年1000部最流行的电影数据 评分的平均分导演人数等
#读取
df=pd.read_csv("datasets_1474_2639_IMDB-Movie-Data.csv")
# print(df)
# print(df.info())
print(df.head(1))
#获取电影的平均评分
print(df["Rating"].mean())
#导演人数
# print(df["Director"].tolist())
print(len(set(df["Director"].tolist())))
# print(set(df["Director"].tolist()))
#
print(len(df["Director"].unique()))
# print(df["Director"].unique())
#获取演员人数
print("原始数据")
print(df["Actors"].tolist())
print(",分割后数据")
temp_actors_list=df["Actors"].str.split(",").tolist()
print(temp_actors_list)
#展开
actors_list=[i for j in temp_actors_list for i in j]
# np.array(temp_actors_list).flattern()#这里没有展开flattern无效
# print(actors_list)
actors_num=len(set(actors_list))
print(actors_num)
print(df["Runtime (Minutes)"].max())
print(df["Runtime (Minutes)"].min())
print(df["Runtime (Minutes)"].argmax())
print(df["Runtime (Minutes)"].argmin())
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
#统计其分类情况 一个电影可能是有多个分类 那怎么统计电影的分类情况
#思路 重新构造一个全为0的数组 列名为分类名 如果某一条数据中分类出现过,就让0变为1
file_path="./datasets_1474_2639_IMDB-Movie-Data.csv";
df=pd.read_csv(file_path)
print(df.head(1))
print(df["Genre"])
#有1000行 类别个数个列
temp_list=df["Genre"].str.split(",").tolist()#是列表嵌套列表的形式 [[],[],[]]
print(temp_list)
genre_list=list(set([i for j in temp_list for i in j]))#集合去重set
print(genre_list)
#构造一个全为零的数组,然后统计每个类别个数 就每一列单独求和即可
zeros_df=pd.DataFrame(np.zeros((df.shape[0],len(genre_list))),columns=genre_list)
print(zeros_df)
#给每个电影出现分类的位置赋值
for i in range(df.shape[0]):
zeros_df.loc[i,temp_list[i]]=1 #一行多列赋值
print(zeros_df.head(5))
#统计每个电影分类的和
genre_count=zeros_df.sum(axis=0)
print(genre_count)
#排序
genre_count=genre_count.sort_values()
#绘制图形
plt.figure(figsize=(20,8),dpi=80)
_x=genre_count.index
_y=genre_count.values
plt.bar(range(len(_x)),_y,width=0.4,color="orange")#条形图
plt.xticks(range(len(_x)),_x)
plt.show()
Rank Title ... Revenue (Millions) Metascore
0 1 Guardians of the Galaxy ... 333.13 76.0
[1 rows x 12 columns]
0 Action,Adventure,Sci-Fi
1 Adventure,Mystery,Sci-Fi
2 Horror,Thriller
3 Animation,Comedy,Family
4 Action,Adventure,Fantasy
...
995 Crime,Drama,Mystery
996 Horror
997 Drama,Music,Romance
998 Adventure,Comedy
999 Comedy,Family,Fantasy
Name: Genre, Length: 1000, dtype: object
[['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Mystery', 'Sci-Fi'], ['Horror', 'Thriller'], ['Animation', 'Comedy', 'Family'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama', 'Music'], ['Comedy'], ['Action', 'Adventure', 'Biography'], ['Adventure', 'Drama', 'Romance'], ['Adventure', 'Family', 'Fantasy'], ['Biography', 'Drama', 'History'], ['Action', 'Adventure', 'Sci-Fi'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Comedy', 'Drama'], ['Animation', 'Adventure', 'Comedy'], ['Biography', 'Drama', 'History'], ['Action', 'Thriller'], ['Biography', 'Drama'], ['Drama', 'Mystery', 'Sci-Fi'], ['Adventure', 'Drama', 'Thriller'], ['Drama'], ['Crime', 'Drama', 'Horror'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy'], ['Action', 'Adventure', 'Drama'], ['Horror', 'Thriller'], ['Comedy'], ['Action', 'Adventure', 'Drama'], ['Comedy'], ['Drama', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Comedy'], ['Action', 'Horror', 'Sci-Fi'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Drama', 'Sci-Fi'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Western'], ['Comedy', 'Drama'], ['Animation', 'Adventure', 'Comedy'], ['Drama'], ['Horror'], ['Biography', 'Drama', 'History'], ['Drama'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Thriller'], ['Adventure', 'Drama', 'Fantasy'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama'], ['Action', 'Crime', 'Thriller'], ['Action', 'Crime', 'Drama'], ['Adventure', 'Drama', 'History'], ['Crime', 'Horror', 'Thriller'], ['Drama', 'Romance'], ['Comedy', 'Drama', 'Romance'], ['Biography', 'Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Horror', 'Mystery', 'Thriller'], ['Crime', 'Drama', 'Mystery'], ['Drama', 'Romance', 'Thriller'], ['Drama', 'Mystery', 'Sci-Fi'], ['Action', 'Adventure', 'Comedy'], ['Drama', 'History', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama'], ['Action', 'Drama', 'Thriller'], ['Drama', 'History'], ['Action', 'Drama', 'Romance'], ['Drama', 'Fantasy'], ['Drama', 'Romance'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Sci-Fi'], ['Adventure', 'Drama', 'War'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Comedy', 'Fantasy'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy', 'Drama'], ['Biography', 'Comedy', 'Crime'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Crime', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Crime', 'Drama'], ['Action', 'Adventure', 'Fantasy'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Crime', 'Drama'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Adventure', 'Fantasy'], ['Drama'], ['Comedy', 'Crime', 'Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Comedy', 'Crime'], ['Animation', 'Drama', 'Fantasy'], ['Horror', 'Mystery', 'Sci-Fi'], ['Drama', 'Mystery', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Biography', 'Crime', 'Drama'], ['Action', 'Adventure', 'Fantasy'], ['Adventure', 'Drama', 'Sci-Fi'], ['Crime', 'Mystery', 'Thriller'], ['Action', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Thriller'], ['Comedy'], ['Action', 'Adventure', 'Drama'], ['Drama'], ['Drama', 'Mystery', 'Sci-Fi'], ['Action', 'Horror', 'Thriller'], ['Biography', 'Drama', 'History'], ['Romance', 'Sci-Fi'], ['Action', 'Fantasy', 'War'], ['Adventure', 'Drama', 'Fantasy'], ['Comedy'], ['Horror', 'Thriller'], ['Action', 'Biography', 'Drama'], ['Drama', 'Horror', 'Mystery'], ['Animation', 'Adventure', 'Comedy'], ['Adventure', 'Drama', 'Family'], ['Adventure', 'Mystery', 'Sci-Fi'], ['Adventure', 'Comedy', 'Romance'], ['Action'], ['Action', 'Thriller'], ['Adventure', 'Drama', 'Family'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Crime', 'Mystery'], ['Comedy', 'Family', 'Musical'], ['Adventure', 'Drama', 'Thriller'], ['Drama'], ['Adventure', 'Comedy', 'Drama'], ['Drama', 'Horror', 'Thriller'], ['Drama', 'Music'], ['Action', 'Crime', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Drama', 'Romance'], ['Mystery', 'Thriller'], ['Mystery', 'Thriller', 'Western'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy', 'Family'], ['Biography', 'Comedy', 'Drama'], ['Drama'], ['Drama', 'Western'], ['Drama', 'Mystery', 'Romance'], ['Comedy', 'Drama'], ['Action', 'Drama', 'Mystery'], ['Comedy'], ['Action', 'Adventure', 'Crime'], ['Adventure', 'Family', 'Fantasy'], ['Adventure', 'Sci-Fi', 'Thriller'], ['Drama'], ['Action', 'Crime', 'Drama'], ['Drama', 'Horror', 'Mystery'], ['Action', 'Horror', 'Sci-Fi'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Comedy', 'Fantasy'], ['Action', 'Comedy', 'Mystery'], ['Thriller', 'War'], ['Action', 'Comedy', 'Crime'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Crime'], ['Action', 'Adventure', 'Thriller'], ['Drama', 'Fantasy', 'Romance'], ['Action', 'Adventure', 'Comedy'], ['Biography', 'Drama', 'History'], ['Action', 'Drama', 'History'], ['Action', 'Adventure', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Animation', 'Adventure', 'Family'], ['Adventure', 'Horror'], ['Drama', 'Romance', 'Sci-Fi'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Family'], ['Action', 'Adventure', 'Drama'], ['Action', 'Comedy'], ['Horror', 'Mystery', 'Thriller'], ['Action', 'Adventure', 'Comedy'], ['Comedy', 'Romance'], ['Horror', 'Mystery'], ['Drama', 'Family', 'Fantasy'], ['Sci-Fi'], ['Drama', 'Thriller'], ['Drama', 'Romance'], ['Drama', 'War'], ['Drama', 'Fantasy', 'Horror'], ['Crime', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Romance'], ['Drama'], ['Crime', 'Drama', 'History'], ['Horror', 'Sci-Fi', 'Thriller'], ['Action', 'Drama', 'Sport'], ['Action', 'Adventure', 'Sci-Fi'], ['Crime', 'Drama', 'Thriller'], ['Adventure', 'Biography', 'Drama'], ['Biography', 'Drama', 'Thriller'], ['Action', 'Comedy', 'Crime'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama', 'Fantasy', 'Horror'], ['Biography', 'Drama', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Mystery'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama', 'Horror'], ['Comedy', 'Drama', 'Romance'], ['Comedy', 'Romance'], ['Drama', 'Horror', 'Thriller'], ['Action', 'Adventure', 'Drama'], ['Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Drama', 'Mystery'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Comedy'], ['Drama', 'Horror'], ['Action', 'Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Animation', 'Adventure', 'Comedy'], ['Horror', 'Mystery'], ['Crime', 'Drama', 'Mystery'], ['Comedy', 'Crime'], ['Drama'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Family'], ['Horror', 'Sci-Fi', 'Thriller'], ['Drama', 'Fantasy', 'War'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Adventure', 'Drama'], ['Action', 'Adventure', 'Thriller'], ['Action', 'Adventure', 'Drama'], ['Drama', 'Romance'], ['Biography', 'Drama', 'History'], ['Drama', 'Horror', 'Thriller'], ['Adventure', 'Comedy', 'Drama'], ['Action', 'Adventure', 'Romance'], ['Action', 'Drama', 'War'], ['Animation', 'Adventure', 'Comedy'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Family', 'Fantasy'], ['Drama', 'Musical', 'Romance'], ['Drama', 'Sci-Fi', 'Thriller'], ['Comedy', 'Drama'], ['Action', 'Comedy', 'Crime'], ['Biography', 'Comedy', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Thriller'], ['Biography', 'Drama', 'History'], ['Action', 'Adventure', 'Sci-Fi'], ['Horror', 'Mystery', 'Thriller'], ['Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Drama', 'Sci-Fi'], ['Horror'], ['Drama', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Thriller'], ['Comedy', 'Drama'], ['Drama'], ['Action', 'Adventure', 'Comedy'], ['Drama', 'Horror', 'Thriller'], ['Comedy'], ['Drama', 'Sci-Fi'], ['Action', 'Adventure', 'Sci-Fi'], ['Horror'], ['Action', 'Adventure', 'Thriller'], ['Adventure', 'Fantasy'], ['Action', 'Comedy', 'Crime'], ['Comedy', 'Drama', 'Music'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Mystery'], ['Action', 'Comedy', 'Crime'], ['Crime', 'Drama', 'History'], ['Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Crime', 'Mystery', 'Thriller'], ['Action', 'Adventure', 'Crime'], ['Thriller'], ['Biography', 'Drama', 'Romance'], ['Action', 'Adventure'], ['Action', 'Fantasy'], ['Action', 'Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Comedy', 'Crime'], ['Thriller'], ['Action', 'Drama', 'Horror'], ['Comedy', 'Music', 'Romance'], ['Comedy'], ['Drama'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Romance'], ['Animation', 'Adventure', 'Comedy'], ['Comedy', 'Drama'], ['Biography', 'Crime', 'Drama'], ['Drama', 'History'], ['Action', 'Crime', 'Thriller'], ['Action', 'Biography', 'Drama'], ['Horror'], ['Comedy', 'Romance'], ['Comedy', 'Romance'], ['Comedy', 'Crime', 'Drama'], ['Adventure', 'Family', 'Fantasy'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Crime', 'Thriller'], ['Comedy', 'Romance'], ['Biography', 'Drama', 'Sport'], ['Drama', 'Romance'], ['Drama', 'Horror'], ['Adventure', 'Fantasy'], ['Adventure', 'Family', 'Fantasy'], ['Action', 'Drama', 'Sci-Fi'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Horror'], ['Comedy', 'Horror', 'Thriller'], ['Action', 'Crime', 'Thriller'], ['Crime', 'Drama', 'Music'], ['Drama'], ['Action', 'Crime', 'Thriller'], ['Action', 'Sci-Fi', 'Thriller'], ['Biography', 'Drama'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Horror', 'Sci-Fi'], ['Biography', 'Comedy', 'Drama'], ['Crime', 'Horror', 'Thriller'], ['Crime', 'Drama', 'Mystery'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Biography', 'Drama'], ['Biography', 'Drama'], ['Biography', 'Drama', 'History'], ['Action', 'Biography', 'Drama'], ['Drama', 'Fantasy', 'Horror'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Sport'], ['Drama', 'Romance'], ['Comedy', 'Romance'], ['Action', 'Crime', 'Thriller'], ['Action', 'Crime', 'Drama'], ['Action', 'Drama', 'Thriller'], ['Adventure', 'Family', 'Fantasy'], ['Action', 'Adventure'], ['Action', 'Adventure', 'Romance'], ['Adventure', 'Family', 'Fantasy'], ['Crime', 'Drama'], ['Comedy', 'Horror'], ['Comedy', 'Fantasy', 'Romance'], ['Drama'], ['Drama'], ['Comedy', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Adventure', 'Sci-Fi', 'Thriller'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama'], ['Biography', 'Drama', 'Romance'], ['Comedy', 'Fantasy'], ['Comedy', 'Drama', 'Fantasy'], ['Comedy'], ['Horror', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Comedy', 'Horror'], ['Comedy', 'Mystery'], ['Drama'], ['Adventure', 'Drama', 'Fantasy'], ['Drama', 'Sport'], ['Action', 'Adventure'], ['Action', 'Adventure', 'Drama'], ['Action', 'Drama', 'Sci-Fi'], ['Action', 'Mystery', 'Sci-Fi'], ['Action', 'Crime', 'Drama'], ['Action', 'Crime', 'Fantasy'], ['Biography', 'Comedy', 'Drama'], ['Action', 'Crime', 'Thriller'], ['Biography', 'Crime', 'Drama'], ['Drama', 'Sport'], ['Adventure', 'Comedy', 'Drama'], ['Action', 'Adventure', 'Thriller'], ['Comedy', 'Fantasy', 'Horror'], ['Drama', 'Sport'], ['Horror', 'Thriller'], ['Drama', 'History', 'Thriller'], ['Animation', 'Action', 'Adventure'], ['Action', 'Adventure', 'Drama'], ['Action', 'Comedy', 'Family'], ['Action', 'Adventure', 'Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Comedy'], ['Action', 'Crime', 'Drama'], ['Biography', 'Drama'], ['Comedy', 'Romance'], ['Comedy'], ['Drama', 'Fantasy', 'Romance'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy'], ['Comedy', 'Sci-Fi'], ['Comedy', 'Drama'], ['Animation', 'Action', 'Adventure'], ['Horror'], ['Action', 'Biography', 'Crime'], ['Animation', 'Adventure', 'Comedy'], ['Drama', 'Romance'], ['Drama', 'Mystery', 'Thriller'], ['Drama', 'History', 'Thriller'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Comedy'], ['Action', 'Thriller'], ['Comedy', 'Music'], ['Animation', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Adventure', 'Crime'], ['Comedy', 'Drama', 'Horror'], ['Drama'], ['Drama', 'Mystery', 'Romance'], ['Adventure', 'Family', 'Fantasy'], ['Drama'], ['Action', 'Drama', 'Thriller'], ['Drama'], ['Action', 'Horror', 'Romance'], ['Action', 'Drama', 'Fantasy'], ['Action', 'Crime', 'Drama'], ['Drama', 'Fantasy', 'Romance'], ['Action', 'Crime', 'Thriller'], ['Action', 'Mystery', 'Thriller'], ['Horror', 'Mystery', 'Thriller'], ['Action', 'Horror', 'Sci-Fi'], ['Comedy', 'Drama'], ['Comedy'], ['Action', 'Adventure', 'Horror'], ['Action', 'Adventure', 'Thriller'], ['Action', 'Crime', 'Drama'], ['Comedy', 'Crime', 'Drama'], ['Drama', 'Romance'], ['Drama', 'Thriller'], ['Action', 'Comedy', 'Crime'], ['Comedy'], ['Adventure', 'Family', 'Fantasy'], ['Drama', 'Romance'], ['Animation', 'Family', 'Fantasy'], ['Drama', 'Romance'], ['Thriller'], ['Adventure', 'Horror', 'Mystery'], ['Action', 'Sci-Fi'], ['Adventure', 'Comedy', 'Drama'], ['Animation', 'Action', 'Adventure'], ['Drama', 'Horror'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy', 'Drama'], ['Action', 'Horror', 'Mystery'], ['Action', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama'], ['Comedy', 'Drama', 'Romance'], ['Comedy', 'Crime'], ['Comedy', 'Romance'], ['Drama', 'Romance'], ['Crime', 'Drama', 'Thriller'], ['Horror', 'Mystery', 'Thriller'], ['Biography', 'Drama'], ['Drama', 'Mystery', 'Sci-Fi'], ['Adventure', 'Comedy', 'Family'], ['Action', 'Adventure', 'Crime'], ['Action', 'Crime', 'Mystery'], ['Mystery', 'Thriller'], ['Action', 'Sci-Fi', 'Thriller'], ['Action', 'Comedy', 'Crime'], ['Biography', 'Crime', 'Drama'], ['Biography', 'Drama', 'History'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Family', 'Fantasy'], ['Biography', 'Drama', 'History'], ['Biography', 'Comedy', 'Drama'], ['Drama', 'Thriller'], ['Horror', 'Thriller'], ['Drama'], ['Drama', 'War'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Romance', 'Sci-Fi'], ['Action', 'Crime', 'Drama'], ['Comedy', 'Drama'], ['Animation', 'Action', 'Adventure'], ['Adventure', 'Comedy', 'Drama'], ['Comedy', 'Drama', 'Family'], ['Drama', 'Romance', 'Thriller'], ['Comedy', 'Crime', 'Drama'], ['Animation', 'Comedy', 'Family'], ['Drama', 'Horror', 'Sci-Fi'], ['Action', 'Adventure', 'Drama'], ['Action', 'Horror', 'Sci-Fi'], ['Action', 'Crime', 'Sport'], ['Drama', 'Horror', 'Sci-Fi'], ['Drama', 'Horror', 'Sci-Fi'], ['Action', 'Adventure', 'Comedy'], ['Mystery', 'Sci-Fi', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Sci-Fi', 'Thriller'], ['Drama', 'Romance'], ['Crime', 'Drama', 'Thriller'], ['Comedy', 'Drama', 'Music'], ['Drama', 'Fantasy', 'Romance'], ['Crime', 'Drama', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Comedy', 'Romance'], ['Drama', 'Sci-Fi', 'Thriller'], ['Drama', 'War'], ['Action', 'Crime', 'Drama'], ['Sci-Fi', 'Thriller'], ['Adventure', 'Drama', 'Horror'], ['Comedy', 'Drama', 'Music'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Adventure', 'Drama'], ['Action', 'Crime', 'Drama'], ['Adventure', 'Fantasy'], ['Drama', 'Romance'], ['Biography', 'History', 'Thriller'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Drama', 'History'], ['Biography', 'Comedy', 'Drama'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Biography', 'Drama'], ['Action', 'Drama', 'Sci-Fi'], ['Adventure', 'Horror'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Mystery'], ['Comedy', 'Drama', 'Romance'], ['Horror', 'Thriller'], ['Action', 'Sci-Fi', 'Thriller'], ['Action', 'Sci-Fi', 'Thriller'], ['Biography', 'Drama'], ['Action', 'Crime', 'Drama'], ['Action', 'Crime', 'Mystery'], ['Action', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Thriller'], ['Crime', 'Drama'], ['Mystery', 'Thriller'], ['Mystery', 'Sci-Fi', 'Thriller'], ['Action', 'Mystery', 'Sci-Fi'], ['Drama', 'Romance'], ['Drama', 'Thriller'], ['Drama', 'Mystery', 'Sci-Fi'], ['Comedy', 'Drama'], ['Adventure', 'Family', 'Fantasy'], ['Biography', 'Drama', 'Sport'], ['Drama'], ['Comedy', 'Drama', 'Romance'], ['Biography', 'Drama', 'Romance'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama', 'Sci-Fi', 'Thriller'], ['Drama', 'Romance', 'Thriller'], ['Mystery', 'Thriller'], ['Mystery', 'Thriller'], ['Action', 'Drama', 'Fantasy'], ['Action', 'Adventure', 'Biography'], ['Adventure', 'Comedy', 'Sci-Fi'], ['Action', 'Adventure', 'Thriller'], ['Fantasy', 'Horror'], ['Horror', 'Mystery'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Adventure', 'Drama'], ['Adventure', 'Family', 'Fantasy'], ['Action', 'Adventure', 'Sci-Fi'], ['Comedy', 'Drama'], ['Comedy', 'Drama'], ['Crime', 'Drama', 'Thriller'], ['Comedy', 'Romance'], ['Animation', 'Comedy', 'Family'], ['Comedy', 'Drama'], ['Comedy', 'Drama'], ['Biography', 'Drama', 'Sport'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Drama', 'History'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Mystery'], ['Crime', 'Drama', 'Mystery'], ['Action'], ['Action', 'Adventure', 'Family'], ['Comedy', 'Romance'], ['Comedy', 'Drama', 'Romance'], ['Biography', 'Drama', 'Sport'], ['Action', 'Fantasy', 'Thriller'], ['Biography', 'Drama', 'Sport'], ['Action', 'Drama', 'Fantasy'], ['Adventure', 'Sci-Fi', 'Thriller'], ['Animation', 'Adventure', 'Comedy'], ['Drama', 'Mystery', 'Thriller'], ['Drama', 'Romance'], ['Crime', 'Drama', 'Mystery'], ['Comedy', 'Romance', 'Sport'], ['Comedy', 'Family'], ['Drama', 'Horror', 'Mystery'], ['Action', 'Drama', 'Sport'], ['Action', 'Adventure', 'Comedy'], ['Drama', 'Mystery', 'Sci-Fi'], ['Animation', 'Action', 'Comedy'], ['Action', 'Crime', 'Drama'], ['Action', 'Crime', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Animation', 'Action', 'Adventure'], ['Crime', 'Drama'], ['Drama'], ['Drama'], ['Comedy', 'Crime'], ['Drama'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Fantasy', 'Romance'], ['Comedy', 'Drama'], ['Drama', 'Fantasy', 'Thriller'], ['Biography', 'Crime', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Crime', 'Drama'], ['Sci-Fi'], ['Action', 'Biography', 'Drama'], ['Action', 'Comedy', 'Romance'], ['Adventure', 'Comedy', 'Drama'], ['Comedy', 'Crime', 'Drama'], ['Action', 'Fantasy', 'Horror'], ['Drama', 'Horror'], ['Horror'], ['Action', 'Thriller'], ['Action', 'Adventure', 'Mystery'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama', 'Romance'], ['Crime', 'Drama', 'Mystery'], ['Adventure', 'Comedy', 'Family'], ['Comedy', 'Drama', 'Romance'], ['Comedy'], ['Comedy', 'Drama', 'Horror'], ['Drama', 'Horror', 'Thriller'], ['Animation', 'Adventure', 'Family'], ['Comedy', 'Romance'], ['Mystery', 'Romance', 'Sci-Fi'], ['Crime', 'Drama'], ['Drama', 'Horror', 'Mystery'], ['Comedy'], ['Biography', 'Drama'], ['Comedy', 'Drama', 'Thriller'], ['Comedy', 'Western'], ['Drama', 'History', 'War'], ['Drama', 'Horror', 'Sci-Fi'], ['Drama'], ['Comedy', 'Drama'], ['Fantasy', 'Horror', 'Thriller'], ['Drama', 'Romance'], ['Action', 'Comedy', 'Fantasy'], ['Drama', 'Horror', 'Musical'], ['Crime', 'Drama', 'Mystery'], ['Horror', 'Mystery', 'Thriller'], ['Comedy', 'Music'], ['Drama'], ['Biography', 'Crime', 'Drama'], ['Drama'], ['Action', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Mystery'], ['Drama'], ['Action', 'Comedy', 'Crime'], ['Comedy', 'Drama', 'Romance'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Comedy', 'Crime'], ['Drama'], ['Drama', 'Romance'], ['Crime', 'Drama', 'Mystery'], ['Adventure', 'Comedy', 'Romance'], ['Comedy', 'Crime', 'Drama'], ['Adventure', 'Drama', 'Thriller'], ['Biography', 'Crime', 'Drama'], ['Crime', 'Drama', 'Thriller'], ['Drama', 'History', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Comedy'], ['Horror'], ['Action', 'Crime', 'Mystery'], ['Comedy', 'Romance'], ['Comedy'], ['Action', 'Drama', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Drama', 'Mystery', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Fantasy', 'Horror'], ['Drama', 'Romance'], ['Biography', 'Drama'], ['Biography', 'Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Animation', 'Adventure', 'Comedy'], ['Drama', 'Mystery', 'Thriller'], ['Action', 'Horror', 'Sci-Fi'], ['Drama', 'Romance'], ['Biography', 'Drama'], ['Action', 'Adventure', 'Drama'], ['Adventure', 'Drama', 'Fantasy'], ['Drama', 'Family'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Romance', 'Sci-Fi'], ['Action', 'Adventure', 'Thriller'], ['Comedy', 'Romance'], ['Crime', 'Drama', 'Horror'], ['Comedy', 'Fantasy'], ['Action', 'Comedy', 'Crime'], ['Adventure', 'Drama', 'Romance'], ['Action', 'Crime', 'Drama'], ['Crime', 'Horror', 'Thriller'], ['Romance', 'Sci-Fi', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Crime', 'Drama'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Adventure', 'Sci-Fi'], ['Animation', 'Fantasy'], ['Animation', 'Adventure', 'Comedy'], ['Drama', 'Mystery', 'War'], ['Comedy', 'Romance'], ['Animation', 'Comedy', 'Family'], ['Comedy'], ['Horror', 'Mystery', 'Thriller'], ['Action', 'Adventure', 'Drama'], ['Comedy'], ['Drama'], ['Adventure', 'Biography', 'Drama'], ['Comedy'], ['Horror', 'Thriller'], ['Action', 'Drama', 'Family'], ['Comedy', 'Fantasy', 'Horror'], ['Comedy', 'Romance'], ['Drama', 'Mystery', 'Romance'], ['Action', 'Adventure', 'Comedy'], ['Thriller'], ['Comedy'], ['Adventure', 'Comedy', 'Sci-Fi'], ['Comedy', 'Drama', 'Fantasy'], ['Mystery', 'Thriller'], ['Comedy', 'Drama'], ['Adventure', 'Drama', 'Family'], ['Horror', 'Thriller'], ['Action', 'Drama', 'Romance'], ['Drama', 'Romance'], ['Action', 'Adventure', 'Fantasy'], ['Comedy'], ['Action', 'Biography', 'Drama'], ['Drama', 'Mystery', 'Romance'], ['Adventure', 'Drama', 'Western'], ['Drama', 'Music', 'Romance'], ['Comedy', 'Romance', 'Western'], ['Thriller'], ['Comedy', 'Drama', 'Romance'], ['Horror', 'Thriller'], ['Adventure', 'Family', 'Fantasy'], ['Crime', 'Drama', 'Mystery'], ['Horror', 'Mystery'], ['Comedy', 'Crime', 'Drama'], ['Action', 'Comedy', 'Romance'], ['Biography', 'Drama', 'History'], ['Adventure', 'Drama'], ['Drama', 'Thriller'], ['Drama'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Biography', 'Drama'], ['Drama', 'Music'], ['Comedy', 'Drama'], ['Drama', 'Thriller', 'War'], ['Action', 'Mystery', 'Thriller'], ['Horror', 'Sci-Fi', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Sci-Fi'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Mystery', 'Romance'], ['Drama'], ['Action', 'Adventure', 'Thriller'], ['Action', 'Crime', 'Thriller'], ['Animation', 'Action', 'Adventure'], ['Drama', 'Fantasy', 'Mystery'], ['Drama', 'Sci-Fi'], ['Animation', 'Adventure', 'Comedy'], ['Horror', 'Thriller'], ['Action', 'Thriller'], ['Comedy'], ['Biography', 'Drama'], ['Action', 'Mystery', 'Thriller'], ['Action', 'Mystery', 'Sci-Fi'], ['Crime', 'Drama', 'Thriller'], ['Comedy', 'Romance'], ['Comedy', 'Drama', 'Romance'], ['Biography', 'Drama', 'Thriller'], ['Drama'], ['Action', 'Adventure', 'Family'], ['Animation', 'Comedy', 'Family'], ['Action', 'Crime', 'Drama'], ['Comedy'], ['Comedy', 'Crime', 'Thriller'], ['Comedy', 'Romance'], ['Animation', 'Comedy', 'Drama'], ['Action', 'Crime', 'Thriller'], ['Comedy', 'Romance'], ['Adventure', 'Biography', 'Drama'], ['Animation', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Mystery'], ['Action', 'Comedy', 'Sci-Fi'], ['Comedy', 'Fantasy', 'Horror'], ['Comedy', 'Crime'], ['Animation', 'Action', 'Adventure'], ['Action', 'Drama', 'Thriller'], ['Fantasy', 'Horror'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Adventure', 'Fantasy'], ['Comedy', 'Drama', 'Romance'], ['Biography', 'Drama', 'Romance'], ['Action', 'Drama', 'History'], ['Action', 'Adventure', 'Comedy'], ['Horror', 'Thriller'], ['Horror', 'Mystery', 'Thriller'], ['Comedy', 'Romance'], ['Animation', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Mystery'], ['Crime', 'Drama', 'Mystery'], ['Adventure', 'Biography', 'Drama'], ['Horror', 'Mystery', 'Thriller'], ['Horror', 'Thriller'], ['Drama', 'Romance', 'War'], ['Adventure', 'Fantasy', 'Mystery'], ['Action', 'Adventure', 'Sci-Fi'], ['Biography', 'Drama'], ['Drama', 'Thriller'], ['Horror', 'Thriller'], ['Drama', 'Horror', 'Thriller'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Horror', 'Thriller'], ['Comedy'], ['Drama', 'Sport'], ['Comedy', 'Family'], ['Drama', 'Romance'], ['Action', 'Adventure', 'Comedy'], ['Comedy'], ['Mystery', 'Romance', 'Thriller'], ['Crime', 'Drama'], ['Action', 'Comedy'], ['Crime', 'Drama', 'Mystery'], ['Biography', 'Drama', 'Romance'], ['Comedy', 'Crime'], ['Drama', 'Thriller'], ['Drama'], ['Animation', 'Adventure', 'Comedy'], ['Action', 'Thriller'], ['Drama', 'Thriller'], ['Animation', 'Adventure', 'Comedy'], ['Crime', 'Drama', 'Mystery'], ['Thriller'], ['Biography', 'Drama', 'Sport'], ['Crime', 'Drama', 'Thriller'], ['Drama', 'Music'], ['Crime', 'Drama', 'Thriller'], ['Drama', 'Romance'], ['Animation', 'Action', 'Adventure'], ['Comedy', 'Drama'], ['Action', 'Adventure', 'Drama'], ['Biography', 'Crime', 'Drama'], ['Horror'], ['Biography', 'Drama', 'Mystery'], ['Drama', 'Romance'], ['Animation', 'Drama', 'Romance'], ['Comedy', 'Family'], ['Drama'], ['Mystery', 'Thriller'], ['Drama', 'Fantasy', 'Horror'], ['Drama', 'Romance'], ['Biography', 'Drama', 'History'], ['Comedy', 'Family'], ['Action', 'Adventure', 'Thriller'], ['Comedy', 'Drama'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Thriller'], ['Drama', 'Romance'], ['Comedy', 'Drama', 'Romance'], ['Drama', 'Horror', 'Sci-Fi'], ['Comedy', 'Horror', 'Romance'], ['Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Action', 'Adventure', 'Fantasy'], ['Action', 'Adventure', 'Drama'], ['Biography', 'Comedy', 'Drama'], ['Drama', 'Mystery', 'Romance'], ['Animation', 'Adventure', 'Comedy'], ['Drama', 'Romance', 'Sci-Fi'], ['Drama'], ['Drama', 'Fantasy'], ['Drama', 'Romance'], ['Comedy', 'Horror', 'Thriller'], ['Comedy', 'Drama', 'Romance'], ['Crime', 'Drama'], ['Comedy', 'Romance'], ['Action', 'Drama', 'Family'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Thriller', 'War'], ['Action', 'Comedy', 'Horror'], ['Biography', 'Drama', 'Sport'], ['Adventure', 'Comedy', 'Drama'], ['Comedy', 'Romance'], ['Comedy', 'Romance'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Adventure', 'Crime'], ['Comedy', 'Romance'], ['Animation', 'Action', 'Adventure'], ['Action', 'Crime', 'Sci-Fi'], ['Drama'], ['Comedy', 'Drama', 'Romance'], ['Crime', 'Thriller'], ['Comedy', 'Horror', 'Sci-Fi'], ['Drama', 'Thriller'], ['Drama', 'Fantasy', 'Horror'], ['Thriller'], ['Adventure', 'Drama', 'Family'], ['Mystery', 'Sci-Fi', 'Thriller'], ['Biography', 'Crime', 'Drama'], ['Drama', 'Fantasy', 'Horror'], ['Action', 'Adventure', 'Thriller'], ['Crime', 'Drama', 'Horror'], ['Crime', 'Drama', 'Fantasy'], ['Adventure', 'Family', 'Fantasy'], ['Action', 'Adventure', 'Drama'], ['Action', 'Comedy', 'Horror'], ['Comedy', 'Drama', 'Family'], ['Action', 'Thriller'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure', 'Drama', 'Fantasy'], ['Drama'], ['Drama'], ['Comedy'], ['Drama'], ['Comedy', 'Drama', 'Music'], ['Drama', 'Fantasy', 'Music'], ['Drama'], ['Thriller'], ['Comedy', 'Horror'], ['Action', 'Comedy', 'Sport'], ['Horror'], ['Comedy', 'Drama'], ['Action', 'Drama', 'Thriller'], ['Drama', 'Romance'], ['Horror', 'Mystery'], ['Adventure', 'Drama', 'Fantasy'], ['Thriller'], ['Comedy', 'Romance'], ['Action', 'Sci-Fi', 'Thriller'], ['Fantasy', 'Mystery', 'Thriller'], ['Biography', 'Drama'], ['Crime', 'Drama'], ['Action', 'Adventure', 'Sci-Fi'], ['Adventure'], ['Comedy', 'Drama'], ['Comedy', 'Drama'], ['Comedy', 'Drama', 'Romance'], ['Adventure', 'Comedy', 'Drama'], ['Action', 'Sci-Fi', 'Thriller'], ['Comedy', 'Romance'], ['Action', 'Fantasy', 'Horror'], ['Crime', 'Drama', 'Thriller'], ['Action', 'Drama', 'Thriller'], ['Crime', 'Drama', 'Mystery'], ['Crime', 'Drama', 'Mystery'], ['Drama', 'Sci-Fi', 'Thriller'], ['Biography', 'Drama', 'History'], ['Crime', 'Horror', 'Thriller'], ['Drama'], ['Drama', 'Mystery', 'Thriller'], ['Adventure', 'Biography'], ['Adventure', 'Biography', 'Crime'], ['Action', 'Horror', 'Thriller'], ['Action', 'Adventure', 'Western'], ['Horror', 'Thriller'], ['Drama', 'Mystery', 'Thriller'], ['Comedy', 'Drama', 'Musical'], ['Horror', 'Mystery'], ['Biography', 'Drama', 'Sport'], ['Comedy', 'Family', 'Romance'], ['Drama', 'Mystery', 'Thriller'], ['Comedy'], ['Drama'], ['Drama', 'Thriller'], ['Biography', 'Drama', 'Family'], ['Comedy', 'Drama', 'Family'], ['Drama', 'Fantasy', 'Musical'], ['Comedy'], ['Adventure', 'Family'], ['Adventure', 'Comedy', 'Fantasy'], ['Horror', 'Thriller'], ['Drama', 'Romance'], ['Horror'], ['Biography', 'Drama', 'History'], ['Action', 'Adventure', 'Fantasy'], ['Drama', 'Family', 'Music'], ['Comedy', 'Drama', 'Romance'], ['Action', 'Adventure', 'Horror'], ['Comedy'], ['Crime', 'Drama', 'Mystery'], ['Horror'], ['Drama', 'Music', 'Romance'], ['Adventure', 'Comedy'], ['Comedy', 'Family', 'Fantasy']]
['Mystery', 'Drama', 'Western', 'Horror', 'Romance', 'Animation', 'Crime', 'Fantasy', 'Music', 'Sport', 'Thriller', 'Family', 'War', 'Biography', 'Comedy', 'Adventure', 'Sci-Fi', 'History', 'Musical', 'Action']
Mystery Drama Western Horror ... Sci-Fi History Musical Action
0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
.. ... ... ... ... ... ... ... ... ...
995 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
996 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
997 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
998 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
999 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
[1000 rows x 20 columns]
Mystery Drama Western Horror ... Sci-Fi History Musical Action
0 0.0 0.0 0.0 0.0 ... 1.0 0.0 0.0 1.0
1 1.0 0.0 0.0 0.0 ... 1.0 0.0 0.0 0.0
2 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0
[5 rows x 20 columns]
Mystery 106.0
Drama 513.0
Western 7.0
Horror 119.0
Romance 141.0
Animation 49.0
Crime 150.0
Fantasy 101.0
Music 16.0
Sport 18.0
Thriller 195.0
Family 51.0
War 13.0
Biography 81.0
Comedy 279.0
Adventure 259.0
Sci-Fi 120.0
History 29.0
Musical 5.0
Action 303.0
dtype: float64
5、pandas中数据合并
join方法和merge方法
修正:图片中外连接是 outer
#数据合并 join方法 默认情况下把行索引(index)相同的数据合并到一起
df1=pd.DataFrame(np.ones((2,4)),index=["A","B"],columns=list("abcd"))
df2=pd.DataFrame(np.zeros((3,3)),index=["A","B","C"],columns=list("xyz"))
print(df1)
print(df2)
dfhb1=df1.join(df2)#行数以df1为准
dfhb2=df2.join(df1)#行数以df2为准
print(dfhb1)
print("..........")
print(dfhb2)
#按列索引进行合并 merge方法 columns
df3=pd.DataFrame(np.zeros((3,3)),columns=list("fax"))
print(df3)
dfhb3=df1.merge(df3,on="a")#在a列索引进行合并 默认情况下 取交集 内连接
print(dfhb3)
df3=pd.DataFrame(np.arange(9).reshape((3,3)),columns=list("fax"))
print(df3)
dfhb4=df1.merge(df3,on="a")
print(dfhb4)
df1.loc["A","a"]=100
dfhb5=df1.merge(df3,on="a")
print(dfhb5)
dfhb6=df1.merge(df3,on="a",how="inner")#默认的 内连接 交集
print(dfhb6)
dfhb7=df1.merge(df3,on="a",how="outer")#外连接 取并集
print(dfhb7)
dfhb8=df1.merge(df3,on="a",how="left")#左连接 以df1为准
print(dfhb8)
dfhb9=df1.merge(df3,on="a",how="right")#右连接 以df3为准
print(dfhb9)
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
x y z
A 0.0 0.0 0.0
B 0.0 0.0 0.0
C 0.0 0.0 0.0
a b c d x y z
A 1.0 1.0 1.0 1.0 0.0 0.0 0.0
B 1.0 1.0 1.0 1.0 0.0 0.0 0.0
..........
x y z a b c d
A 0.0 0.0 0.0 1.0 1.0 1.0 1.0
B 0.0 0.0 0.0 1.0 1.0 1.0 1.0
C 0.0 0.0 0.0 NaN NaN NaN NaN
f a x
0 0.0 0.0 0.0
1 0.0 0.0 0.0
2 0.0 0.0 0.0
Empty DataFrame
Columns: [a, b, c, d, f, x]
Index: []
f a x
0 0 1 2
1 3 4 5
2 6 7 8
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
1 1.0 1.0 1.0 1.0 0 2
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
a b c d f x
0 100.0 1.0 1.0 1.0 NaN NaN
1 1.0 1.0 1.0 1.0 0.0 2.0
2 4.0 NaN NaN NaN 3.0 5.0
3 7.0 NaN NaN NaN 6.0 8.0
a b c d f x
0 100.0 1.0 1.0 1.0 NaN NaN
1 1.0 1.0 1.0 1.0 0.0 2.0
a b c d f x
0 1.0 1.0 1.0 1.0 0 2
1 4.0 NaN NaN NaN 3 5
2 7.0 NaN NaN NaN 6 8
如果t1和t2没有列名相同,则用t1.merge(t2,left_on=“a”,right_on=“b”,how=“right”)
这里,left_on左侧按什么合并,right_on右侧按什么合并。
6、pandas中数据分组聚合
全球星巴克店铺的统计数据。数据来源:https://www.kaggle.com/starbucks/store-locations/data
两个索引,也就是复合索引,见下一节。
import pandas as pd
import numpy as np
import pymysql
pd.set_option('display.max_columns', None)
file_path="./directory.csv"
df=pd.read_csv(file_path)
print(df.head(1))
print(df.info())
#分组
grouped=df.groupby(by="Country")
print(grouped)
print("-"*100)
#DataFrameGroupBy
# 可以进行遍历 (元组)
# for i,j in grouped:
# print(i)
# print("*"*10)
# print(j,type(j))
# print("-"*100)
#打印国家是US的数据
print(df[df["Country"]=="US"])
print("-"*100)
#可以调用聚合方法 统计m个数
country_count=grouped["Brand"].count()
print(country_count)
print("-"*100)
#分别统计美国和中国的星巴克的店铺数量
print(country_count["US"])
print(country_count["CN"])
print("-"*100)
#统计中国每个省份的星巴克的店铺数量
china_data=df[df["Country"]=="CN"]#选中中国的数据
groupbyProvince=china_data.groupby(by="State/Province").count()["Brand"]#取Brand列
print(groupbyProvince)
print("-"*100)
#数据按照多个条件进行分组 返回series 两个索引
grouped1=df["Brand"].groupby(by=[df["Country"],df["State/Province"]]).count()
print(grouped1)
print(type(grouped1))
print("-"*100)
#数据按照多个条件进行分组 返回DataFrame
grouped2=df[["Brand"]].groupby(by=[df["Country"],df["State/Province"]]).count()
grouped3=df.groupby(by=[df["Country"],df["State/Province"]])[["Brand"]].count()
grouped4=df.groupby(by=[df["Country"],df["State/Province"]]).count()[["Brand"]]
#以上三个效果相同
print(grouped2,type(grouped2))
print("-"*100)
print(grouped3,type(grouped3))
print("-"*100)
print(grouped4,type(grouped4))
Brand Store Number Store Name Ownership Type Street Address \
0 Starbucks 47370-257954 Meritxell, 96 Licensed Av. Meritxell, 96
City State/Province Country Postcode Phone Number \
0 Andorra la Vella 7 AD AD500 376818720
Timezone Longitude Latitude
0 GMT+1:00 Europe/Andorra 1.53 42.51
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25600 entries, 0 to 25599
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Brand 25600 non-null object
1 Store Number 25600 non-null object
2 Store Name 25600 non-null object
3 Ownership Type 25600 non-null object
4 Street Address 25598 non-null object
5 City 25585 non-null object
6 State/Province 25600 non-null object
7 Country 25600 non-null object
8 Postcode 24078 non-null object
9 Phone Number 18739 non-null object
10 Timezone 25600 non-null object
11 Longitude 25599 non-null float64
12 Latitude 25599 non-null float64
dtypes: float64(2), object(11)
memory usage: 2.5+ MB
None
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000022CED62E240>
----------------------------------------------------------------------------------------------------
Brand Store Number Store Name \
11964 Starbucks 3513-125945 Safeway-Anchorage #1809
11965 Starbucks 74352-84449 Safeway-Anchorage #2628
11966 Starbucks 12449-152385 Safeway - Anchorage #1813
11967 Starbucks 24936-233524 100th & C St - Anchorage
11968 Starbucks 8973-85630 Old Seward & Diamond
... ... ... ...
25567 Starbucks 74385-87621 Safeway-Laramie #2466
25568 Starbucks 73320-24375 Ridley's - Laramie #1131
25569 Starbucks 22425-219024 Laramie - Grand & 30th
25570 Starbucks 10849-103163 I-80 & Dewar Dr-Rock Springs
25571 Starbucks 10769-102454 Coffeen & Brundage Lane-Sheridan
Ownership Type Street Address \
11964 Licensed 5600 Debarr Rd Ste 9
11965 Licensed 1725 Abbott Rd
11966 Licensed 1501 Huffman Rd
11967 Company Owned 320 W. 100th Ave, 100, Southgate Shopping Ctr ...
11968 Company Owned 1005 E Dimond Blvd
... ... ...
25567 Licensed 554 N 3rd St
25568 Licensed 3112 E. Grand
25569 Company Owned 3021 Grand Ave
25570 Company Owned 118 Westland Way
25571 Company Owned 2208 Coffeen Ave
City State/Province Country Postcode Phone Number \
11964 Anchorage AK US 995042300 907-339-0900
11965 Anchorage AK US 995073444 907-339-2800
11966 Anchorage AK US 995153596 907-339-1300
11967 Anchorage AK US 99515 (907) 227-9631
11968 Anchorage AK US 995152050 907-344-4160
... ... ... ... ... ...
25567 Laramie WY US 820723012 307-721-5107
25568 Laramie WY US 820705141 307-742-8146
25569 Laramie WY US 82070 307-742-3262
25570 Rock Springs WY US 829015751 307-362-7145
25571 Sheridian WY US 828016213 307-672-5129
Timezone Longitude Latitude
11964 GMT-09:00 America/Anchorage -149.78 61.21
11965 GMT-09:00 America/Anchorage -149.84 61.14
11966 GMT-09:00 America/Anchorage -149.85 61.11
11967 GMT-09:00 America/Anchorage -149.89 61.13
11968 GMT-09:00 America/Anchorage -149.86 61.14
... ... ... ...
25567 GMT-07:00 America/Denver -105.59 41.32
25568 GMT-07:00 America/Denver -105.56 41.31
25569 GMT-07:00 America/Denver -105.56 41.31
25570 GMT-07:00 America/Denver -109.25 41.58
25571 GMT-07:00 America/Denver -106.94 44.77
[13608 rows x 13 columns]
----------------------------------------------------------------------------------------------------
Country
AD 1
AE 144
AR 108
AT 18
AU 22
...
TT 3
TW 394
US 13608
VN 25
ZA 3
Name: Brand, Length: 73, dtype: int64
----------------------------------------------------------------------------------------------------
13608
2734
----------------------------------------------------------------------------------------------------
State/Province
11 236
12 58
13 24
14 8
15 8
21 57
22 13
23 16
31 551
32 354
33 315
34 26
35 75
36 13
37 75
41 21
42 76
43 35
44 333
45 21
46 16
50 41
51 104
52 9
53 24
61 42
62 3
63 3
64 2
91 162
92 13
Name: Brand, dtype: int64
----------------------------------------------------------------------------------------------------
Country State/Province
AD 7 1
AE AJ 2
AZ 48
DU 82
FU 2
..
US WV 25
WY 23
VN HN 6
SG 19
ZA GT 3
Name: Brand, Length: 545, dtype: int64
<class 'pandas.core.series.Series'>
----------------------------------------------------------------------------------------------------
Brand
Country State/Province
AD 7 1
AE AJ 2
AZ 48
DU 82
FU 2
... ...
US WV 25
WY 23
VN HN 6
SG 19
ZA GT 3
[545 rows x 1 columns] <class 'pandas.core.frame.DataFrame'>
----------------------------------------------------------------------------------------------------
Brand
Country State/Province
AD 7 1
AE AJ 2
AZ 48
DU 82
FU 2
... ...
US WV 25
WY 23
VN HN 6
SG 19
ZA GT 3
[545 rows x 1 columns] <class 'pandas.core.frame.DataFrame'>
----------------------------------------------------------------------------------------------------
Brand
Country State/Province
AD 7 1
AE AJ 2
AZ 48
DU 82
FU 2
... ...
US WV 25
WY 23
VN HN 6
SG 19
ZA GT 3
[545 rows x 1 columns] <class 'pandas.core.frame.DataFrame'>
7、pandas中索引和复合索引
① d.swaplever()交换内外层索引,可以从内层开始取索引;
修正:从里层索引来选时——交换里外层索引的位置
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
file_path="./directory.csv"
df=pd.read_csv(file_path)
# print(df.head(1))
# print(df.info())
#索引的方法和属性
grouped2=df[["Brand"]].groupby(by=[df["Country"],df["State/Province"]]).count()
print(grouped2.index)#复合索引
df1=pd.DataFrame(np.ones((2,4)),index=["A","B"],columns=list("abcd"))
df2=pd.DataFrame(np.zeros((3,3)),index=["A","B","C"],columns=list("xyz"))
print(df1)
print(df2)
# df1.index=["a","b"]#设置索引的值
# print(df1)
# reindex 对df1取行 重新赋值
print(df1.reindex(["a","f"]))
print(df1)
print(df1.set_index("a").index)
df3=df1.set_index("a",drop=True)#默认 指定列名为a的这一列作为索引 drop=True a这列会删掉
print(df3)
df3=df1.set_index("a",drop=False)# 指定列名为a的这一列作为索引 drop=False a这列不会删掉
print(df3)
print(df1["d"].unique())#取该列值 不重复
df4=df1.set_index("b",drop=True)#索引可以重复
print("-"*100)
print(df4)
print(df1.set_index("b").index.unique())#找不重复的索引
print(len(df1.set_index("b").index))#求索引长度个数
print(list(df1.set_index("b").index))#把索引强制类型转化为列表 索引是个可迭代对象
print("-"*100)
df11=pd.DataFrame(np.ones((2,4)),index=["A","B"],columns=list("abcd"))
print(df11)
df11.loc["A","a"]=100
print(df11)
df5=df11.set_index(["a","b"])#设置两个索引 但df1本身不变
print(df5)
print(df11.set_index(["a","b"]).index)
df6=df11.set_index(["a","b","c"],drop=False)#设置三个索引 不删除 但df1本身不变
print(df6)
MultiIndex([('AD', '7'),
('AE', 'AJ'),
('AE', 'AZ'),
('AE', 'DU'),
('AE', 'FU'),
('AE', 'RK'),
('AE', 'SH'),
('AE', 'UQ'),
('AR', 'B'),
('AR', 'C'),
...
('US', 'UT'),
('US', 'VA'),
('US', 'VT'),
('US', 'WA'),
('US', 'WI'),
('US', 'WV'),
('US', 'WY'),
('VN', 'HN'),
('VN', 'SG'),
('ZA', 'GT')],
names=['Country', 'State/Province'], length=545)
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
x y z
A 0.0 0.0 0.0
B 0.0 0.0 0.0
C 0.0 0.0 0.0
a b c d
a NaN NaN NaN NaN
f NaN NaN NaN NaN
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
Float64Index([1.0, 1.0], dtype='float64', name='a')
b c d
a
1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0
a b c d
a
1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0
[1.]
----------------------------------------------------------------------------------------------------
a c d
b
1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0
Float64Index([1.0], dtype='float64', name='b')
2
[1.0, 1.0]
----------------------------------------------------------------------------------------------------
a b c d
A 1.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
a b c d
A 100.0 1.0 1.0 1.0
B 1.0 1.0 1.0 1.0
c d
a b
100.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0
MultiIndex([(100.0, 1.0),
( 1.0, 1.0)],
names=['a', 'b'])
a b c d
a b c
100.0 1.0 1.0 100.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0
8、pandas中的时间序列
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
#生成时间序列 start表示开始时间 end表示结束时间 freq表示频率 D表示 天
pd_time=pd.date_range(start="20171230",end="20180131",freq="D")
print(pd_time)
pd_time=pd.date_range(start="20171230",end="20180131",freq="10D")#每隔十天
print(pd_time)
#periods 表示个数
pd_time=pd.date_range(start="20171230",periods=10,freq="10D")
print(pd_time)
# M 表示月份
pd_time=pd.date_range(start="20171230",periods=10,freq="M")
print(pd_time)
# H小时
pd_time=pd.date_range(start="20171230",periods=10,freq="H")
print(pd_time)
pd_time=pd.date_range(start="2017/12/30",periods=10,freq="H")
print(pd_time)
pd_time=pd.date_range(start="2017-12-30 10:30:45",periods=10,freq="H")
print(pd_time)
DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01', '2018-01-02',
'2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06',
'2018-01-07', '2018-01-08', '2018-01-09', '2018-01-10',
'2018-01-11', '2018-01-12', '2018-01-13', '2018-01-14',
'2018-01-15', '2018-01-16', '2018-01-17', '2018-01-18',
'2018-01-19', '2018-01-20', '2018-01-21', '2018-01-22',
'2018-01-23', '2018-01-24', '2018-01-25', '2018-01-26',
'2018-01-27', '2018-01-28', '2018-01-29', '2018-01-30',
'2018-01-31'],
dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-12-30', '2018-01-09', '2018-01-19', '2018-01-29'], dtype='datetime64[ns]', freq='10D')
DatetimeIndex(['2017-12-30', '2018-01-09', '2018-01-19', '2018-01-29',
'2018-02-08', '2018-02-18', '2018-02-28', '2018-03-10',
'2018-03-20', '2018-03-30'],
dtype='datetime64[ns]', freq='10D')
DatetimeIndex(['2017-12-31', '2018-01-31', '2018-02-28', '2018-03-31',
'2018-04-30', '2018-05-31', '2018-06-30', '2018-07-31',
'2018-08-31', '2018-09-30'],
dtype='datetime64[ns]', freq='M')
DatetimeIndex(['2017-12-30 00:00:00', '2017-12-30 01:00:00',
'2017-12-30 02:00:00', '2017-12-30 03:00:00',
'2017-12-30 04:00:00', '2017-12-30 05:00:00',
'2017-12-30 06:00:00', '2017-12-30 07:00:00',
'2017-12-30 08:00:00', '2017-12-30 09:00:00'],
dtype='datetime64[ns]', freq='H')
DatetimeIndex(['2017-12-30 00:00:00', '2017-12-30 01:00:00',
'2017-12-30 02:00:00', '2017-12-30 03:00:00',
'2017-12-30 04:00:00', '2017-12-30 05:00:00',
'2017-12-30 06:00:00', '2017-12-30 07:00:00',
'2017-12-30 08:00:00', '2017-12-30 09:00:00'],
dtype='datetime64[ns]', freq='H')
DatetimeIndex(['2017-12-30 10:30:45', '2017-12-30 11:30:45',
'2017-12-30 12:30:45', '2017-12-30 13:30:45',
'2017-12-30 14:30:45', '2017-12-30 15:30:45',
'2017-12-30 16:30:45', '2017-12-30 17:30:45',
'2017-12-30 18:30:45', '2017-12-30 19:30:45'],
dtype='datetime64[ns]', freq='H')
9、pandas重采样
重采样:是指将时间序列从一个频率转化为另一个频率进行处理的过程,将高频率数据转化为低频率数据为降采样,低频率数据转化为高频率数据为升采样。pandas里提供了一个resample方法可以实现频率转化。
#重采样
t=pd.DataFrame(np.random.uniform(10,50,(100,1)),index=pd.date_range(start="20170101",periods=100))
print(t)
print(t.resample("M").mean())
print(t.resample("10D").count())
0
2017-01-01 31.238575
2017-01-02 22.248967
2017-01-03 17.524832
2017-01-04 44.035044
2017-01-05 29.195196
... ...
2017-04-06 30.878312
2017-04-07 20.670926
2017-04-08 43.626255
2017-04-09 49.956429
2017-04-10 48.605538
[100 rows x 1 columns]
0
2017-01-31 28.165566
2017-02-28 27.370483
2017-03-31 28.530910
2017-04-30 35.559494
0
2017-01-01 10
2017-01-11 10
2017-01-21 10
2017-01-31 10
2017-02-10 10
2017-02-20 10
2017-03-02 10
2017-03-12 10
2017-03-22 10
2017-04-01 10
10、一些练手小例子
(1)例1:统计中国各省份星巴克店铺排名前25并绘制图形。
数据来源:https://www.kaggle.com/starbucks/store-locations/data
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
#使用matplotlib绘制中国各省市星巴克店铺排名
file_path="./directory.csv"
df=pd.read_csv(file_path)
df=df[df["Country"]=="CN"]
#取前25名
data=df.groupby(by="City").count()["Brand"].sort_values(ascending=False)[:25]
_x=data.index
_y=data.values
#画图
font = {'family' : 'MicroSoft YaHei',
'weight' : 'bold',
}
matplotlib.rc("font",**font)
plt.figure(figsize=(20,8),dpi=80)
plt.barh(range(len(_x)),_y,height=0.3,color="orange")
plt.yticks(range(len(_x)),_x)
plt.show()
(2)2015年至2017年911的紧急电话数据,统计不同类型的紧急情况的次数,统计不同月份不同类型的紧急电话次数的变化情况。
数据来源:https://www.kaggle.com/mchirico/montcoalert/data
①统计不同类型的紧急情况的次数
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
pd.set_option('display.max_columns', None)
df=pd.read_csv("./911.csv")
# print(df.head(10))
# print(df.info())
#获取分类情况
print(df["title"].str.split(":"))#series类型
time_list=df["title"].str.split(":").tolist()#转化为列表 series tolist() DataFrame里to_list()
# print(time_list)
cate_list=list(set([i[0] for i in time_list]))#去重 取分类名称
print(cate_list)#一共就三个分类
#构造全为0的数组
zero_df=pd.DataFrame(np.zeros((df.shape[0],len(cate_list))),columns=cate_list)
#赋值
#方法1
for cate in cate_list:
#df["title"].str.contains(cate)#遍历分类的名称列表 然后找df的title列包含这个分类名称
zero_df[cate][df["title"].str.contains(cate)]=1#给当前cate这个一列赋值 选中包含的true就赋值为1(这里是布尔索引)
print(zero_df)
#方法2 这个有些慢
# for i in range(df.shape[0]):
# zero_df.loc[i,time_list[i][0]]=1
# print(zero_df)
#统计不同类型紧急情况的次数
sum_ret=zero_df.sum(axis=0)
print(sum_ret)
0 [EMS, BACK PAINS/INJURY]
1 [EMS, DIABETIC EMERGENCY]
2 [Fire, GAS-ODOR/LEAK]
3 [EMS, CARDIAC EMERGENCY]
4 [EMS, DIZZINESS]
...
663517 [Traffic, VEHICLE ACCIDENT -]
663518 [EMS, GENERAL WEAKNESS]
663519 [EMS, VEHICLE ACCIDENT]
663520 [Fire, BUILDING FIRE]
663521 [Traffic, VEHICLE ACCIDENT -]
Name: title, Length: 663522, dtype: object
['Fire', 'Traffic', 'EMS']
Fire Traffic EMS
0 0.0 0.0 1.0
1 0.0 0.0 1.0
2 1.0 0.0 0.0
3 0.0 0.0 1.0
4 0.0 0.0 1.0
... ... ... ...
663517 0.0 1.0 0.0
663518 0.0 0.0 1.0
663519 0.0 0.0 1.0
663520 1.0 0.0 0.0
663521 0.0 1.0 0.0
[663522 rows x 3 columns]
Fire 100622.0
Traffic 230208.0
EMS 332700.0
dtype: float64
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
pd.set_option('display.max_columns', None)
df=pd.read_csv("./911.csv")
# print(df.head(10))
# print(df.info())
#获取分类情况
print(df["title"].str.split(":"))#series类型
time_list=df["title"].str.split(":").tolist()#转化为列表 series tolist() DataFrame里to_list()
# print(time_list)
cate_list=[i[0] for i in time_list]
cate_df=pd.DataFrame(np.array(cate_list).reshape((df.shape[0],1)))
df["cate"]=cate_df
print(cate_df)
print(df)
print(df.groupby(by="cate").count())
print(df.groupby(by="cate").count()["title"])
0 [EMS, BACK PAINS/INJURY]
1 [EMS, DIABETIC EMERGENCY]
2 [Fire, GAS-ODOR/LEAK]
3 [EMS, CARDIAC EMERGENCY]
4 [EMS, DIZZINESS]
...
663517 [Traffic, VEHICLE ACCIDENT -]
663518 [EMS, GENERAL WEAKNESS]
663519 [EMS, VEHICLE ACCIDENT]
663520 [Fire, BUILDING FIRE]
663521 [Traffic, VEHICLE ACCIDENT -]
Name: title, Length: 663522, dtype: object
0
0 EMS
1 EMS
2 Fire
3 EMS
4 EMS
... ...
663517 Traffic
663518 EMS
663519 EMS
663520 Fire
663521 Traffic
[663522 rows x 1 columns]
lat lng \
0 40.297876 -75.581294
1 40.258061 -75.264680
2 40.121182 -75.351975
3 40.116153 -75.343513
4 40.251492 -75.603350
... ... ...
663517 40.157956 -75.348060
663518 40.136306 -75.428697
663519 40.013779 -75.300835
663520 40.121603 -75.351437
663521 40.015046 -75.299674
desc zip \
0 REINDEER CT & DEAD END; NEW HANOVER; Station ... 19525.0
1 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... 19446.0
2 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... 19401.0
3 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... 19401.0
4 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... NaN
... ... ...
663517 SUNSET AVE & WOODLAND AVE; EAST NORRITON; 2020... 19403.0
663518 EAGLEVILLE RD & BUNTING CIR; LOWER PROVIDENCE... 19403.0
663519 HAVERFORD STATION RD; LOWER MERION; Station 3... 19041.0
663520 MARSHALL ST & HAWS AVE; NORRISTOWN; 2020-07-29... 19401.0
663521 HAVERFORD STATION RD & W MONTGOMERY AVE; LOWER... 19041.0
title timeStamp twp \
0 EMS: BACK PAINS/INJURY 2015-12-10 17:10:52 NEW HANOVER
1 EMS: DIABETIC EMERGENCY 2015-12-10 17:29:21 HATFIELD TOWNSHIP
2 Fire: GAS-ODOR/LEAK 2015-12-10 14:39:21 NORRISTOWN
3 EMS: CARDIAC EMERGENCY 2015-12-10 16:47:36 NORRISTOWN
4 EMS: DIZZINESS 2015-12-10 16:56:52 LOWER POTTSGROVE
... ... ... ...
663517 Traffic: VEHICLE ACCIDENT - 2020-07-29 15:46:51 EAST NORRITON
663518 EMS: GENERAL WEAKNESS 2020-07-29 15:52:19 LOWER PROVIDENCE
663519 EMS: VEHICLE ACCIDENT 2020-07-29 15:52:52 LOWER MERION
663520 Fire: BUILDING FIRE 2020-07-29 15:54:08 NORRISTOWN
663521 Traffic: VEHICLE ACCIDENT - 2020-07-29 15:52:46 LOWER MERION
addr e cate
0 REINDEER CT & DEAD END 1 EMS
1 BRIAR PATH & WHITEMARSH LN 1 EMS
2 HAWS AVE 1 Fire
3 AIRY ST & SWEDE ST 1 EMS
4 CHERRYWOOD CT & DEAD END 1 EMS
... ... .. ...
663517 SUNSET AVE & WOODLAND AVE 1 Traffic
663518 EAGLEVILLE RD & BUNTING CIR 1 EMS
663519 HAVERFORD STATION RD 1 EMS
663520 MARSHALL ST & HAWS AVE 1 Fire
663521 HAVERFORD STATION RD & W MONTGOMERY AVE 1 Traffic
[663522 rows x 10 columns]
lat lng desc zip title timeStamp twp addr \
cate
EMS 332692 332692 332692 304855 332692 332692 332480 332692
Fire 100622 100622 100622 88867 100622 100622 100545 100622
Traffic 230208 230208 230208 189601 230208 230208 230204 230208
e
cate
EMS 332692
Fire 100622
Traffic 230208
cate
EMS 332692
Fire 100622
Traffic 230208
Name: title, dtype: int64
②统计911数据中不同月份电话次数的变化情况
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
#统计911数据中不同月份电话次数的变化情况
#统计911数据中不同月份不同类型的电话的次数的变化情况
pd.set_option('display.max_columns', None)
df=pd.read_csv("./911.csv")
# print(df.head(10))
# print(df.info())
df["timeStamp"]=pd.to_datetime(df["timeStamp"])#将时间戳这一列的数据转换为时间序列DatetimeIndex类型
df.set_index("timeStamp",inplace=True)
print(df.head(10))
#统计911数据中不同月份电话次数的变化情况
print("统计911数据中不同月份电话次数的变化情况")
countByMonth=df.resample("M").count()["title"]
print(countByMonth.head())
#绘图
_x=countByMonth.index
_y=countByMonth.values
_x=[i.strftime("%Y-%m-%d") for i in _x]#格式化日期
plt.figure(figsize=(20,8),dpi=80)
plt.plot(range(len(_x)),_y)
plt.xticks(range(len(_x)),_x,rotation=45)
plt.show()
lat lng \
timeStamp
2015-12-10 17:10:52 40.297876 -75.581294
2015-12-10 17:29:21 40.258061 -75.264680
2015-12-10 14:39:21 40.121182 -75.351975
2015-12-10 16:47:36 40.116153 -75.343513
2015-12-10 16:56:52 40.251492 -75.603350
2015-12-10 15:39:04 40.253473 -75.283245
2015-12-10 16:46:48 40.182111 -75.127795
2015-12-10 16:17:05 40.217286 -75.405182
2015-12-10 16:51:42 40.289027 -75.399590
2015-12-10 17:35:41 40.102398 -75.291458
desc \
timeStamp
2015-12-10 17:10:52 REINDEER CT & DEAD END; NEW HANOVER; Station ...
2015-12-10 17:29:21 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...
2015-12-10 14:39:21 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...
2015-12-10 16:47:36 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...
2015-12-10 16:56:52 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...
2015-12-10 15:39:04 CANNON AVE & W 9TH ST; LANSDALE; Station 345;...
2015-12-10 16:46:48 LAUREL AVE & OAKDALE AVE; HORSHAM; Station 35...
2015-12-10 16:17:05 COLLEGEVILLE RD & LYWISKI RD; SKIPPACK; Stati...
2015-12-10 16:51:42 MAIN ST & OLD SUMNEYTOWN PIKE; LOWER SALFORD;...
2015-12-10 17:35:41 BLUEROUTE & RAMP I476 NB TO CHEMICAL RD; PLYM...
zip title twp \
timeStamp
2015-12-10 17:10:52 19525.0 EMS: BACK PAINS/INJURY NEW HANOVER
2015-12-10 17:29:21 19446.0 EMS: DIABETIC EMERGENCY HATFIELD TOWNSHIP
2015-12-10 14:39:21 19401.0 Fire: GAS-ODOR/LEAK NORRISTOWN
2015-12-10 16:47:36 19401.0 EMS: CARDIAC EMERGENCY NORRISTOWN
2015-12-10 16:56:52 NaN EMS: DIZZINESS LOWER POTTSGROVE
2015-12-10 15:39:04 19446.0 EMS: HEAD INJURY LANSDALE
2015-12-10 16:46:48 19044.0 EMS: NAUSEA/VOMITING HORSHAM
2015-12-10 16:17:05 19426.0 EMS: RESPIRATORY EMERGENCY SKIPPACK
2015-12-10 16:51:42 19438.0 EMS: SYNCOPAL EPISODE LOWER SALFORD
2015-12-10 17:35:41 19462.0 Traffic: VEHICLE ACCIDENT - PLYMOUTH
addr e
timeStamp
2015-12-10 17:10:52 REINDEER CT & DEAD END 1
2015-12-10 17:29:21 BRIAR PATH & WHITEMARSH LN 1
2015-12-10 14:39:21 HAWS AVE 1
2015-12-10 16:47:36 AIRY ST & SWEDE ST 1
2015-12-10 16:56:52 CHERRYWOOD CT & DEAD END 1
2015-12-10 15:39:04 CANNON AVE & W 9TH ST 1
2015-12-10 16:46:48 LAUREL AVE & OAKDALE AVE 1
2015-12-10 16:17:05 COLLEGEVILLE RD & LYWISKI RD 1
2015-12-10 16:51:42 MAIN ST & OLD SUMNEYTOWN PIKE 1
2015-12-10 17:35:41 BLUEROUTE & RAMP I476 NB TO CHEMICAL RD 1
统计911数据中不同月份电话次数的变化情况
timeStamp
2015-12-31 7916
2016-01-31 13096
2016-02-29 11396
2016-03-31 11059
2016-04-30 11287
Freq: M, Name: title, dtype: int64
③统计911数据中不同月份不同类型的电话的次数的变化情况
#定义一个绘图的方法
def plot_img(df,label):
countByMonth=df.resample("M").count()["title"]
# print(countByMonth.head())
#绘图
_x=countByMonth.index
_y=countByMonth.values
_x=[i.strftime("%Y-%m-%d") for i in _x]#格式化日期
plt.plot(range(len(_x)),_y,label=label)
plt.xticks(range(len(_x)),_x,rotation=45)
#统计911数据中不同月份不同类型的电话的次数的变化情况
print("统计911数据中不同月份不同类型的电话的次数的变化情况")
pd.set_option('display.max_columns', None)
df=pd.read_csv("./911.csv")
#把时间字符串转化为时间类型设置为索引
df["timeStamp"]=pd.to_datetime(df["timeStamp"])
#新增一列表示事件分类
time_list=df["title"].str.split(":").tolist()
cate_list=[i[0] for i in time_list]
#这里要注意索引得一致 否则赋值不过去
cate_df=pd.DataFrame(np.array(cate_list).reshape((df.shape[0],1)))#这块的索引是从0到6万多的数字
df["cate"]=cate_df
df.set_index("timeStamp",inplace=True)#这里重新设置索引为 时间了
plt.figure(figsize=(20, 8), dpi=80)
#分组 可迭代对象 元组
print(df.groupby(by="cate"))
for group_name,group_data in df.groupby(by="cate"):#对不同的分类都进行绘图
plot_img(group_data,group_name)
plt.legend(loc="best")
plt.show()
统计911数据中不同月份不同类型的电话的次数的变化情况
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000265C8559358>
(3)北京、广州、上海、沈阳、成都5个城市空气质量数据,绘制这5个城市的PM2.5随时间的变化情况。数据来源:https://www.kaggle.com/uciml/pm25-data-for-five-chinese-cities
DatetimeIndex可以理解为时间戳,而PeriodIndex可以理解为时间段。以北京为例:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
pd.set_option('display.max_columns', None)
file_path="./BeijingPM20100101_20151231.csv"
file_path1="./ChengduPM20100101_20151231.csv"
file_path2="./GuangzhouPM20100101_20151231.csv"
file_path3="./ShanghaiPM20100101_20151231.csv"
file_path4="./ShenyangPM20100101_20151231.csv"
df=pd.read_csv(file_path)
print(df.head())
print(df.info())
#把分开的时间字符串通过PeriodIndex方法转化为pandas的时间类型
period=pd.PeriodIndex(year=df["year"],month=df["month"],day=df["day"],hour=df["hour"],freq="H")
print(period)
print(type(period))
#把新的时间序列数据添加到原始数据里
df["datetime"]=period
print(df.head(10))
#把datatime设置为索引
df.set_index("datetime",inplace=True)
#进行降采样
df=df.resample("7D").mean()
#处理缺失数据 删除
print(df["PM_US Post"])
data=df["PM_US Post"].dropna()
data_china=df["PM_Dongsi"]
#绘制
_x=data.index
_x=[i.strftime("%Y%m%d") for i in _x]#格式化日期
_x_china=[i.strftime("%Y%m%d") for i in data_china.index]
_y=data.values
_y_china=data_china.values
plt.figure(figsize=(20,8),dpi=80)
plt.plot(range(len(_x)),_y,label="PM_US Post")
plt.plot(range(len(_x_china)),_y_china,label="PM_CN Post")
plt.xticks(range(0,len(_x),10),list(_x)[::10],rotation=45)#列表取步长
plt.legend(loc="best")
plt.show()
No year month day hour season PM_Dongsi PM_Dongsihuan \
0 1 2010 1 1 0 4 NaN NaN
1 2 2010 1 1 1 4 NaN NaN
2 3 2010 1 1 2 4 NaN NaN
3 4 2010 1 1 3 4 NaN NaN
4 5 2010 1 1 4 4 NaN NaN
PM_Nongzhanguan PM_US Post DEWP HUMI PRES TEMP cbwd Iws \
0 NaN NaN -21.0 43.0 1021.0 -11.0 NW 1.79
1 NaN NaN -21.0 47.0 1020.0 -12.0 NW 4.92
2 NaN NaN -21.0 43.0 1019.0 -11.0 NW 6.71
3 NaN NaN -21.0 55.0 1019.0 -14.0 NW 9.84
4 NaN NaN -20.0 51.0 1018.0 -12.0 NW 12.97
precipitation Iprec
0 0.0 0.0
1 0.0 0.0
2 0.0 0.0
3 0.0 0.0
4 0.0 0.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52584 entries, 0 to 52583
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 No 52584 non-null int64
1 year 52584 non-null int64
2 month 52584 non-null int64
3 day 52584 non-null int64
4 hour 52584 non-null int64
5 season 52584 non-null int64
6 PM_Dongsi 25052 non-null float64
7 PM_Dongsihuan 20508 non-null float64
8 PM_Nongzhanguan 24931 non-null float64
9 PM_US Post 50387 non-null float64
10 DEWP 52579 non-null float64
11 HUMI 52245 non-null float64
12 PRES 52245 non-null float64
13 TEMP 52579 non-null float64
14 cbwd 52579 non-null object
15 Iws 52579 non-null float64
16 precipitation 52100 non-null float64
17 Iprec 52100 non-null float64
dtypes: float64(11), int64(6), object(1)
memory usage: 7.2+ MB
None
PeriodIndex(['2010-01-01 00:00', '2010-01-01 01:00', '2010-01-01 02:00',
'2010-01-01 03:00', '2010-01-01 04:00', '2010-01-01 05:00',
'2010-01-01 06:00', '2010-01-01 07:00', '2010-01-01 08:00',
'2010-01-01 09:00',
...
'2015-12-31 14:00', '2015-12-31 15:00', '2015-12-31 16:00',
'2015-12-31 17:00', '2015-12-31 18:00', '2015-12-31 19:00',
'2015-12-31 20:00', '2015-12-31 21:00', '2015-12-31 22:00',
'2015-12-31 23:00'],
dtype='period[H]', length=52584, freq='H')
<class 'pandas.core.indexes.period.PeriodIndex'>
No year month day hour season PM_Dongsi PM_Dongsihuan \
0 1 2010 1 1 0 4 NaN NaN
1 2 2010 1 1 1 4 NaN NaN
2 3 2010 1 1 2 4 NaN NaN
3 4 2010 1 1 3 4 NaN NaN
4 5 2010 1 1 4 4 NaN NaN
5 6 2010 1 1 5 4 NaN NaN
6 7 2010 1 1 6 4 NaN NaN
7 8 2010 1 1 7 4 NaN NaN
8 9 2010 1 1 8 4 NaN NaN
9 10 2010 1 1 9 4 NaN NaN
PM_Nongzhanguan PM_US Post DEWP HUMI PRES TEMP cbwd Iws \
0 NaN NaN -21.0 43.0 1021.0 -11.0 NW 1.79
1 NaN NaN -21.0 47.0 1020.0 -12.0 NW 4.92
2 NaN NaN -21.0 43.0 1019.0 -11.0 NW 6.71
3 NaN NaN -21.0 55.0 1019.0 -14.0 NW 9.84
4 NaN NaN -20.0 51.0 1018.0 -12.0 NW 12.97
5 NaN NaN -19.0 47.0 1017.0 -10.0 NW 16.10
6 NaN NaN -19.0 44.0 1017.0 -9.0 NW 19.23
7 NaN NaN -19.0 44.0 1017.0 -9.0 NW 21.02
8 NaN NaN -19.0 44.0 1017.0 -9.0 NW 24.15
9 NaN NaN -20.0 37.0 1017.0 -8.0 NW 27.28
precipitation Iprec datetime
0 0.0 0.0 2010-01-01 00:00
1 0.0 0.0 2010-01-01 01:00
2 0.0 0.0 2010-01-01 02:00
3 0.0 0.0 2010-01-01 03:00
4 0.0 0.0 2010-01-01 04:00
5 0.0 0.0 2010-01-01 05:00
6 0.0 0.0 2010-01-01 06:00
7 0.0 0.0 2010-01-01 07:00
8 0.0 0.0 2010-01-01 08:00
9 0.0 0.0 2010-01-01 09:00
datetime
2010-01-01 71.627586
2010-01-08 69.910714
2010-01-15 163.654762
2010-01-22 68.069307
2010-01-29 53.583333
...
2015-11-27 242.642857
2015-12-04 145.437500
2015-12-11 88.750000
2015-12-18 204.139241
2015-12-25 209.244048
Freq: 7D, Name: PM_US Post, Length: 313, dtype: float64