课时44
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
df=pd.read_csv(" ")
#获取分类
temp_list=df["title"].srt.split(": ").to_list()
cate_list=list(set([i[0]] for i in temp_list))
#多添加一列放入结果
cate_df=pd.DataFrame(np.array(cate_list)).reshape((df.shape[0],1),collumns=cate_list)
#构造全为0的数组
df["cate"]=pd.DataFrame(np.zeros(df.shape[0],len(cate_list)),columns=cate_list)
#赋值
for cate in cate_list:
zeros_df[cate][df["title"].str.contains(cate)]=1
#print(zeros_df)
#break
#print(zeros_df)
#for i in range(df.shape[0]):
#zeros_df.loc[i,temp_list[i][0]]=1
sum_ret=zeros_df.sum(axis=0)
print(sum_ret)
课时45 pandasa时间序列