import pandas as pd
import json
###从csv导入数据,合并,添加国家标签###
df=pd.read_csv('CAvideos.csv')
df=df.assign(country='CA')
list_cou=['DE','FR','GB','US']
for name in list_cou:
temp=pd.read_csv(name+'videos.csv')
temp=temp.assign(country=name)
df=pd.concat([df,temp])
###日期格式处理###
df['trending_date'] = pd.to_datetime(df['trending_date'], format='%y.%d.%m')
df.trending_date = df.trending_date.dt.date
df['publish_time'] = pd.to_datetime(df['publish_time'], format='%Y-%m-%dT%H:%M:%S.%fZ')
df=df.assign(publish_date=df['publish_time'].dt.date)
df['publish_time'] = df['publish_time'].dt.time
category名称另外保存在json文件中,读取添加过程如下:
###导入category名称###
df=df.assign(cat_name='a')
for name in list_cou:
id_to_category = {}
file=name+'_category_id.json'with open(file, 'r') as f:
data=json.load(f)
for category in data['items']:
id_to_category[category['id']] = category['snippet']['title']
print(id_to_category)
###实际上每个国家的category id-name 字典是一样的
df['category_id'] = df['category_id'