'''2012年美国大选政治献金分析实战'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# 导入数据
data = pd.read_csv(r'E:\AI课程笔记\数据分析\usa_election.csv') # 读取数据 r表示不转义
print(data.info())
print(data.describe())
print(data.head())
#将空值填充为not provide
data.fillna('not provide',inplace=True)
print(data.info())
#将捐赠金额为负数的数据删除
indexs = data.loc[data['contb_receipt_amt']<=0].index
data.drop(indexs,axis=0,inplace=True)
#新建一列为各个候选人所在党派
parties = {'Obama, Barack':'Democrat','Romney, Mitt':'Republican'}
data['party'] = data['cand_nm'].map(parties)
print(data.head())
#查看party这一列中有哪些不同的元素
print(data['party'].unique())
#统计party列中各个元素的个数
print(data['party'].value_counts())
#统计各个党派获得的捐赠总金额
print(data.groupby('party')['contb_receipt_amt'].sum())
#查看具体每天各个党派获得的捐赠总金额
print(data.groupby(['party','contb_receipt_dt'])['contb_receipt_amt'].sum())
print(data.groupby(['contb_receipt_dt','party'])['contb_receipt_amt'].sum())
#将日期转换为pandas中的日期格式
data['contb_receipt_dt'] = pd.to_datetime(data['contb_receipt_dt'])
print(data.head())
#查看老兵(捐赠者职业)disabled veteran主要支持谁
data["contbr_occupation"]=="DISABLED VETERAN"
d = data.loc[data['contbr_occupation']=='DISABLED VETERAN']
print(d.head())
print(d.groupby('cand_nm')['contb_receipt_amt'].sum())