pandas_总统选举数据分析分析

pandas数据分析
http://www.fec.gov/disclosurep/PDownload.do 总统数据下载

  • fec.cand_nm.unique() 统计所有人名列表

  • fec[‘party’] = fec.cand_nm.map(parties) 把parties字典中和cand_nm对应的放在新的一列party中,map操作

  • fec[‘party’].value_counts() 统计列party中的每个value的总数

  • fec.loc[:,[‘party’,‘cand_nm’]] 取指定两列

  • (fec.contb_receipt_amt > 0).value_counts() 判断返回布尔值,并对二值统计

  • fec = fec[fec.contb_receipt_amt > 0] 限定该数据集出资大于零的

  • fec_mrbo = fec[fec.cand_nm.isin([‘Obama, Barack’,‘Romney, Mitt’])]
    把指定列中的满足条件的数据重新生成新的dataframe

  • fec.contbr_occupation.value_counts() 出资人统计,默认重大到小排序

  • f = lambda x: occ_mapping.get(x,x)
    存在字典中的返回字典的value,不存在返回原值
    fec.contbr_occupation = fec.contbr_occupation.map(f) series循环操作value
    map方法只能作用在series中

  • over_2mm = by_occupation[by_occupation.sum(1) > 2000000]#过滤出资金超两百万的

  • over_2mm.plot(kind=‘barh’)#横向条形

  • grouped 分组

def	get_top_amounts(group,key,n=5):
     totals	=	group.groupby(key)['contb_receipt_amt'].sum()
    return	totals.nlargest(n)#  nlargest最大前n个
grouped	=	fec_mrbo.groupby('cand_nm')  #根据职业和雇主进行聚合
grouped.apply(get_top_amounts,'contbr_occupation',n=7)
  • 数据分组
    bins=np.array([0,1,10,100,1000,10000,100000,1000000,10000000])
    labels=pd.cut(fec_mrbo.contb_receipt_amt,bins)

  • cand_nm为行名,labels为列名显示
    grouped =fec_mrbo.groupby([‘cand_nm’,labels])
    grouped.size().unstack(0) 花括号结构转表格结构

  • bucket_sums = grouped.contb_receipt_amt.sum().unstack(0)
    normed_sums = bucket_sums.div(bucket_sums.sum(axis=1), axis=0)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
fec = pd.read_csv(r'C:\Users\Administrator\Desktop\pandas_01\fec\P00000001-ALL.csv')
unique_cands = fec.cand_nm.unique()#所有参选人
unique_cands
array(['Bachmann, Michelle', 'Romney, Mitt', 'Obama, Barack',
       "Roemer, Charles E. 'Buddy' III", 'Pawlenty, Timothy',
       'Johnson, Gary Earl', 'Paul, Ron', 'Santorum, Rick',
       'Cain, Herman', 'Gingrich, Newt', 'McCotter, Thaddeus G',
       'Huntsman, Jon', 'Perry, Rick'], dtype=object)
len(unique_cands)
13
parties = {'Bachmann, Michelle':'Republican',
'Cain, Herman':'Republican',
'Gingrich, Newt':'Republican',
'Huntsman, Jon':'Republican',
'Johnson, Gary Earl':'Republican',
'McCotter, Thaddeus G':'Republican',
'Obama, Barack':'Democrat',
'Paul, Ron':'Republican',
'Pawlenty, Timothy':'Republican',
'Perry, Rick':'Republican',
"Roemer, Charles E. 'Buddy' III":'Republican',
'Romney, Mitt':'Republican',
'Santorum, Rick':'Republican'}
parties
{'Bachmann, Michelle': 'Republican',
 'Cain, Herman': 'Republican',
 'Gingrich, Newt': 'Republican',
 'Huntsman, Jon': 'Republican',
 'Johnson, Gary Earl': 'Republican',
 'McCotter, Thaddeus G': 'Republican',
 'Obama, Barack': 'Democrat',
 'Paul, Ron': 'Republican',
 'Pawlenty, Timothy': 'Republican',
 'Perry, Rick': 'Republican',
 "Roemer, Charles E. 'Buddy' III": 'Republican',
 'Romney, Mitt': 'Republican',
 'Santorum, Rick': 'Republican'}
fec['party'] = fec.cand_nm.map(parties)

fec['party'].value_counts()
Democrat      593746
Republican    407985
Name: party, dtype: int64
fec.loc[:,['party','cand_nm']]
partycand_nm
0RepublicanBachmann, Michelle
1RepublicanBachmann, Michelle
2RepublicanBachmann, Michelle
3RepublicanBachmann, Michelle
4RepublicanBachmann, Michelle
.........
1001726RepublicanPerry, Rick
1001727RepublicanPerry, Rick
1001728RepublicanPerry, Rick
1001729RepublicanPerry, Rick
1001730RepublicanPerry, Rick

1001731 rows × 2 columns

(fec.contb_receipt_amt > 0).value_counts()#赞助费,包含退款
True     991475
False     10256
Name: contb_receipt_amt, dtype: int64
fec	=	fec[fec.contb_receipt_amt	>	0]#限定该数据集只能有正出资
fec_mrbo = fec[fec.cand_nm.isin(['Obama, Barack','Romney, Mitt'])]
fec.contbr_occupation.value_counts()[:10]#出资人出资统计
RETIRED                                   233990
INFORMATION REQUESTED                      35107
ATTORNEY                                   34286
HOMEMAKER                                  29931
PHYSICIAN                                  23432
INFORMATION REQUESTED PER BEST EFFORTS     21138
ENGINEER                                   14334
TEACHER                                    13990
CONSULTANT                                 13273
PROFESSOR                                  12555
Name: contbr_occupation, dtype: int64
occ_mapping = {'INFORMATION	REQUESTED PER BEST EFFORTS':'NOT PROVIDED',
'INFORMATION REQUESTED' : 'NOT PROVIDED',
'INFORMATION REQUESTED (BEST EFFORTS)' : 'NOT PROVIDED',
'C.E.O.': 'CEO'}
f = lambda x: occ_mapping.get(x,x)#存在字典中的返回字典的value,不存在返回原值
fec.contbr_occupation = fec.contbr_occupation.map(f)
emp_mapping = {'INFORMATION	REQUESTED PER BEST EFFORTS' : 'NOT PROVIDED',
'INFORMATION REQUESTED' : 'NOT PROVIDED',
'SELF' : 'SELF-EMPLOYED',
'SELF EMPLOYED' : 'SELF-EMPLOYED',}#雇主信息
f = lambda x:emp_mapping.get(x,x)
fec.contbr_employer = fec.contbr_employer.map(f)
by_occupation = fec.pivot_table('contb_receipt_amt',index='contbr_occupation',columns='party',aggfunc='sum')
#contb_receipt_amt赞助费那列
over_2mm = by_occupation[by_occupation.sum(1) > 2000000]#过滤出资金超两百万的
over_2mm
partyDemocratRepublican
contbr_occupation
ATTORNEY11141982.977.477194e+06
CEO2074974.794.211041e+06
CONSULTANT2459912.712.544725e+06
ENGINEER951525.551.818374e+06
EXECUTIVE1355161.054.138850e+06
HOMEMAKER4248875.801.363428e+07
INFORMATION REQUESTED PER BEST EFFORTSNaN1.634053e+07
over_2mm.plot(kind='barh')#横向条形
<matplotlib.axes._subplots.AxesSubplot at 0x28a2e8604e0>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-2yyw6mhe-1592225133002)(output_20_1.png)]

def	get_top_amounts(group,key,n=5):
    totals	=	group.groupby(key)['contb_receipt_amt'].sum()
    return	totals.nlargest(n)
grouped	=	fec_mrbo.groupby('cand_nm')#根据职业和雇主进行聚合
grouped.apply(get_top_amounts,'contbr_occupation',n=7)
cand_nm        contbr_occupation                     
Obama, Barack  RETIRED                                   25305116.38
               ATTORNEY                                  11141982.97
               INFORMATION REQUESTED                      4866973.96
               HOMEMAKER                                  4248875.80
               PHYSICIAN                                  3735124.94
               LAWYER                                     3160478.87
               CONSULTANT                                 2459912.71
Romney, Mitt   RETIRED                                   11508473.59
               INFORMATION REQUESTED PER BEST EFFORTS    11396894.84
               HOMEMAKER                                  8147446.22
               ATTORNEY                                   5364718.82
               PRESIDENT                                  2491244.89
               EXECUTIVE                                  2300947.03
               C.E.O.                                     1968386.11
Name: contb_receipt_amt, dtype: float64
grouped.apply(get_top_amounts,'contbr_employer',n=10)
cand_nm        contbr_employer                       
Obama, Barack  RETIRED                                   22694358.85
               SELF-EMPLOYED                             17080985.96
               NOT EMPLOYED                               8586308.70
               INFORMATION REQUESTED                      5053480.37
               HOMEMAKER                                  2605408.54
               SELF                                       1076531.20
               SELF EMPLOYED                               469290.00
               STUDENT                                     318831.45
               VOLUNTEER                                   257104.00
               MICROSOFT                                   215585.36
Romney, Mitt   INFORMATION REQUESTED PER BEST EFFORTS    12059527.24
               RETIRED                                   11506225.71
               HOMEMAKER                                  8147196.22
               SELF-EMPLOYED                              7409860.98
               STUDENT                                     496490.94
               CREDIT SUISSE                               281150.00
               MORGAN STANLEY                              267266.00
               GOLDMAN SACH & CO.                          238250.00
               BARCLAYS CAPITAL                            162750.00
               H.I.G. CAPITAL                              139500.00
Name: contb_receipt_amt, dtype: float64
bins=np.array([0,1,10,100,1000,10000,100000,1000000,10000000])#出资额分组
labels=pd.cut(fec_mrbo.contb_receipt_amt,bins)
labels
411         (10, 100]
412       (100, 1000]
413       (100, 1000]
414         (10, 100]
415         (10, 100]
             ...     
701381      (10, 100]
701382    (100, 1000]
701383        (1, 10]
701384      (10, 100]
701385    (100, 1000]
Name: contb_receipt_amt, Length: 694282, dtype: category
Categories (8, interval[int64]): [(0, 1] < (1, 10] < (10, 100] < (100, 1000] < (1000, 10000] < (10000, 100000] < (100000, 1000000] < (1000000, 10000000]]
grouped	=fec_mrbo.groupby(['cand_nm',labels])
grouped.size().unstack(0)
cand_nmObama, BarackRomney, Mitt
contb_receipt_amt
(0, 1]49377
(1, 10]400703681
(10, 100]37228031853
(100, 1000]15399143357
(1000, 10000]2228426186
(10000, 100000]21
(100000, 1000000]30
(1000000, 10000000]40
bucket_sums	=	grouped.contb_receipt_amt.sum().unstack(0)
normed_sums	=	bucket_sums.div(bucket_sums.sum(axis=1),	axis=0)
normed_sums
cand_nmObama, BarackRomney, Mitt
contb_receipt_amt
(0, 1]0.8051820.194818
(1, 10]0.9187670.081233
(10, 100]0.9107690.089231
(100, 1000]0.7101760.289824
(1000, 10000]0.4473260.552674
(10000, 100000]0.8231200.176880
(100000, 1000000]1.000000NaN
(1000000, 10000000]1.000000NaN
normed_sums[:-2].plot(kind='barh')#两位候选⼈收到的各种捐赠额度的总额⽐例
<matplotlib.axes._subplots.AxesSubplot at 0x28a35730550>

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-J5IXppVJ-1592225133003)(output_28_1.png)]

grouped = fec_mrbo.groupby(['cand_nm','contbr_st'])
totals = grouped.contb_receipt_amt.sum().unstack(0).fillna(0)#contb_receipt_amt出资金额
totals = totals[totals.sum(1) > 100000]
totals[:5]#根据州统计赞助信息
cand_nmObama, BarackRomney, Mitt
contbr_st
AK281840.1586204.24
AL543123.48527303.51
AR359247.28105556.00
AZ1506476.981888436.23
CA23824984.2411237636.60
percent = totals.div(totals.sum(1),axis=0)#在各州的比例sum(1)表示横向求和
percent[:5]#候选人在各州的总赞助额比率
cand_nmObama, BarackRomney, Mitt
contbr_st
AK0.7657780.234222
AL0.5073900.492610
AR0.7729020.227098
AZ0.4437450.556255
CA0.6794980.320502
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值