python学习记录(3)——数据分析实战

阿里天池-用Pandas揭秘美国选民的总统喜好
赛事地址https://tianchi.aliyun.com/competition/entrance/531837/forum

import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import os

candidates = pd.read_csv("weball20.txt", sep = '|',names=['CAND_ID','CAND_NAME','CAND_ICI','PTY_CD','CAND_PTY_AFFILIATION','TTL_RECEIPTS',
                                                          'TRANS_FROM_AUTH','TTL_DISB','TRANS_TO_AUTH','COH_BOP','COH_COP','CAND_CONTRIB',
                                                          'CAND_LOANS','OTHER_LOANS','CAND_LOAN_REPAY','OTHER_LOAN_REPAY','DEBTS_OWED_BY',
                                                          'TTL_INDIV_CONTRIB','CAND_OFFICE_ST','CAND_OFFICE_DISTRICT','SPEC_ELECTION','PRIM_ELECTION','RUN_ELECTION'
                                                          ,'GEN_ELECTION','GEN_ELECTION_PRECENT','OTHER_POL_CMTE_CONTRIB','POL_PTY_CONTRIB',
                                                          'CVG_END_DT','INDIV_REFUNDS','CMTE_REFUNDS'])
ccl = pd.read_csv("ccl.txt", sep="|", names=['CAND_ID','CAND_ELECTION_YR','FEC_ELECTION_YR','CMTE_ID','CMTE_TP','CMTE_DSGN','LINKAGE_ID'])

ccl = pd.merge(ccl,candidates)
ccl = pd.DataFrame(ccl,columns=['CMTE_ID','CAND_ID', 'CAND_NAME','CAND_PTY_AFFILIATION'])
ccl.head(20)

itcont = pd.read_csv("itcont_2020_20200722_20200820.txt",sep = '|',names=['CMTE_ID','AMNDT_IND','RPT_TP','TRANSACTION_PGI',
                                                                                  'IMAGE_NUM','TRANSACTION_TP','ENTITY_TP','NAME','CITY',
                                                                                  'STATE','ZIP_CODE','EMPLOYER','OCCUPATION','TRANSACTION_DT',
                                                                                  'TRANSACTION_AMT','OTHER_ID','TRAN_ID','FILE_NUM','MEMO_CD',
                                                                                  'MEMO_TEXT','SUB_ID'])
c_itcont = pd.merge(ccl,itcont)
c_itcont = pd.DataFrame(c_itcont,columns=[ 'CAND_NAME','NAME', 'STATE','EMPLOYER','OCCUPATION',
                                           'TRANSACTION_AMT', 'TRANSACTION_DT','CAND_PTY_AFFILIATION'])
##数据清洗,补充缺失的列,将时间,日期规范化
c_itcont['STATE'].fillna('NOT PROVIDED',inplace=True)
c_itcont['EMPLOYER'].fillna('NOT PROVIDED',inplace=True)
c_itcont['OCCUPATION'].fillna('NOT PROVIDED',inplace=True)

c_itcont['TRANSACTION_DT']=c_itcont['TRANSACTION_DT'].astype(str)
c_itcont['TRANSACTION_DT'] = [i[3:7]+i[0]+i[1:3] for i in c_itcont['TRANSACTION_DT'] ]

##数据分析,捐款总数最多的党派、候选人、职业、州
sum_AFFILIATION = c_itcont.groupby("CAND_PTY_AFFILIATION").sum().sort_values("TRANSACTION_AMT",ascending=False).head(10)

sum_CANDIDATE = c_itcont.groupby("CAND_NAME").sum().sort_values("TRANSACTION_AMT",ascending=False).head(10)

sum_OCCUPATION = c_itcont.groupby("OCCUPATION").sum().sort_values("TRANSACTION_AMT",ascending=False).head(10)

c_itcont['OCCUPATION'].value_counts().head(10)

sum_STATE = c_itcont.groupby("STATE").sum().sort_values("TRANSACTION_AMT",ascending=False).head(5)

c_itcont['STATE'].value_counts().head(5)

##数据可视化
st_amt = c_itcont.groupby('STATE').sum().sort_values("TRANSACTION_AMT",ascending=False)[:10]
st_amt = pd.DataFrame(st_amt,columns=["TRANSACTION_AMT"])
st_amt.plot(kind='bar')

st_amt = c_itcont.groupby('STATE').size().sort_values(ascending=False).head(10)
st_amt.plot(kind='bar')

biden = c_itcont[c_itcont['CAND_NAME']=='BIDEN, JOSEPH R JR']
biden_state = biden.groupby('STATE').sum().sort_values("TRANSACTION_AMT", ascending=False).head(10)
biden_state.plot.pie(figsize=(10, 10),autopct='%0.2f%%',subplots=True)
plt.show()

os.rename('TB10Jx4pBBh1e4jSZFhXXcC9VXa-689-390.jpg','biden.jpg')
data = ' '.join(biden['NAME'].tolist())
bg = plt.imread('biden.jpg')
wc = WordCloud(
    background_color='white',
    width=890,
    height=900,
    mask=bg,
    margin=10,
    max_font_size=100,
    random_state=20,
).generate_from_text(data)
big_color = ImageColorGenerator(bg)
plt.imshow(wc.recolor(color_func=big_color))
plt.axis('off')
wc.to_file("biden_wordcloud.png")
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值