数据分析-chen

爬虫部分

def keep_to_csv(rows):
    print('正在保存csv…………')
    headers = ['Id', 'PostId', 'RecruitPostId', 'RecruitPostName', "CountryName", "LocationName", "BGName",
               "ProductName", "CategoryName", "Responsibility", "LastUpdateTime", "PostURL", "SourceID", "IsCollect",
               "IsValid"]
    with open('movies_test_111.csv', 'w', encoding='gb18030', newline='') as f:
        f_csv = csv.DictWriter(f, headers)
        f_csv.writeheader()
        f_csv.writerows(rows)


timestamp = int(time.time()*1000)
print(timestamp)
json_data = requests.get(
    url=f"https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={timestamp}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex=1&pageSize=200&language=zh-cn&area=cn").text
json_data = json.loads(json_data)

keep_to_csv(rows=json_data["Data"]['Posts'])

导入的包

import requests
import json
import csv
import time
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib

读取文件

with open("python_job .csv", encoding='gb18030') as f:
    data = pd.read_csv(f)
    print(data)

设置可以显示中文字体

font = {
    'family': "MicroSoft YaHei",
    'weight': 'bold',  
}
matplotlib.rc("font",**font)

统计各个城市的数量

area = list(data['area'])
dec = {}
for item in set(area):
    dec.update({item:area.count(item)})

扇形表

plt.figure(figsize=(10,10),dpi=80)
city_name_list = []
city_name_num = []
for i, j in dec.items():
    city_name_list.append(i)
    city_name_num.append(j)
explode = [0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05]
plt.pie(city_name_num,explode=explode,labels=city_name_list,shadow=True,autopct="%.f%%")
plt.axis("equal")
plt.show()

柱状图

education = list(data['education'])
dac = {}
for item in set(education):
    dac.update({item:education.count(item)})
print(dac)
education_type = []
education_num = []
for i , j in dac.items():
    education_type.append(i)
    education_num.append(j)
# print(education_type)
# print(education_num)
plt.title("学历分布")
plt.bar(education_type,education_num,width=0.6)
for a,b in zip(education_type,education_num):
    plt.text(a,b,s=b,ha="center")
plt.show()

八个饼图

drc = {}
llst = []

for i in city_name_list:
    data_city = data[data['area']==i]
    llst.append(data_city)
for c,k in zip(llst,city_name_list):
    education = list(c["education"])
    for item in set(education):
        drc.update({item:education.count(item)})
    type_list = []
    num_list = []
    for type,num in drc.items():
        type_list.append(type)
        num_list.append(num)
#     fig,axs = subplot(4,2)
#     axs[0,1].pie()  
    plt.title(f"{k}学历分布图")
    plt.pie(num_list,labels=type_list,shadow=True,autopct="%.f%%")
    plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值