爬虫部分
def keep_to_csv(rows):
print('正在保存csv…………')
headers = ['Id', 'PostId', 'RecruitPostId', 'RecruitPostName', "CountryName", "LocationName", "BGName",
"ProductName", "CategoryName", "Responsibility", "LastUpdateTime", "PostURL", "SourceID", "IsCollect",
"IsValid"]
with open('movies_test_111.csv', 'w', encoding='gb18030', newline='') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
f_csv.writerows(rows)
timestamp = int(time.time()*1000)
print(timestamp)
json_data = requests.get(
url=f"https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={timestamp}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex=1&pageSize=200&language=zh-cn&area=cn").text
json_data = json.loads(json_data)
keep_to_csv(rows=json_data["Data"]['Posts'])
导入的包
import requests
import json
import csv
import time
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib
读取文件
with open("python_job .csv", encoding='gb18030') as f:
data = pd.read_csv(f)
print(data)
设置可以显示中文字体
font = {
'family': "MicroSoft YaHei",
'weight': 'bold',
}
matplotlib.rc("font",**font)
统计各个城市的数量
area = list(data['area'])
dec = {}
for item in set(area):
dec.update({item:area.count(item)})
扇形表
plt.figure(figsize=(10,10),dpi=80)
city_name_list = []
city_name_num = []
for i, j in dec.items():
city_name_list.append(i)
city_name_num.append(j)
explode = [0.05,0.05,0.05,0.05,0.05,0.05,0.05,0.05]
plt.pie(city_name_num,explode=explode,labels=city_name_list,shadow=True,autopct="%.f%%")
plt.axis("equal")
plt.show()
柱状图
education = list(data['education'])
dac = {}
for item in set(education):
dac.update({item:education.count(item)})
print(dac)
education_type = []
education_num = []
for i , j in dac.items():
education_type.append(i)
education_num.append(j)
plt.title("学历分布")
plt.bar(education_type,education_num,width=0.6)
for a,b in zip(education_type,education_num):
plt.text(a,b,s=b,ha="center")
plt.show()
八个饼图
drc = {}
llst = []
for i in city_name_list:
data_city = data[data['area']==i]
llst.append(data_city)
for c,k in zip(llst,city_name_list):
education = list(c["education"])
for item in set(education):
drc.update({item:education.count(item)})
type_list = []
num_list = []
for type,num in drc.items():
type_list.append(type)
num_list.append(num)
plt.title(f"{k}学历分布图")
plt.pie(num_list,labels=type_list,shadow=True,autopct="%.f%%")
plt.show()