import requests
import json
import sqlite3
import xlwt
from urllib import parse
class Zl_zp(object):
def __init__(self):
# self.url = 'https://fe-api.zhaopin.com/c/i/sou?start=0&pageSize=60&cityId=489&kw=python&kt=3'
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0'}
self.html = ''
self.page = 0
self.connect = ''
self.cursor = ''
self.count = 0
self.results = ''
self.page_num = 0
self.workbook = xlwt.Workbook(encoding='utf-8')
self.sheet = self.workbook.add_sheet('job_data')
self.create_excel()
self.create_table()
self.count_l = 0
def get_html(self, url):
response = requests.get(url=url, headers=self.headers)
self.html = json.loads(response.text, encoding='utf-8')
def parse_html(self, url):
self.get_html(url)
if self.html['code'] != 200:
print('获取失败')
return
self.results = self.html['data']['results']
for i in range(0, len(self.results)):
self.count += 1
print('*'*40)
print('正在爬取第{}个职位的相关信息.......'.format(self.count))
number = self.results[i]['number']
z_id = self.results[i]['id']
display = self.results[i]['jobType']['display']
company_number= self.results[i]['company']['number']
company_id = self.results[i]['company']['id']
company_url = self.results[i]['company']['url']
company_name = self.results[i]['company']['name']
company_size = self.results[i]['company']['size']['name']
company_type = self.results[i]['company']['type']['name']
positionURL = self.results[i]['positionURL']
workingExp = self.results[i]['workingExp']['name']
edulevel = self.results[i]['eduLevel']['name']
salary = self.results[i]['salary']
empltype = self.results[i]['emplType']
jobname = self.results[i]['jobName']
geo_lat= self.results[i]['geo']['lat']
geo_lon = self.results[i]['geo']['lon']
city = self.results[i]['city']['display']
update = self.results[i]['updateDate']
createdate = self.results[i]['createDate']
enddate = self.results[i]['endDate']
welfare = ','.join(self.results[i]['welfare'])
score = self.results[i]['score']
resumeCount = self.results[i]['resumeCount']
self.insert_sql(number, z_id, display, company_number, company_id, company_url, company_name, company_size, company_type, positionURL, workingExp, edulevel, salary, empltype, jobname, geo_lat, geo_lon, city, update, createdate, enddate, welfare, score, resumeCount)
self.save_data(number, z_id, display, company_number, company_id, company_url, company_name, company_size, company_type, positionURL, workingExp, edulevel, salary, empltype, jobname, geo_lat, geo_lon, city, update, createdate, enddate, welfare, score, resumeCount)
def con_sql(self):
self.connect = sqlite3.connect('zlzp.db')
self.cursor = self.connect.cursor()
def close_sql(self):
self.connect.commit()
self.cursor.close()
self.connect.close()
def create_table(self):
self.con_sql()
sql = "create table if not exists zlzp(id integer primary key,number char,z_id integer,display text,company_number char,company_id integer,company_url char,company_name text,company_size text,company_type text,positionURL char,workingExp char,edulevel text,salary char,empltype text,jobname char,geo_lat char,geo_lon char,city text,update_time text,createdate char,enddate char,welfare text,score char,resumeCount integer)"
self.cursor.execute(sql)
self.close_sql()
def create_excel(self):
self.sheet.write(0, 0, '职位编号')
self.sheet.write(0, 1, '职位id')
self.sheet.write(0, 2, '职位种类')
self.sheet.write(0, 3, '公司编号')
self.sheet.write(0, 4, '公司id')
self.sheet.write(0, 5, '公司简介')
self.sheet.write(0, 6, '公司名称')
self.sheet.write(0, 7, '公司规模')
self.sheet.write(0, 8, '公司性质')
self.sheet.write(0, 9, '职位详情地址')
self.sheet.write(0, 10, '工作经验')
self.sheet.write(0, 11, '学历要求')
self.sheet.write(0, 12, '职位薪资')
self.sheet.write(0, 13, '职位类型')
self.sheet.write(0, 14, '职位名称')
self.sheet.write(0, 15, '经度')
self.sheet.write(0, 16, '纬度')
self.sheet.write(0, 17, '公司所在地')
self.sheet.write(0, 18, '更新日期')
self.sheet.write(0, 19, '创建日期')
self.sheet.write(0, 20, '结束日期')
self.sheet.write(0, 21, '公司福利')
self.sheet.write(0, 22, '职位评分')
self.sheet.write(0, 23, '简历个数')
def save_data(self, *args):
# 计数(统计读取职位数)
self.count_l += 1
# args代表所传的参数 *args代表的是所传的参数所组成的元组
# 对包含所有参数的元组*args中的每一个参数args增添id
for idx, data in enumerate(args):
# 将其获取的数据写进表里
self.write_into_excel(idx, data)
self.workbook.save('职位信息.xls')
def write_into_excel(self, idx, data):
self.sheet.write(self.count_l, idx, data)
def insert_sql(self, *args):
self.con_sql()
insert_sql = "insert into zlzp(number,z_id,display,company_number,company_id,company_url,company_name,company_size,company_type,positionURL,workingExp,edulevel,salary,empltype,jobname,geo_lat,geo_lon,city,update_time,createdate,enddate,welfare,score,resumeCount)values('{}',{},'{}','{}',{},'{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}',{})".format(*args)
self.cursor.execute(insert_sql)
self.close_sql()
def run(self, name):
while True:
self.page += 1
print('$'*40)
print('正在爬取第{}页数据.....'.format(self.page))
s = (self.page-1)*60
url = 'https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=60&cityId=489&kw={}&kt=3'.format(s, name)
self.parse_html(url)
if self.results == []:
print('数据爬取完毕.........')
break
if __name__ == '__main__':
name = parse.quote(input('请输入您要查找的工作名称:'))
z = Zl_zp()
z.run(name=name)