拉勾网数据库代码-立哥开发

import random
import time

import requests
from openpyxl import Workbook
import pymysql.cursors

def get_conn():

conn = pymysql.connect(host='localhost',
                            user='root',
                            password='root',
                            db='python',
                            charset='utf8mb4',
                            cursorclass=pymysql.cursors.DictCursor)
return conn

def insert(conn, info):

with conn.cursor() as cursor:
    sql = "INSERT INTO `python` (`shortname`, `fullname`, `industryfield`, `companySize`, `salary`, `city`, `education`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
    cursor.execute(sql, info)
conn.commit()

def get_json(url, page, lang_name):

headers = {
    'Host': 'www.lagou.com',
    'Connection': 'keep-alive',
    'Content-Length': '23',
    'Origin': 'https://www.lagou.com',
    'X-Anit-Forge-Code': '0',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'X-Requested-With': 'XMLHttpRequest',
    'X-Anit-Forge-Token': 'None',
    'Referer': 'https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7'
}
data = {'first': 'false', 'pn': page, 'kd': lang_name}
json = requests.post(url, data, headers=headers).json()
list_con = json['content']['positionResult']['result']
info_list = []
for i in list_con:
    info = []
    info.append(i.get('companyShortName', '无'))
    info.append(i.get('companyFullName', '无'))
    info.append(i.get('industryField', '无'))
    info.append(i.get('companySize', '无'))
    info.append(i.get('salary', '无'))
    info.append(i.get('city', '无'))
    info.append(i.get('education', '无'))
    info_list.append(info)
return info_list

def main():
lang_name = ‘python’
wb = Workbook()
conn = get_conn()
for i in [‘北京’, ‘上海’, ‘广州’, ‘深圳’, ‘杭州’]:
page = 1
ws1 = wb.active
ws1.title = lang_name
url = ‘https://www.lagou.com/jobs/positionAjax.json?city={}&needAddtionalResult=false’.format(i)
while page < 31:
info = get_json(url, page, lang_name)
page += 1
print(i, ‘page’, page)
time.sleep(random.randint(10, 20))
for row in info:
insert(conn, tuple(row))
ws1.append(row)
conn.close()
wb.save(’{}职位信息.xlsx’.format(lang_name))

if name == ‘main’:
main()

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值