拉勾网数据爬取

‘’’
first: false
pn: 4
kd: python
post_url = ‘https://www.lagou.com/jobs/positionAjax.json?city=上海&needAddtionalResult=false
‘’’
import json
import time
import urllib.request
import urllib.parse

import jsonpath
import pymysql

def create_lagou(page):
post_url = ‘https://www.lagou.com/jobs/positionAjax.json?city=上海&needAddtionalResult=false

headers = {
    'Cookie': 'JSESSIONID=ABAAABAAAFCAAEG12EEC0073FA18163B1C43975D71B94ED; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1542875288; _ga=GA1.2.275185600.1542875289; user_trace_token=20181122162808-8a90607b-ee30-11e8-8acd-5254005c3644; LGUID=20181122162808-8a9064c2-ee30-11e8-8acd-5254005c3644; _gid=GA1.2.791093737.1542875290; index_location_city=%E4%B8%8A%E6%B5%B7; _gat=1; LGSID=20181122170437-a32894a8-ee35-11e8-8acd-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_search; SEARCH_ID=28c951a5fc404f9ba1079d4334eb097f; LGRID=20181122170442-a6564d61-ee35-11e8-b44d-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1542877483',
    'Host': 'www.lagou.com',
    'Origin': 'https://www.lagou.com',
    'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
    }

data = {
    'first':'flase',
    'pn':page,
    'kd':'python'
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=post_url,headers=headers,data=data)
return request

def save_lagou(request,page):
response = urllib.request.urlopen(request)
content = response.read().decode(‘utf-8’)
str = json.loads(content)
dump01 = json.dumps(str,ensure_ascii=False)
with open(‘lagou{}.json’.format(page),‘w’,encoding=‘utf-8’) as fp:
fp.write(dump01)

def jsonpath_lagou(page):
db = pymysql.connect(“localhost”, “root”, “123456”, “lagou”, charset=“utf8”)
cursor = db.cursor()
obj = json.load(open(‘lagou{}.json’.format(page),‘r’,encoding=‘utf-8’))
name1 = jsonpath.jsonpath(obj,’ . . r e s u l t . . p o s i t i o n N a m e ′ ) m o n e y = j s o n p a t h . j s o n p a t h ( o b j , ′ ..result..positionName') money = jsonpath.jsonpath(obj, ' ..result..positionName)money=jsonpath.jsonpath(obj,…result…salary’)
year1 = jsonpath.jsonpath(obj, ‘ . . r e s u l t . . w o r k Y e a r ′ ) e d u c a t i o n = j s o n p a t h . j s o n p a t h ( o b j , ′ ..result..workYear') education = jsonpath.jsonpath(obj, ' ..result..workYear)education=jsonpath.jsonpath(obj,…result…education’)
city = jsonpath.jsonpath(obj, ‘$…result…city’)
for i in range(len(name1)):
add_db = “insert into lg (name1,money, year1, education, city) values (’%s’,’%s’,’%s’,’%s’,’%s’)”%(name1[i],money[i],year1[i],education[i],city[i])
cursor.execute(add_db)
db.commit()

if name == ‘main’:
for page in range(1,6):
time.sleep(1)
request = create_lagou(page)
save_lagou(request,page)
jsonpath_lagou(page)

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值