‘’’
first: false
pn: 4
kd: python
post_url = ‘https://www.lagou.com/jobs/positionAjax.json?city=上海&needAddtionalResult=false’
‘’’
import json
import time
import urllib.request
import urllib.parse
import jsonpath
import pymysql
def create_lagou(page):
post_url = ‘https://www.lagou.com/jobs/positionAjax.json?city=上海&needAddtionalResult=false’
headers = {
'Cookie': 'JSESSIONID=ABAAABAAAFCAAEG12EEC0073FA18163B1C43975D71B94ED; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1542875288; _ga=GA1.2.275185600.1542875289; user_trace_token=20181122162808-8a90607b-ee30-11e8-8acd-5254005c3644; LGUID=20181122162808-8a9064c2-ee30-11e8-8acd-5254005c3644; _gid=GA1.2.791093737.1542875290; index_location_city=%E4%B8%8A%E6%B5%B7; _gat=1; LGSID=20181122170437-a32894a8-ee35-11e8-8acd-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_search; SEARCH_ID=28c951a5fc404f9ba1079d4334eb097f; LGRID=20181122170442-a6564d61-ee35-11e8-b44d-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1542877483',
'Host': 'www.lagou.com',
'Origin': 'https://www.lagou.com',
'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
}
data = {
'first':'flase',
'pn':page,
'kd':'python'
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=post_url,headers=headers,data=data)
return request
def save_lagou(request,page):
response = urllib.request.urlopen(request)
content = response.read().decode(‘utf-8’)
str = json.loads(content)
dump01 = json.dumps(str,ensure_ascii=False)
with open(‘lagou{}.json’.format(page),‘w’,encoding=‘utf-8’) as fp:
fp.write(dump01)
def jsonpath_lagou(page):
db = pymysql.connect(“localhost”, “root”, “123456”, “lagou”, charset=“utf8”)
cursor = db.cursor()
obj = json.load(open(‘lagou{}.json’.format(page),‘r’,encoding=‘utf-8’))
name1 = jsonpath.jsonpath(obj,’
.
.
r
e
s
u
l
t
.
.
p
o
s
i
t
i
o
n
N
a
m
e
′
)
m
o
n
e
y
=
j
s
o
n
p
a
t
h
.
j
s
o
n
p
a
t
h
(
o
b
j
,
′
..result..positionName') money = jsonpath.jsonpath(obj, '
..result..positionName′)money=jsonpath.jsonpath(obj,′…result…salary’)
year1 = jsonpath.jsonpath(obj, ‘
.
.
r
e
s
u
l
t
.
.
w
o
r
k
Y
e
a
r
′
)
e
d
u
c
a
t
i
o
n
=
j
s
o
n
p
a
t
h
.
j
s
o
n
p
a
t
h
(
o
b
j
,
′
..result..workYear') education = jsonpath.jsonpath(obj, '
..result..workYear′)education=jsonpath.jsonpath(obj,′…result…education’)
city = jsonpath.jsonpath(obj, ‘$…result…city’)
for i in range(len(name1)):
add_db = “insert into lg (name1,money, year1, education, city) values (’%s’,’%s’,’%s’,’%s’,’%s’)”%(name1[i],money[i],year1[i],education[i],city[i])
cursor.execute(add_db)
db.commit()
if name == ‘main’:
for page in range(1,6):
time.sleep(1)
request = create_lagou(page)
save_lagou(request,page)
jsonpath_lagou(page)