上代码这里写代码片
from urllib import request
import re
from lxml import etree
import pymysql
db = pymysql.connect(host='127.0.0.1', user='root', password='123456', port=3306, database='xueqiu')
cursor = db.cursor()
headers={
'Referer':'https://hr.tencent.com/position.php?lid=&tid=&keywords=Python',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5702.400 QQBrowser/10.2.1893.400'
}
for i in range(0,30,10):
url = 'https://hr.tencent.com/position.php?keywords=Python&start='+str(i)
req = request.Request(url,headers=headers)
response = request.urlopen(req)
html = response.read().decode('utf-8')
res = r'<td class="l square"><a target="_blank" href="(.*?)">'
url_list = re.findall(res,html)
for i in url_list:
url = 'https://hr.tencent.com/'+i
# print(url)
req = request.Request(url, headers=headers)
response = request.urlopen(req)
html1 = etree.HTML(response.read())
title = html1.xpath('//*[@id="sharetitle"]')[0].text
# print(title)
location = html1.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td/text()')[0]
# print(location)
category = html1.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td[2]/text()')[0]
# print(category)
people = html1.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[2]/td[3]/text()')[0]
# print(people)
job1 = html1.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[3]/td/ul/li/text()')
job=(';'.join(job1))
experience1 = html1.xpath('//div[@ id ="position_detail"]/div[1]/table/tr[4]/td/ul/li/text()')
experience=(';'.join(experience1))
sql = "insert into tengxun(title,location,category,people,job,experience) values('" + title + "','" + location + "','" + category + "','" + people + "','" + job + "','" + experience + "')"
cursor.execute(sql)
db.commit()
cursor.close()
db.close()