import pymysql
class MysqlHelper(object):
def __init__(self):
self.db = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='123456', database='py1011', charset='utf8')
self.cursor = self.db.cursor()
def execute_modify_sql(self,sql, data):
self.cursor.execute(sql, data)
self.db.commit()
def __del__(self):
self.cursor.close()
self.db.close()
if __name__ == '__main__':
conn = MysqlHelper()
import requests
import re
from lxml import etree
import mysqlhelper
myhelper = mysqlhelper.MysqlHelper()
sql = 'INSERT INTO tengxun (title, location_t, type_t, renshu_t,zhize,yaoqiu) VALUES (%s, %s, %s, %s, %s, %s)'
for i in range(0,40,10):
url = 'https://hr.tencent.com/position.php?lid=&tid=&keywords=%E8%AF%B7%E8%BE%93%E5%85%A5%E5%85%B3%E9%94%AE%E8%AF%8D&start="%s"%i#a'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
}
response = requests.get(url,headers=headers)
# with open('tengxun.html', 'wb') as f:
# f.write(response.content)
html = response.text
zhiwei_url_list = re.findall('<a target="_blank" href="(.*?)">',html)
# print(zhiwei_url_list)
for zhiwei_item in zhiwei_url_list:
zhiwei_url = 'https://hr.tencent.com/'+zhiwei_item
# print(zhiwei_url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
}
response = requests.get(zhiwei_url, headers=headers)
html_ele = etree.HTML(response.text)
title = html_ele.xpath('//tr[@class="h"]/td')[0].text
print(title)
location = html_ele.xpath('//tr[2]/td[1]/span')[0].text
textone = html_ele.xpath('//tr[2]/td[1]/text()')[0]
location_t = location + textone
print(location_t)
type = html_ele.xpath('//tr[2]/td[2]/span')[0].text
texttwo = html_ele.xpath('//tr[2]/td[2]/text()')[0]
type_t = type + texttwo
print(type_t)
renshu = html_ele.xpath('//tr[2]/td[3]/span')[0].text
textthree = html_ele.xpath('//tr[2]/td[3]/text()')[0]
renshu_t = renshu + textthree
print(renshu_t)
gongzuozhize = html_ele.xpath('//tr[3]/td/div')[0].text
print(gongzuozhize)
zhizeli_list = html_ele.xpath('//tr[3]/td/ul/li')
zhize = ''
# zhizeli_str = ','.join(zhizeli_list)
for zzneirong in zhizeli_list:
zhizeneirong = zzneirong.text
# zhize = gongzuozhize + zhizeli_str
zhize += zhizeneirong+'\n'
print(zhize)
gongzuoyaoqiu = html_ele.xpath('//tr[4]/td/div')[0].text
print(gongzuoyaoqiu)
yaoqiuli_list = html_ele.xpath('//tr[4]/td/ul/li')
try:
yaoqiu = ''
# yaoqiuli_str = ','.join(yaoqiuli_list)
for yqneirong in yaoqiuli_list:
yaoqiuneirong = yqneirong.text
# yaoqiu = gongzuoyaoqiu + yaoqiuli_str
yaoqiu += yaoqiuneirong+'\n'
print(yaoqiu)
except:
yaoqiu = '本科学历'+'\n'+'相关工作经验1年以上'
print(yaoqiu)
print('-'*50)
data = (title, location_t, type_t, renshu_t,zhize,yaoqiu)
myhelper.execute_modify_sql(sql, data)