import requests
import re
from lxml import etree
from mysql import MysqlHelper
mc = MysqlHelper()
sql = 'INSERT INTO tencent(didian,zhiwei,people,works) VALUE (%s,%s,%s,%s)'
def tencent(url,headers):
for i in range(0,30,10):
fullurl = url.format(i)
response = requests.get(fullurl,headers=headers)
html = response.text
data_url = re.findall(r'<a target="_blank" href=(.*?)>',html)
for i in data_url:
data_urls = 'https://hr.tencent.com/' + i.replace('"','')
response = requests.get(data_urls,headers=headers)
html = response.text
html_data = etree.HTML(html)
zhaopin = html_data.xpath('//table[@class="tablelist textl"]')
for i in zhaopin:
didian = i.xpath('./tr[@class="c bottomline"]/td[1]/text()')[0]
zhiwei = i.xpath('./tr[@class="c bottomline"]/td[2]/text()')[0]
people = i.xpath('./tr[@class="c bottomline"]/td[3]/text()')[0]
work = i.xpath('./tr[@class="c"]/td/ul/li/text()')
work = str(work).replace(',','')
print(work)
data = (didian, zhiwei, people, work)
mc.execute_modify_sql(sql,data)
if __name__ == '__main__':
url = 'https://hr.tencent.com/position.php?&start={}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
tencent(url,headers)