python3爬虫 selenium+pyquery爬取人才网简历实战

不废话,直接贴代码,各位看官要是觉得可以的话,麻烦点个赞~
import requests, pymysql,pyquery,time
from bs4 import BeautifulSoup
from selenium import webdriver

url = ‘https://www.xmrc.com.cn/
headers = {
‘Cookie’:‘www.xmrc.com.cn=83593967; ASP.NET_SessionId=bv03pjak3tivsrsssk21fyjx; UM_distinctid=16ab96c7428597-06cdcf6387df49-3e385e0c-1fa400-16ab96c7429468; CNZZDATA3869267=cnzz_eid%3D1712410708-1557886698-https%253A%252F%252Fwww.baidu.com%252F%26ntime%3D1557886698; __utma=5198522.1984520062.1557889054.1557889054.1557889054.1; __utmc=5198522; __utmz=5198522.1557889054.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmt=1; _temp=cid=E3D1166BF20D73011A243C31986F11D3C4F9F990675F7180160A3E9D525D0E67EB272594E2833B2F0AC8EC3AA00A186193864A4D2EDF2DDEEDA866F24F7AB4240FE374470D30B6BC2C12C69946E0A1D645DD54E32A9D21D34F1BBA502088B2CD4A03D1B4C6B4517B&agentId=977529CD953B5AADB8B153D173E4469655B7543882203307C492503BED8EEF8F60F2AE84D5B8B50A132E67A4FE6D87E8EFF53764AC08EDBC9E3074AE8830BDEA383386CC91A7510BC6DD9A0F8D91EB1E51411A237DE1ACFD&tag=5d49eccd38744fa7809d9fec61c6f0c3; _trail=cid=909235&cidn=59B8AD936D02C5AF09FB488AA0F9077C4CCE102DFEE65A0EC0224389A4C177C5&sip=137; _cid=6627E8830E488FC7; __utmb=5198522.57.10.1557889054’,
‘User-Agent’:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36’,
‘Referer’:‘https://www.xmrc.com.cn/net/Enterprise/Resultg.aspx?a=a&g=g&searchtype=1&releasetime=60&ffunction=9927&sortby=updatetime&ascde’,
}
browser = webdriver.Chrome()
browser.get(url)
login = browser.find_element_by_xpath(’//[@id=“container”]/table[1]/tbody/tr[3]/td/table/tbody/tr[1]/td[3]/a/img’) # 登录
login.click()
time.sleep(1)
user = browser.find_element_by_xpath(’//
[@id=“ctl00_Body_ctl00_UsernameTextBox”]’) # 用户
user.send_keys(‘xxxxx’)
time.sleep(1)
password = browser.find_element_by_xpath(’//[@id=“ctl00_Body_ctl00_PasswordTextBox”]’) # 密码
password.send_keys(‘xxxx’) #
time.sleep(1)
button = browser.find_element_by_xpath(’//
[@id=“ctl00_Body_ctl00_LoginButton”]’) # 点击登录
button.click()
time.sleep(1)
gangwei = browser.find_element_by_xpath(’//*[@id=“menuTable”]/tbody/tr[34]/td/a’) # 点击查询人才
gangwei.click()
time.sleep(1)

rep_ = ‘https://www.xmrc.com.cn/net/Enterprise/Resultg.aspx?a=a&g=g&searchtype=1&releasetime=60&ffunction=9929&sortby=updatetime&ascdesc=Desc&PageIndex={}
search_position = ‘客服及技术支持’
data = ‘2019-’ + time.strftime("%m%d", time.localtime())
for n in range(1, 68):
rep = rep_.format(n)
browser.get(rep)
text = browser.page_source
time.sleep(1.5)
doc = pyquery.PyQuery(text, parser=“html”) # parser=“html” 当无法提取标签时,加入这个
info = doc.find(‘div[id=“ctl00 B o d

  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值