selenium无页面爬取腾讯招聘简单案例

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from lxml import etree


class Tencent():

    def __init__(self, url):
        """"
        不用打开浏览器进行爬取
        """
        self.url = url
        self.option = Options()
        self.option.add_argument('--headless')
        self.diver = webdriver.Chrome(chrome_options=self.option)
        self.diver.get(url)



    def save(self,content):
        with open('tencent.doc','a+',encoding='utf-8')as f:
            f.write(content+'\n')

    def driver(self):
        # diver = webdriver.PhantomJS() #自动驱动ph获取源代码

        response = self.diver.page_source  #获取源代码
        it = etree.HTML(response)
        ta = it.xpath('//div[@class="recruit-list"]')
        for t in ta:
            title = t.xpath('.//a/h4/text()')[0]
            info = t.xpath('.//a/p[2]/text()')[0]
            information = [title,info]
            content = '\n'.join(information)
            self.save(content)
            print(title)
            print(info)


if __name__ == '__main__':
    for i in range(1,10):
        url = 'https://careers.tencent.com/search.html?index=%s&keyword=python' % str(i)
        main = Tencent(url)
        main.driver()
        print('**'*50)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值