selenium无页面爬取腾讯招聘简单案例

最新推荐文章于 2024-05-01 15:20:15 发布

weixin_45987650

最新推荐文章于 2024-05-01 15:20:15 发布

阅读量197

点赞数

分类专栏： python 文章标签： python

本文链接：https://blog.csdn.net/weixin_45987650/article/details/104659006

版权

python 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from lxml import etree


class Tencent():

    def __init__(self, url):
        """"
        不用打开浏览器进行爬取
        """
        self.url = url
        self.option = Options()
        self.option.add_argument('--headless')
        self.diver = webdriver.Chrome(chrome_options=self.option)
        self.diver.get(url)



    def save(self,content):
        with open('tencent.doc','a+',encoding='utf-8')as f:
            f.write(content+'\n')

    def driver(self):
        # diver = webdriver.PhantomJS() #自动驱动ph获取源代码

        response = self.diver.page_source  #获取源代码
        it = etree.HTML(response)
        ta = it.xpath('//div[@class="recruit-list"]')
        for t in ta:
            title = t.xpath('.//a/h4/text()')[0]
            info = t.xpath('.//a/p[2]/text()')[0]
            information = [title,info]
            content = '\n'.join(information)
            self.save(content)
            print(title)
            print(info)


if __name__ == '__main__':
    for i in range(1,10):
        url = 'https://careers.tencent.com/search.html?index=%s&keyword=python' % str(i)
        main = Tencent(url)
        main.driver()
        print('**'*50)