from selenium import webdriver from selenium.webdriver.chrome.options import Options from lxml import etree class Tencent(): def __init__(self, url): """" 不用打开浏览器进行爬取 """ self.url = url self.option = Options() self.option.add_argument('--headless') self.diver = webdriver.Chrome(chrome_options=self.option) self.diver.get(url) def save(self,content): with open('tencent.doc','a+',encoding='utf-8')as f: f.write(content+'\n') def driver(self): # diver = webdriver.PhantomJS() #自动驱动ph获取源代码 response = self.diver.page_source #获取源代码 it = etree.HTML(response) ta = it.xpath('//div[@class="recruit-list"]') for t in ta: title = t.xpath('.//a/h4/text()')[0] info = t.xpath('.//a/p[2]/text()')[0] information = [title,info] content = '\n'.join(information) self.save(content) print(title) print(info) if __name__ == '__main__': for i in range(1,10): url = 'https://careers.tencent.com/search.html?index=%s&keyword=python' % str(i) main = Tencent(url) main.driver() print('**'*50)
selenium无页面爬取腾讯招聘简单案例
最新推荐文章于 2024-05-01 15:20:15 发布