1 from selenium import webdriver 2 browser=webdriver.Chrome() 3 import time 4 from lxml import etree 5 import requests 6 import re 7 import json 8 9 10 def search(): 11 browser.get('https://www.lagou.com/jobs/list_%E7%88%AC%E8%99%AB?labelWords=&fromSearch=true&suginput=') 12 time.sleep(3) 13 #print(browser.page_source) 14 #browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 15 time.sleep(2) 16 i=0 17 for i in range(1,25): 18 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 19 button=browser.find_element_by_css_selector('#s_position_list > div.item_con_pager > div > span.pager_next') 20 button.click() 21 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 22 time.sleep(2) 23 html = etree.HTML(browser.page_source) 24 links = html.xpath( 25 '//ul[@class="item_con_list"]/li[@class="con_list_item default_list"]//a[@class="position_link"]/@href') 26 #browser.close 27 for link in links: 28 yield { 29 'joblink':link 30 } 31 32 for url in search(): 33 with open ('url.json','a') as f: 34 f.write(json.dumps(url,ensure_ascii=False)+'\n')