1 from selenium import webdriver 2 browser=webdriver.Chrome() 3 import time 4 from lxml import etree 5 import requests 6 import re 7 import json 8 import random 9 10 def search(): 11 browser.get('https://www.lagou.com/jobs/list_%E7%88%AC%E8%99%AB?labelWords=&fromSearch=true&suginput=') 12 time.sleep(2) 13 #print(browser.page_source) 14 #browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 15 #time.sleep(1) 16 i=0 17 for i in range(1,25): 18 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 19 button=browser.find_element_by_css_selector('#s_position_list > div.item_con_pager > div > span.pager_next') 20 button.click() 21 browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') 22 time.sleep(2) 23 html = etree.HTML(browser.page_source) 24 links = html.xpath( 25 '//ul[@class="item_con_list"]/li[@class="con_list_item default_list"]//a[@class="position_link"]/@href') 26 #browser.close 27 for link in links: 28 yield link 29 for i in search(): 30 time.sleep(3) 31 user_agent_list = [ 32 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1" 33 "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", 34 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", 35 "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", 36 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", 37 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", 38 "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", 39 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", 40 "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", 41 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", 42 "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", 43 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", 44 "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", 45 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", 46 "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", 47 "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", 48 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", 49 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" 50 ] 51 ua = random.choice(user_agent_list) 52 headers = {'User-Agent': ua} 53 r=requests.get(i,headers=headers) 54 html=r.text 55 pattern=re.compile('class="description">职位描述:</h3>.*?<div>\s+(.*?)\s+</div',re.S) 56 job=re.findall(pattern,html) 57 content=str(job).replace('<p>', '').replace('\n', '').replace('</p>', '') 58 #if len(job)>1: 59 with open('1.txt', 'a', encoding='utf-8') as f: 60 f.write(content+'\n') 61 #else: 62 #pass 63 time.sleep(10)