直接上主代码
from bs4 import BeautifulSoup
import requests
import ip_proxy
from urllib import parse
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
def get_boss_info(my_ip,detailed_url):
#url = 'https://www.zhipin.com/job_detail/7e883f0c3a336cb51n142968FFM~.html?ka=search_list_1'
proxy = {
'http': 'http://' + my_ip.ip_proxy_str,
'https': 'http://' + my_ip.ip_proxy_str
}
response = requests.get(detailed_url, headers=headers, proxies = proxy, timeout=5)
soup = BeautifulSoup(response.text, 'lxml')
title = soup.find('h1').text
#div_ele = soup.find('div', class_="name")
#print(div_ele)
salary = soup.find('span', class_="badge