import requests
import re#引用正则匹配
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}#伪装浏览器,制作一个请求头
def local():
url="https://www.lagou.com/";
response=requests.get(url,headers = headers);
result=response.text;
rep=r' <a href="(.*?)" data-lg-tj-id="4A00" data-lg-tj-no=".*?" data-lg-tj-cid="idnull">(.*?)</a>'
result=re.findall(rep,result)
return result
def postion(url):
response=requests.get(url,headers = headers)
ggg = [];
soup = BeautifulSoup(response.text, 'html.parser')
for news in soup.select('.default_list'): # 定位
# print(news)
place = news.find_all(class_='add')[0].text
ggg.append(place)
companyName = news.select('a')[1].text
ggg.append(companyName)
companyClass = news.find_all(class_='industry')[0].text.replace(' ', '')
ggg.append(companyClass)
companySpeak = news.find_all(class_='li_b_r')[0].text
ggg.append(companySpeak)
workMoney = news.find_all(class_='money')[0].text
ggg.append(workMoney)
workNeed = news.find_all(class_='li_b_l')[0].text.split('k')[-1]
ggg.append(workNeed)
url = news.find_all(class_='position_link')[0]['href']
ggg.append(url)
return ggg
for url,title in local():
result=postion(url)
for item in result:
print(item)
还有些不足之处,以后会努力改进,仅供大家参考!