用Python爬取智联招聘网站“数据分析”相关岗位信息
# _*_ coding: utf-8 _*_
from bs4 import BeautifulSoup
import requests
import csv
import json
import pandas as pd
import numpy as np
#定义函数:请求下载页面源代码
def download(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'}
req=requests.get(url,headers=headers)
return req.text
#定义函数:解析网页内容,抓取职位信息
def get_content(html):
soup=BeautifulSoup(html,'lxml')
body=soup.body
data_main=body.find('div',{
'class':'newlist_list_content'})#找div标签的特定属性
tables=data_main.find_all('table')
zw_list=[]
for i,table in enumerate(tables):
if i==0:
continue
temp=[]
tbs=table.find('tr').find_all('td')#find_all形成的是list
zwmc=tbs[0].find('a').get_text()
gsmc=tbs[2].find('a').get_text()
zwyx=tbs[3].get_text()
gzdd=tbs[4].get_text()
bt_brief=table.find('tr',{
'class':'newlist_tr_detail'})
brief=bt_brief.find('li',{
'class':'newlist_deatil_two'}).get_text()
temp.append(zwmc)
temp.append(gsmc)
temp.append(zwyx)
temp.append(gzdd)
temp.append(xl)
zw_list.append(temp)
return zw_list