导入爬虫库
import requests
from bs4 import BeautifulSoup
导入操作excle的库
import xlwt
import xlrd
import xlutils
网页页面
代码
import requests
from bs4 import BeautifulSoup
import xlwt
import xlrd
import xlutils
#得到excle对象
book=xlwt.Workbook()
sheet=book.add_sheet('sheet1')
listall=[]#用于存储爬去的职位名称和薪资
#爬取1到29页所有的信息
for i in range(1,30):
URL='https://www.jobui.com/jobs?jobKw=linux&cityKw=%E5%8C%97%E4%BA%AC&n='+str(i)
#wb_data = requests.get(URL)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
wb_data = requests.get(URL, headers=headers)
print(wb_data)#请求成功200
soup = BeautifulSoup(wb_data.content, 'lxml')
#print(soup)
names=soup.select('div.astruct.cfix > div.aleft > div.m-box.j-jobInfo > div.j-recommendJob > div > div.job-content-box > div.job-content > div > a > h3')
#print(names)
prices=soup.select("div.astruct.cfix > div.aleft > div.m-box.j-jobInfo > div.j-recommendJob > div > div.job-content-box > div.job-content > div > div > span.job-pay-text")
#print(prices)
for n,p in zip(names,prices):
listall.append([n.text,p.text])#信息存入listall列表中
#print("招聘岗位为:"+n.text+",薪资为:"+p.text)
#写入excle
line=0 #控制的是行
for stu in listall:
col=0
for s in stu:
sheet.write(line,col,s)#写入excle
col+=1
line+=1
book.save('linux.xls')#保存,和本python文件同目录
结果: