代码
import requests
import bs4
ll=[]
list__=["职位","公司","规模","经验","学历","工资"]
ll.append(list__)
for i in range(1,68):
print(i)
print(ll)
list_01=[]
list_02=[]
list_021=[]
list_022=[]
list_03=[]
list_04=[]
url=f"需要爬的URLn={i}"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36'}
re = requests.get(url, headers=headers)
# print(re.text)
re.encoding = 'utf-8'
bs = bs4.BeautifulSoup(re.text, 'html.parser')
list_title=bs.find_all('a',class_='job-name')
list_name=bs.find_all('div',class_='job-desc')
list_dizhi=bs.find_all('a',class_='job-company-name')
list_gui=bs.find_all('span',class_='job-desc')
# print(list_name)
aa=''
for i in list_title:
list_name01=bs.find('span')
bb=(str(i.text)).replace('\n','')
list_01.append(bb)
for i in list_name:
list_name01=bs.find('span',class_="job-pay-text")
aa=i.text.split("|")
bb=str(aa[0]).replace('\n','')
cc=str(aa[1]).replace('\n','')
dd=str(aa[2]).replace('\n','')
list_02.append(bb)
list_021.append(cc)
list_022.append(dd)
for i in list_dizhi:
list_03.append(i.text)
for i in list_gui:
aa=i.text.split('|')
bb=str(aa[1]).split("\n")
list_04.append(str(bb[0]))
for i, j, k, n ,a ,b in zip(list_01,list_03,list_04,list_02,list_021,list_022):
ll1=[]
ll1.append(i)
ll1.append(j)
ll1.append(k)
ll1.append(n)
ll1.append(a)
ll1.append(b)
ll.append(ll1)
# print(list_01[10])
# print(list_02[10])
# print(list_03[10])
# print(list_04)
import openpyxl
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='广州'
for i in ll:
sheet.append(i)
wb.save("F://爬虫资料/工作职位.xlsx")
wb.close()
爬取的部分资料