#--BS,find()和findAll()方法
from bs4 import BeautifulSoup as bs
import requests as res
import time
# f = open(r'E:\PythonData\非代码数据\前程无忧源码.txt',encoding = 'gbk').read()
(post1,comp1,site1,salary1) = ([],[],[],[])
for page in range(1,3):
url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590,2,{}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='.format(page)
head = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0'}
html = res.get(url,headers = head)
html.encoding = 'gbk'
soup = bs(html.text,'html.parser')
time.sleep(3)
for i in soup.find('div', class_="dw_table",id="resultList").findAll(class_="el")[1:]:
try: # 给新手看的注释,try-except可以有效避免程序崩溃
post = i.find('a',target="_blank").string.strip()
except:
post = ""
try:
comp = i.find('span', class_="t2").string.strip()
except: # 给萌新看的,可以试一下标签选择和标准选择组合的方法,更简便
comp = ""
try:
site = i.find('span', class_="t3").string.strip()
except:
site = ''
try:
salary = i.find('span', class_="t4").string.strip()
except:
salary = ''
post1.append(post)
comp1.append(comp)
site1.append(site)
salary1.append(salary)
print('第%d页'%page)
print(len(post1),len(comp1),len(site1),len(salary1))
import pandas as pd
dict1 = {'post':post1,'company':comp1,'site':site1,'salary':salary1}
df = pd.DataFrame(dict1)
df.to_csv(r'E:\PythonData\Homework\全程无忧2.csv',encoding= 'gbk')
df