import bs4
from selenium import webdriver
import time
import pymysql
import csv
def getWebDriver(url):
# 进入浏览器设置
options = webdriver.ChromeOptions()
# 设置中文
options.add_argument('lang=zh_CN.UTF-8')
# 更换头部
options.add_argument(
'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"')
# 创建浏览器对象
wd = webdriver.Chrome(chrome_options=options)
wd.implicitly_wait(100)
wd.maximize_window()
wd.get(url)
return wd
def getjoplist(wd):
list=[]
text = wd.find_element_by_class_name("j_joblist").get_attribute("outerHTML")
soup = bs4.BeautifulSoup(text,"html.parser")
joblist = soup.find_all("div",class_="e")
# 南京 | 3-4年经验 | 本科 | 招1人
for i in joblist:
jobname = i.find("a").find("p",class_="t").find_all("span")[0].attrs["title"]
jobsalary=i.find("a").find("p",class_="info").find_all("span")[0].text
if (len(jobsalary)==0):
jobsalary="null"
Jobcity=i.find("a").find("p",class_="info").find_all("span")[1].text
jobcity=str(Jobcity).split("|")[0].replace(" ","")
jobneed=str(Jobcity)[len(jobcity)+2:-1].replace(" ","")
companyclassify = str(i.find("div",class_="er").find("p",class_="dc at").text).replace(" ","")
if (len(companyclassify)==0):
companyclassify="null"
companyclassify2= str(i.find("div",class_="er").find("p",class_="int at").text).replace(" ","")
if (len(companyclassify2)==0):
companyclassify2="null"
list.append([jobname,jobsalary,jobcity,jobneed,companyclassify,companyclassify2])
return list
def changePage(wd):
# next = wd.find_element_by_css_selector('li[class="next"]')
next = wd.find_element_by_css_selector('div[class="j_page"]')
next = next.find_element_by_css_selector('li[class="next"]')
next.click()
url = "https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare="
wd = getWebDriver(url)
# <a style="url = " https:="" search.51job.com="" list=""><i class="e_icons"></i></a>
file_path="d:/dst/爬虫课程设计3.csv"
csv_file= open(file_path, 'w', newline='', encoding='utf-8')
writer = csv.writer(csv_file)
list1=[]
for j in range(2000):
print(j)
list = getjoplist(wd)
# time.sleep(0.5)
for i in list:
writer.writerow(i)
print(i)
try:
changePage(wd)
except:
try:
changePage(wd)
except:
changePage(wd)
# list = getjoplist(wd)
#
# for i in list:
# writer.writerow(i)
# print(i)
#
# time.sleep(0.5)
csv_file.close()
爬虫课程设计(爬虫代码)
最新推荐文章于 2024-07-18 12:06:32 发布