爬虫课程设计(爬虫代码)

import bs4
from selenium import webdriver
import time
import pymysql
import csv


def getWebDriver(url):

    # 进入浏览器设置
    options = webdriver.ChromeOptions()
    # 设置中文
    options.add_argument('lang=zh_CN.UTF-8')
    # 更换头部
    options.add_argument(
            'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"')
    # 创建浏览器对象
    wd = webdriver.Chrome(chrome_options=options)
    wd.implicitly_wait(100)
    wd.maximize_window()
    wd.get(url)


    return wd



def getjoplist(wd):
    list=[]
    text = wd.find_element_by_class_name("j_joblist").get_attribute("outerHTML")
    soup = bs4.BeautifulSoup(text,"html.parser")
    joblist = soup.find_all("div",class_="e")
    # 南京  |  3-4年经验  |  本科  |  招1人
    for i in joblist:
        jobname = i.find("a").find("p",class_="t").find_all("span")[0].attrs["title"]
        jobsalary=i.find("a").find("p",class_="info").find_all("span")[0].text
        if (len(jobsalary)==0):
            jobsalary="null"
        Jobcity=i.find("a").find("p",class_="info").find_all("span")[1].text
        jobcity=str(Jobcity).split("|")[0].replace(" ","")
        jobneed=str(Jobcity)[len(jobcity)+2:-1].replace(" ","")

        companyclassify =  str(i.find("div",class_="er").find("p",class_="dc at").text).replace(" ","")
        if (len(companyclassify)==0):
            companyclassify="null"
        companyclassify2=  str(i.find("div",class_="er").find("p",class_="int at").text).replace(" ","")
        if (len(companyclassify2)==0):
            companyclassify2="null"
        list.append([jobname,jobsalary,jobcity,jobneed,companyclassify,companyclassify2])
    return list

def changePage(wd):
    # next = wd.find_element_by_css_selector('li[class="next"]')
    next = wd.find_element_by_css_selector('div[class="j_page"]')
    next = next.find_element_by_css_selector('li[class="next"]')
    next.click()


url = "https://search.51job.com/list/000000,000000,0000,00,9,99,+,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare="
wd = getWebDriver(url)
# <a style="url = " https:="" search.51job.com="" list=""><i class="e_icons"></i></a>

file_path="d:/dst/爬虫课程设计3.csv"
csv_file= open(file_path, 'w', newline='', encoding='utf-8')
writer = csv.writer(csv_file)


list1=[]
for j in range(2000):

        print(j)
        list = getjoplist(wd)
        # time.sleep(0.5)
        for i in list:
            writer.writerow(i)
            print(i)

        try:
            changePage(wd)
        except:
            try:
                changePage(wd)
            except:
                changePage(wd)




# list = getjoplist(wd)
#
# for i in list:
#     writer.writerow(i)
#     print(i)
#
# time.sleep(0.5)

csv_file.close()
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值