python3爬取58同城一页招聘信息并存入数据库，虽然有一些扒不下来，希望有大神指点

本文链接：https://blog.csdn.net/lajihzc/article/details/118521932
直接上代码

import requests
from bs4 import BeautifulSoup
import time
import pymysql

#得到每个的详细信息
def get_info(URL):

    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0' }
    try:
        wb_data = requests.get(URL,headers=headers)
        
        #获取整个页面
        soup = BeautifulSoup(wb_data.content, 'html.parser')

        #名称
        NameList = soup.select('div.pos_base_info > span.pos_title')
        name = NameList[0].text.strip()

        #第二名称
        LastList = soup.select('span.pos_name')
        last = LastList[0].text.strip()

        #行业
        HyList = soup.select('p.comp_baseInfo_belong > a')
        hy = HyList[0].text.strip()

        #公司
        GsList = soup.select('div.baseInfo_link > a')
        gs = GsList[0].text.strip()

        #标签
        BqList = soup.select('div.pos_welfare > span.pos_welfare_item')
        bq = BqList[0].text.strip()+'|'+BqList[1].text.strip()+'|'+BqList[2].text.strip()+'|'+BqList[3].text.strip()+'|'+BqList[4].text.strip()+'|'+BqList[5].text.strip()
        #bq = oldBq.str.replace("-", "|")


        #要求
        YqList = soup.select('div.pos_base_condition > span.item_condition')
        yq = YqList[0].text.strip()+'|'+YqList[1].text.strip()+'|'+YqList[2].text.strip()

        #价格
        JgList = soup.select('span.pos_salary')
        jg = JgList[0].text.strip()


        #地址
        AddressList = soup.select('div.pos-area > span')
        address = AddressList[0].text.strip()+'-'+AddressList[1].text.strip()

        #描述
        MsNameList = soup.select('div.des')
        ms = MsNameList[0].text
        

        #介绍
        JsList = soup.select('div.shiji > p')
        js = JsList[0].text

        job_all = [name,last,hy,gs,bq,yq,jg,address,ms,js]

        print('数据获取成功！')

        #返回爬取的数据
        return job_all

    except:
        print('爬取错误') #不想看到这句话，可以写pass


#创建数据库            
def database_found():
    # 打开数据库连接，不需要指定数据库，因为需要创建数据库
    conn = pymysql.connect(host='localhost', user="root", passwd="root")
    # 获取游标
    cursor = conn.cursor()
    # 创建pythonBD数据库
    cursor.execute('CREATE DATABASE IF NOT EXISTS 58_job DEFAULT CHARSET utf8;')

    conn.select_db('58_job')
    cursor = conn.cursor()
    # `id` int(11) NOT NULL AUTO_INCREMENT,
    
    sql = """CREATE TABLE IF NOT EXISTS `job` (
        	  `name` varchar(255),
        	  `last` varchar(255),
        	  `hy` varchar(255),
        	  `gs` varchar(255),
        	  `bq` varchar(255),
        	  `yq` varchar(255),
        	  `jg` varchar(255),
        	  `address` varchar(255),
        	  `ms` varchar(1000),
        	  `js` varchar(1000)
        	) ENGINE=InnoDB  DEFAULT CHARSET=utf8 AUTO_INCREMENT=0"""

    cursor.execute(sql)
    cursor.close()  # 先关闭游标
    conn.close()  # 再关闭数据库连接


#向数据库插入数据
def save_page_data(data: list):
    conn = pymysql.connect(host='localhost',user='root',password='root')
    conn.select_db('58_job')
    cur = conn.cursor()
    sql_str = f'insert into job values ("'+data[0]+'","'+data[1]+'","'+data[2]+'","'+data[3]+'","'+data[4]+'","'+data[5]+'","'+data[6]+'","'+data[7]+'","'+data[8]+'","'+data[9]+'");'
    if cur.execute(sql_str) == 0:
        print('写入错误！')
    else:
        print('写入数据成功!')
        
    cur.close()
    conn.commit()
    conn.close()


'''
下面是得到一个列表页的所有链接，然后单个请求页面并获取数据
'''
def get_all_info():
    #地区 郑州"0d30364d-0015-60bd-6f5c-6b4f6a48df00"
    addr_code = '0d30364d-0015-60bd-6f5c-6b4f6a48df00'
    
    #页数
    page_num = '1'
    
    #开始计时
    start_time = time.time()
    
    res = []

    #初始url
    url = f'https://zz.58.com/yewu/pn{page_num}/?PGTID={addr_code}&ClickID=3'
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0' }
    try:
        wb_data = requests.get(url,headers=headers)
        soup = BeautifulSoup(wb_data.text, 'html.parser')
        GetLink = soup.select('div.job_name > a')
    except:
        print('列表页信息被修改，所以爬取错误')
        
        
    for i in GetLink:
        #得到href链接
        link = i.get('href')
        
        #选择url带有“业务”拼音“yewu”的链接，有便执行get_info（）
        if 'yewu' in link:

            #间歇一秒
            #time.sleep(1)
            
            #添加到res
            res.append(get_info(link))

            #排除list中的None,因为有些信息爬取失败
            new_res = []
            for i in res:
                if i is not None:
                    new_res.append(i)

    #创建数据库
    database_found()

    all_num = len(new_res)

    while new_res:
        print(len(new_res))
        save_page_data(new_res.pop())
    print(f'成功爬取{all_num}条数据，共耗时{time.time() - start_time}s') 
   


get_all_info()