python爬虫demo

话不多说,直接上例子

# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import time
import random
import pymysql
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import sys

reload(sys)
sys.setdefaultencoding('utf8')

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

time.sleep(random.random() * 6)


def get_info(URL):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36'}

    wb_data = requests.get(URL, headers=headers)
    soup = BeautifulSoup(wb_data.content, 'lxml')
    JobNameList = soup.select('span.pos_name')
    if JobNameList:
        JobName = JobNameList[0].text.strip()
    else:
        JobName = ''
    JobSalarList = soup.select('span.pos_salary')
    if JobSalarList:
        JobSalar = JobSalarList[0].text.strip();
    else:
        JobSalar = ''
    JobUpdateList = soup.select('span.pos_base_update')
    if JobUpdateList:
        JobUpdate = JobUpdateList[0].text.strip();
    else:
        JobUpdate = ''
    JobTitleList = soup.select('span.pos_title')
    if JobTitleList:
        JobTitle = JobTitleList[0].text.strip()
    else:
        JobTitle = ''
    JobWelfareList = soup.select('span.pos_welfare_item')
    JobWelfare = ''
    if JobWelfareList:
        for welfare in JobWelfareList:
            JobWelfare += welfare.text.strip();
    else:
        JobWelfare = ''
    JobDesList = soup.select('div.des')
    if JobDesList:
        JobDes = JobDesList[0].text.strip();
    else:
        JobDes = '';
    CompanyNameList = soup.select('div.baseInfo_link > a')
    if CompanyNameList:
        CompanyNameUrl = CompanyNameList[0].get('href')
        CompanyName =  CompanyNameList[0].text.strip();
    else:
        CompanyNameUrl = '';
        CompanyName = '';
    ShuliangList = soup.select('span.item_condition')
    if ShuliangList:
        Shuliang = ShuliangList[0].text.strip() + ShuliangList[1].text.strip() + ShuliangList[2].text.strip();
    else:
        Shuliang = ''
    print(JobName),
    print(JobSalar),
    print(JobUpdate),
    print(JobTitle),
    print(JobWelfare),
    print(JobDes),
    print(CompanyName),
    print(CompanyNameUrl),
    print(Shuliang)
    # 连接数据库
    conn = pymysql.connect(host="localhost", user="root", passwd="root",
                           db="wuba", port=3306, charset="utf8")
    cur = conn.cursor()  # 创建游标对象
    insert_mysql = "INSERT INTO wu_job (JobName, JobSalar, JobUpdate, JobTitle, JobWelfare, JobDes, CompanyName,CompanyNameUrl, Shuliang ,url)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

    cur.execute(insert_mysql,
    (str(JobName), str(JobSalar), str(JobUpdate), str(JobTitle), str(JobWelfare), str(JobDes), str(CompanyName),str(CompanyNameUrl),
     str(Shuliang), str(URL)))

    conn.commit();
    print('ok');
    conn.close();
    cur.close();


def get_all_info():
    url = 'https://bz.58.com/songcanyuan/?PGTID=0d202408-003b-0c4c-00d4-d1e7471f2361&ClickID=1'
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36'}
    wb_data = requests.get(url, headers=headers, verify=False)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    GetLink = soup.select('div.job_name > a')

    for i in GetLink:
        link = i.get('href')
        get_info(link)


get_all_info()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值