使用scrapy框架爬取实习僧python岗位第一页页面内所有岗位名称

import scrapy
from Scrapy.items import PositionItem
from bs4 import BeautifulSoup
import requests



class ShixisengSpider(scrapy.Spider):
    name = 'shixiseng'
    allowed_domains = ['https://www.shixiseng.com/interns?page=1&keyword=Python&type=intern&area=&months=&days=&degree=&official=&enterprise=&salary=-0&publishTime=&sortType=&city=%E8%BF%90%E5%9F%8E&internExtend=']
    start_urls = ['https://www.shixiseng.com/interns?page=1&keyword=Python&type=intern&area=&months=&days=&degree=&official=&enterprise=&salary=-0&publishTime=&sortType=&city=%E8%BF%90%E5%9F%8E&internExtend=']

    def parse(self, response):
        position = PositionItem()
        j=0
        for i in response.xpath('//*[@id="__layout"]/div/div[2]/div[2]/div[1]/div[1]/div[1]//div/div[1]/div[1]/p[1]/a/@href'):
            position['url_cur']=i.extract()
            response2=requests.get(url=position['url_cur']).text
            bs=BeautifulSoup(response2,"html.parser")
            temp=bs.find(attrs={'class':'new_job_name'}).find('span')
            position['name']=temp.get_text()
            print(j+1,': ',position['name'])
            j += 1


评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值