# -*- coding: utf-8 -*-
import scrapy
import re
from scrapy import Request
import json
def convert(s):
if s is str and s.isdigit:
return int(s.resplace(','))
else:
return 0
class XpcSpider(scrapy.Spider):
name = 'xpc'
allowed_domains = ['xinpianchang.com','openapi-vtom.vmovier.com']
start_urls = ['https://www.xinpianchang.com/channel/index/sort-like?from=tabArticle']
#獲取每個視頻的鏈接
def parse(self,response):
pid_list=response.xpath('//ul[@class="video-list"]/li[@class="enter-filmplay"]/@data-articleid').extract() #pid列表
cookies={
"Authorization":"01D3EF58AA36A73BCAA36A438BAA36A9459AA36AFD0C8371FE04"}
for pid in pid_list:
url ='https://www.xinpianchang.com/a%s?from=ArticleList' %pid
request=response.follow(url,self.parse_post)
request.meta['pid']=pid
yield request
'''
pages=response.xpath('//div[@class="page-wrap"]/div[@class="page"]/a/@href').extract()
for page in pages:
yield response.follow(page,self.parse,cookies=cookies)
'''
#解析單個視頻信息
def parse_post(self, response):
pid
scrapy爬虫爬取新片场信息
最新推荐文章于 2022-06-01 14:50:59 发布