-- coding: utf-8 --
import scrapy
import json
from scrapy_project.items import BilibiliItem
class BilibiliSpider(scrapy.Spider):
name = ‘bilibili’
allowed_domains = [‘bilibili.com’]
start_urls = ‘https://space.bilibili.com/ajax/member/GetInfo’
data_dict = {
‘mid’: ‘116683’,
‘csrf’: ”,
}
def start_requests(self):
return [scrapy.FormRequest(url = self.start_urls,formdata=self.data_dict,headers = {
‘Referer’: ‘https://space.bilibili.com/116683/‘,
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36’
})]
def parse(self, response):
# with open('bilibili.html','wb') as f:
# f.write(response.body)
# print(response.body)
res = json.loads(response.body.decode('utf-8'))
ress = res['data']
name = ress['name']
sex = ress['sex']
sign = ress['sign']
item = BilibiliItem()
item['name'] = name
item['sex'] = sex
item['sign'] = sign
print(item)
# 通过 yield 将item传递给 itempipeline
yield item