从GitHub得到完整项目(https://github.com/daleyzou/douban.git)
1、成果展示
数据库
本地海报图片
2、环境
(1)已安装Scrapy的Pycharm
(2)mysql
(3)连上网络的电脑
3、实体类设计
4、代码
items.py
1 class DoubanItem(scrapy.Item): 2 title = scrapy.Field() 3 bd = scrapy.Field() 4 star = scrapy.Field() 5 quote = scrapy.Field() 6 img_url = scrapy.Field() 7 pic_path = scrapy.Field()
doubanmovie.py(爬虫类)
1 # -*- coding: utf-8 -*- 2 import scrapy 3 4 # noinspection PyUnresolvedReferences 5 from douban.items import DoubanItem 6 import sys 7 reload(sys) 8 sys.setdefaultencoding('utf-8') 9 10 11 class DoubanmovieSpider(scrapy.Spider): 12 name = 'doubanmovie' 13 allowed_domains = ['douban.com'] 14 offset = 0 15 url = "https://movie.douban.com/top250?start=" 16 start_urls = [url + str(offset),] 17 18 def parse(self, response): 19 item = DoubanItem() 20 movies = response.xpath("//div[ @class ='info']") 21 links = response.xpath("//div[ @class =