爬虫实践

最新推荐文章于 2024-06-19 17:27:45 发布

Road_to_programmers

最新推荐文章于 2024-06-19 17:27:45 发布

阅读量275

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/HongKong_Python/article/details/80570744

版权

Python 专栏收录该内容

41 篇文章 0 订阅

订阅专栏

#蜘蛛：spider

from kgc_demo.items import *

class KgcKe(scrapy.Spider):
    name = 'ke'
    allowed_domains = ['kgc.cn']
    start_urls = ['http://www.kgc.cn/list/230-1-6-9-9-0.shtml']

    def parse(self, response):
        for i in response.css("li.course_detail"):
            name = i.css('a.course-title-a::text').extract_first()
            print(name)
            price = i.css('span.view0-price::text').extract_first()
            print(price)
            count =  i.css('span.course-pepo::text').extract_first()
            print(count)
            item = KeItem()
            item["name"] = name
            item["price"] = price
            item["count"] = count
            yield item

        next_page = response.css("li.next a::attr('href')").extract_first()
        if next_page is not None:
            yield response.follow(next_page,self.parse)

#管道：pipelines

import pymysql

class DBPipeline(object):
    def open_spider(self,spider):#当spider开始工作的时候调用，打开一个文件写入对象
        self.db= pymysql.connect(host='localhost',port=3306,user='root',passwd='123456',db='kgc',charset='utf8')
    def close_spider(self,spider):#当spider停止时调用，文件写入的文件关闭
        print('关闭数据库')
        self.db.close()
    def process_item(self,item,spider):#当蜘蛛解析出内容item我把item的信息写入到tb_kegc数据表中中
        try:
            cursor = self.db.cursor()
            price = None
            if item['price'] is not None:
                price = float(item['price'][1:])
                print(price)
            else:
                price = 0
            cursor.execute("insert into tb_kegc(name,price,t_count) values (%s,%s,%s)",(item["name"],price,int(item["count"])))
            self.db.commit()
            print('成功添加一条数据!')
        except:
            print('error......')

#数据存储：items

import scrapy


class KgcDemoItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass
class FirstDemoItem(scrapy.Item):
    name = scrapy.Field()
    job = scrapy.Field()

class KeItem(scrapy.Item):
    name = scrapy.Field()
    price = scrapy.Field()
    count = scrapy.Field()

#start ：启动蜘蛛

import scrapy.cmdline as cmdline

if __name__=='__main__':
    # spiderName = input('请输入蜘蛛！')
    # cmdline.execute("scrapy crawl {spiderName}".format(spiderName=spiderName).split())
    cmdline.execute('scrapy crawl ke'.split())

settings：配置文件

配置管道

Road_to_programmers

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
爬虫实践

#蜘蛛：spiderfrom kgc_demo.items import *class KgcKe(scrapy.Spider): name = 'ke' allowed_domains = ['kgc.cn'] start_urls = ['http://www.kgc.cn/list/230-1-6-9-9-0.shtml'] def parse(self, ...
复制链接

扫一扫

专栏目录