Scrapy & Django项目

需求: 编写爬虫项目与Django项目详解和, 将爬取到的数据展示到前端页面上

# spider编写:
import scrapy
from dl.items import DlItem
class PSpider(scrapy.Spider):
    name = 'p'
    # allowed_domains = ['www.baidu.com']
    start_urls = ['https://www.kuaidaili.com/free/']

    def parse(self, response):
        # print(response)
        tr_list = response.xpath('//*[@id="list"]/table/tbody/tr')
        # print(tr_list)
        for tr in tr_list:
            ip = tr.xpath('./td[1]/text()').extract_first()
            port = tr.xpath('./td[2]/text()').extract_first()
            typ = tr.xpath('./td[3]/text()').extract_first()
            protocal = tr.xpath('./td[4]/text()').extract_first()
            position = tr.xpath('./td[5]/text()').extract_first()
            # print(ip, port, protocal, position)
            item = DlItem()
            item['ip'] = ip
            item['port'] = port
            item['typ'] = typ
            item['protocal'] = protocal
            item['position'] = position
            print(item)
            yield item
# items编码
import scrapy
class DlItem(scrapy.Item):
    ip = scrapy.Field()
    port = scrapy.Field()
    typ = scrapy.Field()
    protocal = scrapy.Field()
    position = scrapy.Field()
# Django项目创建与所有配置:
1.models创建:
from django.db import models

# Create your models here.

class Proxy(models.Model):
    ip = models.CharField(max_length=50)
    port = models.CharField(max_length=50)
    typ = models.CharField(max_length=50)
    protocal = models.CharField(max_length=50)
    position = models.CharField(max_length=50)
    
2.在scrapy框架项目中嵌入django
import os
import sys
sys.path.append(os.path.dirname(os.path.abspath('.')))
os.environ['DJANGO_SETTINGS_MODULE'] = 'proxyscan.settings'
# 手动初始化Django:
import django
django.setup()

3.修改爬虫item:
import scrapy
from scrapy_djangoitem import DjangoItem
from proxy import models
class DlItem(DjangoItem):
    django_model = models.Proxy
    
4.pipeline编码:
class DlPipeline(object):
    def process_item(self, item, spider):
        print('开启数据库, 进行数据存储')
        item.save()
        print('关闭数据库')
        return item
    
5.Django项目迁移数据库与admin后台配置
Python manage.py makemigrations
python manage.py migrate

from proxy.models import Proxy
admin.site.register(Proxy)

# 创建超级用户:
Python manage.py createsuperuser

# 路由:
from django.conf.urls import url
from django.contrib import admin
from proxy.views import index

urlpatterns = [
    url(r'^admin/', admin.site.urls),
    url(r'^index/', index),
]

# 视图函数:
from django.shortcuts import render
from proxy.models import Proxy
def index(requests):
    p = Proxy.objects.all()
    return render(requests, 'index.html', {"p":p})

# 前端代码:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
    <script src="https://cdn.bootcss.com/jquery/3.4.1/jquery.min.js"></script>
    <link href="https://cdn.bootcss.com/twitter-bootstrap/4.3.1/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
    <div class="row" >
        <div class="col-md-10 col-md-offset-2" style="margin:0 auto">
            <div class="panel panel-primary">
                <div class="panel-heading" style="margin-top:50px">
                    <h3 class="panel-title">代理IP一览表</h3>
                </div>
                <div class="panel-body">
                    <table class="table table-striped">
                        <thead>
                        <tr>
                            <th>IP</th>
                            <th>Port</th>
                            <th>Type</th>
                            <th>Protocal</th>
                            <th>Positon</th>
                        </tr>
                        </thead>
                        <tbody>
                        {% for i in p %}
                            <tr>
                                <th>{{ i.ip }}</th>
                                <td>{{ i.port }}</td>
                                <td>{{ i.typ }}</td>
                                <td>{{ i.protocal }}</td>
                                <td>{{ i.position }}</td>
                            </tr>
                        {% endfor %}
                        </tbody>
                    </table>
                </div>
            </div>
        </div>
    </div>
</div>

</body>
</html>
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值