Django第一个项目总结

第一个项目总结
本项目是编写一个基于django的系统,实现分布式爬虫的对接以及爬取数据的可视化。

Django配置

数据库使用磁盘数据库sqlite

DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.sqlite3',
        'NAME': 'D:/sqlitedb/finally_data.db',
    }
}

静态文件的使用image、html、css、js

STATIC_URL = '/static/'
STATIC_ROOT = os.path.join(BASE_DIR, 'static')
STATICFILES_DIRS = [
    ('css', os.path.join(STATIC_ROOT, 'css')),
    ('img', os.path.join(STATIC_ROOT, 'img')),
    ('dynamic photo', os.path.join(STATIC_ROOT, 'dynamic photo')),
    ('js', os.path.join(STATIC_ROOT, 'js')),
]

在于manage.py同级目录下创建文件如下
在这里插入图片描述

功能实现

用户的注册、登录
建立名为user的app并编写视图函数,使用内置User模型。
前端代码中对应的input要加上name属性,views.py对应进行request.POST.get(‘username’,’’)和request.POST.get(‘password’,’’)

# urls.py

urlpatterns = [
    path('login.html', loginView, name="login"),
    path('register.html', registerView, name="register"),
]

# views.py

import re

from django.contrib import messages
from django.contrib.auth import login, authenticate
from django.contrib.auth.models import User
from django.shortcuts import render, redirect


def sub_config_file(username, password):
    '''
    修改spiderkeeper模块下的config.py文件中的登录账号和密码
    :param username:
    :param password:
    :return:
    '''
    path = ''
    with open(path, 'r+', encoding='utf8') as f:
        text = f.read()
        f.seek(0)
        f.truncate()
        text = re.sub("BASIC_AUTH_USERNAME = 'admin'", "BASIC_AUTH_USERNAME = '{}'".format(username), text, re.S)
        text = re.sub("BASIC_AUTH_PASSWORD = 'admin'", "BASIC_AUTH_PASSWORD = '{}'".format(password), text, re.S)
        f.write(text)


def loginView(request):
    if request.method == 'POST':
        username = request.POST.get('username', '')
        password = request.POST.get('password', '')
        print(username, password)
        if User.objects.filter(username=username):
            user = authenticate(username=username, password=password)
            print('111')
            if user:
                import pickle
                with open('D:/the_flag.pk', 'wb') as fp:
                    pickle.dump('ex', fp)
                print('222')
                if user.is_active:
                    login(request, user)
                return redirect('trackBack')
            else:
                print('333')
                tips = '账号密码错误,请重新输入'
                messages.error(request, tips)
                return render(request, 'index.html', locals())
        else:
            print('444')
            tips = '用户不存在,请注册'
            messages.error(request, tips)
            return render(request, 'index.html', locals())
    return redirect('/')


# 登录直接跳5000了
def registerView(request):
    if request.method == 'POST':
        username = request.POST.get('username', '')
        password = request.POST.get('password', '')
        print(username, password)
        if User.objects.filter(username=username):
            print('555')
            tips = '用户已经存在'
            messages.info(request, tips)
        else:
            print('666')
            user = User.objects.create_user(username=username, password=password)
            user.save()
            return render(request, 'index.html')
    return render(request, 'index.html', locals())

html中加这个才会弹窗,执行message的提示。

{# 执行弹窗操作 #}
        {% if messages %}
            <script>
                {% for msg in messages %}
                    alert('{{ msg.message }}');
                {% endfor %}
            </script>
        {% endif %}
        <script>

实现右上角显示用户的部分

{% if request.user.is_authenticated %}
                                <li><a href="#">你好!{{ request.user.username}},欢迎使用本系统</a></li>
                                <li><a href="#">退出</a></li>
                            {% else %}
                                <li><a href="#" data-toggle="modal" data-target=".bs-example-modal-sm1">注册</a></li>
                                <li><a href="#" data-toggle="modal" data-target=".bs-example-modal-sm">登录</a></li>
                            {% endif %}

当用户登录以后,会给管理员发送邮件
使用smtplib库

import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr

def sent_email():
    my_sender = '@qq.com'  # 发件人qq邮箱
    my_pass = ''  # 去qq邮箱里面注册的开启stmp协议的密码
    my_user = '@qq.com'  # 收件人qq邮箱
    def mail():
        ret = True
        try:
            msg = MIMEText('请开始爬取', 'plain', 'utf-8')
            # 括号里的对应发件人邮箱昵称、发件人邮箱账号
            msg['From'] = formataddr(["FromRunoob", my_sender])
            # 括号里的对应收件人邮箱昵称、收件人邮箱账号
            msg['To'] = formataddr(["FK", my_user])
            msg['Subject'] = "客户端"  # 邮件的主题,也可以说是标题

            server = smtplib.SMTP_SSL("smtp.qq.com", 465)  # 发件人邮箱中的SMTP服务器,端口是25
            server.login(my_sender, my_pass)  # 括号中对应的是发件人邮箱账号、邮箱密码
            # 括号中对应的是发件人邮箱账号、收件人邮箱账号、发送邮件
            server.sendmail(my_sender, [my_user, ], msg.as_string())
            server.quit()  # 关闭连接
        except Exception:  # 如果 try 中的语句没有执行,则会执行下面的 ret=False
            ret = False
        return ret
    ret = mail()
    if ret:
        print("邮件发送成功")
    else:
        print("邮件发送失败")
        

数据库迁移,把数据从redis迁移至sqlite
要注意的是此处写入数据库时候的优化,不是写一条SQL语句就提交一次。先把多条语句写入内存,再提交,大概比一条一条快几十倍。最大内存写入瓶颈是200条左右。

import time
import json
import redis
import sqlite3

def exchange_data():
    # 指定Redis数据库信息
    rediscli = redis.StrictRedis(host='', port=6379, db=0)
    # 指定MongoDB数据库信息
    # mongocli = pymongo.MongoClient(host='localhost', port=27017)
    # 连接sqlite数据库
    conn = sqlite3.connect('D:/sqlitedb/finally_data.db')

    # 创建数据库名
    # db = mongocli['dangdang']
    # 创建表名
    # sheet = db['dangdangbook']
    for i in range(10000):
        # FIFO模式为 blpop,LIFO模式为 brpop,获取键值
        source, data = rediscli.blpop(["jh:items"])
        data = data.decode('utf-8')
        item = json.loads(data)
        print(i)
        print(item)
        # TODO: 取出各种字段
        goods_description = item.get("goods_description")
        goods_publish_time = item.get("goods_publish_time")
        goods_now_price = item.get("goods_now_price")
        goods_author = item.get("goods_author")
        goods_publish = item.get("goods_publish")
        classify_kind_name = item.get("classify_kind_name")
        goods_review = item.get("goods_review")
        kind_detail = item.get("kind_detail")
        goods_popular = item.get("goods_popular")
        if goods_popular == "":
            goods_popular = 0
        else:
            goods_popular = int(goods_popular)
        goods_img_url = item.get("goods_img_url")
        id = ''
        sql = "insert into detail_book(title,comments,author,large_category,small_category,publisher,img,price,rate) values('%s',%d,'%s','%s','%s','%s','%s',%f,%d)" % (
            str(goods_description),
            int(goods_review),
            str(goods_author),
            str(classify_kind_name),
            str(kind_detail),
            str(goods_publish),
            str(goods_img_url),
            float(goods_now_price),
            goods_popular,
        )
        if i % 200 == 0:
            conn.commit()
        else:
            try:
                print(item)
                conn.execute(sql)
            except:
                pass

实现一个大分类下出现小分类(淘宝、京东那种)

先对所有的大分类进行过滤去重,在遍历这个去重了的列表,对对应的每一个大分类进行数据库的索引,搜索出下面的小分类并去重生成列表,生成一个字典,以大分类名为键,对应的小分类的值为列表。传入到模板中。

另一点要注意的是不能直接对Queryset对象的序列进行遍历,为惰性序列,所以要先list()一下。

def compare(request):
    category_list = Book.objects.values('large_category').distinct()[:35]
    # small_category_list = Book.objects.values('large_category').distinct().values('small_category').distinct()
    name_list = list(category_list)[:35]
    small_category_dict = {}
    for category in name_list:
        small_category_dict[category['large_category']] = Book.objects.filter(large_category=category['large_category']).values_list('small_category').distinct()
        # print(small_category_dict[category['large_category']])

    # x = LocalTime()
    # time_dict = x.read_txt()
    context = {
        'category_list': category_list,
        'small_category_dict': small_category_dict,
        # 'time_dict': time_dict,
    }
    return render(request, 'analysis.html', context=context)
    

商品列表的展示


# urls.py
urlpatterns = [
    # path('', listing),
    path('search', search, name='search'),
    # todo 传递一个flag表明当前所在的是大分类还是小分类
    path('category=<category_name>', get_category, name='show1'),
    path('ccategory=<category_name>', get_small_category, name='show2'),
    path('callback=<category_name>', callback, name='callback'),
    # todo 没办法,只能把小分类和大分类的排序函数分开了,并设定url一者为categery=,另一者为ccategory=
    path('sort_by_price=<category_name>', sort_by_price, name='sort_by_price'),
    path('sort_by_comments=<category_name>', sort_by_comments, name='sort_by_comments'),
    path('go!!!<page>', get_input_page, name='input_page'),
]

# views.py
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
from django.db.models import Q
from django.shortcuts import render, redirect

from detail.photo_crawler import PhotoCrawler

from .models import *
import pickle

# 执行某一页对应的图片的爬取
def download(books):
    img_list = []
    url_list = []
    compare_img = []
    for book in books:
        if book.img != 'None':
            url = book.img
            url_list.append(url)
            img_list.append('img/' + book.img[-16:])
            compare_img.append(book.img[-16:])
        else:
            img_list.append('')
            compare_img.append('NA')
    crawler = PhotoCrawler()
    crawler.more_processing(url_list)
    new_list = list(zip(books, img_list, compare_img))
    return new_list, books


# 商品展示
def listing(request):
    book_list = Book.objects.all()
    paginator = Paginator(book_list, 12)  # 30个一页

    page = request.GET.get('page')
    try:
        books = paginator.page(page)

    except PageNotAnInteger:
        books = paginator.page(1)
    except EmptyPage:
        books = paginator.page(paginator.num_pages)
    # 执行图片下载
    new_list, books = download(books)
    return render(request, 'details.html', {'books': books, 'new_list': new_list})


# 大分类页和详情页的搜索功能
def search(request):
    # 搜索的时候重定向不一样,需要携带参数,所以进行url的拼接
    first = request.path
    second = request.GET.get('keyword')
    with open('D:/referer_url.pk', 'wb') as fp:
        pickle.dump(first+'?keyword='+second, fp)

    if request.method == 'GET':
        # 设置keyword的默认值为''空
        keyword = request.GET.get('keyword', '')
        # 搜索标题,作者,出版社中有搜索关键字的信息
        book_list = Book.objects.filter(
            Q(title__icontains=keyword) | Q(author__icontains=keyword) | Q(publisher__icontains=keyword))
        paginator = Paginator(book_list, 12)
        page = request.GET.get('page')
        try:
            books = paginator.page(page)
        except PageNotAnInteger:
            books = paginator.page(1)
        except EmptyPage:
            books = paginator.page(paginator.num_pages)
        # 执行图片下载
        new_list, books = download(books)
        # 此处的category_name为搜索的keyword的名字,用于搜索后的排序和分页
        return render(request, 'details.html',
                      {'books': books, 'category_name': keyword + 'flagshere', 'new_list': new_list})


# 从外面的大分类点击 “查看更多”的时候进入的分页
def get_category(request, category_name):
    book_list = Book.objects.filter(large_category=category_name)
    # 实现点击返回原页面排序的操作
    with open('D:/referer_url.pk', 'wb') as fp:
        pickle.dump(request.path, fp)

    paginator = Paginator(book_list, 12)  # 30个一页

    page = request.GET.get('page')
    try:
        books = paginator.page(page)
    except PageNotAnInteger:
        books = paginator.page(1)
    except EmptyPage:
        books = paginator.page(paginator.num_pages)
    # 执行图片下载
    new_list, books = download(books)
    return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name})

# 通过小分类进行商品的筛选
def get_small_category(request, category_name):
    book_list = Book.objects.filter(small_category=category_name)
    paginator = Paginator(book_list, 12)  # 30个一页

    with open('D:/referer_url.pk', 'wb') as fp:
        pickle.dump(request.path, fp)

    page = request.GET.get('page')
    try:
        books = paginator.page(page)
    except PageNotAnInteger:
        books = paginator.page(1)
    except EmptyPage:
        books = paginator.page(paginator.num_pages)
    # 执行图片下载
    new_list, books = download(books)
    return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name})

'''问题:实现  搜索后的排序,分类后的排序'''


# 再次点击综合排序的时候,返回最初的排序
def callback(request, category_name):
    # referer = request.META.get('HTTP_REFERER')
    # flag为1是小分类的标志。flag为0是大分类的标志
    # if  flag == 1:
    #     return redirect('show2', category_name=category_name, flag=1)
    with open('D:/referer_url.pk', 'rb') as fp:
        go_to = pickle.load(fp)
    return redirect(go_to)



# 通过价格排序
def sort_by_price(request, category_name):
    # 设置搜索后的标签
    if category_name.endswith('flagshere'):
    # if flag == 3:
        category_name = category_name.replace('flagshere', '')
        book_list = Book.objects.filter(Q(title__icontains=category_name) | Q(author__icontains=category_name) | Q(
            publisher__icontains=category_name)).order_by('-price')
        paginator = Paginator(book_list, 12)  # 30个一页

        page = request.GET.get('page')
        try:
            books = paginator.page(page)
        except PageNotAnInteger:
            books = paginator.page(1)
        except EmptyPage:
            books = paginator.page(paginator.num_pages)
        category_name += 'flagshere'
        # 执行图片下载
        new_list, books = download(books)
        witch = 'pri'
        return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name, 'witch':witch})
    # 否则就是来自大分类的大兄弟了
    else:
        book_list = Book.objects.filter(large_category=category_name).order_by('-price')
        if book_list:
            pass
        else:
            book_list = Book.objects.filter(small_category=category_name).order_by('-price')
        paginator = Paginator(book_list, 12)  # 30个一页

        page = request.GET.get('page')
        try:
            books = paginator.page(page)
        except PageNotAnInteger:
            books = paginator.page(1)
        except EmptyPage:
            books = paginator.page(paginator.num_pages)
        # 执行图片下载
        new_list, books = download(books)
        witch = 'pri'
        return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name, 'witch':witch})


# 按照评论数排序
def sort_by_comments(request, category_name):
    # 设置搜索后的标签
    if category_name.endswith('flagshere'):
    # if flag == 3:
        category_name = category_name.replace('flagshere', '')
        book_list = Book.objects.filter(Q(title__icontains=category_name) | Q(author__icontains=category_name) | Q(
            publisher__icontains=category_name)).order_by('-comments')
        paginator = Paginator(book_list, 12)  # 30个一页

        page = request.GET.get('page')
        try:
            books = paginator.page(page)
        except PageNotAnInteger:
            books = paginator.page(1)
        except EmptyPage:
            books = paginator.page(paginator.num_pages)
        category_name += 'flagshere'
        # 执行图片下载
        new_list, books = download(books)
        witch = 'coms'
        return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name, 'witch':witch})
    # 否则就是来自大分类的大兄弟了
    else:
        book_list = Book.objects.filter(large_category=category_name).order_by('-comments')
        if book_list:
            pass
        else:
            book_list = Book.objects.filter(small_category=category_name).order_by('-comments')
        paginator = Paginator(book_list, 12)  # 30个一页

        page = request.GET.get('page')
        try:
            books = paginator.page(page)
        except PageNotAnInteger:
            books = paginator.page(1)
        except EmptyPage:
            books = paginator.page(paginator.num_pages)
        # 执行图片下载
        new_list, books = download(books)
        witch = 'coms'
        return render(request, 'details.html', {'new_list': new_list, 'books': books, 'category_name': category_name,'witch':witch})

def get_input_page(request,page):
    page_number = request.GET.get('want_to_got_page')
    referer = request.META.get('HTTP_REFERER')
    print(page_number)
    print(type(page))
    if page == '1':
        part_of_url = referer.split('page')[0]
        url = part_of_url + '?page=' + page_number
    else:
        part_of_url = referer.split('page')[0]
        url = part_of_url + 'page=' + page_number
    return redirect(url)

注意,如果要跳转到不在自己服务器上的任意网页
可以使用href的静态链接,也可以用HttpResponseRedirect

from django.shortcuts import HttpResponseRedirect

视图函数下
return HttpResponseRedirect('想要跳转的url')

对图书进行快速爬取,此处有每一本书的url,使用多线程爬取。大概是普通爬的5倍。

import os
from multiprocessing.dummy import Pool
from urllib.request import urlretrieve

class PhotoCrawler:
    # 爬取函数
    def get_photo(self, url):
        part_of_path = r'.\static\img'
        path = os.path.join(part_of_path, url[-16:])
        try:
            # TODO 将这个项目文件放在D盘权限会更高
            urlretrieve(url, path)
        except:
            pass

    # 多线程爬取图片
    def more_processing(self, url_list):
        # 8个线程爬取
        pool = Pool(8)
        results = pool.map(self.get_photo, url_list)
        pool.close()
        pool.join()

自定义过滤器的使用

在于manage.py同级目录下创建文件,并添加文件至settings
在这里插入图片描述
在里面建立myfilter.py编写过滤器

from django import template

register = template.Library()


# 自定义过滤器实现模板变量的过滤
@register.filter
def myRepalce(value,args):
    # 将搜索的字段的,改为/
    old_value, new_value = args.split(':')
    return value.replace(old_value,new_value)

# # 返回对应的字符串的出版社
@register.filter
def get_real_publisher(value):
    try:
        value = eval(value)
        if len(value) == 1:
            return value[0]
        return value[-1]
    except:
        return value


# 返回对应字符串的作者
@register.filter
def get_real_author(value):
    try:
        value = eval(value)
        if len(value) == 1:
            return value[0]
        value = value[:-2]
    except:
        return value
    return ','.join(value)

# 在分页的按钮实现数字的减1
@register.filter
def get_sub(value):
    value -= 1
    return value

@register.filter
def clean_rate(value):
    return int(value)*10
    

html中的使用

{% load myfilter %}

<span>{{ book.publisher|get_real_publisher }}</span>

搜索

<div class="search-block">
					<form action="{% url 'search' %}" method="get">
						<input type="text" name="keyword" />
						<button type="submit">搜索</button>
					</form>
				</div>

对应的views.py里面request.GET.get(‘keyword’)

搜索后的分页
1.照常使用分页模型
2.使用js处理跳转的url

# 分页模型
# 商品展示
def listing(request):
    book_list = Book.objects.all()
    paginator = Paginator(book_list, 12) 

    page = request.GET.get('page')
    try:
        books = paginator.page(page)

    except PageNotAnInteger:
        books = paginator.page(1)
    except EmptyPage:
        books = paginator.page(paginator.num_pages)
    # 执行图片下载
    new_list, books = download(books)
    return render(request, 'details.html', {'books': books, 'new_list': new_list})

html中

<ul class="pagination pull-right">
					{% if books.has_previous %}
					<li>
						<a onclick="button_page(this,{{ books.number }})">
							<span>&laquo;</span>
						</a>
					</li>
					{% endif %}
                    <li><a onclick="button_page(this,{{ books.number }})"  title="{{ books.number }}">{{ books.number }}</a></li>
					<li><a  "button_page(this,{{ books.number|add:"1" }})" class="" title="{{ books.number|add:"1" }}">{{ books.number|add:"1" }}</a></li>
				    <li><a "button_page(this,{{ books.number|add:"2" }})" class="" title="{{ books.number|add:"2" }}">{{ books.number|add:"2" }}</a></li>
				    <li><a "button_page(this,{{ books.number|add:"3" }})" class="" title="{{ books.number|add:"3" }}">{{ books.number|add:"3" }}</a></li>
				    <li><a "button_page(this,{{ books.number|add:"4" }})" class="" title="{{ books.number|add:"4" }}">{{ books.number|add:"4" }}</a></li>
					<li><a href="">...</a></li>
                    <li><a onclick="button_page(this,{{ books.paginator.num_pages }})"  title="{{ books.paginator.num_pages }}">{{ books.paginator.num_pages }}</a></li>

{#                    {{ books.paginator.num_pages }}#}
					{% if books.has_next %}
					<li>
						<a onclick="nextPage(this)" title="点击此处可以翻到下一页哦!">
							<span>&raquo;</span>
						</a>
					</li>
					{% endif %}
				</ul>
			</nav>

处理url的js

//翻页js
    function getUrl() {
        var current_url = window.location.href;
        var params = current_url.split('?');
        var url = '';
        var flag = 0;
        //如果没有keyword这个字符串
        if (current_url.indexOf("keyword") == -1) {
            //没有参数
            if (params.length == 1) {
                url += '?'
            }
            else {
                for (i = 0; i < params.length; i++) {
                    if (params[i].indexOf('page') == -1) {
                        if (i == 0) {
                            url += params[i] + '?';
                            url += params[i] + '?';
                        } else {
                            url += params[i] + '&';
                        }
                    }
                }
            }
        } else {
            var mm = current_url.split('&');
            url = mm[0] + "&";
        }
        return url;
    }
    //下一页
    function nextPage(node) {
        var url = getUrl()
        {% if books.has_next %}
            var href = url + "page={{ books.next_page_number}}"
        {% else %}
            var href = url + "page={{ books.number }}"
        {% endif %}
        node.href = href
    }
    //上一页
    function previousPage(node) {
        var url = getUrl()
        {% if books.has_previous %}
            var href = url + "page={{ books.previous_page_number}}"
        {% else %}
            var href = url + "page={{ books.number }}"
        {% endif %}
        node.href = href
    }
    //点击某一页的按钮跳转到指定的那一页
    //这个函数是后端人员写的~~滑稽
    function button_page(node,mynumber) {
        var url = getUrl()
        var href = url + "page=" + mynumber
        node.href = href
    }
</script>
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值