ElasticSearch for Django

ElasticSearch for Django

创建django项目并配置settings.py

setting.py配置

INSTALLED_APPS = [
    'app01.apps.App01Config',
    'rest_framework',
    'corsheaders',
]

MIDDLEWARE = [
    'corsheaders.middleware.CorsMiddleware',  # 第三行
]

CORS_ORIGIN_ALLOW_ALL = True

项目下urls.py配置

from django.contrib import admin
from django.urls import path, include

urlpatterns = [
    path('admin/', admin.site.urls),
    path('app01/', include('app01.urls')),
]

app下路由配置

from django.urls import path
from . import views

urlpatterns = [
    path('auto_home/', views.AutoHomeSpider.as_view()),
    path('write_es/', views.WriteDataEs.as_view()),
    path('search/', views.SearchView.as_view())
]

views.py

from django.shortcuts import render
from django.views import View
from django.http import JsonResponse


from .models import News

from elasticsearch import Elasticsearch, helpers

import requests
from bs4 import BeautifulSoup

es = Elasticsearch()

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
    Chrome/80.0.3987.149 Safari/537.36'
}

auto_home_url = ['https://www.autohome.com.cn/news/{}/#liststart'.format(i) for i in range(2, 102)]


# 数据准备,为了更好的演示es的效果,需要多准备点数据,这里准备了1500条,可以再多点
class AutoHomeSpider(View):
    """
    汽车之家新闻爬取
    """
    def get(self, request):
        for url in auto_home_url:
            res = requests.get(url=url, headers=headers)
            res.encoding = res.apparent_encoding
            html_obj = BeautifulSoup(res.text, features='lxml')
            li_list = html_obj.find('ul', {'class': 'article'}).find_all('li')
            bulk_list = []
            for li in li_list:
                a_href = li.find('a').get('href')
                bulk_list.append(News(title=li.find('h3').text,
                                      summary=li.find('p').text,
                                      a_href=li.find('a').get('href'),
                                      img_url=li.find('img').get('src'),
                                      tags=a_href.split('/')[3]))
            News.objects.bulk_create(bulk_list)
        return JsonResponse({'msg': 'OK', 'code': 200})


# 把数据写入es中
class WriteDataEs(View):
    """
    把数据库数据写入es中
    """

    def get(self, request):
        news_list = News.objects.all()
        action = ({
            '_index': 'news',
            '_type': 'doc',
            '_source': {
                'title': i.title,
                'a_href': i.a_href,
                'img_url': i.img_url,
                'tags': i.tags,
                'summary': i.summary
            }
        } for i in news_list)
        helpers.bulk(es, action)

        return JsonResponse({'msg': 'OK', 'code': 200})


# 使用es进行索引
class SearchView(View):
    """
    搜索
    """
    def post(self, request):
        search_msg = request.POST.get('search_msg')
        body = {
            'size': 10,
            'query': {
                'match': {
                    'title': search_msg
                }
            },
            'highlight': {
                'pre_tags': '<b style="color: red">',
                'post_tags': '</b>',
                'fields': {
                    'title': {}
                }
            }
        }
        res = es.search(index='news', body=body)
        return JsonResponse({'msg': 'OK', 'code': 200, 'data': res}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值