ElasticSearch for Django
创建django项目并配置settings.py
setting.py配置
INSTALLED_APPS = [
'app01.apps.App01Config',
'rest_framework',
'corsheaders',
]
MIDDLEWARE = [
'corsheaders.middleware.CorsMiddleware', # 第三行
]
CORS_ORIGIN_ALLOW_ALL = True
项目下urls.py配置
from django.contrib import admin
from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
path('app01/', include('app01.urls')),
]
app下路由配置
from django.urls import path
from . import views
urlpatterns = [
path('auto_home/', views.AutoHomeSpider.as_view()),
path('write_es/', views.WriteDataEs.as_view()),
path('search/', views.SearchView.as_view())
]
views.py
from django.shortcuts import render
from django.views import View
from django.http import JsonResponse
from .models import News
from elasticsearch import Elasticsearch, helpers
import requests
from bs4 import BeautifulSoup
es = Elasticsearch()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/80.0.3987.149 Safari/537.36'
}
auto_home_url = ['https://www.autohome.com.cn/news/{}/#liststart'.format(i) for i in range(2, 102)]
# 数据准备,为了更好的演示es的效果,需要多准备点数据,这里准备了1500条,可以再多点
class AutoHomeSpider(View):
"""
汽车之家新闻爬取
"""
def get(self, request):
for url in auto_home_url:
res = requests.get(url=url, headers=headers)
res.encoding = res.apparent_encoding
html_obj = BeautifulSoup(res.text, features='lxml')
li_list = html_obj.find('ul', {'class': 'article'}).find_all('li')
bulk_list = []
for li in li_list:
a_href = li.find('a').get('href')
bulk_list.append(News(title=li.find('h3').text,
summary=li.find('p').text,
a_href=li.find('a').get('href'),
img_url=li.find('img').get('src'),
tags=a_href.split('/')[3]))
News.objects.bulk_create(bulk_list)
return JsonResponse({'msg': 'OK', 'code': 200})
# 把数据写入es中
class WriteDataEs(View):
"""
把数据库数据写入es中
"""
def get(self, request):
news_list = News.objects.all()
action = ({
'_index': 'news',
'_type': 'doc',
'_source': {
'title': i.title,
'a_href': i.a_href,
'img_url': i.img_url,
'tags': i.tags,
'summary': i.summary
}
} for i in news_list)
helpers.bulk(es, action)
return JsonResponse({'msg': 'OK', 'code': 200})
# 使用es进行索引
class SearchView(View):
"""
搜索
"""
def post(self, request):
search_msg = request.POST.get('search_msg')
body = {
'size': 10,
'query': {
'match': {
'title': search_msg
}
},
'highlight': {
'pre_tags': '<b style="color: red">',
'post_tags': '</b>',
'fields': {
'title': {}
}
}
}
res = es.search(index='news', body=body)
return JsonResponse({'msg': 'OK', 'code': 200, 'data': res}