我做的只是个 demo 加上前端知识不精,界面会比较丑。
一、需求
1、实现用户上传文件,然后根据文件自动在Elasticsearch中创建一个为期3天的索引
2、需要自动识别用户ID,实现一个用户在有效期内只能创建一个索引
3、用户可以实现批量上传还是单个文件上传
4、创建成功后需要给用户体验全文检索的功能,并对关键词进行高亮
根据以上需求,可知需要创建两个前端界面
二、建立文件
构建三个文件如下图所示
其中 UploadText.html 用于实现用户上传文件的功能,FullTextSearch.html 用于给用户体验全文检索,esAPI.py 则是作为后端控制自动创建索引的功能。
2.1 关于 UploadText.html
主要内容如下
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ESAPI</title>
<script src="https://code.jquery.com/jquery-3.6.0.js"
integrity="sha256-H+K7U5CnXl1h5ywQfKtSj8PCmoN9aaq30gDh27Xc0jk=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/uuid/8.3.2/uuid.min.js"></script>
<!-- 设置一个空的图标,以免报错-->
<link rel="icon" href="data:;base64,iVBORw0KGgo=">
<script type="text/javascript">
// 生成唯一的标识符
function generateUniqueId() {
// 可以使用 UUID 算法或其他方式来生成唯一的标识符,这里使用UUID
const uniqueId = uuid.v4();
console.log("uuid", uniqueId)
return uniqueId
}
function getUUID() {
// 检查本地存储是否已存在 UUID
var uuid = localStorage.getItem("myUUID");
if (!uuid) {
// 如果本地存储中不存在 UUID,则生成一个新的 UUID 并存储到本地存储中
uuid = generateUniqueId();
localStorage.setItem("myUUID", uuid);
}
return uuid;
}
userId = getUUID()
/* 设置Cookie */
const expirationDate = new Date();
expirationDate.setMonth(expirationDate.getMonth() + 12); // 设置cookie的有效时间为12个月
document.cookie = `userId=${userId}; expires=${expirationDate.toUTCString()}; path=/`; //设置Cookie ID和指定的过期时间
/* 获取Cookie */
function getCookie(name) {
var cookieArr = document.cookie.split(";");
for (var i = 0; i < cookieArr.length; i++) {
var cookiePair = cookieArr[i].split("=");
if (cookiePair[0].trim() === "userId") {
return decodeURIComponent(cookiePair[1]);
}
}
return null;
}
Cookie = getCookie('myCookies')
console.log("Cookie", Cookie);
var isMultiple = false; //默认单个文件上传
$(document).ready(function () {
$('#chooseUploadWay').change(function () { // 绑定用户选择批量上传还是单个文件上传按钮
isMultiple = $(this).val() == 'multiple'; //判读用户是否是批量上传,是为 true
if (isMultiple) {
uploadInput.setAttribute('multiple', 'multiple');
uploadInput.setAttribute('webkitdirectory', 'webkitdirectory');
console.log("mul", isMultiple)
}
else {
uploadInput.removeAttribute('multiple');
uploadInput.removeAttribute('webkitdirectory');
}
});
});
$(function () { // 点击上传按钮触发
$("#Upload").click(function () {
$("#uploadInput").click();
})
$("#uploadInput").change(function () {
var files = this.files;
console.log("files", files)
if (files.length === 0) {
alert("请选择要上传的文件");
return;
}
// 创建一个FormData对象,用于存储要上传的文件数据
var formData = new FormData();
if (isMultiple) { // 多个文件上传
for (var i = 0; i < files.length; i++) {
var file = files[i];
formData.append('file' + i, file);
}
} else { // 单个文件上传
console.log("mul2", isMultiple)
formData.append('file', files[0])
}
// 将 Cookie 添加到 FormData 中
formData.append('Cookie', Cookie);
try {
setTimeout(() => {
$("#form1").submit();
}, 1001) // 延迟1001毫秒,确保Cookie已经设置到FormData中
} catch (error) {
alert("请检查上传的文件格式是否正确!")
}
})
fetch
})
</script>
<style>
#Upload {
width: 15em;
position: relative;
left: 28%;
font-size: 30px;
}
.prompt {
position: relative;
left: 28%;
top: 1em !important;
color: red;
white-space: nowrap;
margin: 0%;
}
#chooseUploadWay {
position: relative;
left: 28%;
}
body {
height: 15em;
}
</style>
</head>
<body>
<select name="上传选择" id="chooseUploadWay">
<option value="single">单个文件上传</option>
<option value="multiple">多个文件上传</option>
</select>
<input type="button" value="请上传文件" id="Upload">
<form style="display: none;" id="form1" action="/esAPI" method="post" enctype="multipart/form-data"
target="myframe">
<input type="file" name="file" id="uploadInput">
</form>
<p class="prompt">文件夹中的内容只支持.doc、.docx、.xlsx、.sql、.txt、.log、.json格式</p>
<iframe style="display: none;" id="myframe" />
</body>
</html>
界面效果如下图所示
2.2 打开 ES 和 Kibana建立索引模板并设置索引有效期为3天
在 Kibana 中输入
PUT _ilm/policy/my_policy2
{
"policy": {
"phases": {
"delete": {
"min_age": "3d",
"actions": {
"delete": {}
}
}
}
}
}
PUT _template/my_template
{
"index_patterns": ["data-*"],
"settings": {
"index": {
"lifecycle.name": "my_policy2"
}
}
}
运行代码,如下图所示
由图可知运行成功
2.3 关于esAPI.py
其主要内容如下
# -*- coding: utf-8 -*-
from openpyxl import load_workbook
from flask import Flask, request, send_file, jsonify
import os
from docx import Document
from elasticsearch import Elasticsearch
from bs4 import BeautifulSoup
import sys
import io
global row, body
indexName = ''
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') # 创建新的文本包装器
""" 创建 Elasticsearch 客户端 """
try:
es = Elasticsearch(hosts=["http://127.0.0.1:9200"],
basic_auth=("es", "root")
) # 链接es
except Exception as e:
print("ES未链接!")
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = './files' # 设置下载用户上传文档的路径
"""设置根目录"""
@app.route('/')
def file_page():
return send_file(r'./UploadText.html') # 使其先出现文件上传界面
"""读取文件内容并将其索引到 Elasticsearch 中"""
def index_file(file_path):
if file_path is not None:
# word文档读取
if file_path.split('.')[-1] == "doc" or file_path.split('.')[-1] == "docx":
document = Document(file_path)
content = "\n".join([paragraph.text for paragraph in document.paragraphs])
es.index(index=indexName, body={"content": content})
# excel格式读取,目前只支持xlsx形式的,不支持xls以及直接将xls修改成xlsx的文件
elif file_path.split('.')[-1] == "xlsx":
workbook = load_workbook(filename=file_path, read_only=True)
worksheet = workbook.active
lable = 0 # 用于将字段名称读取出来
nameList = [] # 存放名称列表
for row in worksheet.iter_rows(values_only=True): # 将数据中的每行都读取出来
lable += 1
if lable == 1:
for name in row:
nameList.append(name)
else:
List = {}
for le in range(len(nameList)):
# Name = '"' + nameList[le] + '"'
valueList = []
for value in row:
valueList.append(value)
List[nameList[le]] = valueList[le]
es.index(index=indexName, body=List)
# 其他格式读取
else:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
es.index(index=indexName, body={"content": content})
return
'''
<!doctype html>
<p>上传文件成功</p>
'''
else:
return send_file(r'./UploadText.html')
"""检查索引是否存在并自动创建索引"""
def ensure_index_exists(index_name, user_id):
global indexName
index_name = index_name.split('-')[0]
indexName = "{index_name}-{data}".format(index_name=index_name, data=user_id)
if not es.indices.exists(index=indexName):
# 使用索引模板创建新索引
es.indices.put_template(name="my_template", body={
"index_patterns": [indexName],
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0,
"lifecycle.name": "my_policy2" # 关联生命周期策略
}
}
})
# 创建新索引
es.indices.create(index=indexName)
"""在上传文件时调用此函数,调用data-*的模板"""
def createES(file_path, user_id):
ensure_index_exists("data-*", user_id)
index_file(file_path)
"""上传文件接口,函数用于将上传的文件保存到本地并调用文件创建索引"""
@app.route('/esAPI', methods=['POST'])
def upload_file():
userCookie = request.cookies
user_id = userCookie.get('userId') # 获取用户ID
files = request.files.lists()
num = 0
for file in files:
num = len(file[1])
filesList = file[1]
# 如果是文件夹
if num > 1:
for l in range(num):
file = filesList[l] # 获取上传的文件
filename = file.filename
filesPath = os.path.dirname(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # 获取文件路径
if os.path.exists(filesPath):
print("文件路径已经存在")
else:
os.makedirs(filesPath) # 创建路径对应的文件夹
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # 保存文件
filepath = os.path.join(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # 获取文件保存路径
createES(filepath, user_id) # 调用创建索引的函数
else: # 如果是单个文件
file = request.files['file']
filename = file.filename
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # 保存文件
filepath = os.path.join(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # 获取文件保存路径
createES(filepath, user_id)
return send_file("./FullTextSearch.html") # 返回全文检索页面实现跳转
"""全文检索接口"""
@app.route('/data/query', methods=['GET'])
def get_data():
value = request.args.get('value') # 获取前端用户上传的关键字
# 设置参数
params = {
"query": {
"match_phrase": { # 使用 match_phrase 查询
"content": value
}
},
"highlight": {
"fields": {
"content": {
"type": "plain", # 设置高亮片段的类型, plain 表示普通文本
"fragment_size": 100, # 设置每个高亮片段的最大长度为100,超过部分会被截断
"number_of_fragments": 100 # 设置最多返回100个高亮片段,如果匹配到对个关键词,会返回多个字符
}
},
"max_analyzed_offset": 900000 # 解决高亮字数超出默认值的问题
}
}
# 创建一个Search对象
s = es.search(index=indexName, body=params)
response = s['hits']['hits']
result = []
# 处理并返回查询结果
for hit in response:
# 获取索引结果中的高亮文本
if 'highlight' in hit:
highlighted_text = hit['highlight']['content'][0]
soup = BeautifulSoup(highlighted_text, 'html.parser')
highlighted_text = soup.get_text()
result.append({
'id': hit['_id'],
'highlighted_text': highlighted_text,
'indexName': indexName
})
return jsonify(result) # 将数据转成 json 格式返回给前端
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
2.4 关于 FullTextSearch.html
<!DOCTYPE html>
<html lang="en" xmlns="http://www.w3.org/1999/html" xmlns="" xmlns="" xmlns="">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>全文检索</title>
<link rel="icon" href="data:;base64,iVBORw0KGgo=">
<script src="https://code.jquery.com/jquery-3.6.0.js"
integrity="sha256-H+K7U5CnXl1h5ywQfKtSj8PCmoN9aaq30gDh27Xc0jk=" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script>
</head>
<style>
p {
position: relative;
left: 30%;
}
#input {
position: relative;
left: 30%;
font-size: 30px;
}
.input {
position: relative;
left: 35%;
font-size: 30px;
}
.prompt{
position: relative;
left: 30%;
color: red;
font-size:10px
}
</style>
<body>
<p>上传文件成功!</p>
<span class="prompt">在搜索栏搜索,接口地址会自动出现注意索引有效期为3天</span>
<p>es外部调用全文索引接口地址为:<span id = "address"></span></p>
<input id="input" type="text" placeholder="开始进行全文检索" onblur="hideData()" onfocus="readySearch()">
<ul class="input"></ul>
</body>
<script type="text/javascript">
// 数据
var keywords = "";
var result = [];
var isShow = false;
var idList = [];
function readySearch() {
var that = this;
that.isShow = true;
const container = document.querySelector('.input');
container.innerHTML = '';
}
// 监听关键字变化
var that = this;
var indexName =document.getElementById("address")
var event = document.getElementById("input");
event.addEventListener("input", function () {
var text = event.value.trim();
var exist = false
if (text.length == 0) {
exist = false
that.isShow = false;
that.idList = []
// 获取所有 class 为 result-item 的元素
var resultItems = document.querySelectorAll('.result-item');
// 遍历所有元素,并将它们的内容清空
for (var i = 0; i < resultItems.length; i++) {
resultItems[i].innerHTML = '';
}
} else {
fetch(`/data/query?value=${text}`) // 调用后端接口
.then(response => response.json())
.then(data => {
const results = document.createElement('div');
results.innerHTML = ''
results.classList.add('results');
data.forEach(result => {
const item = document.createElement('div');
item.classList.add('result-item');
item.innerHTML = result.highlighted_text;
item.id = result.id
var address = "<span>http://127.0.0.1:9200/"+result.indexName+"/_search</span>"
indexName.innerHTML=address;
for (var i = 0; i < idList.length; i++) {
if (idList[i] == item.id) {
exist = true;
break
}
}
// 在高亮位置添加 <mark> 标签
const markedResult = item.innerHTML.replace(
new RegExp(`(${text})`, 'gi'), '<mark style="background-color:yellow"><b>$1</b></mark>');
item.innerHTML = markedResult;
results.appendChild(item);
if (exist == false) {
idList.push(item.id)
} else {
// 获取所有 class 为 result-item 的元素
var resultItems = document.querySelectorAll('.result-item');
// 遍历所有元素,并将它们的内容清空
for (var i = 0; i < resultItems.length; i++) {
resultItems[i].innerHTML = '';
}
}
});
const container = document.querySelector('.input');
container.appendChild(results);
// 获取目标位置元素的父元素
const parentNode = container.parentNode;
// 将要插入的元素插入到目标位置之前
parentNode.insertBefore(results, container);
})
.catch(error => console.error(error));
}
})
function hideData() {
this.isShow = false
const container = document.querySelector('.input');
container.innerHTML = '';
}
</script>
</html>
三、效果图
索引信息