manage.py
from flask_script import Manager,Server
from flask_migrate import Migrate,MigrateCommand
from app import db,app
# 生成管理工具对象
manager = Manager(app)
# 添加服务器运行指令
manager.add_command("runserver",Server(host='127.0.0.1',use_debugger=True))
# # 数据库迁移
# Migrate(app,db)
# # 添加数据库的操作指令
# manager.add_command("db",MigrateCommand)
#如果是以此脚本作为主脚本程序,就执行
if __name__ == '__main__':
manager.run()
app.py
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
# 将application目录添加到项目路径,解决views里的文件导入models里的模型类时找不到models模块路径的问题
import sys,os
sys.path.append(os.getcwd() + "/application")
app = Flask(__name__)
app.config['JSON_AS_ASCII']=False
# 将app中的数据库配置加载到app中
db = SQLAlchemy(app)
# 蓝图的导入不要放到最前面,否则会产生循环导入的问题
#这个作为主蓝图
from application.views.data_clean_process import route_index
# 注册路由
app.register_blueprint(route_index,url_prefix='/')
需要装的包:pip install flask==1.1.4
报错后,python.exe -m pip install --upgrade pip
装好后设置参数
主程序代码
from flask import Blueprint,request
route_index = Blueprint('index_page',__name__ )
import json
import os
import shutil
from pathlib import Path
# from config.config import data_dir, output_file_path
from tools import txt_process
# from tools import excel, html, docs_similarity, txt_process,ppt,docx
def data_clean():
data_converting(data_dir)
data_drop_duplicates()
drop_emoji_and_privacy()
dataline_deduplicate()
@route_index.route("/data_converting",methods=["GET","POST"])
def data_converting():
# data_dir="传入的路径"
#如果是get请求,返回登录页面
if request.method == "GET":
return {'result':"error",'msg':'这个需要post请求!'}
#如果是post,判断用户登录
resp = {'result':"success",'msg':'提取文档成功!','url':""}
data = request.get_data()
req_dic = json.loads(data)
# req_dic = request.values
data_dir = req_dic["data_dir"]
try:
file_list = os.listdir(data_dir)
except Exception as e:
resp['result'] = 'error'
resp['msg'] = "提取失败!" + str(e)
return resp
for f in file_list:
file_path = os.path.join(data_dir, f)
suffix = Path(file_path).suffix
# if suffix == ".xlsx":
# excel.excel_clean(file_path)
# elif suffix == ".html":
# html.html_get(file_path)
# elif suffix == ".txt":
# shutil.copy(file_path, output_file_path)
# elif suffix == ".dox":
# #开始执行提取工作,这边需要返回提取后的路径 需要客户自行修改
# resp['url']=docx.doc_clean(file_path)
# #假设已经提取完成
# resp['url']='提取后。txt'
# elif suffix == ".ppt":
# ppt.get_text(file_path)
#else:
# raise ValueError("无法识别后缀")
print(suffix)
if suffix == ".doc":
# 开始执行提取工作,这边需要返回提取后的路径 需要客户自行修改
# resp['url'] = docx.doc_clean(file_path)
# 假设已经提取完成
resp['url'] = f'传入的路径是{data_dir}。提取后。txt'
else:
return {'result': "error", 'msg': '后缀无法识别!'}
return resp
def data_drop_duplicates():
docs_similarity.drop_file()
@route_index.route("/drop_emoji_and_privacy",methods=["GET","POST"])
def drop_emoji_and_privacy():
#模拟调用txt_process.dataclean_worker文件
# txt_process.tt()
output_file_path="D:\\PY\\Flask网页\\改运行模式\\pythonprj\\data"
file_list = os.listdir(output_file_path)
for f in file_list:
file_path = os.path.join(output_file_path, f)
print(file_path)
txt_process.data_clean_worker(file_path, file_path)
def dataline_deduplicate():
docs_similarity.drop_file()
# if __name__=='__main__':
# data_converting(data_dir)
# drop_emoji_and_privacy()
#