flask部署数据处理

manage.py

from flask_script import Manager,Server
from flask_migrate import Migrate,MigrateCommand
from app import db,app
# 生成管理工具对象
manager = Manager(app)
# 添加服务器运行指令
manager.add_command("runserver",Server(host='127.0.0.1',use_debugger=True))

# # 数据库迁移
# Migrate(app,db)
# # 添加数据库的操作指令
# manager.add_command("db",MigrateCommand)

#如果是以此脚本作为主脚本程序,就执行
if __name__ == '__main__':
    manager.run()

app.py

from flask import Flask
from flask_sqlalchemy import SQLAlchemy

# 将application目录添加到项目路径,解决views里的文件导入models里的模型类时找不到models模块路径的问题
import sys,os
sys.path.append(os.getcwd() + "/application")

app = Flask(__name__)
app.config['JSON_AS_ASCII']=False

# 将app中的数据库配置加载到app中
db = SQLAlchemy(app)



# 蓝图的导入不要放到最前面,否则会产生循环导入的问题
#这个作为主蓝图
from application.views.data_clean_process import route_index



# 注册路由
app.register_blueprint(route_index,url_prefix='/')


需要装的包:pip install flask==1.1.4

报错后,python.exe -m pip install --upgrade pip

装好后设置参数

主程序代码

from flask import Blueprint,request
route_index = Blueprint('index_page',__name__ )
import json

import os
import shutil
from pathlib import Path
# from config.config import data_dir, output_file_path
from tools import txt_process
# from tools import excel, html, docs_similarity, txt_process,ppt,docx

def data_clean():
    data_converting(data_dir)
    data_drop_duplicates()
    drop_emoji_and_privacy()
    dataline_deduplicate()

@route_index.route("/data_converting",methods=["GET","POST"])
def data_converting():
    # data_dir="传入的路径"
    #如果是get请求,返回登录页面
    if request.method == "GET":
        return  {'result':"error",'msg':'这个需要post请求!'}
    #如果是post,判断用户登录
    resp = {'result':"success",'msg':'提取文档成功!','url':""}
    data = request.get_data()
    req_dic = json.loads(data)
    # req_dic = request.values
    data_dir = req_dic["data_dir"]
    try:
        file_list = os.listdir(data_dir)
    except Exception as e:
        resp['result'] = 'error'
        resp['msg'] = "提取失败!" + str(e)
        return resp

    for f in file_list:
        file_path = os.path.join(data_dir, f)
        suffix = Path(file_path).suffix
        # if suffix == ".xlsx":
        #     excel.excel_clean(file_path)
        # elif suffix == ".html":
        #     html.html_get(file_path)
        # elif suffix == ".txt":
        #     shutil.copy(file_path, output_file_path)
        # elif suffix == ".dox":
        #     #开始执行提取工作,这边需要返回提取后的路径  需要客户自行修改
        #     resp['url']=docx.doc_clean(file_path)
        #     #假设已经提取完成
        #     resp['url']='提取后。txt'
        # elif suffix == ".ppt":
        #     ppt.get_text(file_path)
        #else:
           # raise ValueError("无法识别后缀")
        print(suffix)
        if suffix == ".doc":
            # 开始执行提取工作,这边需要返回提取后的路径  需要客户自行修改
            # resp['url'] = docx.doc_clean(file_path)
            # 假设已经提取完成
            resp['url'] = f'传入的路径是{data_dir}。提取后。txt'
        else:
            return {'result': "error", 'msg': '后缀无法识别!'}
    return resp


def data_drop_duplicates():
    docs_similarity.drop_file()

@route_index.route("/drop_emoji_and_privacy",methods=["GET","POST"])
def drop_emoji_and_privacy():
    #模拟调用txt_process.dataclean_worker文件
    # txt_process.tt()
    output_file_path="D:\\PY\\Flask网页\\改运行模式\\pythonprj\\data"

    file_list = os.listdir(output_file_path)
    for f in file_list:
        file_path = os.path.join(output_file_path, f)
        print(file_path)
        txt_process.data_clean_worker(file_path, file_path)

def dataline_deduplicate():
    docs_similarity.drop_file()

# if __name__=='__main__':
#     data_converting(data_dir)
#     drop_emoji_and_privacy()
#

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值