做badcase分析,页面化查询
脚本
# coding:utf-8
import collections
import sys
from flask import Flask, render_template, request
import jieba
import leveldb
import pandas as pd
import time
import pickle
app = Flask(__name__)
level_obj = None
class LevelDb(object):
def __init__(self, path):
self.path = path
self.db = None
def load_db(self):
st = time.time()
self.db = pickle.load(open('whole_data.pkl', 'rb'))
print('load over time out: {}'.format(time.time() - st))
def find_term_nums(self, term):
num = 0
try:
values = self.db.Get(term.encode("utf-8")).decode("utf-8")
num = int(values.split("\x01\x01")[0])
except:
pass
return num
def find_term_values(self, term, req_term):
res = []
try:
values = self.db.Get(term.encode("utf-8")).decode("utf-8")
values = values.split("\x01\x01")[1]
values_list = values.split("\x01")
for value in values_list:
label, source_sku_name, target_sku_name, freq = value.split("\t")
if source_sku_name.find(req_term) < 0:
continue
for _ in range(int(freq)):
res.append([source_sku_name, target_sku_name, label])
except:
pass
# print(res)
res.sort(key=lambda x: x[2])
return res
def find(self, term):
res = []
cnt_dic = collections.defaultdict(int)
for k in self.db:
if term in k:
cid, cna = self.db[k]
cnt_dic[cna] += 1
res.append([k, cid, cna])
return res, cnt_dic
@app.route('/', methods=['POST', 'GET'])
def get_sample():
global level_obj
st = time.time()
result_find = {}
if request.method == 'POST':
req_term = request.form['Term']
print(req_term)
# seg_list = list(jieba.cut(req_term, cut_all=False))
final_res, cnt_dic = level_obj.find(req_term)
print(len(final_res))
result_find['res'] = final_res
result_find['dic'] = [[k, v] for k, v in cnt_dic.items()]
result_find['input'] = req_term
print('time out: {}'.format(time.time() - st))
return render_template('web.html', result=result_find)
if __name__ == '__main__':
db_path = sys.argv[1]
level_obj = LevelDb(db_path)
level_obj.load_db()
app.run(host='0.0.0.0', port=8080, debug=False)
页面
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<p>sku_match_sku_sample_debug</p>
<p>contact with youfei02@meituan.com</p>
<hr />
<form method="POST" onsubmit="return check(this)">
<p>**输入检索片段,检索source_sku_name**</p>
<p>**检索是利用分词在leveldb上建索引:输入为词粒度不然检索不出来;输入最好多个词,单个词展示结果巨多**</p>
<p> 检索片段 <input type = "text" name = "Term" value="" />
<input type = "submit" value = "search" /></p>
</form>
<form method="GET">
<table border="1">
<tr>
<th>name</th>
<th>count</th>
</tr>
{% for k, v in result['dic'] %}
<tr>
<th> {{ k }} </th>
<th> {{ v }} </th>
</tr>
{% endfor %}
</table>
</form>
<form method="GET">
<table border="1">
<tr>
<th>normalized_name</th>
<th>category_id</th>
<th>category_name</th>
</tr>
{% for source,target,label in result['res'] %}
<tr>
<th> {{ source }} </th>
<th> {{ target }} </th>
<th> {{ label }} </th>
</tr>
{% endfor %}
</table>
</form>
<script language="JavaScript">
//声明全局数组,用于存放取值
var inputArr = document.getElementsByTagName("input");
function check(o)
{
var nameStr = "";
for (var i = 0; i < inputArr.length-1; i++)
{
nameStr += inputArr[i].value + ";";
}
nameStr += inputArr[inputArr.length - 1];
window.name = nameStr;
}
if (window.name)
{
//声明数组,用于存放从window.name中分离出的值
var nameArr = new Array();
nameArr = window.name.split(";");
for (var i = 0; i < nameArr.length; i++)
{
if (inputArr[i].type == "text")
{
inputArr[i].value = nameArr[i];
}
}
}
</script>
</body>
</html>