数据预处理和二叉树构建
build.py
# 基于annoy二叉树的近邻搜索版本 构建
import json
import numpy as np
from collections import OrderedDict
from annoy import AnnoyIndex
def build(word_path='../../res/res/word1.txt', vec_path='../../res/res/vector1.npy', build_trees=1, annoy_deep=200):
keywords = getKeyword(word_path)
vectors = getVectors(vec_path)
word_index = OrderedDict()
# 关键词->索引 表
for count, word in enumerate(keywords)