mport sys
import math
#读取文件,L是字符串列表
def read_file(filename):
try:
fp = open(filename)
L = fp.readlines()
return L
except IOError:
print("error opening or reading input file:",filename)
sys.exit()
#入口参数是一行
def get_words_from_string(line):
word_list = []
character_list = []
for c in line:
if c.isalnum():
character_list.append(c)
elif len(character_list) > 0:
#将字符列表转化为字符串
word = "".join(character_list)
#将单词变为小写
word = str.lower(word)
#加入到单词列表中
word_list.append(word)
character_list = []
#如果读完了文件,character_list还没有被清空,就将character_list中的单词取出加入到word_list
if len(character_list) > 0:
word = "".join(character_list)
word = str.lower(word)
word_list.append(word)
character_list = []
return word_list
#入口参数:一篇文章的字符串列表,列表项是文章的每一行
def get_words_from_line_list(L):
word_list = []
for line in L:
words_test_line = get_words_from_string(line)
word_list = word_list + words_test_line
return word_list
#计算文件中每一个单词出现的频次
def count_ferquency(word_list):
L = []
for new_word in word_list:
for entry in L:
if new_word ==entry[0]:
entry[1] +=1
break
else:
L.append([new_word,1])
#计算两向量内积
def inner_product(L1,L2):
sum = 0
for word1,cont1 in L1:
for word2,cont2 in L2:
if word1 == word2:
sum += cont1*cont2
return sum
#计算两向量夹角
def vector_angle(L1,L2):
#计算分子
numerator = inner_product(L1,L2)
#计算分母
denominator = math.sqrt(inner_product(L1,L1)*inner_product(L2,L2))
return math.acos(numerator/denominator)
算法设计与分析-文档比较-代码解析
最新推荐文章于 2023-05-07 17:30:46 发布