您还没有提到要将哪些数据传递给已编码的函数。在#Document Frequency
document_frequecy_data_1 = { "word" : "to" ,
"frequency" : 993427,
"info" :
[ { "document_id" : 1 ,
"occurrence_count" : 6 ,
"positions" : [ 7, 18, 33, 72, 86, 231]
},
{ "document_id" : 2 ,
"occurrence_count" : 5 ,
"positions" : [ 1, 17, 74, 222, 255]
},
{ "document_id" : 4 ,
"occurrence_count" : 5 ,
"positions" : [ 8, 16 , 190, 429, 433]
},
{ "document_id" : 5 ,
"occurrence_count" : 2 ,
"positions" : [ 363, 367 ]
},
{ "document_id" : 7 ,
"occurrence_count" : 3 ,
"positions" : [ 13, 23, 191]
}
]
}
document_frequecy_data_2 = { "word" : "be" ,
"frequency" : 178239,
"info" :
[ { "document_id" : 1 ,
"occurrence_count" : 2,
"positions" : [ 17, 25]
},
{ "document_id" : 4 ,
"occurrence_count" : 5 ,
"positions" : [ 17, 191, 291, 430, 434]
},
{ "document_id" : 5 ,
"occurrence_count" : 3 ,
"positions" : [ 14, 19, 101 ]
}
]
}
def pos_intersect(data_1, data_2, k =1):
answer = []
data_info_1 = data_1["info"]
data_info_2 = data_2["info"]
i = 0
j = 0
while ( i < len(data_info_1) and j < len(data_info_2)):
document_id_1 = data_info_1[i]["document_id"]
document_id_2 = data_info_2[j]["document_id"]
if ( document_id_1 == document_id_2):
pos_res_list = []
pos_list_1 = data_info_1[i]["positions"]
pos_list_2 = data_info_2[j]["positions"]
k = 0
while ( k < len(pos_list_1) ):
l = 0
while (l < len(pos_list_2)) :
distance = abs(pos_list_1[k] - pos_list_2[l])
if ( distance <= k):
pos_res_list.append(l)
elif pos_list_2[l] > pos_list_1[k]:
break
l = l + 1
for item in pos_res_list:
distance = abs(pos_list_2[item] - pos_list_1[k] )
if distance > k :
pos_res_list.remove(item)
for item in pos_res_list:
answer.append({ "document_id" : document_id_1, "position_data_1" : pos_list_1[k] , "position_data_2" : pos_list_2[item] } )
k = k + 1
i = i + 1
j = j + 1
else:
if document_id_1 < document_id_2:
i = i + 1
else:
j = j + 1
return answer
results = pos_intersect(document_frequecy_data_1, document_frequecy_data_2, 4)
print "Results : "
for res in results:
print "Document id :" , res["document_id"] , " Position 1: " , res["position_data_1" ], " Position 2 :", res["position_data_1" ]