在python中intersaction_Python位置Inters

您还没有提到要将哪些数据传递给已编码的函数。在#Document Frequency

document_frequecy_data_1 = { "word" : "to" ,

"frequency" : 993427,

"info" :

[ { "document_id" : 1 ,

"occurrence_count" : 6 ,

"positions" : [ 7, 18, 33, 72, 86, 231]

},

{ "document_id" : 2 ,

"occurrence_count" : 5 ,

"positions" : [ 1, 17, 74, 222, 255]

},

{ "document_id" : 4 ,

"occurrence_count" : 5 ,

"positions" : [ 8, 16 , 190, 429, 433]

},

{ "document_id" : 5 ,

"occurrence_count" : 2 ,

"positions" : [ 363, 367 ]

},

{ "document_id" : 7 ,

"occurrence_count" : 3 ,

"positions" : [ 13, 23, 191]

}

]

}

document_frequecy_data_2 = { "word" : "be" ,

"frequency" : 178239,

"info" :

[ { "document_id" : 1 ,

"occurrence_count" : 2,

"positions" : [ 17, 25]

},

{ "document_id" : 4 ,

"occurrence_count" : 5 ,

"positions" : [ 17, 191, 291, 430, 434]

},

{ "document_id" : 5 ,

"occurrence_count" : 3 ,

"positions" : [ 14, 19, 101 ]

}

]

}

def pos_intersect(data_1, data_2, k =1):

answer = []

data_info_1 = data_1["info"]

data_info_2 = data_2["info"]

i = 0

j = 0

while ( i < len(data_info_1) and j < len(data_info_2)):

document_id_1 = data_info_1[i]["document_id"]

document_id_2 = data_info_2[j]["document_id"]

if ( document_id_1 == document_id_2):

pos_res_list = []

pos_list_1 = data_info_1[i]["positions"]

pos_list_2 = data_info_2[j]["positions"]

k = 0

while ( k < len(pos_list_1) ):

l = 0

while (l < len(pos_list_2)) :

distance = abs(pos_list_1[k] - pos_list_2[l])

if ( distance <= k):

pos_res_list.append(l)

elif pos_list_2[l] > pos_list_1[k]:

break

l = l + 1

for item in pos_res_list:

distance = abs(pos_list_2[item] - pos_list_1[k] )

if distance > k :

pos_res_list.remove(item)

for item in pos_res_list:

answer.append({ "document_id" : document_id_1, "position_data_1" : pos_list_1[k] , "position_data_2" : pos_list_2[item] } )

k = k + 1

i = i + 1

j = j + 1

else:

if document_id_1 < document_id_2:

i = i + 1

else:

j = j + 1

return answer

results = pos_intersect(document_frequecy_data_1, document_frequecy_data_2, 4)

print "Results : "

for res in results:

print "Document id :" , res["document_id"] , " Position 1: " , res["position_data_1" ], " Position 2 :", res["position_data_1" ]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值