python遍历列表指定部分_python 遍历hadoop, 跟指定列表对比 包含列表中值的取出。...

import sys

import tstree

fname= 'high_freq_site.list'tree=tstree.TernarySearchTrie()

tree.loadData(fname)

token= ''counter= 0post=[]

# url, count, posttimefor line insys.stdin:

line=line.strip()

arr=line.split()if len(arr) != 3:continue#print arr

num= arr[1]

url= arr[0]

posttime= int(arr[2])if token == '':

token=url

counter= 0counter+= int(num)

post.append(posttime)

elif token==url:

counter+= int(num)

post.append(posttime)

elif token!=url:

ret=tree.maxMatch(token)ifret and post:

print'%s\t%s\t%s\t%s' %(ret, token, counter, min(post))

token=url

counter= 0counter+= int(num)

post=[]

ret=tree.maxMatch(token)ifret and post:

print'%s\t%s\t%s\t%s' %(ret, token, counter, min(post))class TSTNode(object):

def __init__(self, splitchar):

self.splitchar=splitchar

self.data=None

self.loNode=None

self.eqNode=None

self.hiNode=Noneclass TernarySearchTrie(object):

def __init__(self):

self.rootNode=None

def loadData(self, fname):

f=open(fname)whileTrue:

line=f.readline()ifnot line:breakline=line.strip()

node=self.addWord(line)ifnode:

node.data=line

f.close()

def addWord(self, word):ifnot word:returnNone

charIndex= 0

ifnot self.rootNode:

self.rootNode= TSTNode(word[0])

currentNode=self.rootNodewhileTrue:

charComp= ord(word[charIndex]) -ord(currentNode.splitchar)if charComp == 0:

charIndex+= 1

if charIndex ==len(word):returncurrentNodeifnot currentNode.eqNode:

currentNode.eqNode=TSTNode(word[charIndex])

currentNode=currentNode.eqNode

elif charComp< 0:ifnot currentNode.loNode:

currentNode.loNode=TSTNode(word[charIndex])

currentNode=currentNode.loNodeelse:ifnot currentNode.hiNode:

currentNode.hiNode=TSTNode(word[charIndex])

currentNode=currentNode.hiNode

def maxMatch(self, url):

ret=None

currentNode=self.rootNode

charIndex= 0

whilecurrentNode:if charIndex >=len(url):breakcharComp= ord(url[charIndex]) -ord(currentNode.splitchar)if charComp == 0:

charIndex+= 1

ifcurrentNode.data:

ret=currentNode.dataif charIndex ==len(url):returnret

currentNode=currentNode.eqNode

elif charComp< 0:

currentNode=currentNode.loNodeelse:

currentNode=currentNode.hiNodereturnretif __name__ == '__main__':

import sys

fname= 'high_freq_site.list'tree=TernarySearchTrie()

tree.loadData(fname)for url insys.stdin:

url=url.strip()

ret=tree.maxMatch(url)

print ret

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值