将java的ClassId转换成python版本

import sys
###########ClassId start##########################
class ClassId:
	def __init__(self):
		self.mapUrl = {}
	
	def configure(self, strApUrlPath):
#		try:
			f = open(strApUrlPath, 'r')
			for line in f.readlines():
				strList = line.split("\"")
				if len(strList) < 6:
					continue
				self.mapUrl[strList[5].strip()] = strList[1].strip() + "\"" + strList[2].strip()
			f.close()
#		except:
#			pass

	def getClassId(self, url, refer, classId):
		classid = "0"
		child_classid = "0"
		tmp = ""
		try:
			start = 0
			end = 0
			pureUrl = url###
			nIndex = pureUrl.find("?")
			if (nIndex >= 0):
				pureUrl = pureUrl[0:nIndex]
			tmp = self.mapUrl[pureUrl]
			if (tmp != None):###
				classid = tmp.split("\"")[1]
				child_classid = tmp.split("\"")[0]
			elif (url.find("https://taobao.alipay.com/trade/trade_payment.htm") == 0
					or url.find("http://strade1.taobao.com/auction/buy_item.jhtml") == 0):
				classid = "1"
				child_classid = "0"
			elif (url.find("http://list.taobao.com") == 0):
				if (refer.find("category=search_") >= 0):
					classid = "3"
					start = refer.find("category=search_")
					start += 16
					end = refer.find("&", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
			elif (url.find("http://search1.taobao.com") == 0):
				if (refer.indexOf("category=search_") >= 0):
					classid = "4"
					start = refer.find("category=search_")
					start += 16
					end = refer.find("&", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
			elif (url.find("http://member1.taobao.com/member/user-profile-") == 0):
				classid = "5"
				child_classid = "1"
			elif (url.find("http://my.taobao.com/mytaobao/user-rate-") == 0):
				classid = "5"
				child_classid = "2"
			elif (url.find("http://forum.taobao.com") == 0):
				if (refer.indexOf("category=forum_") >= 0):
					classid = "6"
					start = refer.find("category=forum_")
					start += 15
					end = refer.find("_", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
					a = int(child_classid) * 10000###
					child_classid = str(a)###
			elif (url.find("http://info.taobao.com") == 0):
				if (refer.find("category=forum_") >= 0):
					classid = "6"
					start = refer.find("category=forum_")
					start += 15
					end = refer.find("_", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
					a = int(child_classid) * 10000###
					child_classid = str(a);###
			elif (url.find("http://item.taobao.com") == 0):
				if (refer.find("category=item_") >= 0):
					classid = "9"
					start = refer.find("category=item_")
					start += 14
					end = refer.find("&", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
			elif (url.find("http://archive.taobao.com") == 0):
				if (refer.find("category=item_") >= 0):
					classid = "9"
					start = refer.find("category=item_")
					start += 14
					end = refer.find("&", start)
					if (end == -1):
						end = len(refer)
					child_classid = refer[start:end]
				
			elif (url.match("http://shop[0-9]*.taobao.com.*") != None):###
				classid = "5"
				child_classid = "0"
			else:
				classid = "0"
				child_classid = "0"
		except:
		#	e.printStackTrace()
			return -1
		classId.append(classid)
		classId.append(child_classid)
		return 0

###########ClassId end##########################

def main(argv):
	myClassId = ClassId()
	myClassId.configure("/home/alex/examples/LogChannelA_Base/ap_url.csv")

	#refer, uid , purl, channelid
	for line in sys.stdin:
		line = line.strip()
		if line == "":
			continue
		refer, uid , purl, channelid = line.split("\"")
		id = []
		if myClassId.getClassId(purl, refer, id) != 0:
			continue
		classid = 0
		childclassid = 0
		try:
			classid = int(id[0].strip())
			childclassid = int(id[1].strip())
		except:
			continue

		pvtype = 0
		if len(uid) == 0:
			if (9 == classid):
				pvtype = 2
			else:
				pvtype = 3
		else:
			if (9 == classid):
				pvtype = 0
			else:
				pvtype = 1
		print '\"'.join([channelid, str(pvtype), str(classid), str(childclassid), purl])
	return

if __name__ == "__main__":
    main(sys.argv)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值