import sys
###########ClassId start##########################
class ClassId:
def __init__(self):
self.mapUrl = {}
def configure(self, strApUrlPath):
# try:
f = open(strApUrlPath, 'r')
for line in f.readlines():
strList = line.split("\"")
if len(strList) < 6:
continue
self.mapUrl[strList[5].strip()] = strList[1].strip() + "\"" + strList[2].strip()
f.close()
# except:
# pass
def getClassId(self, url, refer, classId):
classid = "0"
child_classid = "0"
tmp = ""
try:
start = 0
end = 0
pureUrl = url###
nIndex = pureUrl.find("?")
if (nIndex >= 0):
pureUrl = pureUrl[0:nIndex]
tmp = self.mapUrl[pureUrl]
if (tmp != None):###
classid = tmp.split("\"")[1]
child_classid = tmp.split("\"")[0]
elif (url.find("https://taobao.alipay.com/trade/trade_payment.htm") == 0
or url.find("http://strade1.taobao.com/auction/buy_item.jhtml") == 0):
classid = "1"
child_classid = "0"
elif (url.find("http://list.taobao.com") == 0):
if (refer.find("category=search_") >= 0):
classid = "3"
start = refer.find("category=search_")
start += 16
end = refer.find("&", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
elif (url.find("http://search1.taobao.com") == 0):
if (refer.indexOf("category=search_") >= 0):
classid = "4"
start = refer.find("category=search_")
start += 16
end = refer.find("&", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
elif (url.find("http://member1.taobao.com/member/user-profile-") == 0):
classid = "5"
child_classid = "1"
elif (url.find("http://my.taobao.com/mytaobao/user-rate-") == 0):
classid = "5"
child_classid = "2"
elif (url.find("http://forum.taobao.com") == 0):
if (refer.indexOf("category=forum_") >= 0):
classid = "6"
start = refer.find("category=forum_")
start += 15
end = refer.find("_", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
a = int(child_classid) * 10000###
child_classid = str(a)###
elif (url.find("http://info.taobao.com") == 0):
if (refer.find("category=forum_") >= 0):
classid = "6"
start = refer.find("category=forum_")
start += 15
end = refer.find("_", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
a = int(child_classid) * 10000###
child_classid = str(a);###
elif (url.find("http://item.taobao.com") == 0):
if (refer.find("category=item_") >= 0):
classid = "9"
start = refer.find("category=item_")
start += 14
end = refer.find("&", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
elif (url.find("http://archive.taobao.com") == 0):
if (refer.find("category=item_") >= 0):
classid = "9"
start = refer.find("category=item_")
start += 14
end = refer.find("&", start)
if (end == -1):
end = len(refer)
child_classid = refer[start:end]
elif (url.match("http://shop[0-9]*.taobao.com.*") != None):###
classid = "5"
child_classid = "0"
else:
classid = "0"
child_classid = "0"
except:
# e.printStackTrace()
return -1
classId.append(classid)
classId.append(child_classid)
return 0
###########ClassId end##########################
def main(argv):
myClassId = ClassId()
myClassId.configure("/home/alex/examples/LogChannelA_Base/ap_url.csv")
#refer, uid , purl, channelid
for line in sys.stdin:
line = line.strip()
if line == "":
continue
refer, uid , purl, channelid = line.split("\"")
id = []
if myClassId.getClassId(purl, refer, id) != 0:
continue
classid = 0
childclassid = 0
try:
classid = int(id[0].strip())
childclassid = int(id[1].strip())
except:
continue
pvtype = 0
if len(uid) == 0:
if (9 == classid):
pvtype = 2
else:
pvtype = 3
else:
if (9 == classid):
pvtype = 0
else:
pvtype = 1
print '\"'.join([channelid, str(pvtype), str(classid), str(childclassid), purl])
return
if __name__ == "__main__":
main(sys.argv)