import requests
import re
url = "http://zuimeia.com"
r = requests.get('http://zuimeia.com/community/app/hot/?platform=2')
pattern = re.compile(r'')
urlList = pattern.findall(r.content)
def requestsUrl(url):
r = requests.get(url)
title = re.findall(r'"app-title">
(.*?)
',r.content)#print title
category = re.findall(r'(.*?)',r.content)
#print category
describe = re.findall(r'
#print type(describe[0])
strdescribe = srtReplace(describe[0])
#print strdescribe
downloadUrl = re.findall(r'
', '
', '', '', '', '', '',
'', '','','', '']
for eachListReplace in listReplace:
string = string.replace(str(eachListReplace),'\n')
string = string.replace('\n\n','')
return string
def categornFinal(category):
categoryFinal =''
for eachCategory in category:
categoryFinal = categoryFinal+str(eachCategory)+'-->'
return categoryFinal
def urlReplace(url):
url = url.replace('&', '&')
return url
requestsUrl("http://zuimeia.com/community/app/27369/?platform=2")
for eachUrl in urlList:
eachUrl = url+eachUrl
content = requestsUrl(eachUrl)
categoryFinal =''
title = content[0][0]
category = categornFinal(content[1])
strdescribe = content[2]
downloadUrl = urlReplace(content[3][0])
with open('c:/wqa.txt', 'a+') as fd:
fd.write('title:'+title+'\n'+'category:'+category+'\n'+'strdescribe:'+strdescribe+'\n'+'downloadUrl:'+downloadUrl+'\n\n\n-----------------------------------------------------------------------------------------------------------------------------\n\n\n')