python爬虫的制作_python制作最美应用的爬虫

最新推荐文章于 2021-10-23 15:47:34 发布

weixin_39856208

最新推荐文章于 2021-10-23 15:47:34 发布

阅读量46

点赞数

文章标签： python爬虫的制作

import requests

import re

url = "http://zuimeia.com"

r = requests.get('http://zuimeia.com/community/app/hot/?platform=2')

pattern = re.compile(r'')

urlList = pattern.findall(r.content)

def requestsUrl(url):

r = requests.get(url)

title = re.findall(r'"app-title">

(.*?)

',r.content)

#print title

category = re.findall(r'(.*?)',r.content)

#print category

describe = re.findall(r'

(.*?)

',r.content)

#print type(describe[0])

strdescribe = srtReplace(describe[0])

#print strdescribe

downloadUrl = re.findall(r'

#print downloadUrl

return title,category,strdescribe,downloadUrl

def srtReplace(string):

listReplace = ['

', '
', '

', '','','

', '
', '', '', '', '', '',

'', '','','', '']

for eachListReplace in listReplace:

string = string.replace(str(eachListReplace),'\n')

string = string.replace('\n\n','')

return string

def categornFinal(category):

categoryFinal =''

for eachCategory in category:

categoryFinal = categoryFinal+str(eachCategory)+'-->'

return categoryFinal

def urlReplace(url):

url = url.replace('&', '&')

return url

requestsUrl("http://zuimeia.com/community/app/27369/?platform=2")

for eachUrl in urlList:

eachUrl = url+eachUrl

content = requestsUrl(eachUrl)

categoryFinal =''

title = content[0][0]

category = categornFinal(content[1])

strdescribe = content[2]

downloadUrl = urlReplace(content[3][0])

with open('c:/wqa.txt', 'a+') as fd:

fd.write('title:'+title+'\n'+'category:'+category+'\n'+'strdescribe:'+strdescribe+'\n'+'downloadUrl:'+downloadUrl+'\n\n\n-----------------------------------------------------------------------------------------------------------------------------\n\n\n')