python爬去手机app
这个是通过网上的代码,基本差不多,我通过操作发现它的代码中在我这运行有bug,我又通过自己的账号实验了一下。并修复了bug。感觉还可以第一次爬取app。挺成功的。
# -*-coding:utf-8 -*-
import urllib2
from cookielib import CookieJar
import sys
import json
"""
超级课程表话题抓取
"""
'''读取json'''
def fetch_data(json_data):
data =json_data['data']
timestampLong= data['timestampLong']
messageBo = data['messageBOs']
topicList=[]
for each in messageBo:
topicDict={}
if each.get('content',False):
topicDict['content'] = each['content']
if each.get('schoolName',False):
topicDict['schoolNmae'] = each['schoolName']
# topicDict['message'] = each['message']
# topicDict['gender'] = each['gender']
# topicDict['time'] = each['time']
print each['schoolName'],each['content']
else:
continue
return timestampLong,topicList
'''加载更多'''
def load(timestamp,headers,url):
headers['Content-Length'] = '159'
loadData = ''
req = urllib2.Request(url,loadData,headers)
loadResult = opener.open(req).read()
loginStatus = json.loads(loadResult).get('status',False)
if loginStatus == 1:
print 'load successful!!!!!!!!!!!'
timestamp,topicList=fetch_data(json.loads(loadResult))
load(timestamp,headers,url)
else:
print 'load fail'
print loadResult
return False
loginUrl = 'http://120.55.151.61:80/V2/StudentSkip/loginCheckV4.action'
topicUrl = 'http://120.55.151.61/V2/Treehole/Message/getMessageByTopicIdV3.action'
headers= {
'Content-Typ':'application/x-www-form-urlencoded; charset=utf-8',
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D257 -SuperFriday_7.3.0',
'Host':'120.55.151.61:80',
'Accept-Encoding':'gzip',
'Content-Length': '314',
'Connection': 'close'
}
'''登陆部分'''
loginData='registrationId=&account=5a51b60ab8341101faf5abd77e2d8219&password=426cd0f5d48f7c7cd058b6404bd63a11&mac=&ifa=64BAFF6C-4E0E-4867-AF1A-F42F19E4824E&ifv=4BD688C9-B11D-499C-A3DA-AA9D9E108C90&versionNumber=7.3.0&platform=2&channel=AppStore&phoneVersion=7.1.2&phoneModel=iPhone%204%28GSM%29%20%28A1332%29&phoneBrand=Apple'
cookieJar = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
req = urllib2.Request(loginUrl,loginData,headers)
loginResult=opener.open(req).read()
loginStatus = json.loads(loginResult).get('data',False)
if loginResult:
print 'login successful!'
else:
print 'login fail'
print loginResult
'''获取话题'''
topicData = 'topicId=15&selectType=0×tamp=0&genderType=-1&versionNumber=7.3.0&platform=2&channel=AppStore&phoneVersion=7.1.2&phoneModel=iPhone%204%28GSM%29%20%28A1332%29&phoneBrand=Apple'
headers['Content-Length']='178'
topicRequest= urllib2.Request(topicUrl,topicData,headers)
topicHtml=opener.open(topicRequest).read()
topicJson=json.loads(topicHtml)
topicStatus=topicJson.get('status',False)
print topicJson
if topicStatus==1:
print'fetch topic success!'
timestamp ,topiclist =fetch_data(topicJson)
load(timestamp,headers,topicUrl)