python爬去app

python爬去手机app


这个是通过网上的代码,基本差不多,我通过操作发现它的代码中在我这运行有bug,我又通过自己的账号实验了一下。并修复了bug。感觉还可以第一次爬取app。挺成功的。

奋斗

# -*-coding:utf-8 -*-
import urllib2
from cookielib import CookieJar
import sys
import json
"""
    超级课程表话题抓取
"""

'''读取json'''
def fetch_data(json_data):
    data =json_data['data']
    timestampLong= data['timestampLong']
    messageBo = data['messageBOs']
    topicList=[]
    for each in messageBo:
        topicDict={}
        if each.get('content',False):
            topicDict['content'] = each['content']
        if each.get('schoolName',False):
            topicDict['schoolNmae'] = each['schoolName']
            # topicDict['message'] = each['message']
            # topicDict['gender'] = each['gender']
            # topicDict['time'] = each['time']
            print each['schoolName'],each['content']
        else:
           continue
    return timestampLong,topicList



'''加载更多'''
def load(timestamp,headers,url):
    headers['Content-Length'] = '159'
    loadData = ''
    req = urllib2.Request(url,loadData,headers)
    loadResult = opener.open(req).read()
    loginStatus = json.loads(loadResult).get('status',False)
    if loginStatus == 1:
        print 'load successful!!!!!!!!!!!'
        timestamp,topicList=fetch_data(json.loads(loadResult))
        load(timestamp,headers,url)
    else:
        print 'load fail'
        print loadResult
        return False
loginUrl = 'http://120.55.151.61:80/V2/StudentSkip/loginCheckV4.action'
topicUrl = 'http://120.55.151.61/V2/Treehole/Message/getMessageByTopicIdV3.action'
headers= {
     'Content-Typ':'application/x-www-form-urlencoded; charset=utf-8',
    'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D257 -SuperFriday_7.3.0',
    'Host':'120.55.151.61:80',
    'Accept-Encoding':'gzip',
    'Content-Length': '314',
    'Connection': 'close'
}

'''登陆部分'''

loginData='registrationId=&account=5a51b60ab8341101faf5abd77e2d8219&password=426cd0f5d48f7c7cd058b6404bd63a11&mac=&ifa=64BAFF6C-4E0E-4867-AF1A-F42F19E4824E&ifv=4BD688C9-B11D-499C-A3DA-AA9D9E108C90&versionNumber=7.3.0&platform=2&channel=AppStore&phoneVersion=7.1.2&phoneModel=iPhone%204%28GSM%29%20%28A1332%29&phoneBrand=Apple'
cookieJar = CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
req = urllib2.Request(loginUrl,loginData,headers)
loginResult=opener.open(req).read()
loginStatus = json.loads(loginResult).get('data',False)
if loginResult:
    print 'login successful!'
else:
    print 'login fail'
    print loginResult

'''获取话题'''
topicData = 'topicId=15&selectType=0×tamp=0&genderType=-1&versionNumber=7.3.0&platform=2&channel=AppStore&phoneVersion=7.1.2&phoneModel=iPhone%204%28GSM%29%20%28A1332%29&phoneBrand=Apple'
headers['Content-Length']='178'
topicRequest= urllib2.Request(topicUrl,topicData,headers)
topicHtml=opener.open(topicRequest).read()
topicJson=json.loads(topicHtml)
topicStatus=topicJson.get('status',False)
print topicJson
if topicStatus==1:
    print'fetch topic success!'
    timestamp ,topiclist =fetch_data(topicJson)
    load(timestamp,headers,topicUrl)




评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值