python登陆百度网盘并读取自己的所有文件，输出到文本文件中

最新推荐文章于 2023-03-31 22:42:51 发布

冻云-FrozenCloud

最新推荐文章于 2023-03-31 22:42:51 发布

阅读量9.5k

点赞数 5

分类专栏： python 文章标签： python 百度网盘模拟登陆遍历目录

本文链接：https://blog.csdn.net/ioiol/article/details/51126654

版权

python 专栏收录该内容

29 篇文章 1 订阅

订阅专栏

这两天帮一个朋友写了一个小程序，需求是导出他自己的百度网盘所有文件的目录结构。

以前注册过一个百度开发者，知道有PCS服务，里面有提供类似的接口，所以就一口答应下来，但是我too naive。

去查文档的时候发现百度已经关闭了pcs服务。

此时，我的心情是崩溃的，因为已经有段日子没写Python了。但已经答应了总得实现了，百度不提供接口只好自己来。

按照需求，需要先登陆百度账户，然后遍历目录接口。

登陆的代码网上很多，随便找了两个，其中一个居然还是登陆云盘的，简直是柳暗花明。

下面直接上代码：

# coding=utf8
import urllib2, requests, time, re
import simplejson as json
import sys

reload(sys)
sys.setdefaultencoding("utf-8")


class getId():
    def __init__(self, name, password):
        # 访问主页，获取cookie
        self.name = name
        self.password = password
        # print self.name
        # print self.password
        self.s = requests.Session()
        self.s.get('http://pan.baidu.com',verify=False)
        self.s.get('https://passport.baidu.com/v2/api/?login',verify=False)
        # 获取token 值
        self.cook = self.s.get("https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true" ,verify=False)
        self.data = self.cook.text
        self.token = re.findall(r"bdPass.api.params.login_token='(.*?)'", self.data)[0]

        # 构造包的头部
        self.headers = {
            'Host': 'passport.baidu.com',
            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13C75 Safari/601.1',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Referer': 'http://pan.baidu.com/',
            'X-Forwarded-For': '112.224.21.186',
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        self.s.get("https://passport.baidu.com/v2/api/?login", headers=self.headers,verify=False)

        # 第一次post数据
        self.payload = {
            'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
            'charset': 'utf-8',
            'token': self.token,
            'tpl': 'netdisk',
            'apiver': 'v3',
            'tt': '1454225544967',
            'codestring': '',
            'safeflg': '0',
            'u': 'http://pan.baidu.com/',
            'isPhone': 'false',
            'gid': 'A1936DC-EE05-488C-9D66-364C8632781C',
            'quick_user': '0',
            'loginmerge': 'true',
            'splogin': 'rate',
            'logintype': 'dialogLogin',
            'logLoginType': 'ios_loginDialog',
            'username': self.name,
            'password': self.password,
            'verifycode': '',
            'mem_pass': 'on',
            'ppui_logintime': '8466',
            'callback': 'parent.bd__pcbs__hksq59'
        }
        # 第一次post，获取验证码地址
        self.login = self.s.post("https://passport.baidu.com/v2/api/?login", data=self.payload, headers=self.headers,
                                 verify=False)
        self.get_code = re.findall(r'codeString=(.*?)&userName', self.login.text)[0]
        # print self.get_code

        if (self.get_code != ''):
            # 获取验证码
            self.code = self.s.get("https://passport.baidu.com/cgi-bin/genimage", params=self.get_code, stream=True,verify=False)
            self.path = "code.jpg"  # 请自行修改路径
            if self.code.status_code == 200:
                with open(self.path, 'wb') as f:
                    for chunk in self.code.iter_content():
                        f.write(chunk)

    def GetResult(self, verifycode):
        # 输入验证码
        #        verifycode = ''
        #        while not verifycode:
        #            verifycode = raw_input("Input Vcode:")

        self.verifycode = verifycode
        # print self.verifycode
        # 构造post数据
        self.payload = {
            'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
            'charset': 'utf-8',
            'token': self.token,
            'tpl': 'netdisk',
            'apiver': 'v3',
            'tt': '1454225548817',
            'gid': 'A1936DC-EE05-488C-9D66-364C8632781C',
            'codestring': self.get_code,
            'safeflg': '0',
            'u': 'http://pan.baidu.com/',
            'isPhone': 'true',
            'quick_user': '0',
            'loginmerge': 'true',
            'logintype': 'dialogLogin',
            'logLoginType': 'ios_loginDialog',
            'username': self.name,
            'password': self.password,
            'verifycode': self.verifycode,
            # 'mem_pass':'on',
            'ppui_logintime': '8466',
            'callback': 'parent.bd__pcbs__hksq59'
        }
        self.login2 = self.s.post("https://passport.baidu.com/v2/api/?login", data=self.payload, headers=self.headers,
                                  verify=False)

        # print self.login2.text
        # 判断是否登录成功,判断cookie中是否含有'BDUSS'
        if 'BDUSS' in self.s.cookies:
            print "SUCCESS"
            self.getQuota()
            return 0
        else:
            self.error_no = re.search("err_no=(?P<err_no>\d+?)&", self.login2.text)
            err_no = int(self.error_no.group("err_no"))
            print "error_no = : %d" % err_no
            print "FAILED"
            return err_no

    def getQuota(self):
        payload = {
            'checkexpire': '1',
            'checkfree': '1',
            'bdstoken': self.token,
            'channel': 'chunlei',
            'clienttype': '0',
            'web': '1',
            'app_id': '250528'
        }
        self.quota = self.s.get("http://pan.baidu.com/api/quota", params=payload)
        # print self.quota.text
        jsdate = json.loads(self.quota.text)
        # print jsdate['used']
        size = jsdate['used'] / 1024.0 / 1024.0 / 1024.0
        print ("%.2fG" % size)
        return size

    def getCode(self):
        return self.get_code

    def getList(self,path):
        payload = {
            'order': 'time',
            'desc': '1',
            'showempty': '0',
            'web': '1',
            'page': '1',
            'num': '100',
            'dir': path,
            't': '0.844042636686936',
            'bdstoken': self.token,
            'channel': 'chunlei',
            'clienttype': '0',
            'web': '1',
            'app_id': '250528'
        }
        self.Path = self.s.get("http://pan.baidu.com/api/list", params=payload ,verify=False)
        print self.Path.text
        mJson = list(json.loads(self.Path.text)['list'])
        for str in mJson:
            if str['isdir'] == 0:
                try:
                    print str['server_filename'].decode('utf-8')
                except:
                    print str['server_filename'].decode('gbk')
                self.write2File(str['server_filename'])
            elif str['isdir'] == 1:
                try:
                    print (str['path']).decode('utf-8')
                except:
                    print (str['path']).decode('gbk')
                self.write2File(str['path'])
                path = str['path'] + '/'
                self.getList(path)

    def write2File(self,result):
        f = open(u'云盘目录.txt', 'a')
        f.write(result)
        f.write('\n')
        f.close()

if __name__ == '__main__':

    # 构造一个会话，用来跨请求保存cookie
    #此处输入自己的账号名和密码
   baiduId = getId('', '')
    verifycode = ''
    while not verifycode:
        verifycode = raw_input(u"输入验证码:")
    baiduId.GetResult(verifycode);
    baiduId.getQuota()
    baiduId.getList(u'/')
    #这里是起始目录，可以根据需要填入更深层的目录。

登陆基本是上网上的方法，没怎么改，遍历的getList方法递归是我自己写的，还有个写文件的方法。

这个整体没什么太大难度，但是还是有几个坑的。

1.在执行s.get的时候会出现，查找原因说是python2的系统bug。

一种临时的解决办法是在s.get方法中传参verify=False，从而跳过ssl检查。

我这里采用了这个方式。

2.在没有采用这种登陆方式之前，我自己用urllib2实现了一个登陆，但是由于我没有做验证码的处理，从而导致了一个现象。我一个经常使用的账号可以登陆成功并获得目录结构，但是其他账号都不行。我开始以为是cookie的问题，然后一个大牛朋友用了1分钟思考后告诉我你是不是没处理验证码。。。。

3.网盘目录、文件过多的好会发生代码是未知错误而终止运行。所以后来是分开四次执行的。

以上。