python123 百度网盘_python爬虫:登录百度账户,并上传文件到百度云盘

这段代码展示了如何使用Python进行百度网盘的自动化登录,并上传文件到云端。首先,通过执行JavaScript代码获取必要的gid和callback,接着获取token和RSA密钥,对密码进行RSA加密,然后进行登录操作。最后,实现了一个进度条功能的上传函数,利用requests库分块上传文件。
摘要由CSDN通过智能技术生成

1 #-*- coding:utf-8 -*-

2 __author__ = 'Administrator'

3

4 importtime5 importjson6 importre7 importrequests8 importexecjs9 importbase6410 from urllib.parse importurlencode11 from requests_toolbelt importMultipartEncoder12 from Crypto.Cipher importPKCS1_v1_513 from Crypto.PublicKey importRSA14 from hashlib importmd515 from zlib importcrc3216 #import progressbar

17 importsys18 from contextlib importclosing19 importtime20 importos21

22 try:23 requests.packages.urllib3.disable_warnings()24 except:25 pass

26

27 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36'

28 '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',29 }30

31 #全局的session

32 session =requests.session()33 session.get('https://pan.baidu.com', headers=headers)34

35

36 classBufferReader(MultipartEncoder):37 """将multipart-formdata转化为stream形式的Proxy类38 """

39

40 def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None):41 self._callback =callback42 self._progress =043 self._cb_args =cb_args44 self._cb_kwargs = cb_kwargs or{}45 super(BufferReader, self).__init__(fields, boundary)46

47 def read(self, size=None):48 chunk =super(BufferReader, self).read(size)49 self._progress +=int(len(chunk))50 self._cb_kwargs.update({51 'size': self._len,52 'progress': self._progress53 })54 ifself._callback:55 try:56 self._callback(*self._cb_args, **self._cb_kwargs)57 except: #catches exception from the callback

58 #raise CancelledError('The upload was cancelled.')

59 pass

60 returnchunk61

62 classProgressBar():63 """

64 import progressbar65 使用第三方库显示上传进度66

67 """

68 def __init__(self):69 self.first_call =True70 def __call__(self, *args, **kwargs):71 ifself.first_call:72 self.widgets = [progressbar.Percentage(), ' ', progressbar.Bar(marker=progressbar.RotatingMarker('>')),73 ' ', progressbar.FileTransferSpeed()]74 self.pbar = progressbar.ProgressBar(widgets=self.widgets, maxval=kwargs['size']).start()75 self.first_call =False76

77 if kwargs['size'] <= kwargs['progress']:78 self.pbar.finish()79 else:80 self.pbar.update(kwargs['progress'])81

82

83 def_get_runntime():84 """

85 :param path: 加密js的路径,注意js中不要使用中文!估计是pyexecjs处理中文还有一些问题86 :return: 编译后的js环境,不清楚pyexecjs这个库的用法的请在github上查看相关文档87 """

88 phantom = execjs.get() #这里必须为phantomjs设置环境变量,否则可以写phantomjs的具体路径

89 with open('login.js', 'r') as f:90 source =f.read()91 returnphantom.compile(source)92

93 defget_gid():94 return _get_runntime().call('getGid')95

96 defget_callback():97 return _get_runntime().call('getCallback')98

99 def_get_curtime():100 return int(time.time()*1000)101

102 #抓包也不是百分百可靠啊,这里?getapi一定要挨着https://passport.baidu.com/v2/api/写,才会到正确的路由

103 defget_token(gid, callback):104 cur_time =_get_curtime()105 get_data ={106 'tpl': 'netdisk',107 'subpro': 'netdisk_web',108 'apiver': 'v3',109 'tt': cur_time,110 'class': 'login',111 'gid': gid,112 'logintype': 'basicLogin',113 'callback': callback114 }115 headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com'))116 resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers)117 if resp.status_code == 200 and callback inresp.text:118 #如果json字符串中带有单引号,会解析出错,只有统一成双引号才可以正确的解析

119 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1))

120 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))121 return data.get('data').get('token')122 else:123 print('获取token失败')124 returnNone125

126 defget_rsa_key(token, gid, callback):127 cur_time =_get_curtime()128 get_data ={129 'token': token,130 'tpl': 'netdisk',131 'subpro': 'netdisk_web',132 'apiver': 'v3',133 'tt': cur_time,134 'gid': gid,135 'callback': callback,136 }137 resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data)138 if resp.status_code == 200 and callback inresp.text:139 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"'))140 return data.get('pubkey'), data.get('key')141 else:142 print('获取rsa key失败')143 returnNone144

145 defencript_password(password, pubkey):146 """

147 import rsa148 使用rsa库加密(法一)149 pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8'))150 encript_passwd = rsa.encrypt(password.encode('utf-8'), pub)151 return base64.b64encode(encript_passwd).decode('utf-8')152

153 """

154 #pubkey必须为bytes类型

155 pub=RSA.importKey(pubkey.encode('utf-8'))156 #构造“加密器”

157 encryptor=PKCS1_v1_5.new(pub)158 #加密的内容必须为bytes类型

159 encript_passwd =encryptor.encrypt(password.encode('utf-8'))160 return base64.b64encode(encript_passwd).decode('utf-8')161

162 deflogin(token, gid, callback, rsakey, username, password):163 post_data ={164 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',165 'charset': 'utf-8',166 'token': token,167 'tpl': 'netdisk',168 'subpro': 'netdisk_web',169 'apiver': 'v3',170 'tt': _get_curtime(),171 'codestring': '',172 'safeflg': 0,173 'u': 'http://pan.baidu.com/disk/home',174 'isPhone': '',175 'detect': 1,176 'gid': gid,177 'quick_user': 0,178 'logintype': 'basicLogin',179 'logLoginType': 'pc_loginBasic',180 'idc': '',181 'loginmerge': 'true',182 'foreignusername': '',183 'username': username,184 'password': password,185 'mem_pass': 'on',186 #返回的key

187 'rsakey': rsakey,188 'crypttype': 12,189 'ppui_logintime': 33554,190 'countrycode': '',191 'callback': 'parent.'+callback192 }193 resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers)194 if 'err_no=0' inresp.text:195 print('登录成功')196 else:197 print('登录失败')198 def progressbar(size=None, progress=None,progress_title="已完成",finish_title="全部完成"):199 #size:文件总字节数 progress:当前传输完成字节数

200 #print("{0} / {1}".format(size, progress))

201 if progress<202 sys.stdout.write sys.stdout.flush else:205 progress="size206">

208 def upload(dest_path,file_handle,token,callback=None):209 res=rapidupload(dest_path,file_handle,token)210 #print(res.content.decode('utf-8'))

211 result=json.loads(res.content.decode('utf-8'))212 if result.get("error_code",-1)==31079:213 print("using upload....")214 params ={215 'method': 'upload',216 'app_id': "250528",217 'BDUSS': session.cookies['BDUSS'],218 't': str(int(time.time())),219 'bdstoken': token,220 'path': dest_path,221 'ondup': "newcopy"

222 }223 #print(params)

224 files = {'file': (str(int(time.time())), file_handle)}225 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')226 api = '%s?%s' %(url, urlencode(params))227 #print(api)

228 body = BufferReader(files,callback=callback)229 #print(body)

230 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",231 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}232 header =dict(baibupan_header.items())233 #print(headers)

234 header.update({"Content-Type": body.content_type})235 response = session.post(api, data=body, verify=False, headers=header)236 returnresponse237 else:238 print("using rapidupload....")239 returnres240

241 def rapidupload(dest_path,file_handler,token,callback=None):242 """秒传一个文件243 :param file_handler: 文件handler, e.g. open('file','rb')244 :type file_handler: file245

246 :param dest_path: 上传到服务器的路径,包含文件名247 :type dest_path: str248

249 :return: requests.Response250 .. note::251 * 文件已在服务器上存在,不上传,返回示例252 {253 "path" : "/apps/album/1.jpg",254 "size" : 372121,255 "ctime" : 1234567890,256 "mtime" : 1234567890,257 "md5" : "cb123afcc12453543ef",258 "fs_id" : 12345,259 "isdir" : 0,260 "request_id" : 12314124261 }262 * 文件不存在,需要上传263 {"errno":404,"info":[],"request_id":XXX}264 * 文件大小不足 256kb (slice-md5 == content-md5) 时265 {"errno":2,"info":[],"request_id":XXX}266 * 远程文件已存在267 {"errno":-8,"info":[],"request_id":XXX}268 """

269 params ={270 'method': 'rapidupload',271 'app_id': "250528",272 'BDUSS': session.cookies['BDUSS'],273 't': str(int(time.time())),274 'bdstoken': token,275 'path': dest_path,276 'ondup': "newcopy"

277 }278 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",279 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}280 header =dict(baibupan_header.items())281 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')282 api = '%s?%s' %(url, urlencode(params))283

284 file_handler.seek(0, 2)285 _BLOCK_SIZE = 2 ** 20 #1MB大小

286 #print(_BLOCK_SIZE)

287 content_length =file_handler.tell()288 #print(content_length)

289 file_handler.seek(0)290

291 #校验段为前 256KB

292 first_256bytes = file_handler.read(256 * 1024)293 slice_md5 =md5(first_256bytes).hexdigest()294

295 content_crc32 =crc32(first_256bytes).conjugate()296 content_md5 =md5(first_256bytes)297

298 #data = {

299 #'content-length': content_length,

300 #'content-md5': content_md5.hexdigest(),

301 #'slice-md5': slice_md5,

302 #'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)

303 #}

304 #response= session.post(api, data=data, verify=False,headers=header)

305 #return response

306

307 count=1

308 whileTrue:309 block =file_handler.read(_BLOCK_SIZE)310 ifcallback:311 callback(size=content_length,progress=count*_BLOCK_SIZE)312 count=count+1

313 if notblock:314 break

315 #更新crc32和md5校验值

316 content_crc32 =crc32(block, content_crc32).conjugate()317 content_md5.update(block)318 data ={319 'content-length': content_length,320 'content-md5': content_md5.hexdigest(),321 'slice-md5': slice_md5,322 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF)323 }324

325 response= session.post(api, data=data, verify=False,headers=header)326 returnresponse327

328 defdownload(remote_path,file_path,token):329 """下载单个文件。330 download 接口支持HTTP协议标准range定义,通过指定range的取值可以实现331 断点下载功能。 例如:如果在request消息中指定“Range: bytes=0-99”,332 那么响应消息中会返回该文件的前100个字节的内容;333 继续指定“Range: bytes=100-199”,334 那么响应消息中会返回该文件的第二个100字节内容::335 >>> headers = {'Range': 'bytes=0-99'}336 >>> pcs = PCS('username','password')337 >>> pcs.download('/test_sdk/test.txt', headers=headers)338 :param remote_path: 网盘中文件的路径(包含文件名)。339 必须以 / 开头。340 .. warning::341 * 路径长度限制为1000;342 * 径中不能包含以下字符:``\\\\ ? | " > < : *``;343 * 文件名或路径名开头结尾不能是 ``.``344 或空白字符,空白字符包括:345 ``\\r, \\n, \\t, 空格, \\0, \\x0B`` 。346 :return: requests.Response 对象347 """

348 params ={349 'method': 'download',350 'app_id': "250528",351 'BDUSS': session.cookies['BDUSS'],352 't': str(int(time.time())),353 'bdstoken': token,354 'path':remote_path355 }356 #兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,则提供更快、更稳定的下载服务

357 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com')358 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",359 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}360 header =dict(baibupan_header.items())361 #print(headers)

362 #header.update({'Range': 'bytes=0-1024'}) #返回1KB内容

363 #response = session.get(url, params=params, verify=False, headers=header)

364 #print(response.headers)

365 #print(response.headers['content-length'])

366 with closing(session.get(url, params=params, verify=False, headers=header,stream=True)) as response:367 chunk_size=1024 #单次请求最大值

368 count=1

369 total_size=int(response.headers['content-length']) #内容体总大小

370 with open(file_path,'wb') as file:371 for data in response.iter_content(chunk_size=chunk_size):372 file.write(data)373 progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下载",finish_title="下载完成")374 count=count+1

375

376 """

377 通过断点续传一点一点下载378 start=0379 stop=1023380 while True:381 chunk_size='bytes={0}-{1}'.format(start,stop)382 header.update({'Range': chunk_size}) #返回1KB内容383 response = session.get(url, params=params, verify=False, headers=header)384 # print(response.apparent_encoding)385 if response.content:386 with open(file_path,'ab') as file:387 file.write(response.content)388 start=start+1024389 stop=stop+1024390 else:391 break392

393 """

394

395 defget_filesize(rote_path,token):396 """获得文件(s)的meta397 :param rote_path: 文件路径,如 '/aaa.txt'398 """

399 params ={400 'method': 'meta',401 'app_id': "250528",402 'BDUSS': session.cookies['BDUSS'],403 't': str(int(time.time())),404 'bdstoken': token,405 'path':rote_path406 }407 #url="https://pcs.baidu.com/rest/2.0/pcs/file"

408 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com')409 #api = '%s?%s' % (url, urlencode(params))

410 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",411 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}412 header =dict(baibupan_header.items())413 #print(headers)

414 response = session.get(url,params=params,verify=False, headers=header)415 returnresponse416

417 defmeta(file_list,token):418 """获得文件(s)的metainfo419

420 :param file_list: 文件路径列表,如 ['/aaa.txt']421 :type file_list: list422

423 :return: requests.Response424 .. note ::425 示例426

427 * 文件不存在428

429 {"errno":12,"info":[{"errno":-9}],"request_id":3294861771}430

431 * 文件存在432 {433 "errno": 0,434

435 "info": [436

437 {438

439 "fs_id": 文件id,440

441 "path": "\/\u5c0f\u7c73\/mi2s\u5237recovery.rar",442

443 "server_filename": "mi2s\u5237recovery.rar",444

445 "size": 8292134,446

447 "server_mtime": 1391274570,448

449 "server_ctime": 1391274570,450

451 "local_mtime": 1391274570,452

453 "local_ctime": 1391274570,454

455 "isdir": 0,456

457 "category": 6,458

459 "path_md5": 279827390796736883,460

461 "delete_fs_id": 0,462

463 "object_key": "84221121-2193956150-1391274570512754",464

465 "block_list": [466 "76b469302a02b42fd0a548f1a50dd8ac"467 ],468

469 "md5": "76b469302a02b42fd0a548f1a50dd8ac",470

471 "errno": 0472

473 }474

475 ],476

477 "request_id": 2964868977478

479 }480

481 """

482 if notisinstance(file_list, list):483 file_list =[file_list]484 data = {'target': json.dumps(file_list)}485 params ={486 'method': 'filemetas',487 'app_id': "250528",488 'BDUSS': session.cookies['BDUSS'],489 't': str(int(time.time())),490 'bdstoken': token491 }492 print(token)493 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home",494 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"}495 header =dict(baibupan_header.items())496 uri='filemetas?blocks=0&dlink=1'

497 url='http://pan.baidu.com/api/{0}'.format(uri)498 print(url)499 if '?' inurl:500 api = "%s&%s" %(url, urlencode(params))501 else:502 api = '%s?%s' %(url, urlencode(params))503 print(api)504 print(data)505 response=session.post(api,data=data,verify=False,headers=header)506 returnresponse507 #return self._request('filemetas?blocks=0&dlink=1', 'filemetas', data=data, **kwargs)

508

509 if __name__ == '__main__':510 user='xxx'

511 password='xxx'

512

513 cur_gid =get_gid()514 cur_callback =get_callback()515 cur_token =get_token(cur_gid, cur_callback)516 #print("token:%s" %(cur_token))

517 cur_pubkey, cur_key =get_rsa_key(cur_token, cur_gid, cur_callback)518 encript_password =encript_password(password, cur_pubkey)519 login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password)520 #print("cookies:%s" %(session.cookies['BDUSS']))

521

522 res=upload("/hello/word.py",open("test_BaiduPan.py",'rb'),cur_token,callback=progressbar)523 print(res.content.decode('utf-8'))524

525

526 #res=rapidupload("/hello/traindata.js",open("login.js",'rb'),cur_token,callback=progressbar)

527 #print(json.loads(res.content.decode('utf-8')))

528

529

530 #download("/hello/words.txt","word.txt",cur_token)

531 #print(res.content.decode('utf-8'))

532

533 #res=get_filesize("/hello/words",cur_token)

534 #print(res.content.decode('utf-8'))

535

536 #res=meta("/hello/words.txt",cur_token)

537 #print(res.content)

202>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值