代码:
1 #coding: utf-8
2
3 import requests
4 from HTMLParser import HTMLParser
5
6
7 class DoubanClient(object):
8 def __init__(self):
9 object.__init__(self)
10
11 myheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840
.71 Safari/537.36','Origin': 'https://accounts.douban.com'} #浏览器开发者:Request Headers
12
13 self.session = requests.session() #requests包 创建session
14 self.session.headers.update(myheaders) #将定制的header加入session
15
16
17 def login(self, username, password,source='None',redir='https://www.douban.com/',login='登录'):
18 #浏览器开发者:Form Data
19
20 url = 'https://accounts.douban.com/login' #网页URL
21 r = self.session.get(url) #用session访问该网页
22 (captcha_id, captcha_url) = _get_captcha(r.content) #调用get_captchar()解析网页中的内容,获取验证码的id和url
23
24 #如果得到了验证码的id和url,提示用户打开url并输入其中的验证码
25 if captcha_id:
26 captcha_solution = raw_input('please input solution for captcha [%s]:' % captcha_url)
27
28 url = 'https://accounts.douban.com/login'
29 mydata = {'form_email': username,
30 'form_password': password,
31 'source': source,
32 'redir': redir,
33 'login': login}
34 myheaders = {'referer': 'https://acocunts.douban.com/login',
35 'host': 'accounts.douban.com'}
36 #浏览器开发者:Request Headers
37
38 #将验证码的id和用户输入的验证码 加入post的data中
39 if captcha_id:
40 mydata['captcha-id'] = captcha_id
41 mydata['captcha-solution'] = captcha_solution
42
43 self.session.post(url, data=mydata, headers=myheaders) #post发出请求
44 print(self.session.cookies.items())
45
46 #更改签名
47 def edit_signature(self, username, signature):
48 url = 'https://www.douban.com/people/%s/' % username #网页URL
49 r = self.session.get(url) #用session访问该网页
50 mydata = {'ck': _get_ck(r.content),
51 'signature': signature}
52 myurl = 'https://www.douban.com/j/people/%s/edit_signature' % username
53 myheaders = {'referer': url,
54 'host': 'www.douban.com',
55 'x-requested-with': 'XMLHttpRequest'}
56 r = self.session.post(myurl, data=mydata, headers=myheaders) #post
57 print(r.content)
58
59
60 def _attr(attrs, attrname):
61 for attr in attrs:
62 if attr[0] == attrname:
63 return attr[1]
64 return None
65
66
67 def _get_captcha(content):
68 #获取验证码的id和url
69 class CaptchaParser(HTMLParser): #继承父类HTMLParser
70 def __init__(self):
71 HTMLParser.__init__(self)
72 self.captcha_id = None #默认值设为None
73 self.captcha_url = None
74
75 def handle_starttag(self, tag, attrs):
76 if tag == 'img' and _attr(attrs, 'id') == 'captcha_image' and _attr(attrs, 'class') == 'captcha_image':
77 #根据网页框架进行条件限定,定位至验证码图片
78 self.captcha_url = _attr(attrs, 'src') #得到验证码图片的url
79
80 if tag == 'input' and _attr(attrs, 'type') == 'hidden' and _attr(attrs, 'name') == 'captcha-id':
81 #条件限定,定位至验证码id
82 self.captcha_id = _attr(attrs, 'value') #得到验证码的id value
83
84 p = CaptchaParser()
85 p.feed(content) #feed()向解析器喂数据
86 return p.captcha_id, p.captcha_url
87
88
89 def _get_ck(content):
90
91 class CKParser(HTMLParser):
92 def __init__(self):
93 HTMLParser.__init__(self)
94 self.ck = None
95
96 def handle_starttag(self, tag, attrs):
97 if tag == 'input' and _attr(attrs, 'type') == 'hidden' and _attr(attrs, 'name') == 'ck':
98 #条件限定,定位至签名框
99 self.ck = _attr(attrs, 'value')
100
101 p = CKParser() #实例化类
102 p.feed(content) #feed()向解析器喂数据
103 return p.ck
104
105
106 if __name__ == '__main__':
107 c = DoubanClient()
108 c.login('791368726@qq.com', '**此处为密码**')
109 c.edit_signature('162101126', '**此处为签名**')
执行:
Google Chrome: