最新的豆瓣自动回复
废话不多说 直接挂代码.
import requests
import re
import time
#from chaojiying import *
from random import choice , randint
ran = [.....自动回复的内容]
# 用于维持登录会话,requests高级用法
s = requests.Session ()
headers = {
'Host': 'www.douban.com' ,
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
# 登录
def login():
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'
}
url = 'https://accounts.douban.com/j/mobile/login/basic'
data = {
'ck': '' ,
'name': '豆瓣账号' ,
'password': '豆瓣密码' ,
'remember': 'false' ,
'ticket': '' ,
}
html = s.post (url , headers=headers , data=data)
if html.json ()['status'] == 'success':
print ('登录成功')
print (html.cookies.get_dict ())
return html.cookies.get_dict ()
# 获取个人信息
def get_user_data():
groups_url = 'https://www.douban.com/group/people/' + cookies['dbcl2'].split (':')[0][1:] + '/joins'
html = s.get (groups_url , headers=headers).text
r = re.compile (r'<a href=".*?&ck=(.*?)".*?</a>' , re.S)
ck = re.findall (r , html)[0] # 获取ck值
r = re.compile (r'<a title=.*?href="(.*?)".*?</a>' , re.S)
groups = re.findall (r , html)
print (ck)
return groups , ck
# 获取各个组的最新讨论并自动回复.
def parase_groups(group , m):
print(1)
headers = {
'Connection': 'keep-alive' ,
'Host': 'www.douban.com' ,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
html = s.get (group + 'discussion?start='+m , headers=headers).text
r = re.compile (r'<td class="title">.*?<a href="(.*?)".*?title=.*?<td.*?<td nowrap="nowrap" class="">(.*?)</td>', re.S)
result = re.findall (r , html)
url_test =[url[0] for url in result]
talks_url = []
comments = [comment[1] for comment in result]
for i in range(len(comments)):
if comments[i]:
if int(comments[i]) <=5:
talks_url.append(url_test[i])
else:
talks_url.append(url_test[i])
if talks_url:
for url in talks_url:
print (url)
headers = {
'Host': 'www.douban.com' ,
'Referer': group + 'discussion?start=0' ,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
html = s.get (url , headers=headers).text
r = re.compile (r'<a href=".*?&ck=(.*?)".*?</a>' , re.S)
ck = re.findall (r , html)[0] # 获取ck值
r = re.compile (r'captcha.*?src="(.*?)"')
pic = re.findall (r , html)
url = url + 'add_comment#last'
param = {
'ck': ck ,
'rv_comment': choice (ran) ,
}
print ('评论了:%s' % param['rv_comment'])
if pic:
param['captcha-id'] = re.findall ('id=(.*?)&' , pic[0])[0]
print ('有验证码\n验证码')
with open (pic[0][0] + '.jpg' , 'wb') as f:
f.write (requests.get (pic[0] , headers=headers).content)
print ("%s下载完成" % pic)
#chaojiying = Chaojiying_Client ('dbyejd' , 'dbyejd2020..' ,
'37f81b4542a8cd3cf9eb96071d65a0e2') # 用户中心>>软件ID 生成一个替换 96001
#im = open (pic[0][0] + '.jpg' , 'rb').read () # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
#param['captcha-solution'] = chaojiying.PostPic (im , 1902)['pic_str']
#print (param['captcha-solution'] , param['captcha-id'])
try:
response = s.post (url , headers=headers , data=param , timeout=5)
print (response.status_code)
if int (response.status_code) == 403:
print (time.asctime ())
except:
print ('发言失败.')
else:
print ('无验证码')
try:
response = s.post (url , headers=headers , data=param , timeout=5)
print (response.status_code)
if int (response.status_code) == 403:
print (time.asctime ())
except:
print ('发言失败.')
time.sleep (randint (10 , 30))
if __name__ == '__main__':
cookies = login ()
groups , ck = get_user_data ()
time.sleep (2)
while True:
for group in groups:
for m in ('0','25','50'):
parase_groups (group , m)
time.sleep (20)
有需要定制爬虫,定制自动回复.联系q:940755193