Python实现微信机器人斗图

最新推荐文章于 2024-07-23 17:35:12 发布

sika0819

最新推荐文章于 2024-07-23 17:35:12 发布

阅读量1.8k

点赞数 2

分类专栏： python weixin 文章标签： python

本文链接：https://blog.csdn.net/lisa0626/article/details/86075553

版权

python 同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

weixin

2 篇文章 0 订阅

订阅专栏

前情提要

以前接了图灵机器人的api做过一个微信小机器人,但是这个机器人只会尬聊也就算了，竟然连斗图都不能！！！虽说图灵机器人官网上有这个api,可是由于我是用python手动接入而不是直接授权，发送表情包给他依然会回复尬聊。我深信没有斗图的机器人是没有灵魂的，于是想动手自己写一个。

参考：用python爬取斗图网

打印从用户哪里获得的消息，会发现，接收表情包返回的是一行文本信息：【收到不支持的消息类型，暂无法显示】

于是我们稍微改一改之前的代码，对这行文本进行判断，只要接收到这行消息，参考爬取代码从斗图网随机爬取一个表情包下载下来，再传入临时素材库，获取mediaid,再生成图片消息发送给用户就可以了。

效果：因为又要下载又要上传显得稍微有点卡顿，但是还可以。

[外链图片转存失败(img-LcIYYqi5-1567160626891)(https://sika0819.top//images/pasted-48.png)]

核心代码：

handler.py

# -*- coding=utf-8 -*-
import hashlib
import reply
import receive
import web
import robot
import getemoticon
from basic import Basic
from media import Media
import json
class Handle(object):
	def GET(self):
		try:
			data = web.input()
			if len(data) == 0:
				return "hello, this is handle view"
			signature = data.signature
			timestamp = data.timestamp
			nonce = data.nonce
			echostr = data.echostr
			token = "yourtoken"
			list = [token, timestamp, nonce]
			list.sort()
			sha1 = hashlib.sha1()
			map(sha1.update, list)
			hashcode = sha1.hexdigest()
			if hashcode == signature:
				return echostr
			else:
				return ""
		except Exception:
			return Exception.message
	def POST(self):
		try:
			webData = web.data()
			#print("Handle Post webdata is ", webData)
			recMsg=receive.parse_xml(webData)
			if isinstance(recMsg, receive.Msg):
				toUser = recMsg.FromUserName
				fromUser = recMsg.ToUserName
				if recMsg.MsgType=='text':
					content = recMsg.Content
					print(content);
					if content=="【收到不支持的消息类型，暂无法显示】":
						path=getemoticon.getRandomEmoticon()
						print(path)
						myMedia = Media()
						accessToken = Basic().get_access_token()
						mediaType = "image"
						callbackjson = myMedia.upload(accessToken, path, mediaType)
						callback = json.loads(callbackjson)
						mediaId=callback[u'media_id']
						createTime=callback[u'created_at']
						replyMsg = reply.ImageMsg(toUser, fromUser,createTime,mediaType,mediaId)
						return replyMsg.send()
					else:
						rpyMsg= robot.get_response(content,fromUser)
						replyMsg=reply.TextMsg(toUser, fromUser,rpyMsg)
						return replyMsg.send()
				if recMsg.MsgType == 'image':
					mediaId = recMsg.MediaId
					replyMsg = reply.ImageMsg(toUser, fromUser, mediaId)
					return replyMsg.send()
			else:
				print("none handler yet")
				return "success"
		except Exception as Argument:
			print Exception.message
			return "fail"

media.py

# -*- coding: utf-8 -*-
# filename: media.py
from basic import Basic
import urllib2
import poster.encode
from poster.streaminghttp import register_openers

class Media(object):
	def __init__(self):
		register_openers()
	#上传图片
	def upload(self, accessToken, filePath, mediaType):
		openFile = open(filePath, "rb")
		param = {'media': openFile}
		postData, postHeaders = poster.encode.multipart_encode(param)

		postUrl = "https://api.weixin.qq.com/cgi-bin/media/upload?access_token=%s&type=%s" % (accessToken, mediaType)
		request = urllib2.Request(postUrl, postData, postHeaders)
		urlResp = urllib2.urlopen(request)
		return urlResp.read()

# if __name__ == '__main__':
	# myMedia = Media()
	# accessToken = Basic().get_access_token()
	# filePath = "img/帽冷汗.jpg"   #请安实际填写
	# mediaType = "image"
	# myMedia.upload(accessToken, filePath, mediaType)

reply.py

# -*- coding=utf-8 -*-
import time
class Msg(object):
    def __init__(self):
        pass
    def send(self):
        return "success"
class TextMsg(Msg):
    def __init__(self, toUserName, fromUserName, content):
        self.__dict = dict()
        self.__dict['ToUserName'] = toUserName
        self.__dict['FromUserName'] = fromUserName
        self.__dict['CreateTime'] = int(time.time())
        self.__dict['Content'] = content
    def send(self):
        XmlForm = """
        <xml>
        <ToUserName><![CDATA[{ToUserName}]]></ToUserName>
        <FromUserName><![CDATA[{FromUserName}]]></FromUserName>
        <CreateTime>{CreateTime}</CreateTime>
        <MsgType><![CDATA[text]]></MsgType>
        <Content><![CDATA[{Content}]]></Content>
        </xml>
        """
        return XmlForm.format(**self.__dict)
class ImageMsg(Msg):
    def __init__(self, toUserName, fromUserName, mediaId):
        self.__dict = dict()
        self.__dict['ToUserName'] = toUserName
        self.__dict['FromUserName'] = fromUserName
        self.__dict['CreateTime'] = int(time.time())
        self.__dict['MediaId'] = mediaId
    def __init__(self, toUserName, fromUserName,createTime,msgType, mediaId):
        self.__dict = dict()
        self.__dict['ToUserName'] = toUserName
        self.__dict['FromUserName'] = fromUserName
        self.__dict['CreateTime'] = createTime
        self.__dict['MsgType'] = msgType
        self.__dict['MediaId'] = mediaId
    def send(self):
        XmlForm = """
        <xml>
        <ToUserName><![CDATA[{ToUserName}]]></ToUserName>
        <FromUserName><![CDATA[{FromUserName}]]></FromUserName>
        <CreateTime>{CreateTime}</CreateTime>
        <MsgType><![CDATA[image]]></MsgType>
        <Image>
        <MediaId><![CDATA[{MediaId}]]></MediaId>
        </Image>
        </xml>
        """
        return XmlForm.format(**self.__dict)

随机爬取表情包
getemoticon.py

# -*- coding=utf-8 -*-
 
#导入模块
import random
import requests
import re
from bs4 import BeautifulSoup
import bs4
import os

#创建请求头列表，帮助我们在进行数据爬取的时候伪装成浏览器
my_headers = [
	"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
	"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
	"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
]
 
kv = {"User-Agent": "Mozilla/5.0"}
 
def getHTMLText(url, headers):
	try:
		#随机从headers列表中选择一个header使用
		random_header = random.choice(headers)
		r = requests.get(url, headers={"User-Agent": random_header}, timeout = 30)
		#校验是否爬取成功，如果获取失败，输出“爬取失败”
		r.raise_for_status()
		r.encoding = r.apparent_encoding
		# print r.text
		return r.text
	except:
		print("爬取失败")
 
def getImgList(Ilist, html):
	#使用python自带的html解析器，html.parser进行返回的html数据的解析工作
	soup = BeautifulSoup(html, "html.parser")
	# print html
	
	#分析解析后的html代码，通过正则表达式获取每一个图片对应的url地址，然后组成获取url的正则表达式
	pattern_img = re.compile(r'data-original="(.+?)"')
	#获取图片对应的标题
	pattern_title = re.compile(r'alt="(.+?)"')
	#找到所有的图片url值
	imgList = re.findall(pattern_img, html)
	# print imgList
	#获取所有的图片对应的标题信息
	titleList = re.findall(pattern_title, html)
	# print titleList[0].encode('utf-8')
	
	#将每一对urli地址和title组成一个列表项，放入到另外一个列表项中可以通过下表进行调用
	for i in range(len(imgList)):
		# print i,
		titleList[i] = titleList[i].encode('utf-8')
		# print titleList[i]
		Ilist.append([imgList[i], titleList[i]])
	return Ilist
 
#判断是否存在指定的文件夹，然后创建文件夹
def mkdir():
	if not os.path.exists('img'):
		os.mkdir('img')

def saveImg(Ilistcontent):
	mkdir()
	img_content = requests.get(Ilistcontent[0]).content
	img_path=""
	if (Ilistcontent[0][-4:] == '.jpg'):
		img_path='img/%s.jpg' % (Ilistcontent[1].decode('utf-8'))
	elif (Ilistcontent[0][-4:] == '.gif'):
		img_path='img/%s.gif' % (Ilistcontent[1].decode('utf-8'))
	if os.path.exists(img_path):
		return img_path
	with open(img_path, 'wb') as f:
		f.write(img_content)
		f.close()
	return img_path
def download(page):
	Ilist = []
	url = "https://www.doutula.com/photo/list/?page=%d" %page
	html = getHTMLText(url, my_headers)
	Ilist = getImgList(Ilist, html)
	#printImg(Ilist,page)
	return Ilist

#page = 1
def getRandomEmoticon():
	print("下载图片")
	page= 1;
	print("第%d页"%page)
	Ilist= download(page);
	i=random.randint(0,len(Ilist)-1)
	print("第%d张图"%i)
	path=saveImg(Ilist[i])
	return path